feat(seed): set override clause + column-fill (ADR-0048 Phase 2)
Build the two SD2 surfaces Phase 1 deferred:
- `set` override clause (D2): comma-separated per-column pins —
`= 'v'` (fixed), `in ('a','b')` (pick-list), `as <generator>`
(named), `between x and y` (range; numeric and quoted dates).
Type-aware via the typed `current_column_value` slot; an override
drops its column from the generic-fill advisory (D13). Folded from
the flat matched path (build_seed_overrides) and applied to the
per-column plan (apply_seed_overrides).
- `<table>.<column>` column-fill (D1 form 2): an UPDATE over existing
rows. Refuses PK/autogen targets, empty-table no-op, FK-samples the
parent, collision-free for UNIQUE/identifier targets, one undo step;
`set` may only adjust the filled column.
Supporting work: KNOWN_GENERATORS vocabulary + generator_for_name
(src/seed/vocabulary.rs, D9); a range Generator + range_bounds_reason;
IdentSource::Generators and HighlightClass::Function; completion of the
generator vocabulary after `as` and the set/.col column slots; the
typing-time validity indicator for an unknown generator; help,
parse-error pedagogy rows, and the D13 advisory's Phase-2/3 wording.
A bounded override (fixed value / too-short pick-list) on a
single-column-UNIQUE target is a friendly error rather than a silent
uniqueness cap (post-implementation /runda finding, user-chosen).
Dates in the range form are quoted (no date-literal token exists);
ADR-0048 D2 amended accordingly. Both modes (D5); reproducible (D4).
This commit is contained in:
@@ -81,6 +81,11 @@ pub fn generate_value(generator: &Generator, ty: Type, rng: &mut SeedRng) -> Val
|
||||
let chosen: &String = pick(rng, values);
|
||||
literal_to_value(chosen, ty)
|
||||
}
|
||||
// The `set <col> between low and high` override (D2). Bounds are
|
||||
// interpreted per the destination type; the executor has already
|
||||
// validated they parse, so a defensive parse failure here falls
|
||||
// back to type-based generation rather than producing junk.
|
||||
Generator::Range { low, high } => range_value(low, high, ty, rng),
|
||||
// Un-intercepted markers + an empty pick list → type-based.
|
||||
Generator::PickFrom(_)
|
||||
| Generator::IdentitySequential
|
||||
@@ -89,6 +94,132 @@ pub fn generate_value(generator: &Generator, ty: Type, rng: &mut SeedRng) -> Val
|
||||
}
|
||||
}
|
||||
|
||||
/// Uniform value in `[low, high]` for the `between` override (D2).
|
||||
///
|
||||
/// Bounds are interpreted by destination type. Returns the type-based
|
||||
/// fallback for a bound that does not parse or a type that has no range
|
||||
/// meaning — the executor pre-validates, so this is defensive only.
|
||||
fn range_value(low: &str, high: &str, ty: Type, rng: &mut SeedRng) -> Value {
|
||||
match ty {
|
||||
Type::Int | Type::Serial => parse_int_range(low, high)
|
||||
.map(|(lo, hi)| Value::Number(rng.random_range(lo..=hi).to_string()))
|
||||
.unwrap_or_else(|| generic_for_type(ty, rng)),
|
||||
Type::Real | Type::Decimal => parse_real_range(low, high)
|
||||
.map(|(lo, hi)| {
|
||||
let v = rng.random::<f64>().mul_add(hi - lo, lo);
|
||||
Value::Number(format!("{v:.2}"))
|
||||
})
|
||||
.unwrap_or_else(|| generic_for_type(ty, rng)),
|
||||
Type::Date => parse_date_range(low, high)
|
||||
.map(|(lo, hi)| Value::Text(format_date(random_date_between(rng, lo, hi))))
|
||||
.unwrap_or_else(|| generic_for_type(ty, rng)),
|
||||
Type::DateTime => parse_datetime_range(low, high)
|
||||
.map(|(lo, hi)| Value::Text(random_datetime_between(rng, lo, hi)))
|
||||
.unwrap_or_else(|| generic_for_type(ty, rng)),
|
||||
// text / bool / blob / shortid have no range meaning.
|
||||
_ => generic_for_type(ty, rng),
|
||||
}
|
||||
}
|
||||
|
||||
/// Validate that `low`/`high` parse as bounds for `ty`.
|
||||
///
|
||||
/// The `between` override (D2) is checked by the executor *before*
|
||||
/// generation. Returns a short human reason on failure (the executor
|
||||
/// wraps it in a friendly error naming the column), `None` when valid.
|
||||
#[must_use]
|
||||
pub fn range_bounds_reason(ty: Type, low: &str, high: &str) -> Option<String> {
|
||||
let ok = match ty {
|
||||
Type::Int | Type::Serial => parse_int_range(low, high).is_some(),
|
||||
Type::Real | Type::Decimal => parse_real_range(low, high).is_some(),
|
||||
Type::Date => parse_date_range(low, high).is_some(),
|
||||
Type::DateTime => parse_datetime_range(low, high).is_some(),
|
||||
// text / bool / blob / shortid have no range meaning.
|
||||
Type::Text | Type::Bool | Type::Blob | Type::ShortId => false,
|
||||
};
|
||||
if ok {
|
||||
return None;
|
||||
}
|
||||
Some(match ty {
|
||||
Type::Int | Type::Serial => "expected two whole numbers, e.g. `between 1 and 100`".to_string(),
|
||||
Type::Real | Type::Decimal => "expected two numbers, e.g. `between 1.0 and 9.99`".to_string(),
|
||||
Type::Date => "expected two quoted dates, e.g. `between '2023-01-01' and '2024-12-31'`".to_string(),
|
||||
Type::DateTime => {
|
||||
"expected two quoted datetimes, e.g. `between '2023-01-01T00:00:00' and '2024-12-31T23:59:59'`"
|
||||
.to_string()
|
||||
}
|
||||
Type::Text | Type::Bool | Type::Blob | Type::ShortId => {
|
||||
"a `between` range only applies to numeric and date/datetime columns".to_string()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse and order an integer range; `None` if either bound is not an
|
||||
/// integer.
|
||||
fn parse_int_range(low: &str, high: &str) -> Option<(i64, i64)> {
|
||||
let lo: i64 = low.trim().parse().ok()?;
|
||||
let hi: i64 = high.trim().parse().ok()?;
|
||||
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
|
||||
}
|
||||
|
||||
fn parse_real_range(low: &str, high: &str) -> Option<(f64, f64)> {
|
||||
let lo: f64 = low.trim().parse().ok()?;
|
||||
let hi: f64 = high.trim().parse().ok()?;
|
||||
if !lo.is_finite() || !hi.is_finite() {
|
||||
return None;
|
||||
}
|
||||
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
|
||||
}
|
||||
|
||||
fn parse_date_range(low: &str, high: &str) -> Option<(NaiveDate, NaiveDate)> {
|
||||
let lo = NaiveDate::parse_from_str(low.trim(), "%Y-%m-%d").ok()?;
|
||||
let hi = NaiveDate::parse_from_str(high.trim(), "%Y-%m-%d").ok()?;
|
||||
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
|
||||
}
|
||||
|
||||
/// Accept both the `T`-separated and space-separated datetime spellings
|
||||
/// the app validates (`bind_datetime` / `validate_datetime`).
|
||||
fn parse_one_datetime(s: &str) -> Option<chrono::NaiveDateTime> {
|
||||
let t = s.trim();
|
||||
chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%dT%H:%M:%S")
|
||||
.or_else(|_| chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%d %H:%M:%S"))
|
||||
.ok()
|
||||
}
|
||||
|
||||
fn parse_datetime_range(
|
||||
low: &str,
|
||||
high: &str,
|
||||
) -> Option<(chrono::NaiveDateTime, chrono::NaiveDateTime)> {
|
||||
let lo = parse_one_datetime(low)?;
|
||||
let hi = parse_one_datetime(high)?;
|
||||
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
|
||||
}
|
||||
|
||||
/// Uniform date in `[lo, hi]` (inclusive).
|
||||
fn random_date_between(rng: &mut SeedRng, lo: NaiveDate, hi: NaiveDate) -> NaiveDate {
|
||||
let lo_ce = lo.num_days_from_ce();
|
||||
let hi_ce = hi.num_days_from_ce();
|
||||
let day = rng.random_range(lo_ce..=hi_ce);
|
||||
NaiveDate::from_num_days_from_ce_opt(day).unwrap_or(lo)
|
||||
}
|
||||
|
||||
/// Uniform datetime in `[lo, hi]`, rendered `YYYY-MM-DDTHH:MM:SS`.
|
||||
fn random_datetime_between(
|
||||
rng: &mut SeedRng,
|
||||
lo: chrono::NaiveDateTime,
|
||||
hi: chrono::NaiveDateTime,
|
||||
) -> String {
|
||||
let lo_s = lo.and_utc().timestamp();
|
||||
let hi_s = hi.and_utc().timestamp();
|
||||
let secs = if lo_s <= hi_s {
|
||||
rng.random_range(lo_s..=hi_s)
|
||||
} else {
|
||||
rng.random_range(hi_s..=lo_s)
|
||||
};
|
||||
let dt = chrono::DateTime::from_timestamp(secs, 0)
|
||||
.map_or(lo, |d| d.naive_utc());
|
||||
dt.format("%Y-%m-%dT%H:%M:%S").to_string()
|
||||
}
|
||||
|
||||
/// Type-based fallback generation (D8). Never produces NULL for a
|
||||
/// generatable type; `blob`/`serial`/`shortid` are handled by the
|
||||
/// executor (autogen / block guard) and yield NULL here only as a
|
||||
@@ -358,6 +489,76 @@ mod tests {
|
||||
assert!(matches!(v, Value::Number(_)), "numeric pick should be a Number: {v:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn int_range_stays_within_inclusive_bounds() {
|
||||
let g = Generator::Range { low: "10".into(), high: "20".into() };
|
||||
let mut rng = make_rng(Some(5));
|
||||
for _ in 0..200 {
|
||||
let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else {
|
||||
panic!("int range should be a number")
|
||||
};
|
||||
let n: i64 = s.parse().unwrap();
|
||||
assert!((10..=20).contains(&n), "int {n} out of [10,20]");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn real_range_stays_within_bounds_and_has_cents() {
|
||||
let g = Generator::Range { low: "1.0".into(), high: "9.0".into() };
|
||||
let mut rng = make_rng(Some(5));
|
||||
for _ in 0..200 {
|
||||
let Value::Number(s) = generate_value(&g, Type::Real, &mut rng) else {
|
||||
panic!("real range should be a number")
|
||||
};
|
||||
let n: f64 = s.parse().unwrap();
|
||||
assert!((1.0..=9.0).contains(&n), "real {n} out of [1,9]");
|
||||
assert!(s.contains('.'), "real should be formatted with cents: {s}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_range_stays_within_quoted_bounds() {
|
||||
let g = Generator::Range {
|
||||
low: "2023-01-01".into(),
|
||||
high: "2023-12-31".into(),
|
||||
};
|
||||
let lo = NaiveDate::parse_from_str("2023-01-01", "%Y-%m-%d").unwrap();
|
||||
let hi = NaiveDate::parse_from_str("2023-12-31", "%Y-%m-%d").unwrap();
|
||||
let mut rng = make_rng(Some(9));
|
||||
for _ in 0..200 {
|
||||
let Value::Text(s) = generate_value(&g, Type::Date, &mut rng) else {
|
||||
panic!("date range should be text")
|
||||
};
|
||||
let d = NaiveDate::parse_from_str(&s, "%Y-%m-%d").expect("valid date");
|
||||
assert!(d >= lo && d <= hi, "date {d} out of range");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reversed_bounds_are_tolerated() {
|
||||
let g = Generator::Range { low: "20".into(), high: "10".into() };
|
||||
let mut rng = make_rng(Some(1));
|
||||
let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else {
|
||||
panic!("number")
|
||||
};
|
||||
let n: i64 = s.parse().unwrap();
|
||||
assert!((10..=20).contains(&n), "reversed bounds still produce in-range: {n}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn range_bounds_reason_accepts_compatible_and_rejects_incompatible() {
|
||||
// Numeric / date / datetime accept; text / bool reject.
|
||||
assert!(range_bounds_reason(Type::Int, "1", "10").is_none());
|
||||
assert!(range_bounds_reason(Type::Real, "1.5", "9.9").is_none());
|
||||
assert!(range_bounds_reason(Type::Date, "2023-01-01", "2024-01-01").is_none());
|
||||
assert!(range_bounds_reason(Type::DateTime, "2023-01-01T00:00:00", "2024-01-01T00:00:00").is_none());
|
||||
// Non-numeric bound on a numeric column.
|
||||
assert!(range_bounds_reason(Type::Int, "abc", "10").is_some());
|
||||
// A range on a text column is meaningless.
|
||||
assert!(range_bounds_reason(Type::Text, "a", "z").is_some());
|
||||
assert!(range_bounds_reason(Type::Bool, "0", "1").is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn markers_fall_back_to_type_based_generation() {
|
||||
// An un-intercepted marker must not panic; it generates by type.
|
||||
|
||||
Reference in New Issue
Block a user