feat(seed): set override clause + column-fill (ADR-0048 Phase 2)

Build the two SD2 surfaces Phase 1 deferred:

- `set` override clause (D2): comma-separated per-column pins —
  `= 'v'` (fixed), `in ('a','b')` (pick-list), `as <generator>`
  (named), `between x and y` (range; numeric and quoted dates).
  Type-aware via the typed `current_column_value` slot; an override
  drops its column from the generic-fill advisory (D13). Folded from
  the flat matched path (build_seed_overrides) and applied to the
  per-column plan (apply_seed_overrides).
- `<table>.<column>` column-fill (D1 form 2): an UPDATE over existing
  rows. Refuses PK/autogen targets, empty-table no-op, FK-samples the
  parent, collision-free for UNIQUE/identifier targets, one undo step;
  `set` may only adjust the filled column.

Supporting work: KNOWN_GENERATORS vocabulary + generator_for_name
(src/seed/vocabulary.rs, D9); a range Generator + range_bounds_reason;
IdentSource::Generators and HighlightClass::Function; completion of the
generator vocabulary after `as` and the set/.col column slots; the
typing-time validity indicator for an unknown generator; help,
parse-error pedagogy rows, and the D13 advisory's Phase-2/3 wording.

A bounded override (fixed value / too-short pick-list) on a
single-column-UNIQUE target is a friendly error rather than a silent
uniqueness cap (post-implementation /runda finding, user-chosen).

Dates in the range form are quoted (no date-literal token exists);
ADR-0048 D2 amended accordingly. Both modes (D5); reproducible (D4).
This commit is contained in:
claude@clouddev1
2026-06-12 09:44:30 +00:00
parent 78c38e8b33
commit a12facc784
20 changed files with 1913 additions and 65 deletions
+201
View File
@@ -81,6 +81,11 @@ pub fn generate_value(generator: &Generator, ty: Type, rng: &mut SeedRng) -> Val
let chosen: &String = pick(rng, values);
literal_to_value(chosen, ty)
}
// The `set <col> between low and high` override (D2). Bounds are
// interpreted per the destination type; the executor has already
// validated they parse, so a defensive parse failure here falls
// back to type-based generation rather than producing junk.
Generator::Range { low, high } => range_value(low, high, ty, rng),
// Un-intercepted markers + an empty pick list → type-based.
Generator::PickFrom(_)
| Generator::IdentitySequential
@@ -89,6 +94,132 @@ pub fn generate_value(generator: &Generator, ty: Type, rng: &mut SeedRng) -> Val
}
}
/// Uniform value in `[low, high]` for the `between` override (D2).
///
/// Bounds are interpreted by destination type. Returns the type-based
/// fallback for a bound that does not parse or a type that has no range
/// meaning — the executor pre-validates, so this is defensive only.
fn range_value(low: &str, high: &str, ty: Type, rng: &mut SeedRng) -> Value {
match ty {
Type::Int | Type::Serial => parse_int_range(low, high)
.map(|(lo, hi)| Value::Number(rng.random_range(lo..=hi).to_string()))
.unwrap_or_else(|| generic_for_type(ty, rng)),
Type::Real | Type::Decimal => parse_real_range(low, high)
.map(|(lo, hi)| {
let v = rng.random::<f64>().mul_add(hi - lo, lo);
Value::Number(format!("{v:.2}"))
})
.unwrap_or_else(|| generic_for_type(ty, rng)),
Type::Date => parse_date_range(low, high)
.map(|(lo, hi)| Value::Text(format_date(random_date_between(rng, lo, hi))))
.unwrap_or_else(|| generic_for_type(ty, rng)),
Type::DateTime => parse_datetime_range(low, high)
.map(|(lo, hi)| Value::Text(random_datetime_between(rng, lo, hi)))
.unwrap_or_else(|| generic_for_type(ty, rng)),
// text / bool / blob / shortid have no range meaning.
_ => generic_for_type(ty, rng),
}
}
/// Validate that `low`/`high` parse as bounds for `ty`.
///
/// The `between` override (D2) is checked by the executor *before*
/// generation. Returns a short human reason on failure (the executor
/// wraps it in a friendly error naming the column), `None` when valid.
#[must_use]
pub fn range_bounds_reason(ty: Type, low: &str, high: &str) -> Option<String> {
let ok = match ty {
Type::Int | Type::Serial => parse_int_range(low, high).is_some(),
Type::Real | Type::Decimal => parse_real_range(low, high).is_some(),
Type::Date => parse_date_range(low, high).is_some(),
Type::DateTime => parse_datetime_range(low, high).is_some(),
// text / bool / blob / shortid have no range meaning.
Type::Text | Type::Bool | Type::Blob | Type::ShortId => false,
};
if ok {
return None;
}
Some(match ty {
Type::Int | Type::Serial => "expected two whole numbers, e.g. `between 1 and 100`".to_string(),
Type::Real | Type::Decimal => "expected two numbers, e.g. `between 1.0 and 9.99`".to_string(),
Type::Date => "expected two quoted dates, e.g. `between '2023-01-01' and '2024-12-31'`".to_string(),
Type::DateTime => {
"expected two quoted datetimes, e.g. `between '2023-01-01T00:00:00' and '2024-12-31T23:59:59'`"
.to_string()
}
Type::Text | Type::Bool | Type::Blob | Type::ShortId => {
"a `between` range only applies to numeric and date/datetime columns".to_string()
}
})
}
/// Parse and order an integer range; `None` if either bound is not an
/// integer.
fn parse_int_range(low: &str, high: &str) -> Option<(i64, i64)> {
let lo: i64 = low.trim().parse().ok()?;
let hi: i64 = high.trim().parse().ok()?;
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
}
fn parse_real_range(low: &str, high: &str) -> Option<(f64, f64)> {
let lo: f64 = low.trim().parse().ok()?;
let hi: f64 = high.trim().parse().ok()?;
if !lo.is_finite() || !hi.is_finite() {
return None;
}
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
}
fn parse_date_range(low: &str, high: &str) -> Option<(NaiveDate, NaiveDate)> {
let lo = NaiveDate::parse_from_str(low.trim(), "%Y-%m-%d").ok()?;
let hi = NaiveDate::parse_from_str(high.trim(), "%Y-%m-%d").ok()?;
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
}
/// Accept both the `T`-separated and space-separated datetime spellings
/// the app validates (`bind_datetime` / `validate_datetime`).
fn parse_one_datetime(s: &str) -> Option<chrono::NaiveDateTime> {
let t = s.trim();
chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%dT%H:%M:%S")
.or_else(|_| chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%d %H:%M:%S"))
.ok()
}
fn parse_datetime_range(
low: &str,
high: &str,
) -> Option<(chrono::NaiveDateTime, chrono::NaiveDateTime)> {
let lo = parse_one_datetime(low)?;
let hi = parse_one_datetime(high)?;
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
}
/// Uniform date in `[lo, hi]` (inclusive).
fn random_date_between(rng: &mut SeedRng, lo: NaiveDate, hi: NaiveDate) -> NaiveDate {
let lo_ce = lo.num_days_from_ce();
let hi_ce = hi.num_days_from_ce();
let day = rng.random_range(lo_ce..=hi_ce);
NaiveDate::from_num_days_from_ce_opt(day).unwrap_or(lo)
}
/// Uniform datetime in `[lo, hi]`, rendered `YYYY-MM-DDTHH:MM:SS`.
fn random_datetime_between(
rng: &mut SeedRng,
lo: chrono::NaiveDateTime,
hi: chrono::NaiveDateTime,
) -> String {
let lo_s = lo.and_utc().timestamp();
let hi_s = hi.and_utc().timestamp();
let secs = if lo_s <= hi_s {
rng.random_range(lo_s..=hi_s)
} else {
rng.random_range(hi_s..=lo_s)
};
let dt = chrono::DateTime::from_timestamp(secs, 0)
.map_or(lo, |d| d.naive_utc());
dt.format("%Y-%m-%dT%H:%M:%S").to_string()
}
/// Type-based fallback generation (D8). Never produces NULL for a
/// generatable type; `blob`/`serial`/`shortid` are handled by the
/// executor (autogen / block guard) and yield NULL here only as a
@@ -358,6 +489,76 @@ mod tests {
assert!(matches!(v, Value::Number(_)), "numeric pick should be a Number: {v:?}");
}
#[test]
fn int_range_stays_within_inclusive_bounds() {
let g = Generator::Range { low: "10".into(), high: "20".into() };
let mut rng = make_rng(Some(5));
for _ in 0..200 {
let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else {
panic!("int range should be a number")
};
let n: i64 = s.parse().unwrap();
assert!((10..=20).contains(&n), "int {n} out of [10,20]");
}
}
#[test]
fn real_range_stays_within_bounds_and_has_cents() {
let g = Generator::Range { low: "1.0".into(), high: "9.0".into() };
let mut rng = make_rng(Some(5));
for _ in 0..200 {
let Value::Number(s) = generate_value(&g, Type::Real, &mut rng) else {
panic!("real range should be a number")
};
let n: f64 = s.parse().unwrap();
assert!((1.0..=9.0).contains(&n), "real {n} out of [1,9]");
assert!(s.contains('.'), "real should be formatted with cents: {s}");
}
}
#[test]
fn date_range_stays_within_quoted_bounds() {
let g = Generator::Range {
low: "2023-01-01".into(),
high: "2023-12-31".into(),
};
let lo = NaiveDate::parse_from_str("2023-01-01", "%Y-%m-%d").unwrap();
let hi = NaiveDate::parse_from_str("2023-12-31", "%Y-%m-%d").unwrap();
let mut rng = make_rng(Some(9));
for _ in 0..200 {
let Value::Text(s) = generate_value(&g, Type::Date, &mut rng) else {
panic!("date range should be text")
};
let d = NaiveDate::parse_from_str(&s, "%Y-%m-%d").expect("valid date");
assert!(d >= lo && d <= hi, "date {d} out of range");
}
}
#[test]
fn reversed_bounds_are_tolerated() {
let g = Generator::Range { low: "20".into(), high: "10".into() };
let mut rng = make_rng(Some(1));
let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else {
panic!("number")
};
let n: i64 = s.parse().unwrap();
assert!((10..=20).contains(&n), "reversed bounds still produce in-range: {n}");
}
#[test]
fn range_bounds_reason_accepts_compatible_and_rejects_incompatible() {
// Numeric / date / datetime accept; text / bool reject.
assert!(range_bounds_reason(Type::Int, "1", "10").is_none());
assert!(range_bounds_reason(Type::Real, "1.5", "9.9").is_none());
assert!(range_bounds_reason(Type::Date, "2023-01-01", "2024-01-01").is_none());
assert!(range_bounds_reason(Type::DateTime, "2023-01-01T00:00:00", "2024-01-01T00:00:00").is_none());
// Non-numeric bound on a numeric column.
assert!(range_bounds_reason(Type::Int, "abc", "10").is_some());
// A range on a text column is meaningless.
assert!(range_bounds_reason(Type::Text, "a", "z").is_some());
assert!(range_bounds_reason(Type::Bool, "0", "1").is_some());
}
#[test]
fn markers_fall_back_to_type_based_generation() {
// An un-intercepted marker must not panic; it generates by type.