feat(seed): year-as-int + conventional choice-set heuristics (#33, #34)

Two additive D7 catalogue rules, surfaced while writing the website seed
docs. No change to the type fallback, executor, or grammar.

#33 — year-like int columns. `published`/`birth_year` were just `int`, so
they fell to the unbounded int path and produced nonsense (`9419`). Add an
int-gated year rule (after the quantity rule, so `year_count` stays a
count): `year`/`*_year`/`published`/`founded` -> a bounded 1950-2025 year
(new `YearRecent`), or the dob-style birth window 1945-2007 for
`birth`/`born`/`dob` (new `YearBirth`). Plain int; not added to the D9
named-generator vocabulary.

#34 — conventional choice sets. A few enum-ish names have a near-canonical
small set that reads far better than lorem text. Add a type-gated PickFrom
lookup (reusing the existing generator): priority/prio, severity,
rating/stars. `status` is deliberately excluded (values too
domain-specific) and keeps the D12 advisory; a user IN-CHECK still wins.
`priority` leaves ENUM_TOKENS.

ADR-0048 Amendment 1; +8 tests (incl. a column-fill integration test that
also closes a pre-existing gap on that path).
This commit is contained in:
claude@clouddev1
2026-06-12 20:36:20 +00:00
parent fde50ce3bf
commit deb0948d6c
7 changed files with 374 additions and 4 deletions
+117
View File
@@ -281,6 +281,123 @@ fn seed_populates_a_table_and_persists_rows() {
assert!(csv.contains('@'), "seeded emails should appear in the CSV:\n{csv}");
}
/// Parse a seeded table's CSV into per-column value lists (simple
/// comma-split — the values under test carry no commas/quotes).
fn csv_columns(csv: &str) -> (Vec<String>, Vec<Vec<String>>) {
let mut lines = csv.lines().filter(|l| !l.trim().is_empty());
let header: Vec<String> = lines.next().unwrap().split(',').map(str::to_string).collect();
let rows: Vec<Vec<String>> =
lines.map(|l| l.split(',').map(str::to_string).collect()).collect();
(header, rows)
}
fn column_values(csv: &str, col: &str) -> Vec<String> {
let (header, rows) = csv_columns(csv);
let idx = header.iter().position(|h| h == col).expect("column present");
rows.iter().map(|r| r[idx].clone()).collect()
}
#[test]
fn seed_year_and_choice_set_heuristics() {
// Issues #33 (year-like int columns) + #34 (conventional choice
// sets). A fixed `--seed` makes the values deterministic; we assert
// membership in the bounded windows / value sets rather than exact
// strings (robust to RNG-internals changes, still proves the
// heuristic fired — the type fallback would produce 9419 / lorem).
let (project, db, _dir) = open_project_db();
let rt = rt();
rt.block_on(db.create_table(
"Records".to_string(),
vec![
ColumnSpec::new("id", Type::Serial),
ColumnSpec::new("birth_year", Type::Int),
ColumnSpec::new("published", Type::Int),
ColumnSpec::new("priority", Type::Text),
ColumnSpec::new("severity", Type::Text),
ColumnSpec::new("rating", Type::Int),
],
vec!["id".to_string()],
None,
))
.expect("create Records");
rt.block_on(db.seed("Records".into(), None, Some(30), Vec::new(), Some(99), Some("seed Records 30".into())))
.expect("seed succeeds");
let csv = read_csv(&project, "Records").expect("Records CSV exists");
for y in column_values(&csv, "birth_year") {
let n: i32 = y.parse().expect("birth_year is an int");
assert!((1945..=2007).contains(&n), "birth_year {n} must be a plausible birth year");
}
for y in column_values(&csv, "published") {
let n: i32 = y.parse().expect("published is an int");
assert!((1950..=2025).contains(&n), "published {n} must be a plausible recent year");
}
for p in column_values(&csv, "priority") {
assert!(["low", "medium", "high"].contains(&p.as_str()), "priority `{p}` must be low/medium/high");
}
for s in column_values(&csv, "severity") {
assert!(
["low", "medium", "high", "critical"].contains(&s.as_str()),
"severity `{s}` must be low/medium/high/critical",
);
}
for r in column_values(&csv, "rating") {
let n: i32 = r.parse().expect("rating is an int");
assert!((1..=5).contains(&n), "rating {n} must be 15");
}
}
#[test]
fn seed_column_fill_uses_choice_set_heuristic() {
// The `seed <table>.<column>` column-fill path (an UPDATE over
// existing rows) shares `choose_generator`, so issue #34's value
// sets apply there too. Insert rows with `priority` left NULL, then
// fill just that column and confirm it collapses to the set.
let (project, db, _dir) = open_project_db();
let rt = rt();
rt.block_on(db.create_table(
"Tasks".to_string(),
vec![
ColumnSpec::new("id", Type::Serial),
ColumnSpec::new("title", Type::Text),
ColumnSpec::new("priority", Type::Text),
],
vec!["id".to_string()],
None,
))
.expect("create Tasks");
for t in ["a", "b", "c", "d"] {
rt.block_on(db.insert(
"Tasks".to_string(),
Some(vec!["title".to_string()]),
vec![Value::Text(t.to_string())],
None,
))
.expect("insert row");
}
rt.block_on(db.seed(
"Tasks".into(),
Some("priority".into()),
None,
Vec::new(),
Some(5),
Some("seed Tasks.priority".into()),
))
.expect("column-fill priority");
let csv = read_csv(&project, "Tasks").expect("Tasks CSV");
let priorities = column_values(&csv, "priority");
assert_eq!(priorities.len(), 4, "every existing row is filled:\n{csv}");
for p in priorities {
assert!(
["low", "medium", "high"].contains(&p.as_str()),
"column-fill priority `{p}` must be low/medium/high",
);
}
}
#[test]
fn seed_count_defaults_to_twenty() {
let (project, db, _dir) = open_project_db();