feat(seed): year-as-int + conventional choice-set heuristics (#33, #34)

Two additive D7 catalogue rules, surfaced while writing the website seed docs. No change to the type fallback, executor, or grammar. #33 — year-like int columns. `published`/`birth_year` were just `int`, so they fell to the unbounded int path and produced nonsense (`9419`). Add an int-gated year rule (after the quantity rule, so `year_count` stays a count): `year`/`*_year`/`published`/`founded` -> a bounded 1950-2025 year (new `YearRecent`), or the dob-style birth window 1945-2007 for `birth`/`born`/`dob` (new `YearBirth`). Plain int; not added to the D9 named-generator vocabulary. #34 — conventional choice sets. A few enum-ish names have a near-canonical small set that reads far better than lorem text. Add a type-gated PickFrom lookup (reusing the existing generator): priority/prio, severity, rating/stars. `status` is deliberately excluded (values too domain-specific) and keeps the D12 advisory; a user IN-CHECK still wins. `priority` leaves ENUM_TOKENS. ADR-0048 Amendment 1; +8 tests (incl. a column-fill integration test that also closes a pre-existing gap on that path).
2026-06-12 20:36:20 +00:00
parent fde50ce3bf
commit deb0948d6c
7 changed files with 374 additions and 4 deletions
@@ -281,6 +281,123 @@ fn seed_populates_a_table_and_persists_rows() {
    assert!(csv.contains('@'), "seeded emails should appear in the CSV:\n{csv}");
 }

+/// Parse a seeded table's CSV into per-column value lists (simple
+/// comma-split — the values under test carry no commas/quotes).
+fn csv_columns(csv: &str) -> (Vec<String>, Vec<Vec<String>>) {
+    let mut lines = csv.lines().filter(|l| !l.trim().is_empty());
+    let header: Vec<String> = lines.next().unwrap().split(',').map(str::to_string).collect();
+    let rows: Vec<Vec<String>> =
+        lines.map(|l| l.split(',').map(str::to_string).collect()).collect();
+    (header, rows)
+}
+
+fn column_values(csv: &str, col: &str) -> Vec<String> {
+    let (header, rows) = csv_columns(csv);
+    let idx = header.iter().position(|h| h == col).expect("column present");
+    rows.iter().map(|r| r[idx].clone()).collect()
+}
+
+#[test]
+fn seed_year_and_choice_set_heuristics() {
+    // Issues #33 (year-like int columns) + #34 (conventional choice
+    // sets). A fixed `--seed` makes the values deterministic; we assert
+    // membership in the bounded windows / value sets rather than exact
+    // strings (robust to RNG-internals changes, still proves the
+    // heuristic fired — the type fallback would produce 9419 / lorem).
+    let (project, db, _dir) = open_project_db();
+    let rt = rt();
+    rt.block_on(db.create_table(
+        "Records".to_string(),
+        vec![
+            ColumnSpec::new("id", Type::Serial),
+            ColumnSpec::new("birth_year", Type::Int),
+            ColumnSpec::new("published", Type::Int),
+            ColumnSpec::new("priority", Type::Text),
+            ColumnSpec::new("severity", Type::Text),
+            ColumnSpec::new("rating", Type::Int),
+        ],
+        vec!["id".to_string()],
+        None,
+    ))
+    .expect("create Records");
+
+    rt.block_on(db.seed("Records".into(), None, Some(30), Vec::new(), Some(99), Some("seed Records 30".into())))
+        .expect("seed succeeds");
+    let csv = read_csv(&project, "Records").expect("Records CSV exists");
+
+    for y in column_values(&csv, "birth_year") {
+        let n: i32 = y.parse().expect("birth_year is an int");
+        assert!((1945..=2007).contains(&n), "birth_year {n} must be a plausible birth year");
+    }
+    for y in column_values(&csv, "published") {
+        let n: i32 = y.parse().expect("published is an int");
+        assert!((1950..=2025).contains(&n), "published {n} must be a plausible recent year");
+    }
+    for p in column_values(&csv, "priority") {
+        assert!(["low", "medium", "high"].contains(&p.as_str()), "priority `{p}` must be low/medium/high");
+    }
+    for s in column_values(&csv, "severity") {
+        assert!(
+            ["low", "medium", "high", "critical"].contains(&s.as_str()),
+            "severity `{s}` must be low/medium/high/critical",
+        );
+    }
+    for r in column_values(&csv, "rating") {
+        let n: i32 = r.parse().expect("rating is an int");
+        assert!((1..=5).contains(&n), "rating {n} must be 1–5");
+    }
+}
+
+#[test]
+fn seed_column_fill_uses_choice_set_heuristic() {
+    // The `seed <table>.<column>` column-fill path (an UPDATE over
+    // existing rows) shares `choose_generator`, so issue #34's value
+    // sets apply there too. Insert rows with `priority` left NULL, then
+    // fill just that column and confirm it collapses to the set.
+    let (project, db, _dir) = open_project_db();
+    let rt = rt();
+    rt.block_on(db.create_table(
+        "Tasks".to_string(),
+        vec![
+            ColumnSpec::new("id", Type::Serial),
+            ColumnSpec::new("title", Type::Text),
+            ColumnSpec::new("priority", Type::Text),
+        ],
+        vec!["id".to_string()],
+        None,
+    ))
+    .expect("create Tasks");
+    for t in ["a", "b", "c", "d"] {
+        rt.block_on(db.insert(
+            "Tasks".to_string(),
+            Some(vec!["title".to_string()]),
+            vec![Value::Text(t.to_string())],
+            None,
+        ))
+        .expect("insert row");
+    }
+
+    rt.block_on(db.seed(
+        "Tasks".into(),
+        Some("priority".into()),
+        None,
+        Vec::new(),
+        Some(5),
+        Some("seed Tasks.priority".into()),
+    ))
+    .expect("column-fill priority");
+
+    let csv = read_csv(&project, "Tasks").expect("Tasks CSV");
+    let priorities = column_values(&csv, "priority");
+    assert_eq!(priorities.len(), 4, "every existing row is filled:\n{csv}");
+    for p in priorities {
+        assert!(
+            ["low", "medium", "high"].contains(&p.as_str()),
+            "column-fill priority `{p}` must be low/medium/high",
+        );
+    }
+}
+
 #[test]
 fn seed_count_defaults_to_twenty() {
    let (project, db, _dir) = open_project_db();