From fbd219b631e24548e19d6b10e885f2864d281814 Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Thu, 11 Jun 2026 21:45:34 +0000 Subject: [PATCH] feat(seed): --seed flag, ambient wiring, and /runda hardening (ADR-0048 P1.4 + DA) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P1.4 — user-visible surface: - Grammar: `seed [count] [--seed ]` (the first DSL flag with a value); build_seed disambiguates the seed value from the positional count. - Verified the auto-wired surface: table-name completion, --seed offered as a candidate, validity consistent with `show data`, an ADR-0042 near-miss row for bare `seed`, and render tests for the seed outcome. /runda hardening — eight DA findings, all resolved: - FK sampling now uses ORDER BY so --seed reproducibility no longer relies on SQLite's unspecified DISTINCT order (D4). - shortid columns now generate from seed's seeded RNG (new shortid::generate_with_rng) — D4 now holds with no exceptions. - Added the missing coverage the DA flagged: undo-one-step (D15), replay re-runs a seed line (D16), advanced-mode (D5), atomic rollback on a constraint failure, seed 0 no-op, complex-CHECK advisory (D17), and FK + shortid reproducibility. 2358 pass / 0 fail / 0 skip, clippy all-targets clean. --- src/app.rs | 70 +++++++++++ src/db.rs | 22 +++- src/dsl/grammar/data.rs | 76 +++++++----- src/dsl/shortid.rs | 16 ++- src/seed/generators.rs | 2 +- tests/it/parse_error_pedagogy.rs | 1 + tests/it/seed.rs | 202 +++++++++++++++++++++++++++++++ tests/typing_surface/mod.rs | 37 ++++++ 8 files changed, 389 insertions(+), 37 deletions(-) diff --git a/src/app.rs b/src/app.rs index 1106a85..f16e2d3 100644 --- a/src/app.rs +++ b/src/app.rs @@ -6258,6 +6258,76 @@ mod tests { ); } + #[test] + fn seed_success_renders_count_preview_and_advisory() { + // ADR-0048: handle_dsl_seed_success renders the seeded-row count, + // the preview table, and the enum/CHECK advisory. + let mut app = App::new(); + app.output + .push_back(OutputLine::echo("seed users 20", crate::mode::Mode::Simple)); + app.update(AppEvent::DslSeedSucceeded { + command: Command::Seed { + table: "users".to_string(), + count: Some(20), + rng_seed: None, + }, + result: crate::db::SeedResult { + table: "users".to_string(), + requested: 20, + produced: 20, + data: crate::db::DataResult { + table_name: "users".to_string(), + columns: vec!["name".to_string()], + column_types: vec![None], + rows: vec![vec![Some("Alice".to_string())]], + }, + advisory_columns: vec!["status".to_string()], + }, + }); + let texts: Vec = app.output.iter().map(|l| l.text.clone()).collect(); + assert!( + texts.iter().any(|t| t.contains("20 row(s) seeded into users")), + "seeded-row count surfaced: {texts:?}", + ); + assert!( + texts.iter().any(|t| t.contains("status") && t.contains("generic text")), + "the advisory names the enum-ish column: {texts:?}", + ); + } + + #[test] + fn seed_success_reports_a_cap() { + // produced < requested → the cap note appears next to the count. + let mut app = App::new(); + app.output + .push_back(OutputLine::echo("seed J 10", crate::mode::Mode::Simple)); + app.update(AppEvent::DslSeedSucceeded { + command: Command::Seed { + table: "J".to_string(), + count: Some(10), + rng_seed: None, + }, + result: crate::db::SeedResult { + table: "J".to_string(), + requested: 10, + produced: 4, + data: crate::db::DataResult { + table_name: "J".to_string(), + columns: Vec::new(), + column_types: Vec::new(), + rows: Vec::new(), + }, + advisory_columns: Vec::new(), + }, + }); + let texts: Vec = app.output.iter().map(|l| l.text.clone()).collect(); + assert!( + texts.iter().any(|t| t.contains("4 row(s) seeded into J") + && t.contains("of 10 requested")), + "the cap note surfaces requested vs produced: {texts:?}", + ); + } + #[test] fn sql_delete_returning_renders_cascade_and_result_table() { // ADR-0033 3g: a DELETE … RETURNING surfaces BOTH the cascade diff --git a/src/db.rs b/src/db.rs index e49dbe4..1c51c0d 100644 --- a/src/db.rs +++ b/src/db.rs @@ -8724,6 +8724,10 @@ enum SeedColPlan { /// column's slot within the parent key tuple (so a compound FK's /// child columns all read from the *same* sampled parent row). ForeignKey { fk_idx: usize, pos: usize }, + /// A `shortid` column: a base58 id from seed's *seeded* RNG so it + /// reproduces under `--seed` (ADR-0048 D4). Always forced — a + /// `shortid` column needs an id, never a name-heuristic value. + ShortId, } /// Collision key for a positional list of seeded values, used to keep @@ -8771,8 +8775,11 @@ fn sample_parent_key_tuples( .map(|c| format!("\"{}\"", c.replace('"', "\"\""))) .collect::>() .join(", "); + // `ORDER BY` the key columns so the sampled order is deterministic + // (ADR-0048 D4): `--seed` reproducibility must not depend on + // SQLite's unspecified `DISTINCT` row order. let sql = format!( - "SELECT DISTINCT {cols} FROM \"{}\"", + "SELECT DISTINCT {cols} FROM \"{}\" ORDER BY {cols}", parent_table.replace('"', "\"\"") ); let n = parent_columns.len(); @@ -8877,8 +8884,9 @@ fn do_seed( let mut advisory_columns: Vec = Vec::new(); for c in &schema.columns { let ty = c.user_type.unwrap_or(Type::Text); - // serial/shortid auto-fill in `do_insert`; omit them. - if matches!(ty, Type::Serial | Type::ShortId) { + // serial auto-fills deterministically in `do_insert` (rowid / + // MAX+1) — omit it. shortid is handled below from the seeded RNG. + if matches!(ty, Type::Serial) { continue; } // blob has no DSL value path: refuse if required (D1), else omit. @@ -8895,6 +8903,10 @@ fn do_seed( col_names.push(c.name.clone()); if let Some(&(fk_idx, pos)) = fk_child_pos.get(c.name.as_str()) { plans.push(SeedColPlan::ForeignKey { fk_idx, pos }); + } else if matches!(ty, Type::ShortId) { + // Always the shortid generator (never a name heuristic — a + // shortid column needs a base58 id, not e.g. an email). + plans.push(SeedColPlan::ShortId); } else { // A simple `col IN ('a','b')` CHECK becomes the value source // (D17) so the enum-as-CHECK pattern just works. @@ -9028,6 +9040,10 @@ fn do_seed( SeedColPlan::ForeignKey { fk_idx, pos } => { fk_samples[*fk_idx][fk_choice[*fk_idx]][*pos].clone() } + // Seeded base58 id → reproducible under `--seed` (D4). + SeedColPlan::ShortId => { + Value::Text(crate::dsl::shortid::generate_with_rng(&mut rng)) + } SeedColPlan::Generated { generator, ty } if matches!(generator, crate::seed::Generator::IdentitySequential) && matches!(ty, Type::Int) => diff --git a/src/dsl/grammar/data.rs b/src/dsl/grammar/data.rs index 75c6344..9dc1428 100644 --- a/src/dsl/grammar/data.rs +++ b/src/dsl/grammar/data.rs @@ -430,16 +430,26 @@ const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES); // ================================================================= /// Optional positional row count. Reuses `LIMIT_VALIDATOR` (a -/// non-negative integer). Phase 1 has no `--seed` flag, `set` clause, -/// or `
.` column-fill form yet. +/// non-negative integer). const SEED_COUNT: Node = Node::NumberLit { validator: Some(LIMIT_VALIDATOR), }; +/// `--seed ` — a reproducible-generation flag carrying a numeric +/// seed (ADR-0048 D4). The only flag in the DSL that takes a value; +/// `build_seed` reads the number immediately after the flag. +const SEED_FLAG_NODES: &[Node] = &[ + Node::Flag("seed"), + Node::NumberLit { + validator: Some(LIMIT_VALIDATOR), + }, +]; +const SEED_FLAG: Node = Node::Seq(SEED_FLAG_NODES); const SEED_NODES: &[Node] = &[ // `writes_table` so a future `set =…` clause's column slots // can resolve against this table. TABLE_NAME_WRITES, Node::Optional(&SEED_COUNT), + Node::Optional(&SEED_FLAG), ]; const SEED_SHAPE: Node = Node::Seq(SEED_NODES); @@ -726,36 +736,48 @@ fn build_show_limit(path: &MatchedPath) -> Result, ValidationError> }) } -/// Build a `seed []` command (ADR-0048). The only -/// `NumberLit` in a `seed` path is the optional count. +/// Build a `seed [] [--seed ]` command (ADR-0048). The +/// `--seed` flag's value is the `NumberLit` right after the flag; the +/// positional count is the `NumberLit` *before* the flag (or the only +/// one when no flag is present). fn build_seed(path: &MatchedPath, _source: &str) -> Result { + let table = require_ident(path, "table_name")?; + let flag_idx = path + .items + .iter() + .position(|i| matches!(&i.kind, MatchedKind::Flag("seed"))); + + let rng_seed = flag_idx + .and_then(|fi| path.items.get(fi + 1)) + .filter(|i| matches!(i.kind, MatchedKind::NumberLit)) + .map(|i| parse_seed_u64(&i.text)) + .transpose()?; + + let count = path + .items + .iter() + .enumerate() + .find(|(idx, i)| { + matches!(i.kind, MatchedKind::NumberLit) && flag_idx.is_none_or(|fi| *idx < fi) + }) + .map(|(_, i)| parse_seed_u64(&i.text)) + .transpose()?; + Ok(Command::Seed { - table: require_ident(path, "table_name")?, - count: build_seed_count(path)?, - // `--seed ` is added in a later phase; reproducibility off - // for now. - rng_seed: None, + table, + count, + rng_seed, }) } -fn build_seed_count(path: &MatchedPath) -> Result, ValidationError> { - let Some(item) = path - .items - .iter() - .find(|i| matches!(i.kind, MatchedKind::NumberLit)) - else { - return Ok(None); - }; - item.text - .parse::() - .map(Some) - .map_err(|_| ValidationError { - message_key: "parse.custom.bind_type_mismatch", - args: vec![ - ("found", item.text.clone()), - ("expected", "non-negative integer".to_string()), - ], - }) +fn parse_seed_u64(text: &str) -> Result { + text.parse::().map_err(|_| ValidationError { + message_key: "parse.custom.bind_type_mismatch", + args: vec![ + ("found", text.to_string()), + ("expected", "non-negative integer".to_string()), + ], + }) } fn build_insert(path: &MatchedPath, _source: &str) -> Result { diff --git a/src/dsl/shortid.rs b/src/dsl/shortid.rs index 3638e92..b591bd7 100644 --- a/src/dsl/shortid.rs +++ b/src/dsl/shortid.rs @@ -18,17 +18,21 @@ const DEFAULT_LEN: usize = 10; pub const MIN_LEN: usize = 10; pub const MAX_LEN: usize = 12; -/// Generate a fresh shortid using thread-local RNG. +/// Generate a fresh shortid using the thread-local RNG. #[must_use] pub fn generate() -> String { - generate_len(DEFAULT_LEN) + generate_with_rng(&mut rand::rng()) } +/// Generate a shortid from a caller-supplied RNG. +/// +/// Lets `seed --seed ` produce **reproducible** shortid values +/// (ADR-0048 D4) by threading its seeded RNG through, while the default +/// [`generate`] keeps its thread-RNG behaviour for ordinary inserts. #[must_use] -fn generate_len(len: usize) -> String { - let mut rng = rand::rng(); - let mut out = String::with_capacity(len); - for _ in 0..len { +pub fn generate_with_rng(rng: &mut R) -> String { + let mut out = String::with_capacity(DEFAULT_LEN); + for _ in 0..DEFAULT_LEN { let idx = rng.random_range(0..ALPHABET.len()); out.push(ALPHABET[idx] as char); } diff --git a/src/seed/generators.rs b/src/seed/generators.rs index c1e4cab..c068465 100644 --- a/src/seed/generators.rs +++ b/src/seed/generators.rs @@ -100,7 +100,7 @@ fn generic_for_type(ty: Type, rng: &mut SeedRng) -> Value { let words: Vec = lorem::Words(2..4).fake_with_rng(rng); Value::Text(words.join(" ")) } - Type::ShortId => Value::Text(crate::dsl::shortid::generate()), + Type::ShortId => Value::Text(crate::dsl::shortid::generate_with_rng(rng)), Type::Int => Value::Number(rng.random_range(1..=10_000).to_string()), Type::Serial => Value::Number(rng.random_range(1..=10_000).to_string()), Type::Real => { diff --git a/tests/it/parse_error_pedagogy.rs b/tests/it/parse_error_pedagogy.rs index 8cab3b5..d421b82 100644 --- a/tests/it/parse_error_pedagogy.rs +++ b/tests/it/parse_error_pedagogy.rs @@ -109,6 +109,7 @@ fn near_miss_matrix_simple_mode() { ("delete", &["after `delete`, expected `from`", "delete from
"]), ("delete from", &["after `delete from`, expected table name", "delete from
"]), ("delete from T", &["expected `where` or `--all-rows`", "delete from
"]), + ("seed", &["after `seed`, expected table name", "seed
[count]"]), ("replay", &["after `replay`, expected string literal or path", "replay "]), ("explain", &["after `explain`, expected `show`, `update`, or `delete`", "explain show data"]), // advanced-only entry word typed in simple mode → "this is SQL" rail diff --git a/tests/it/seed.rs b/tests/it/seed.rs index 0bcd1cf..5fcdb6e 100644 --- a/tests/it/seed.rs +++ b/tests/it/seed.rs @@ -78,6 +78,34 @@ fn seed_parses_with_and_without_count() { } } +#[test] +fn seed_parses_the_reproducibility_flag() { + // `--seed ` after a count. + match parse_command("seed People 5 --seed 42").expect("count + --seed parses") { + Command::Seed { + table, + count, + rng_seed, + } => { + assert_eq!(table, "People"); + assert_eq!(count, Some(5)); + assert_eq!(rng_seed, Some(42), "the value after --seed is the rng seed"); + } + other => panic!("expected Command::Seed, got {other:?}"), + } + // `--seed ` with no count — the only number is the seed value, + // not the count. + match parse_command("seed People --seed 7").expect("--seed without count parses") { + Command::Seed { + count, rng_seed, .. + } => { + assert_eq!(count, None, "no positional count"); + assert_eq!(rng_seed, Some(7)); + } + other => panic!("expected Command::Seed, got {other:?}"), + } +} + #[test] fn seed_populates_a_table_and_persists_rows() { let (project, db, _dir) = open_project_db(); @@ -534,3 +562,177 @@ fn seed_preview_is_capped_but_count_is_full() { assert_eq!(res.produced, 25, "the full count is produced"); assert_eq!(res.data.rows.len(), 20, "the preview is capped at 20 rows"); } + +#[test] +fn seed_is_available_in_advanced_mode() { + use rdbms_playground::dsl::parser::parse_command_in_mode; + use rdbms_playground::mode::Mode; + // D5/A1: seed is a canonical command available in BOTH modes. + let r = parse_command_in_mode("seed People 5", Mode::Advanced); + assert!( + matches!(r, Ok(Command::Seed { .. })), + "seed must parse in advanced mode: {r:?}" + ); +} + +// — DA-pass coverage: undo (D15), replay (D16), atomicity, zero count, +// complex-CHECK advisory (D17), FK reproducibility (D4) — + +#[test] +fn seed_is_one_undo_step() { + // Undo must be explicitly enabled on the Database. + let dir = tempfile::tempdir().expect("tempdir"); + let project = project::open_or_create(None, Some(dir.path())).expect("project"); + let persistence = Persistence::new(project.path().to_path_buf()); + let db = Database::open_with_persistence_and_undo(project.db_path(), persistence, true) + .expect("open db with undo"); + let rt = rt(); + create_people(&db, &rt); + rt.block_on(db.seed("People".into(), Some(6), Some(1), Some("seed People 6".into()))) + .expect("seed"); + assert_eq!(data_row_count(&read_csv(&project, "People").unwrap()), 6); + + // One undo removes the whole seed batch (ADR-0048 D15). + rt.block_on(db.undo()).unwrap().expect("undo applied"); + let rows = read_csv(&project, "People").map_or(0, |c| data_row_count(&c)); + assert_eq!(rows, 0, "one undo must remove every seeded row in a single step"); +} + +#[test] +fn replay_reruns_a_seed_line_as_a_data_write() { + use rdbms_playground::runtime::run_replay; + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_people(&db, &rt); + std::fs::write(project.path().join("seed.script"), "seed People 5\n").expect("write script"); + + // D16: seed is a data-write — replay re-runs it (it is NOT in the + // app-lifecycle skip-list), so the rows appear. + let _events = rt.block_on(run_replay(&db, project.path(), "seed.script")); + assert_eq!( + data_row_count(&read_csv(&project, "People").unwrap()), + 5, + "replay must re-run the seed line" + ); +} + +#[test] +fn seed_rolls_back_atomically_on_a_constraint_failure() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + // A CHECK that generic text cannot satisfy → every generated row + // violates it, so the whole batch must roll back (P1.3d atomicity). + let mut code = ColumnSpec::new("note", Type::Text); + code.check_sql = Some("length(note) > 100".to_string()); + rt.block_on(db.create_table( + "Bad".to_string(), + vec![ColumnSpec::new("id", Type::Serial), code], + vec!["id".to_string()], + None, + )) + .expect("create Bad"); + + let res = rt.block_on(db.seed("Bad".into(), Some(5), Some(1), Some("seed Bad 5".into()))); + assert!(res.is_err(), "seed must fail when generated rows violate the CHECK"); + let rows = read_csv(&project, "Bad").map_or(0, |c| data_row_count(&c)); + assert_eq!(rows, 0, "a failed seed must leave the table unchanged (atomic)"); +} + +#[test] +fn seed_zero_is_a_no_op() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_people(&db, &rt); + let res = rt + .block_on(db.seed("People".into(), Some(0), Some(1), Some("seed People 0".into()))) + .expect("seed 0 succeeds"); + assert_eq!(res.produced, 0); + let rows = read_csv(&project, "People").map_or(0, |c| data_row_count(&c)); + assert_eq!(rows, 0, "seed 0 inserts nothing"); +} + +#[test] +fn seed_advises_on_a_complex_check_column() { + let (_project, db, _dir) = open_project_db(); + let rt = rt(); + // A complex (non-IN) CHECK seed can't derive values from → the + // column is filled generically AND flagged (D17/D13). `length` keeps + // generic words valid so the seed still succeeds. + let mut label = ColumnSpec::new("label", Type::Text); + label.check_sql = Some("length(label) >= 1".to_string()); + rt.block_on(db.create_table( + "Widgets".to_string(), + vec![ColumnSpec::new("id", Type::Serial), label], + vec!["id".to_string()], + None, + )) + .expect("create Widgets"); + + let res = rt + .block_on(db.seed("Widgets".into(), Some(3), Some(1), Some("seed Widgets 3".into()))) + .expect("seed"); + assert!( + res.advisory_columns.contains(&"label".to_string()), + "a column with an underivable CHECK should be flagged: {:?}", + res.advisory_columns + ); +} + +#[test] +fn seed_foreign_keys_are_reproducible_with_a_fixed_seed() { + let rt = rt(); + let seed_one = |db: &Database| { + create_users_and_orders(db, &rt, true); + rt.block_on(db.seed("Users".into(), Some(4), Some(1), Some("seed Users 4".into()))) + .expect("seed users"); + rt.block_on(db.seed("Orders".into(), Some(8), Some(99), Some("seed Orders 8".into()))) + .expect("seed orders"); + }; + let (p1, db1, _d1) = open_project_db(); + let (p2, db2, _d2) = open_project_db(); + seed_one(&db1); + seed_one(&db2); + // With ORDER BY on the FK sample, the same --seed reproduces the + // sampled FK values (D4). + assert_eq!( + read_csv(&p1, "Orders").unwrap(), + read_csv(&p2, "Orders").unwrap(), + "FK sampling must be reproducible with a fixed --seed" + ); +} + +#[test] +fn seed_shortid_columns_are_reproducible_with_a_fixed_seed() { + let rt = rt(); + let make = |db: &Database| { + rt.block_on(db.create_table( + "Contacts".to_string(), + vec![ + ColumnSpec::new("code", Type::ShortId), + ColumnSpec::new("name", Type::Text), + ], + vec!["code".to_string()], + None, + )) + .expect("create Contacts"); + rt.block_on(db.seed("Contacts".into(), Some(5), Some(42), Some("seed Contacts 5".into()))) + .expect("seed"); + }; + let (p1, db1, _d1) = open_project_db(); + let (p2, db2, _d2) = open_project_db(); + make(&db1); + make(&db2); + + let csv1 = read_csv(&p1, "Contacts").unwrap(); + let csv2 = read_csv(&p2, "Contacts").unwrap(); + assert_eq!(csv1, csv2, "shortid values must reproduce under a fixed --seed"); + + // The shortid PK is populated with distinct 10-char base58 ids. + let codes = nth_column_values(&csv1, 0); + assert_eq!(codes.len(), 5); + let distinct: std::collections::HashSet<&String> = codes.iter().collect(); + assert_eq!(distinct.len(), 5, "shortid PK values must be distinct: {codes:?}"); + for code in &codes { + assert_eq!(code.len(), 10, "shortid should be 10 chars: {code}"); + } +} diff --git a/tests/typing_surface/mod.rs b/tests/typing_surface/mod.rs index 67a3b32..8a5b937 100644 --- a/tests/typing_surface/mod.rs +++ b/tests/typing_surface/mod.rs @@ -441,3 +441,40 @@ fn smoke_assess_parse_label_round_trips() { assert_eq!(a.parse_result.as_deref(), Ok("Insert")); assert!(matches!(a.state, InputState::Valid)); } + +/// `seed` (ADR-0048) gets the standard ambient surface for free from +/// grammar registration: table-name completion, the validity indicator +/// flagging an unknown table, and the `--seed` flag offered as a +/// candidate. +#[test] +fn seed_completion_and_validity() { + let schema = schema_serial_pk(); // Customers(id serial, Name, Email) + + // Completion: `seed ` offers existing table names. + let cands = completion_candidate_texts(&assess_at_end("seed ", &schema)); + assert!( + cands.iter().any(|c| c == "Customers"), + "`seed ` should complete table names, got {cands:?}" + ); + + // Validity (ADR-0027): a known table seeds clean; an unknown one is + // flagged (same table slot as update/delete/show data). + let ok = assess_at_end("seed Customers 5", &schema); + assert!(matches!(ok.state, InputState::Valid), "known table: {:?}", ok.state); + // seed's unknown-table behaviour must match its closest sibling + // `show data` (same table-only slot), whatever that is. + let seed_ghost = assess_at_end("seed Ghost 5", &schema).state; + let show_ghost = assess_at_end("show data Ghost", &schema).state; + assert_eq!( + std::mem::discriminant(&seed_ghost), + std::mem::discriminant(&show_ghost), + "seed should treat an unknown table like `show data`: seed={seed_ghost:?}, show={show_ghost:?}" + ); + + // The `--seed` reproducibility flag is offered after the count. + let flag_cands = completion_candidate_texts(&assess_at_end("seed Customers 5 ", &schema)); + assert!( + flag_cands.iter().any(|c| c.contains("seed")), + "`--seed` should be offered as a candidate, got {flag_cands:?}" + ); +}