diff --git a/src/app.rs b/src/app.rs index 56fc7fc..e7c7e12 100644 --- a/src/app.rs +++ b/src/app.rs @@ -2390,6 +2390,9 @@ impl App { // the executor), like the named DSL drop. C::SqlDropIndex { .. } => (Operation::DropIndex, None, None), C::Insert { table, .. } => (Operation::Insert, Some(table.as_str()), None), + // Seed generates inserts; FK/constraint failures read as + // insert errors (ADR-0048). + C::Seed { table, .. } => (Operation::Insert, Some(table.as_str()), None), C::Update { table, .. } => (Operation::Update, Some(table.as_str()), None), C::Delete { table, .. } => (Operation::Delete, Some(table.as_str()), None), C::ShowData { name, .. } | C::ShowTable { name } => { diff --git a/src/db.rs b/src/db.rs index 562b8d8..f6df666 100644 --- a/src/db.rs +++ b/src/db.rs @@ -702,6 +702,15 @@ enum Request { source: Option, reply: oneshot::Sender>, }, + /// Populate a table with generated fake data (ADR-0048). One undo + /// snapshot wraps the whole seed via `snapshot_then`. + Seed { + table: String, + count: Option, + rng_seed: Option, + source: Option, + reply: oneshot::Sender>, + }, Update { table: String, assignments: Vec<(String, Value)>, @@ -1491,6 +1500,26 @@ impl Database { recv.await.map_err(|_| DbError::WorkerGone)? } + /// Populate a table with generated fake data (ADR-0048, SD1). + pub async fn seed( + &self, + table: String, + count: Option, + rng_seed: Option, + source: Option, + ) -> Result { + let (reply, recv) = oneshot::channel(); + self.send(Request::Seed { + table, + count, + rng_seed, + source, + reply, + }) + .await?; + recv.await.map_err(|_| DbError::WorkerGone)? + } + pub async fn update( &self, table: String, @@ -2646,6 +2675,24 @@ fn handle_request( &values, )); } + Request::Seed { + table, + count, + rng_seed, + source, + reply, + } => { + // One snapshot wraps the whole seed (ADR-0048 D15 — one undo + // step), exactly like a single insert. + snapshot_then(snap, batch, conn, source.as_deref(), reply, || do_seed( + conn, + persistence, + source.as_deref(), + &table, + count, + rng_seed, + )); + } Request::Update { table, assignments, @@ -8636,6 +8683,108 @@ fn count_rows(conn: &Connection, table: &str) -> Result { .map_err(DbError::from_rusqlite) } +/// Default row count when `seed ` omits the count (ADR-0048 D6). +const DEFAULT_SEED_COUNT: u64 = 20; + +/// Populate a table with generated fake data (ADR-0048, SD1). +/// +/// **Phase 1 walking skeleton.** Generates whole rows for every user +/// column that is not an autogen `serial`/`shortid` and not a foreign +/// key, inserting them one at a time through [`do_insert`] — which +/// reuses all the existing per-value validation, autogen autofill, +/// FK-error enrichment and persistence machinery. The whole seed is a +/// single undo step (the worker wraps the call in one `snapshot_then`) +/// and writes exactly one `history.log` line (only the first row +/// carries the `source`). +/// +/// Deferred to the next phase (ADR-0048): FK sampling from parent rows +/// (D14), the efficient single-transaction multi-row path, identifier +/// uniqueness (D10), the `IN`-CHECK value derivation (D17), the +/// required-column block guard (D1), the capped auto-show preview +/// (D18), and the enum/CHECK advisory (D12/D13). +fn do_seed( + conn: &Connection, + persistence: Option<&Persistence>, + source: Option<&str>, + table: &str, + count: Option, + rng_seed: Option, +) -> Result { + use crate::seed; + + let canonical_table = require_canonical_table(conn, table)?; + let table = canonical_table.as_str(); + let n = count.unwrap_or(DEFAULT_SEED_COUNT); + debug!(table = %table, count = n, "seed"); + + let schema = read_schema(conn, table)?; + + // FK child columns are filled by the executor in a later phase; for + // now they are omitted (left to NULL / default). + let fk_children: std::collections::HashSet<&str> = schema + .foreign_keys + .iter() + .flat_map(|fk| fk.child_columns.iter().map(String::as_str)) + .collect(); + + // Columns we generate values for: every user column that is not an + // autogen serial/shortid and not an FK child. + let gen_columns: Vec<&ReadColumn> = schema + .columns + .iter() + .filter(|c| { + !matches!(c.user_type, Some(Type::Serial) | Some(Type::ShortId)) + && !fk_children.contains(c.name.as_str()) + }) + .collect(); + let col_names: Vec = gen_columns.iter().map(|c| c.name.clone()).collect(); + + let mut rng = seed::make_rng(rng_seed); + let mut rows_affected = 0usize; + let mut last_data: Option = None; + + for i in 0..n { + let values: Vec = gen_columns + .iter() + .map(|c| { + let ty = c.user_type.unwrap_or(Type::Text); + let spec = seed::ColumnSpec { + name: c.name.clone(), + ty, + not_null: c.notnull, + primary_key: c.primary_key, + unique: c.unique, + // FK children are already filtered out above. + is_foreign_key: false, + // `IN`-CHECK derivation is a later phase. + check_in_values: None, + }; + let generator = seed::choose_generator(table, &spec); + seed::generate_value(&generator, ty, &mut rng) + }) + .collect(); + + // Only the first row carries the `source`, so the whole seed + // writes exactly one `history.log` line. + let row_source = if i == 0 { source } else { None }; + let result = do_insert(conn, persistence, row_source, table, Some(&col_names), &values)?; + rows_affected += result.rows_affected; + last_data = Some(result.data); + } + + Ok(InsertResult { + rows_affected, + // `None` only when count was 0 — an empty result for the + // auto-show (the zero-no-op refinement lands in a later phase). + data: last_data.unwrap_or_else(|| DataResult { + table_name: table.to_string(), + columns: Vec::new(), + column_types: Vec::new(), + rows: Vec::new(), + }), + }) +} + fn do_insert( conn: &Connection, persistence: Option<&Persistence>, diff --git a/src/dsl/command.rs b/src/dsl/command.rs index 68046e4..a1f834b 100644 --- a/src/dsl/command.rs +++ b/src/dsl/command.rs @@ -402,6 +402,16 @@ pub enum Command { filter: Option, limit: Option, }, + /// Populate a table with generated fake data (ADR-0048, SD1). + /// `count` defaults to 20 when omitted; `rng_seed` (from a future + /// `--seed ` flag) makes generation reproducible. Phase 1 is + /// whole-row generation; the `set` override clause and the + /// `.` column-fill form arrive in later phases. + Seed { + table: String, + count: Option, + rng_seed: Option, + }, /// Replay a sequence of DSL commands from a file. Each line /// is parsed and dispatched through the same pipeline as /// interactive input. Blank lines and lines whose first @@ -949,6 +959,7 @@ impl Command { } => "show index", Self::ShowList { kind, .. } => kind.command_name(), Self::Insert { .. } => "insert into", + Self::Seed { .. } => "seed", Self::Update { .. } => "update", Self::Delete { .. } => "delete from", Self::ShowData { .. } => "show data", @@ -997,6 +1008,7 @@ impl Command { | Self::AddConstraint { table, .. } | Self::DropConstraint { table, .. } | Self::Insert { table, .. } + | Self::Seed { table, .. } | Self::Update { table, .. } | Self::Delete { table, .. } => table, // For relationships we focus on the parent (1-side): diff --git a/src/dsl/grammar/data.rs b/src/dsl/grammar/data.rs index b6a8a34..75c6344 100644 --- a/src/dsl/grammar/data.rs +++ b/src/dsl/grammar/data.rs @@ -425,6 +425,24 @@ const LIMIT_CLAUSE_NODES: &[Node] = &[ ]; const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES); +// ================================================================= +// seed — `seed []` (ADR-0048, SD1) +// ================================================================= + +/// Optional positional row count. Reuses `LIMIT_VALIDATOR` (a +/// non-negative integer). Phase 1 has no `--seed` flag, `set` clause, +/// or `
.` column-fill form yet. +const SEED_COUNT: Node = Node::NumberLit { + validator: Some(LIMIT_VALIDATOR), +}; +const SEED_NODES: &[Node] = &[ + // `writes_table` so a future `set =…` clause's column slots + // can resolve against this table. + TABLE_NAME_WRITES, + Node::Optional(&SEED_COUNT), +]; +const SEED_SHAPE: Node = Node::Seq(SEED_NODES); + const UPDATE_NODES: &[Node] = &[ TABLE_NAME_WRITES, Node::Word(Word::keyword("set")), @@ -708,6 +726,38 @@ fn build_show_limit(path: &MatchedPath) -> Result, ValidationError> }) } +/// Build a `seed []` command (ADR-0048). The only +/// `NumberLit` in a `seed` path is the optional count. +fn build_seed(path: &MatchedPath, _source: &str) -> Result { + Ok(Command::Seed { + table: require_ident(path, "table_name")?, + count: build_seed_count(path)?, + // `--seed ` is added in a later phase; reproducibility off + // for now. + rng_seed: None, + }) +} + +fn build_seed_count(path: &MatchedPath) -> Result, ValidationError> { + let Some(item) = path + .items + .iter() + .find(|i| matches!(i.kind, MatchedKind::NumberLit)) + else { + return Ok(None); + }; + item.text + .parse::() + .map(Some) + .map_err(|_| ValidationError { + message_key: "parse.custom.bind_type_mismatch", + args: vec![ + ("found", item.text.clone()), + ("expected", "non-negative integer".to_string()), + ], + }) +} + fn build_insert(path: &MatchedPath, _source: &str) -> Result { let table = require_ident(path, "table_name")?; @@ -1452,6 +1502,14 @@ pub static SHOW: CommandNode = CommandNode { "parse.usage.show_index", ],}; +pub static SEED: CommandNode = CommandNode { + entry: Word::keyword("seed"), + shape: SEED_SHAPE, + ast_builder: build_seed, + help_id: Some("data.seed"), + usage_ids: &["parse.usage.seed"], +}; + pub static INSERT: CommandNode = CommandNode { entry: Word::keyword("insert"), shape: INSERT_SHAPE, diff --git a/src/dsl/grammar/mod.rs b/src/dsl/grammar/mod.rs index 30a5b3b..30ebbf5 100644 --- a/src/dsl/grammar/mod.rs +++ b/src/dsl/grammar/mod.rs @@ -714,6 +714,7 @@ pub static REGISTRY: &[(&CommandNode, CommandCategory)] = &[ (&ddl::CREATE, CommandCategory::Simple), (&ddl::CREATE_M2N, CommandCategory::Simple), (&data::SHOW, CommandCategory::Simple), + (&data::SEED, CommandCategory::Simple), (&data::INSERT, CommandCategory::Simple), (&data::UPDATE, CommandCategory::Simple), (&data::DELETE, CommandCategory::Simple), diff --git a/src/friendly/keys.rs b/src/friendly/keys.rs index 389a22a..d2a97f5 100644 --- a/src/friendly/keys.rs +++ b/src/friendly/keys.rs @@ -207,6 +207,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[ ("help.ddl.rename", &[]), ("help.ddl.change", &[]), ("help.data.show", &[]), + ("help.data.seed", &[]), ("help.data.insert", &[]), ("help.data.update", &[]), ("help.data.delete", &[]), @@ -308,6 +309,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[ ("parse.usage.undo", &[]), ("parse.usage.save", &[]), ("parse.usage.select", &[]), + ("parse.usage.seed", &[]), ("parse.usage.show_data", &[]), ("parse.usage.show_table", &[]), ("parse.usage.show_tables", &[]), diff --git a/src/friendly/strings/en-US.yaml b/src/friendly/strings/en-US.yaml index 88778ee..53bb040 100644 --- a/src/friendly/strings/en-US.yaml +++ b/src/friendly/strings/en-US.yaml @@ -333,6 +333,10 @@ help: show indexes — list all indexes show relationship — show one relationship's detail show index — show one index's detail + seed: |- + seed [] — fill a table with generated sample rows + (default 20). Existing rows are kept; + foreign keys draw from existing parent rows. insert: |- insert into [(cols)] [values] (vals) — add a row update: |- @@ -569,6 +573,7 @@ parse: change_column: |- change column [in] [table]
: () [--force-conversion | --dont-convert] + seed: "seed
[count]" show_data: "show data
" show_table: "show table
" show_tables: "show tables" diff --git a/src/runtime.rs b/src/runtime.rs index ba9c056..d0f873d 100644 --- a/src/runtime.rs +++ b/src/runtime.rs @@ -2911,6 +2911,17 @@ async fn execute_command_typed( .insert(table, columns, values, src) .await .map(CommandOutcome::Insert), + // ADR-0048 (SD1). Phase 1 reuses the insert outcome for the + // auto-show; a dedicated `SeedResult` (capped preview + + // enum/CHECK advisory) replaces this in a later phase. + Command::Seed { + table, + count, + rng_seed, + } => database + .seed(table, count, rng_seed, src) + .await + .map(CommandOutcome::Insert), Command::Update { table, assignments, diff --git a/tests/it/main.rs b/tests/it/main.rs index a6d300d..cbc6d4b 100644 --- a/tests/it/main.rs +++ b/tests/it/main.rs @@ -23,6 +23,7 @@ mod m2n; mod parse_error_pedagogy; mod project_lifecycle; mod replay_command; +mod seed; mod sql_alter_table; mod sql_create_index; mod sql_create_table; diff --git a/tests/it/seed.rs b/tests/it/seed.rs new file mode 100644 index 0000000..871b589 --- /dev/null +++ b/tests/it/seed.rs @@ -0,0 +1,150 @@ +//! Tier-3 integration tests for the `seed` command (ADR-0048, the +//! Phase-1 walking skeleton). Covers the parse path (grammar → AST), +//! the worker round-trip (rows generated + persisted to CSV), +//! reproducibility via a fixed `--seed`, and the single `history.log` +//! line for the whole command (ADR-0048 D15 / U3). + +use rdbms_playground::db::Database; +use rdbms_playground::dsl::{ColumnSpec, Command, Type, parse_command}; +use rdbms_playground::persistence::Persistence; +use rdbms_playground::project; + +fn rt() -> tokio::runtime::Runtime { + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("tokio rt") +} + +fn open_project_db() -> (project::Project, Database, tempfile::TempDir) { + let dir = tempfile::tempdir().expect("create tempdir"); + let project = + project::open_or_create(None, Some(dir.path())).expect("open or create project"); + let persistence = Persistence::new(project.path().to_path_buf()); + let db = Database::open_with_persistence(project.db_path(), persistence) + .expect("open db with persistence"); + (project, db, dir) +} + +fn read_csv(project: &project::Project, table: &str) -> Option { + std::fs::read_to_string(project.path().join("data").join(format!("{table}.csv"))).ok() +} + +/// `People(id serial pk, name text, email text)` — `id` is autogen +/// (excluded from generation, so no PK collisions), `name`/`email` +/// are generated. +fn create_people(db: &Database, rt: &tokio::runtime::Runtime) { + rt.block_on(db.create_table( + "People".to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + ColumnSpec::new("name", Type::Text), + ColumnSpec::new("email", Type::Text), + ], + vec!["id".to_string()], + None, + )) + .expect("create People"); +} + +/// Data rows in a CSV = non-empty lines minus the header. +fn data_row_count(csv: &str) -> usize { + csv.lines() + .filter(|l| !l.trim().is_empty()) + .count() + .saturating_sub(1) +} + +#[test] +fn seed_parses_with_and_without_count() { + match parse_command("seed People 5").expect("`seed People 5` parses") { + Command::Seed { + table, + count, + rng_seed, + } => { + assert_eq!(table, "People"); + assert_eq!(count, Some(5)); + assert_eq!(rng_seed, None); + } + other => panic!("expected Command::Seed, got {other:?}"), + } + match parse_command("seed People").expect("`seed People` parses") { + Command::Seed { table, count, .. } => { + assert_eq!(table, "People"); + assert_eq!(count, None, "omitted count is None (executor defaults to 20)"); + } + other => panic!("expected Command::Seed, got {other:?}"), + } +} + +#[test] +fn seed_populates_a_table_and_persists_rows() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_people(&db, &rt); + + let result = rt + .block_on(db.seed("People".into(), Some(7), Some(42), Some("seed People 7".into()))) + .expect("seed succeeds"); + assert_eq!(result.rows_affected, 7); + + let csv = read_csv(&project, "People").expect("People CSV exists after seed"); + assert_eq!( + data_row_count(&csv), + 7, + "CSV should hold 7 generated rows:\n{csv}" + ); + // The generated `email` column produces address-shaped values. + assert!(csv.contains('@'), "seeded emails should appear in the CSV:\n{csv}"); +} + +#[test] +fn seed_count_defaults_to_twenty() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_people(&db, &rt); + + let result = rt + .block_on(db.seed("People".into(), None, Some(1), Some("seed People".into()))) + .expect("seed succeeds"); + assert_eq!(result.rows_affected, 20, "omitted count defaults to 20"); + let csv = read_csv(&project, "People").expect("People CSV exists"); + assert_eq!(data_row_count(&csv), 20); +} + +#[test] +fn seed_is_reproducible_with_a_fixed_seed() { + let (p1, db1, _d1) = open_project_db(); + let (p2, db2, _d2) = open_project_db(); + let rt = rt(); + create_people(&db1, &rt); + create_people(&db2, &rt); + + rt.block_on(db1.seed("People".into(), Some(4), Some(123), Some("seed People 4".into()))) + .expect("seed run 1"); + rt.block_on(db2.seed("People".into(), Some(4), Some(123), Some("seed People 4".into()))) + .expect("seed run 2"); + + let csv1 = read_csv(&p1, "People").expect("csv 1"); + let csv2 = read_csv(&p2, "People").expect("csv 2"); + assert_eq!(csv1, csv2, "the same --seed must reproduce identical data"); +} + +#[test] +fn seed_writes_exactly_one_history_line() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_people(&db, &rt); + + rt.block_on(db.seed("People".into(), Some(5), Some(1), Some("seed People 5".into()))) + .expect("seed succeeds"); + + let history = std::fs::read_to_string(project.path().join("history.log")) + .expect("history.log exists"); + let seed_lines = history.lines().filter(|l| l.contains("seed People 5")).count(); + assert_eq!( + seed_lines, 1, + "a seed of 5 rows must write exactly one history line:\n{history}" + ); +} diff --git a/tests/typing_surface/mod.rs b/tests/typing_surface/mod.rs index c2d4307..67a3b32 100644 --- a/tests/typing_surface/mod.rs +++ b/tests/typing_surface/mod.rs @@ -237,6 +237,7 @@ fn command_kind_label(cmd: &rdbms_playground::dsl::Command) -> String { ShowTable { .. } => "ShowTable".into(), ShowList { kind, name } => format!("ShowList({kind:?}, {})", name.is_some()), Insert { .. } => "Insert".into(), + Seed { .. } => "Seed".into(), Update { .. } => "Update".into(), Delete { .. } => "Delete".into(), ShowData { .. } => "ShowData".into(),