diff --git a/src/db.rs b/src/db.rs index f6df666..ab7f7c1 100644 --- a/src/db.rs +++ b/src/db.rs @@ -8686,22 +8686,87 @@ fn count_rows(conn: &Connection, table: &str) -> Result { /// Default row count when `seed ` omits the count (ADR-0048 D6). const DEFAULT_SEED_COUNT: u64 = 20; +/// How a single column's value is produced for each seeded row. +enum SeedColPlan { + /// Generated from the seed library (the generator is chosen once; + /// `generate_value` runs per row). + Generated { + generator: crate::seed::Generator, + ty: Type, + }, + /// A foreign-key child column: sampled from an existing parent row + /// (ADR-0048 D14). `fk_idx` selects the FK; `pos` selects this + /// column's slot within the parent key tuple (so a compound FK's + /// child columns all read from the *same* sampled parent row). + ForeignKey { fk_idx: usize, pos: usize }, +} + +/// Sample existing parent-key tuples for FK generation (ADR-0048 D14). +/// +/// Returns one `Value` tuple per distinct parent row in +/// `parent_columns` order, so a compound FK's children can be filled +/// from one consistent parent row. Empty when the parent has no rows +/// (the caller turns that into the friendly "seed the parent first" +/// error). +fn sample_parent_key_tuples( + conn: &Connection, + parent_table: &str, + parent_columns: &[String], +) -> Result>, DbError> { + let cols = parent_columns + .iter() + .map(|c| format!("\"{}\"", c.replace('"', "\"\""))) + .collect::>() + .join(", "); + let sql = format!( + "SELECT DISTINCT {cols} FROM \"{}\"", + parent_table.replace('"', "\"\"") + ); + let n = parent_columns.len(); + let mut stmt = conn.prepare(&sql).map_err(DbError::from_rusqlite)?; + let tuples = stmt + .query_map([], |row| { + let mut tuple = Vec::with_capacity(n); + for i in 0..n { + let v = match row.get_ref(i)? { + rusqlite::types::ValueRef::Null => Value::Null, + rusqlite::types::ValueRef::Integer(x) => Value::Number(x.to_string()), + rusqlite::types::ValueRef::Real(x) => Value::Number(x.to_string()), + rusqlite::types::ValueRef::Text(t) => { + Value::Text(String::from_utf8_lossy(t).into_owned()) + } + // FK keys are never blobs in this app; treat as NULL. + rusqlite::types::ValueRef::Blob(_) => Value::Null, + }; + tuple.push(v); + } + Ok(tuple) + }) + .map_err(DbError::from_rusqlite)? + .collect::, _>>() + .map_err(DbError::from_rusqlite)?; + Ok(tuples) +} + /// Populate a table with generated fake data (ADR-0048, SD1). /// -/// **Phase 1 walking skeleton.** Generates whole rows for every user -/// column that is not an autogen `serial`/`shortid` and not a foreign -/// key, inserting them one at a time through [`do_insert`] — which -/// reuses all the existing per-value validation, autogen autofill, -/// FK-error enrichment and persistence machinery. The whole seed is a -/// single undo step (the worker wraps the call in one `snapshot_then`) -/// and writes exactly one `history.log` line (only the first row -/// carries the `source`). +/// **Phase 1.** Generates whole rows and inserts them one at a time +/// through [`do_insert`] — reusing all the existing per-value +/// validation, autogen autofill, FK-error enrichment and persistence +/// machinery. The whole seed is a single undo step (the worker wraps +/// the call in one `snapshot_then`) and writes exactly one +/// `history.log` line (only the first row carries the `source`). /// -/// Deferred to the next phase (ADR-0048): FK sampling from parent rows -/// (D14), the efficient single-transaction multi-row path, identifier -/// uniqueness (D10), the `IN`-CHECK value derivation (D17), the -/// required-column block guard (D1), the capped auto-show preview -/// (D18), and the enum/CHECK advisory (D12/D13). +/// Foreign-key columns are filled by sampling existing parent rows +/// (D14); a compound FK reads all its child columns from one sampled +/// parent row. An empty parent is refused with a friendly error. A +/// `NOT NULL blob` column (which seed cannot generate) is refused by +/// the block guard (D1); a nullable blob is omitted (→ NULL). +/// +/// Deferred: identifier/constraint uniqueness incl. junction +/// distinct-combos (D10), the `IN`-CHECK value derivation (D17), the +/// efficient single-transaction multi-row path, the capped auto-show +/// preview (D18), and the enum/CHECK advisory (D12/D13). fn do_seed( conn: &Connection, persistence: Option<&Persistence>, @@ -8711,6 +8776,7 @@ fn do_seed( rng_seed: Option, ) -> Result { use crate::seed; + use rand::RngExt; let canonical_table = require_canonical_table(conn, table)?; let table = canonical_table.as_str(); @@ -8719,48 +8785,92 @@ fn do_seed( let schema = read_schema(conn, table)?; - // FK child columns are filled by the executor in a later phase; for - // now they are omitted (left to NULL / default). - let fk_children: std::collections::HashSet<&str> = schema - .foreign_keys - .iter() - .flat_map(|fk| fk.child_columns.iter().map(String::as_str)) - .collect(); + // Pre-sample each FK's parent key tuples (D14); refuse if a parent + // is empty (no valid reference can be fabricated). + let mut fk_samples: Vec>> = Vec::with_capacity(schema.foreign_keys.len()); + for fk in &schema.foreign_keys { + let tuples = sample_parent_key_tuples(conn, &fk.parent_table, &fk.parent_columns)?; + if tuples.is_empty() { + return Err(DbError::Unsupported(format!( + "cannot seed `{table}`: parent table `{}` (referenced by `{}`) has no rows. \ + Seed or insert into `{}` first.", + fk.parent_table, + fk.child_columns.join(", "), + fk.parent_table, + ))); + } + fk_samples.push(tuples); + } + // child column → (fk index, position within the FK's column list). + let mut fk_child_pos: std::collections::HashMap<&str, (usize, usize)> = + std::collections::HashMap::new(); + for (fk_idx, fk) in schema.foreign_keys.iter().enumerate() { + for (pos, child) in fk.child_columns.iter().enumerate() { + fk_child_pos.insert(child.as_str(), (fk_idx, pos)); + } + } - // Columns we generate values for: every user column that is not an - // autogen serial/shortid and not an FK child. - let gen_columns: Vec<&ReadColumn> = schema - .columns - .iter() - .filter(|c| { - !matches!(c.user_type, Some(Type::Serial) | Some(Type::ShortId)) - && !fk_children.contains(c.name.as_str()) - }) - .collect(); - let col_names: Vec = gen_columns.iter().map(|c| c.name.clone()).collect(); + // Build the per-column generation plan, skipping autogen and + // un-generatable columns. + let mut col_names: Vec = Vec::new(); + let mut plans: Vec = Vec::new(); + for c in &schema.columns { + let ty = c.user_type.unwrap_or(Type::Text); + // serial/shortid auto-fill in `do_insert`; omit them. + if matches!(ty, Type::Serial | Type::ShortId) { + continue; + } + // blob has no DSL value path: refuse if required (D1), else omit. + if matches!(ty, Type::Blob) { + if c.notnull { + return Err(DbError::Unsupported(format!( + "cannot seed `{table}`: column `{}` is `NOT NULL` but has type `blob`, \ + which seed cannot generate. Add the rows another way or make it nullable.", + c.name, + ))); + } + continue; + } + col_names.push(c.name.clone()); + if let Some(&(fk_idx, pos)) = fk_child_pos.get(c.name.as_str()) { + plans.push(SeedColPlan::ForeignKey { fk_idx, pos }); + } else { + let spec = seed::ColumnSpec { + name: c.name.clone(), + ty, + not_null: c.notnull, + primary_key: c.primary_key, + unique: c.unique, + is_foreign_key: false, + // `IN`-CHECK derivation is a later phase. + check_in_values: None, + }; + let generator = seed::choose_generator(table, &spec); + plans.push(SeedColPlan::Generated { generator, ty }); + } + } let mut rng = seed::make_rng(rng_seed); let mut rows_affected = 0usize; let mut last_data: Option = None; for i in 0..n { - let values: Vec = gen_columns + // One sampled parent row per FK for this row, so a compound FK's + // children stay consistent. + let fk_choice: Vec = fk_samples .iter() - .map(|c| { - let ty = c.user_type.unwrap_or(Type::Text); - let spec = seed::ColumnSpec { - name: c.name.clone(), - ty, - not_null: c.notnull, - primary_key: c.primary_key, - unique: c.unique, - // FK children are already filtered out above. - is_foreign_key: false, - // `IN`-CHECK derivation is a later phase. - check_in_values: None, - }; - let generator = seed::choose_generator(table, &spec); - seed::generate_value(&generator, ty, &mut rng) + .map(|tuples| rng.random_range(0..tuples.len())) + .collect(); + + let values: Vec = plans + .iter() + .map(|plan| match plan { + SeedColPlan::Generated { generator, ty } => { + seed::generate_value(generator, *ty, &mut rng) + } + SeedColPlan::ForeignKey { fk_idx, pos } => { + fk_samples[*fk_idx][fk_choice[*fk_idx]][*pos].clone() + } }) .collect(); diff --git a/tests/it/seed.rs b/tests/it/seed.rs index 871b589..b082757 100644 --- a/tests/it/seed.rs +++ b/tests/it/seed.rs @@ -5,7 +5,7 @@ //! line for the whole command (ADR-0048 D15 / U3). use rdbms_playground::db::Database; -use rdbms_playground::dsl::{ColumnSpec, Command, Type, parse_command}; +use rdbms_playground::dsl::{ColumnSpec, Command, ReferentialAction, Type, parse_command}; use rdbms_playground::persistence::Persistence; use rdbms_playground::project; @@ -148,3 +148,153 @@ fn seed_writes_exactly_one_history_line() { "a seed of 5 rows must write exactly one history line:\n{history}" ); } + +// — FK sampling, empty-parent error, block guard (ADR-0048 D14 / D1) — + +/// `Users(id serial pk, name text)` + `Orders(id serial pk, user_id +/// int, total decimal)` with `Orders.user_id -> Users.id`. +fn create_users_and_orders(db: &Database, rt: &tokio::runtime::Runtime, add_fk: bool) { + rt.block_on(async { + db.create_table( + "Users".to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + ColumnSpec::new("name", Type::Text), + ], + vec!["id".to_string()], + None, + ) + .await + .expect("create Users"); + db.create_table( + "Orders".to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + ColumnSpec::new("user_id", Type::Int), + ColumnSpec::new("total", Type::Decimal), + ], + vec!["id".to_string()], + None, + ) + .await + .expect("create Orders"); + if add_fk { + db.add_relationship( + None, + "Users".to_string(), + vec!["id".to_string()], + "Orders".to_string(), + vec!["user_id".to_string()], + ReferentialAction::NoAction, + ReferentialAction::NoAction, + false, + None, + ) + .await + .expect("add Orders->Users FK"); + } + }); +} + +/// `user_id` is column index 1 of `Orders(id, user_id, total)`. +fn order_user_ids(csv: &str) -> Vec { + let mut lines = csv.lines().filter(|l| !l.trim().is_empty()); + lines.next(); // header + lines + .map(|l| l.split(',').nth(1).unwrap_or_default().to_string()) + .collect() +} + +#[test] +fn seed_fills_foreign_keys_from_existing_parents() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_users_and_orders(&db, &rt, true); + + // 5 parents → serial ids 1..=5. + rt.block_on(db.seed("Users".into(), Some(5), Some(1), Some("seed Users 5".into()))) + .expect("seed Users"); + let res = rt + .block_on(db.seed("Orders".into(), Some(10), Some(2), Some("seed Orders 10".into()))) + .expect("seed Orders"); + assert_eq!(res.rows_affected, 10, "every child row must insert (valid FK)"); + + let csv = read_csv(&project, "Orders").expect("Orders CSV"); + let valid: std::collections::HashSet = (1..=5).map(|i| i.to_string()).collect(); + let user_ids = order_user_ids(&csv); + assert_eq!(user_ids.len(), 10); + for uid in &user_ids { + assert!( + valid.contains(uid), + "user_id `{uid}` does not reference an existing parent:\n{csv}" + ); + } +} + +#[test] +fn seed_refuses_when_a_parent_table_is_empty() { + let (_project, db, _dir) = open_project_db(); + let rt = rt(); + create_users_and_orders(&db, &rt, true); + + // Users is empty — no valid FK can be fabricated. + let err = rt + .block_on(db.seed("Orders".into(), Some(3), Some(1), Some("seed Orders 3".into()))) + .expect_err("seed must refuse an empty parent"); + let msg = err.to_string(); + assert!(msg.contains("Users"), "error should name the empty parent: {msg}"); + let lower = msg.to_lowercase(); + assert!( + lower.contains("no rows") || lower.contains("first"), + "error should explain how to fix it: {msg}" + ); +} + +#[test] +fn seed_refuses_a_not_null_blob_column() { + let (_project, db, _dir) = open_project_db(); + let rt = rt(); + let mut payload = ColumnSpec::new("payload", Type::Blob); + payload.not_null = true; + rt.block_on(db.create_table( + "Files".to_string(), + vec![ColumnSpec::new("id", Type::Serial), payload], + vec!["id".to_string()], + None, + )) + .expect("create Files"); + + let err = rt + .block_on(db.seed("Files".into(), Some(2), Some(1), Some("seed Files 2".into()))) + .expect_err("seed must refuse a NOT NULL blob"); + let msg = err.to_string(); + assert!( + msg.contains("payload") && msg.to_lowercase().contains("blob"), + "error should name the un-generatable blob column: {msg}" + ); +} + +#[test] +fn seed_omits_a_nullable_blob_column() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + rt.block_on(db.create_table( + "Files".to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + ColumnSpec::new("name", Type::Text), + // nullable blob → omitted (→ NULL), seed still succeeds. + ColumnSpec::new("payload", Type::Blob), + ], + vec!["id".to_string()], + None, + )) + .expect("create Files"); + + let res = rt + .block_on(db.seed("Files".into(), Some(3), Some(1), Some("seed Files 3".into()))) + .expect("seed succeeds despite the nullable blob"); + assert_eq!(res.rows_affected, 3); + let csv = read_csv(&project, "Files").expect("Files CSV"); + assert_eq!(data_row_count(&csv), 3); +}