diff --git a/src/app.rs b/src/app.rs index f16e2d3..dae22a3 100644 --- a/src/app.rs +++ b/src/app.rs @@ -2097,9 +2097,14 @@ impl App { self.note_system(line); } if !result.advisory_columns.is_empty() { + // `column` (the first advised column) seeds the concrete + // repair examples (D13 Phase 2/3 wording); `columns` lists + // them all. self.push_category_three_prose(crate::t!( "seed.advisory_generic", - columns = result.advisory_columns.join(", ") + columns = result.advisory_columns.join(", "), + column = result.advisory_columns[0], + table = result.table )); } } @@ -6268,7 +6273,9 @@ mod tests { app.update(AppEvent::DslSeedSucceeded { command: Command::Seed { table: "users".to_string(), + target_column: None, count: Some(20), + overrides: Vec::new(), rng_seed: None, }, result: crate::db::SeedResult { @@ -6304,7 +6311,9 @@ mod tests { app.update(AppEvent::DslSeedSucceeded { command: Command::Seed { table: "J".to_string(), + target_column: None, count: Some(10), + overrides: Vec::new(), rng_seed: None, }, result: crate::db::SeedResult { diff --git a/src/completion.rs b/src/completion.rs index 5ca535a..aa9d605 100644 --- a/src/completion.rs +++ b/src/completion.rs @@ -120,7 +120,13 @@ impl SchemaCache { IdentSource::Columns => &self.columns, IdentSource::Relationships => &self.relationships, IdentSource::Indexes => &self.indexes, - IdentSource::NewName | IdentSource::Types | IdentSource::Free => &[], + // Curated / invented sources never come from the schema + // cache — `Generators` candidates are supplied separately + // from the `seed` vocabulary (ADR-0048 D9). + IdentSource::NewName + | IdentSource::Types + | IdentSource::Generators + | IdentSource::Free => &[], } } @@ -709,6 +715,22 @@ pub fn candidates_at_cursor_with_in_mode( } else { Vec::new() }; + // Source 1.9: fake-data generator names (ADR-0048 D9). At the + // `seed … set as ⟨here⟩` slot (`IdentSource::Generators`) the + // curated vocabulary is offered so a learner can discover `email` / + // `product` / … by Tab. Same `Function` kind / `tok_function` colour + // as SQL functions (no new theme colour — ADR-0048 §Grammar). + let has_generator_slot = expected + .iter() + .any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. })); + if has_generator_slot { + functions.extend( + crate::seed::KNOWN_GENERATORS + .iter() + .filter(|g| matches_prefix(g)) + .map(|g| (*g).to_string()), + ); + } // Source 2: schema identifiers — accumulated across every // matching schema-listable `Ident { source }` expectation. @@ -1200,6 +1222,24 @@ pub fn invalid_ident_at_cursor_in_mode( if has_sql_expr_slot && crate::dsl::sql_functions::is_known_function_prefix(partial) { return None; } + // ADR-0048 D9: the `seed … set as ` slot is a curated + // vocabulary (`IdentSource::Generators`), not a schema source, so the + // schema-column check below would never see it. A partial that + // prefix-matches a known generator is an in-progress name; anything + // else is an unknown generator → flag it `[ERR]` while typing. + let has_generator_slot = expected + .iter() + .any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. })); + if has_generator_slot { + if crate::seed::is_known_generator_prefix(partial) { + return None; + } + return Some(InvalidIdent { + range: (start, cursor), + found: partial.to_string(), + source: IdentSource::Generators, + }); + } // Find every schema-listable source in the expected list. let sources: Vec = expected .iter() @@ -2606,6 +2646,48 @@ mod tests { ); } + #[test] + fn invalid_ident_fires_for_unknown_generator_after_as() { + // ADR-0048 D9: an unknown name at the `set as ` slot is + // flagged `[ERR]` while typing. + let cache = two_table_schema(); + let input = "seed a set name as bogus"; + let inv = invalid_ident_at_cursor(input, input.len(), &cache) + .expect("unknown generator must flag"); + assert_eq!(inv.found, "bogus"); + assert_eq!(inv.source, IdentSource::Generators); + } + + #[test] + fn invalid_ident_fires_for_unknown_column_in_seed_set_and_column_fill() { + // ADR-0048: an unknown column at the `set ` slot and the + // `.` column-fill slot is flagged like any other + // column slot (both are `IdentSource::Columns`). + let cache = two_table_schema(); // table `a`; columns id, name + let set_in = invalid_ident_at_cursor("seed a set xyz", 14, &cache) + .expect("unknown column in `set` must flag"); + assert_eq!(set_in.found, "xyz"); + assert_eq!(set_in.source, IdentSource::Columns); + + let fill = invalid_ident_at_cursor("seed a.xyz", 10, &cache) + .expect("unknown column in column-fill must flag"); + assert_eq!(fill.source, IdentSource::Columns); + } + + #[test] + fn invalid_ident_does_not_fire_for_generator_prefix() { + // A prefix of a known generator is an in-progress name, not a typo. + let cache = two_table_schema(); + assert!( + invalid_ident_at_cursor("seed a set name as ema", 22, &cache).is_none(), + "`ema` prefixes `email` — must not flag", + ); + assert!( + invalid_ident_at_cursor("seed a set name as email", 24, &cache).is_none(), + "`email` is a known generator — must not flag", + ); + } + fn two_table_schema() -> SchemaCache { use crate::dsl::types::Type; let mut s = SchemaCache::default(); diff --git a/src/db.rs b/src/db.rs index 1c51c0d..19e4d07 100644 --- a/src/db.rs +++ b/src/db.rs @@ -33,7 +33,8 @@ use tracing::{debug, info, warn}; use crate::dsl::action::ReferentialAction; use crate::dsl::command::{ ChangeColumnMode, Command, CompareOp, Constraint, ConstraintKind, Expr, IndexSelector, - Operand, Predicate, RelationshipSelector, RowFilter, SqlForeignKey, + Operand, Predicate, RelationshipSelector, RowFilter, SeedOverride, SeedOverrideKind, + SqlForeignKey, }; use crate::dsl::ColumnSpec; use crate::dsl::shortid; @@ -723,7 +724,9 @@ enum Request { /// snapshot wraps the whole seed via `snapshot_then`. Seed { table: String, + target_column: Option, count: Option, + overrides: Vec, rng_seed: Option, source: Option, reply: oneshot::Sender>, @@ -1517,18 +1520,22 @@ impl Database { recv.await.map_err(|_| DbError::WorkerGone)? } - /// Populate a table with generated fake data (ADR-0048, SD1). + /// Populate a table with generated fake data (ADR-0048, SD1/SD2). pub async fn seed( &self, table: String, + target_column: Option, count: Option, + overrides: Vec, rng_seed: Option, source: Option, ) -> Result { let (reply, recv) = oneshot::channel(); self.send(Request::Seed { table, + target_column, count, + overrides, rng_seed, source, reply, @@ -2694,7 +2701,9 @@ fn handle_request( } Request::Seed { table, + target_column, count, + overrides, rng_seed, source, reply, @@ -2706,7 +2715,9 @@ fn handle_request( persistence, source.as_deref(), &table, + target_column.as_deref(), count, + &overrides, rng_seed, )); } @@ -2938,7 +2949,10 @@ fn do_list_names_for( } Ok(out) } - IdentSource::NewName | IdentSource::Types | IdentSource::Free => Ok(Vec::new()), + IdentSource::NewName + | IdentSource::Types + | IdentSource::Generators + | IdentSource::Free => Ok(Vec::new()), } } @@ -8808,14 +8822,13 @@ fn sample_parent_key_tuples( Ok(tuples) } -/// Populate a table with generated fake data (ADR-0048, SD1). +/// Populate a table with generated fake data (ADR-0048, SD1/SD2). /// -/// **Phase 1.** Generates whole rows and inserts them one at a time -/// through [`do_insert`] — reusing all the existing per-value -/// validation, autogen autofill, FK-error enrichment and persistence -/// machinery. The whole seed is a single undo step (the worker wraps -/// the call in one `snapshot_then`) and writes exactly one -/// `history.log` line (only the first row carries the `source`). +/// Generates whole rows and inserts them in one transaction, reusing the +/// per-value validation, autogen autofill, FK-error enrichment and +/// persistence machinery via [`insert_one_row`]. The whole seed is a +/// single undo step (the worker wraps the call in one `snapshot_then`) +/// and writes exactly one `history.log` line. /// /// Foreign-key columns are filled by sampling existing parent rows /// (D14); a compound FK reads all its child columns from one sampled @@ -8823,16 +8836,20 @@ fn sample_parent_key_tuples( /// `NOT NULL blob` column (which seed cannot generate) is refused by /// the block guard (D1); a nullable blob is omitted (→ NULL). /// -/// Deferred: identifier/constraint uniqueness incl. junction -/// distinct-combos (D10), the `IN`-CHECK value derivation (D17), the -/// efficient single-transaction multi-row path, the capped auto-show -/// preview (D18), and the enum/CHECK advisory (D12/D13). +/// **Phase 2 (SD2):** when `target_column` is `Some`, this delegates to +/// [`do_seed_column_fill`] (fill one column across existing rows, D1 +/// form 2). `overrides` carries the `set …` clause (D2): per-column +/// pins that replace the heuristic generator and drop the column from the +/// generic-fill advisory (D13). +#[allow(clippy::too_many_arguments)] fn do_seed( conn: &Connection, persistence: Option<&Persistence>, source: Option<&str>, table: &str, + target_column: Option<&str>, count: Option, + overrides: &[SeedOverride], rng_seed: Option, ) -> Result { use crate::seed; @@ -8840,6 +8857,14 @@ fn do_seed( let canonical_table = require_canonical_table(conn, table)?; let table = canonical_table.as_str(); + + // Column-fill (D1 form 2) is a distinct UPDATE path. + if let Some(col) = target_column { + return do_seed_column_fill( + conn, persistence, source, table, col, count, overrides, rng_seed, + ); + } + let n = count.unwrap_or(DEFAULT_SEED_COUNT); debug!(table = %table, count = n, "seed"); if n > MAX_SEED_COUNT { @@ -8937,6 +8962,17 @@ fn do_seed( } } + // Apply the `set …` overrides (D2): each replaces the named + // column's plan with the pinned generator and removes it from the + // generic-fill advisory (the user chose its values deliberately, + // D13). An override that names a non-fillable column is a friendly + // error; a bounded value source (fixed / pick-list) that can't supply + // enough distinct values for a single-column-UNIQUE target is refused + // up front rather than silently capped (DA finding). FK / type binding + // still apply — a value that violates a constraint surfaces through the + // existing FK-error guard. + apply_seed_overrides(&schema, overrides, n, &col_names, &mut plans, &mut advisory_columns)?; + // Uniqueness groups (ADR-0048 D10): value tuples that must stay // distinct across the batch and against existing rows — the // user-fillable PK (so junction distinct-combos fall out of this), @@ -9131,6 +9167,434 @@ fn do_seed( }) } +/// Apply the `set …` overrides (ADR-0048 D2) to the per-column +/// generation plan. Each override replaces the named column's plan and +/// drops it from the generic-fill advisory (D13 — the user chose those +/// values). An override naming a column that is not in the fillable set +/// (unknown, or an auto-generated `serial`) is a friendly error. +fn apply_seed_overrides( + schema: &ReadSchema, + overrides: &[SeedOverride], + row_count: u64, + col_names: &[String], + plans: &mut [SeedColPlan], + advisory_columns: &mut Vec, +) -> Result<(), DbError> { + for ov in overrides { + let Some(idx) = col_names + .iter() + .position(|c| c.eq_ignore_ascii_case(&ov.column)) + else { + return Err(DbError::Unsupported(format!( + "cannot apply `set {col} …`: `{col}` is not a fillable column of this \ + table (it is unknown, or an auto-generated column).", + col = ov.column, + ))); + }; + let ty = schema + .columns + .iter() + .find(|c| c.name.eq_ignore_ascii_case(&ov.column)) + .and_then(|c| c.user_type) + .unwrap_or(Type::Text); + seed_override_capacity_guard(schema, &ov.column, &ov.kind, row_count)?; + plans[idx] = seed_override_plan(&ov.kind, ty, &ov.column)?; + advisory_columns.retain(|c| !c.eq_ignore_ascii_case(&ov.column)); + } + Ok(()) +} + +/// Refuse up front when a **bounded** override (a fixed value or a +/// pick-list) cannot supply enough *distinct* values to fill a +/// single-column-UNIQUE target across `row_count` rows — otherwise the +/// uniqueness machinery would silently cap the run to the achievable +/// count (DA finding; the ADR left this interaction open and the user +/// chose a friendly error). Generators and ranges are treated as +/// effectively unbounded sources here; if one does exhaust, the existing +/// distinct-combination cap (D14) still applies. +fn seed_override_capacity_guard( + schema: &ReadSchema, + column: &str, + kind: &SeedOverrideKind, + row_count: u64, +) -> Result<(), DbError> { + let distinct = match kind { + SeedOverrideKind::Fixed(_) => 1, + SeedOverrideKind::PickList(values) => { + let mut set = std::collections::HashSet::new(); + for v in values { + set.insert(seed_override_literal(v, column)?); + } + set.len() + } + // Unbounded-enough sources — leave to the cap if they exhaust. + SeedOverrideKind::Generator(_) | SeedOverrideKind::Range { .. } => return Ok(()), + }; + if distinct as u64 >= row_count.max(1) { + return Ok(()); + } + // Single-column uniqueness only: a compound UNIQUE / compound PK can + // still be satisfied by varying the *other* columns, so a pinned + // value there does not force a cap. + let single_unique = schema + .columns + .iter() + .find(|c| c.name.eq_ignore_ascii_case(column)) + .is_some_and(|c| c.unique) + || (schema.primary_key.len() == 1 + && schema.primary_key[0].eq_ignore_ascii_case(column)); + if single_unique { + return Err(DbError::Unsupported(format!( + "cannot fill {row_count} rows: `set {column} …` offers only {distinct} distinct \ + value(s), but `{column}` is UNIQUE. Use a generator (e.g. `as email`) or a list \ + of at least {row_count} values." + ))); + } + Ok(()) +} + +/// Turn one `set` override into the `SeedColPlan` that produces its +/// values (ADR-0048 D2). `Fixed`/`PickList` become a `PickFrom` over the +/// literal(s); `Generator` resolves the curated name (unknown → friendly +/// error); `Range` validates its bounds against the column type *before* +/// generation (an incompatible bound → friendly error). +fn seed_override_plan( + kind: &SeedOverrideKind, + ty: Type, + column: &str, +) -> Result { + use crate::seed::Generator; + let generator = match kind { + SeedOverrideKind::Fixed(v) => Generator::PickFrom(vec![seed_override_literal(v, column)?]), + SeedOverrideKind::PickList(vs) => { + let lits = vs + .iter() + .map(|v| seed_override_literal(v, column)) + .collect::, _>>()?; + Generator::PickFrom(lits) + } + SeedOverrideKind::Generator(name) => { + crate::seed::generator_for_name(name).ok_or_else(|| { + DbError::Unsupported(format!( + "unknown generator `{name}` in `set {column} as {name}`. \ + Known generators: {}.", + crate::seed::KNOWN_GENERATORS.join(", "), + )) + })? + } + SeedOverrideKind::Range { low, high } => { + let lo = seed_override_literal(low, column)?; + let hi = seed_override_literal(high, column)?; + if let Some(reason) = crate::seed::range_bounds_reason(ty, &lo, &hi) { + return Err(DbError::Unsupported(format!( + "cannot apply `set {column} between …`: {reason}." + ))); + } + Generator::Range { low: lo, high: hi } + } + }; + Ok(SeedColPlan::Generated { generator, ty }) +} + +/// Extract the literal string an override value contributes to a +/// `PickFrom` / `Range` (re-typed per column by `generate_value`). A +/// `null` override is refused — seed always fills a value (NULL +/// injection is out of scope, ADR-0048 Out-of-scope). +fn seed_override_literal(value: &Value, column: &str) -> Result { + match value { + Value::Number(s) | Value::Text(s) => Ok(s.clone()), + Value::Bool(b) => Ok(if *b { "true" } else { "false" }.to_string()), + Value::Null => Err(DbError::Unsupported(format!( + "`set {column} = null` is not supported — seed always fills a value." + ))), + } +} + +/// Column-fill (ADR-0048 D1 form 2): fill one column across the table's +/// **existing** rows (an UPDATE), the natural follow-up to `add column`. +/// +/// Refuses PK and auto-generated (`serial`/`shortid`/`blob`) targets; +/// an empty table is a friendly no-op. The `set` clause may only adjust +/// the column being filled (the rest of the per-column heuristics do not +/// apply — there is exactly one column). A UNIQUE / identifier target +/// gets collision-free values (generated distinct from *every* existing +/// value in the column, so no row-by-row UPDATE can transiently collide); +/// an FK target samples an existing parent key (D14). The whole fill is +/// one transaction → one undo step (D15), persisted once (commit-db-last). +#[allow(clippy::too_many_arguments)] +fn do_seed_column_fill( + conn: &Connection, + persistence: Option<&Persistence>, + source: Option<&str>, + table: &str, + column: &str, + count: Option, + overrides: &[SeedOverride], + rng_seed: Option, +) -> Result { + use crate::seed; + use rand::RngExt; + + debug!(table = %table, column = %column, "seed column-fill"); + + // A row count is meaningless when filling existing rows (D1 form 2). + if count.is_some() { + return Err(DbError::Unsupported(format!( + "`seed {table}.{column}` fills existing rows, so it takes no row count \ + (drop the number)." + ))); + } + + let schema = read_schema(conn, table)?; + let col = schema + .columns + .iter() + .find(|c| c.name.eq_ignore_ascii_case(column)) + .ok_or_else(|| { + DbError::Unsupported(format!("cannot fill `{table}.{column}`: no such column.")) + })?; + let canonical_col = col.name.clone(); + let ty = col.user_type.unwrap_or(Type::Text); + + // Refuse identity / auto-generated / un-generatable targets (D1). + if col.primary_key { + return Err(DbError::Unsupported(format!( + "cannot fill `{table}.{canonical_col}`: it is part of the primary key — \ + you don't fill an identity column." + ))); + } + if matches!(ty, Type::Serial | Type::ShortId) { + return Err(DbError::Unsupported(format!( + "cannot fill `{table}.{canonical_col}`: `{}` columns generate their own \ + values automatically.", + ty.keyword(), + ))); + } + if matches!(ty, Type::Blob) { + return Err(DbError::Unsupported(format!( + "cannot fill `{table}.{canonical_col}`: seed cannot generate `blob` values." + ))); + } + + // The `set` clause may only adjust the filled column (user decision). + for ov in overrides { + if !ov.column.eq_ignore_ascii_case(&canonical_col) { + return Err(DbError::Unsupported(format!( + "in `seed {table}.{canonical_col}`, `set` can only adjust \ + `{canonical_col}` (the column being filled), not `{}`.", + ov.column, + ))); + } + } + + // Existing rowids in a deterministic order (D4 reproducibility). + let rowids: Vec = { + let sql = format!( + "SELECT rowid FROM \"{}\" ORDER BY rowid", + table.replace('"', "\"\"") + ); + let mut stmt = conn.prepare(&sql).map_err(DbError::from_rusqlite)?; + stmt.query_map([], |r| r.get::<_, i64>(0)) + .map_err(DbError::from_rusqlite)? + .collect::, _>>() + .map_err(DbError::from_rusqlite)? + }; + + // Empty table → friendly no-op (D1). + if rowids.is_empty() { + return Ok(SeedResult { + table: table.to_string(), + requested: 0, + produced: 0, + data: DataResult { + table_name: table.to_string(), + columns: Vec::new(), + column_types: Vec::new(), + rows: Vec::new(), + }, + advisory_columns: Vec::new(), + }); + } + + // FK target → sample an existing parent key column (D14). + let fk_sample: Option> = { + let fk = schema.foreign_keys.iter().find(|fk| { + fk.child_columns + .iter() + .any(|c| c.eq_ignore_ascii_case(&canonical_col)) + }); + match fk { + Some(fk) => { + // Single-column position within the FK (column-fill targets + // one column; a compound FK filled one column at a time is + // unusual but we sample that column's parent values). + let pos = fk + .child_columns + .iter() + .position(|c| c.eq_ignore_ascii_case(&canonical_col)) + .unwrap_or(0); + let parent_col = fk.parent_columns.get(pos).cloned().unwrap_or_default(); + let tuples = sample_parent_key_tuples(conn, &fk.parent_table, &[parent_col])?; + if tuples.is_empty() { + return Err(DbError::Unsupported(format!( + "cannot fill `{table}.{canonical_col}`: parent table `{}` has no \ + rows to reference. Seed or insert into `{}` first.", + fk.parent_table, fk.parent_table, + ))); + } + Some(tuples.into_iter().map(|mut t| t.remove(0)).collect()) + } + None => None, + } + }; + + // The value source: an override (if present) else the heuristic. + let mut advisory_columns: Vec = Vec::new(); + let plan: SeedColPlan = if let Some(ov) = overrides + .iter() + .find(|o| o.column.eq_ignore_ascii_case(&canonical_col)) + { + // Same capacity guard as whole-row: a bounded override that can't + // give enough distinct values for a UNIQUE column across the + // existing rows is refused up front, not silently capped. + seed_override_capacity_guard(&schema, &canonical_col, &ov.kind, rowids.len() as u64)?; + seed_override_plan(&ov.kind, ty, &canonical_col)? + } else if fk_sample.is_some() { + SeedColPlan::ForeignKey { fk_idx: 0, pos: 0 } + } else if matches!(ty, Type::ShortId) { + SeedColPlan::ShortId // unreachable (refused above), kept for totality + } else { + let check_in_values = col + .check + .as_deref() + .and_then(|chk| seed::parse_in_check_values(chk, &canonical_col)); + let spec = seed::ColumnSpec { + name: canonical_col.clone(), + ty, + not_null: col.notnull, + primary_key: col.primary_key, + unique: col.unique, + is_foreign_key: false, + check_in_values, + }; + let generator = seed::choose_generator(table, &spec); + if matches!(generator, crate::seed::Generator::Generic) + && (seed::is_enum_ish(&canonical_col) + || (col.check.is_some() && spec.check_in_values.is_none())) + { + advisory_columns.push(canonical_col.clone()); + } + SeedColPlan::Generated { generator, ty } + }; + + // Collision-free generation for UNIQUE / identifier targets: seed the + // used-set with EVERY existing value of the column so a generated + // value never matches a not-yet-updated row (no transient UNIQUE + // violation) nor a value already assigned this batch (ADR-0048 D10). + let enforce_unique = col.unique + || matches!( + &plan, + SeedColPlan::Generated { + generator: crate::seed::Generator::IdentitySequential, + .. + } + ); + let mut used: std::collections::HashSet = std::collections::HashSet::new(); + if enforce_unique { + for tuple in + sample_parent_key_tuples(conn, table, std::slice::from_ref(&canonical_col))? + { + used.insert(seed_value_list_key(&tuple)); + } + } + let seq_base = if matches!( + &plan, + SeedColPlan::Generated { + generator: crate::seed::Generator::IdentitySequential, + .. + } + ) && matches!(ty, Type::Int) + { + Some(seed_max_int(conn, table, &canonical_col)?) + } else { + None + }; + + const MAX_ATTEMPTS: u32 = 200; + let mut rng = seed::make_rng(rng_seed); + let tx = conn + .unchecked_transaction() + .map_err(DbError::from_rusqlite)?; + + let update_sql = format!( + "UPDATE \"{}\" SET \"{}\" = ?1 WHERE rowid = ?2", + table.replace('"', "\"\""), + canonical_col.replace('"', "\"\""), + ); + let mut produced: u64 = 0; + for (offset, rowid) in rowids.iter().enumerate() { + let mut attempt = 0u32; + let value = loop { + let v = match &plan { + SeedColPlan::ForeignKey { .. } => { + let samples = fk_sample.as_ref().expect("fk plan implies samples"); + samples[rng.random_range(0..samples.len())].clone() + } + SeedColPlan::ShortId => { + Value::Text(crate::dsl::shortid::generate_with_rng(&mut rng)) + } + SeedColPlan::Generated { generator, ty } + if matches!(generator, crate::seed::Generator::IdentitySequential) + && matches!(ty, Type::Int) => + { + Value::Number((seq_base.unwrap_or(0) + produced as i64 + 1).to_string()) + } + SeedColPlan::Generated { generator, ty } => { + seed::generate_value(generator, *ty, &mut rng) + } + }; + if enforce_unique { + let key = seed_value_list_key(std::slice::from_ref(&v)); + if used.contains(&key) { + attempt += 1; + if attempt >= MAX_ATTEMPTS { + break v; // give up on distinctness; DB may reject + } + continue; + } + used.insert(key); + } + break v; + }; + let bound = impl_value_for(&schema, &canonical_col, &value)?; + let params: Vec = + vec![bound_to_sqlite_value(&bound), rusqlite::types::Value::Integer(*rowid)]; + execute_with_fk_enrichment(conn, table, &update_sql, ¶ms)?; + produced += 1; + let _ = offset; + } + + let changes = Changes { + schema_dirty: false, + rewritten_tables: vec![table.to_string()], + ..Changes::default() + }; + finalize_persistence(conn, persistence, source, &changes)?; + tx.commit().map_err(DbError::from_rusqlite)?; + + // Preview the first capped rows (D18). + let preview: Vec = rowids.iter().take(SEED_PREVIEW_CAP).copied().collect(); + let data = query_rows_by_rowid(conn, table, &preview)?; + + Ok(SeedResult { + table: table.to_string(), + requested: produced, + produced, + data, + advisory_columns, + }) +} + /// Build and execute a single-row `INSERT` — column resolution, value /// binding, `serial`/`shortid` autofill, and the FK-enriched execute — /// returning `(rows_affected, new rowid)`. diff --git a/src/dsl/command.rs b/src/dsl/command.rs index a1f834b..99304a3 100644 --- a/src/dsl/command.rs +++ b/src/dsl/command.rs @@ -402,14 +402,23 @@ pub enum Command { filter: Option, limit: Option, }, - /// Populate a table with generated fake data (ADR-0048, SD1). - /// `count` defaults to 20 when omitted; `rng_seed` (from a future - /// `--seed ` flag) makes generation reproducible. Phase 1 is - /// whole-row generation; the `set` override clause and the - /// `
.` column-fill form arrive in later phases. + /// Populate a table with generated fake data (ADR-0048, SD1/SD2). + /// `count` defaults to 20 when omitted; `rng_seed` (from the + /// `--seed ` flag) makes generation reproducible. + /// + /// Phase 2 surfaces (ADR-0048 D1/D2): + /// - `target_column` is `Some` for the **column-fill** form + /// `seed
.` — fill one column across the table's + /// *existing* rows (an UPDATE), rather than generating new rows. + /// - `overrides` carries the `set …` clause: per-column pins + /// that take precedence over the heuristic generator (D2). Seed { table: String, + /// `Some(col)` → column-fill mode (UPDATE existing rows); + /// `None` → whole-row generation (INSERT new rows). + target_column: Option, count: Option, + overrides: Vec, rng_seed: Option, }, /// Replay a sequence of DSL commands from a file. Each line @@ -647,6 +656,38 @@ impl RowFilter { } } +/// One `set …` override on a `seed` command (ADR-0048 D2, Phase 2). +/// +/// The user can pin a column's generated values to a constant, a +/// pick-list, an explicit named generator, or a range — overriding the +/// per-column heuristic the executor would otherwise pick. `column` is +/// the user-typed column name (validated against the table at execution, +/// like every other column slot). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SeedOverride { + pub column: String, + pub kind: SeedOverrideKind, +} + +/// The four `set` override forms (ADR-0048 D2). +/// +/// Values arrive as the DSL's `Value` (quoted text / unquoted number — +/// dates are quoted text per the D2 amendment); the `Generator` name is +/// a raw string validated at execution because `src/dsl` cannot depend +/// on `src/seed` (the curated vocabulary lives there). +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum SeedOverrideKind { + /// `set status = 'pending'` — every row gets the constant. + Fixed(Value), + /// `set role in ('admin', 'editor')` — uniform pick from the list. + PickList(Vec), + /// `set work_addr as email` — force the named generator (D9). + Generator(String), + /// `set price between 10 and 100` — uniform in `[low, high]`; + /// numeric or (quoted) date bounds per the destination column type. + Range { low: Value, high: Value }, +} + /// A complex WHERE expression (ADR-0026 §4). /// /// Built by `grammar::expr::build_expr` from the flat diff --git a/src/dsl/grammar/data.rs b/src/dsl/grammar/data.rs index 9dc1428..b111075 100644 --- a/src/dsl/grammar/data.rs +++ b/src/dsl/grammar/data.rs @@ -24,7 +24,9 @@ //! later swap that capture for the same typed slots used here, adding //! live hints/highlighting. -use crate::dsl::command::{Command, Expr, RowFilter, ShowListKind}; +use crate::dsl::command::{ + Command, Expr, RowFilter, SeedOverride, SeedOverrideKind, ShowListKind, +}; use crate::dsl::grammar::{ CommandNode, IdentSource, Node, NumberValidator, ValidationError, Word, expr, shared::{ @@ -426,7 +428,9 @@ const LIMIT_CLAUSE_NODES: &[Node] = &[ const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES); // ================================================================= -// seed — `seed []` (ADR-0048, SD1) +// seed — `seed [.] [] [set ] [--seed ]` +// (ADR-0048, SD1 whole-row + SD2 Phase 2 set-clause / +// column-fill) // ================================================================= /// Optional positional row count. Reuses `LIMIT_VALIDATOR` (a @@ -444,11 +448,127 @@ const SEED_FLAG_NODES: &[Node] = &[ }, ]; const SEED_FLAG: Node = Node::Seq(SEED_FLAG_NODES); + +// --- column-fill target: the optional `.` (ADR-0048 D1 +// form 2) ---------------------------------------------------- +// +// `seed users.email …` fills one column across existing rows. The +// table ident stops at `.` (idents are alnum/underscore), so an +// `Optional(Seq['.', column])` after the table cleanly discriminates: +// when the next token is not `.`, the `Punct('.')` first-child +// NoMatches and `walk_optional` skips it; once `.` commits, a missing +// column propagates as the user mid-typing `seed users.` (driver +// `walk_optional` semantics). The column resolves against +// `current_table_columns` (populated by `TABLE_NAME_WRITES`). +const SEED_TARGET_COLUMN: Node = Node::Ident { + source: IdentSource::Columns, + role: "seed_target_column", + validator: None, + highlight_override: None, + writes_table: false, + writes_column: false, + writes_user_listed_column: false, + writes_table_alias: false, + writes_cte_name: false, + writes_projection_alias: false, +}; +const SEED_DOT_COLUMN_NODES: &[Node] = &[Node::Punct('.'), SEED_TARGET_COLUMN]; +const SEED_DOT_COLUMN: Node = Node::Optional(&Node::Seq(SEED_DOT_COLUMN_NODES)); + +// --- the `set [, …]` clause (ADR-0048 D2) -------- +// +// Each override pins one column's generation. The column slot +// `writes_column` so the typed value slots (`PER_COLUMN_VALUE`, the +// same `current_column_value` dispatch `update … set` uses) narrow to +// the column's type — so list/range/fixed values get the column's +// typed slot (quoted text, unquoted number, quoted date) and a +// type-mismatched literal is flagged. The four tails each start with a +// distinct token (`=` / `in` / `between` / `as`), so the `Choice` +// discriminates cleanly (no Optional-first branch). + +/// The `set ` column slot. Distinct role from `update`'s +/// `update_set_column` and the expression `expr_column`. +const SEED_SET_COLUMN: Node = Node::Ident { + source: IdentSource::Columns, + role: "seed_set_column", + validator: None, + highlight_override: None, + writes_table: false, + writes_column: true, + writes_user_listed_column: false, + writes_table_alias: false, + writes_cte_name: false, + writes_projection_alias: false, +}; + +/// `as ` — the curated generator-name vocabulary (D9), +/// highlighted in the `tok_function` colour. The slot is structural +/// (any identifier matches); the name is validated at execution and +/// flagged live by the validity indicator. +const SEED_GENERATOR: Node = Node::Ident { + source: IdentSource::Generators, + role: "seed_generator", + validator: None, + highlight_override: Some(crate::dsl::grammar::HighlightClass::Function), + writes_table: false, + writes_column: false, + writes_user_listed_column: false, + writes_table_alias: false, + writes_cte_name: false, + writes_projection_alias: false, +}; + +/// `= ` — a fixed constant for every row. +const SEED_OV_FIXED_NODES: &[Node] = &[Node::Punct('='), PER_COLUMN_VALUE]; +/// `in ( [, ]* )` — uniform pick from the list. +const SEED_OV_IN_VALUES: Node = Node::Repeated { + inner: &PER_COLUMN_VALUE, + separator: Some(&Node::Punct(',')), + min: 1, +}; +const SEED_OV_IN_NODES: &[Node] = &[ + Node::Word(Word::keyword("in")), + Node::Punct('('), + SEED_OV_IN_VALUES, + Node::Punct(')'), +]; +/// `between and ` — uniform in the (typed) range. +const SEED_OV_BETWEEN_NODES: &[Node] = &[ + Node::Word(Word::keyword("between")), + PER_COLUMN_VALUE, + Node::Word(Word::keyword("and")), + PER_COLUMN_VALUE, +]; +/// `as ` — force a named generator. +const SEED_OV_AS_NODES: &[Node] = &[Node::Word(Word::keyword("as")), SEED_GENERATOR]; + +const SEED_OV_TAIL_CHOICES: &[Node] = &[ + Node::Seq(SEED_OV_FIXED_NODES), + Node::Seq(SEED_OV_IN_NODES), + Node::Seq(SEED_OV_BETWEEN_NODES), + Node::Seq(SEED_OV_AS_NODES), +]; +const SEED_OV_TAIL: Node = Node::Choice(SEED_OV_TAIL_CHOICES); + +const SEED_OVERRIDE_NODES: &[Node] = &[SEED_SET_COLUMN, SEED_OV_TAIL]; +const SEED_OVERRIDE: Node = Node::Seq(SEED_OVERRIDE_NODES); +const SEED_OVERRIDES: Node = Node::Repeated { + inner: &SEED_OVERRIDE, + separator: Some(&Node::Punct(',')), + min: 1, +}; +const SEED_SET_CLAUSE_NODES: &[Node] = + &[Node::Word(Word::keyword("set")), SEED_OVERRIDES]; +const SEED_SET_CLAUSE: Node = Node::Seq(SEED_SET_CLAUSE_NODES); + const SEED_NODES: &[Node] = &[ - // `writes_table` so a future `set =…` clause's column slots - // can resolve against this table. + // `writes_table` so the `.column` target, the `set =…` + // clause's column slots, and the typed value slots all resolve + // against this table. TABLE_NAME_WRITES, + SEED_DOT_COLUMN, Node::Optional(&SEED_COUNT), + Node::Optional(&SEED_SET_CLAUSE), Node::Optional(&SEED_FLAG), ]; const SEED_SHAPE: Node = Node::Seq(SEED_NODES); @@ -736,16 +856,29 @@ fn build_show_limit(path: &MatchedPath) -> Result, ValidationError> }) } -/// Build a `seed [] [--seed ]` command (ADR-0048). The -/// `--seed` flag's value is the `NumberLit` right after the flag; the -/// positional count is the `NumberLit` *before* the flag (or the only -/// one when no flag is present). +/// Build a `seed [.] [] [set ] [--seed ]` +/// command (ADR-0048, SD1 + SD2 Phase 2). +/// +/// - `target_column` (column-fill, D1 form 2) is the `seed_target_column` +/// ident, present only for the `seed .` form. +/// - The positional `count` is the `NumberLit` that precedes both the +/// `set` keyword and the `--seed` flag — bounding it that way keeps a +/// `set age between 18 and 80` value (also a `NumberLit`) from being +/// mistaken for the count. +/// - `--seed ` is the `NumberLit` right after the flag (D4). +/// - `overrides` (D2) is folded from the flat `set`-clause terminals. fn build_seed(path: &MatchedPath, _source: &str) -> Result { let table = require_ident(path, "table_name")?; + let target_column = ident_text(path, "seed_target_column").map(str::to_string); + let flag_idx = path .items .iter() .position(|i| matches!(&i.kind, MatchedKind::Flag("seed"))); + let set_idx = path + .items + .iter() + .position(|i| matches!(&i.kind, MatchedKind::Word("set"))); let rng_seed = flag_idx .and_then(|fi| path.items.get(fi + 1)) @@ -753,23 +886,155 @@ fn build_seed(path: &MatchedPath, _source: &str) -> Result, + flag_idx: Option, +) -> Result, ValidationError> { + let Some(set_idx) = set_idx else { + return Ok(Vec::new()); + }; + let end = flag_idx.unwrap_or(path.items.len()); + let region = &path.items[set_idx + 1..end]; + + let mut overrides = Vec::new(); + let mut i = 0; + while i < region.len() { + // The next override starts at its column ident; skip the + // top-level comma separators (and any stray token) between them. + let MatchedKind::Ident { + role: "seed_set_column", + .. + } = ®ion[i].kind + else { + i += 1; + continue; + }; + let column = region[i].text.clone(); + i += 1; + let kind = parse_seed_override_tail(region, &mut i, &column)?; + overrides.push(SeedOverride { column, kind }); + } + Ok(overrides) +} + +/// Parse one override tail starting at `region[*i]` (just past the +/// column ident), advancing `*i` past the consumed tokens. +fn parse_seed_override_tail( + region: &[MatchedItem], + i: &mut usize, + column: &str, +) -> Result { + let head = region.get(*i).ok_or_else(|| seed_set_error(column))?; + match &head.kind { + MatchedKind::Punct('=') => { + *i += 1; + let value = seed_take_value(region, i, column)?; + Ok(SeedOverrideKind::Fixed(value)) + } + MatchedKind::Word("in") => { + *i += 1; // `in` + // `(` + if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Punct('('))) { + *i += 1; + } + let mut values = Vec::new(); + while let Some(item) = region.get(*i) { + match &item.kind { + MatchedKind::Punct(')') => { + *i += 1; + break; + } + MatchedKind::Punct(',') => { + *i += 1; + } + _ => values.push(seed_take_value(region, i, column)?), + } + } + Ok(SeedOverrideKind::PickList(values)) + } + MatchedKind::Word("between") => { + *i += 1; // `between` + let low = seed_take_value(region, i, column)?; + if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Word("and"))) { + *i += 1; + } + let high = seed_take_value(region, i, column)?; + Ok(SeedOverrideKind::Range { low, high }) + } + MatchedKind::Word("as") => { + *i += 1; // `as` + let gen_item = region + .get(*i) + .filter(|t| matches!(t.kind, MatchedKind::Ident { role: "seed_generator", .. })) + .ok_or_else(|| seed_set_error(column))?; + *i += 1; + Ok(SeedOverrideKind::Generator(gen_item.text.clone())) + } + _ => Err(seed_set_error(column)), + } +} + +/// Take one value literal at `region[*i]`, advancing past it. +/// +/// The grammar's typed value slots only ever match value literals (a +/// bare unquoted word fails to match the slot and is rejected *before* +/// this fold runs — D2's quoting requirement enforced structurally), so +/// a non-literal here can only mean a grammar/builder drift bug; the +/// `Err` is a drift guard (mirrors `expr::build_expr`). +fn seed_take_value( + region: &[MatchedItem], + i: &mut usize, + column: &str, +) -> Result { + let item = region.get(*i).ok_or_else(|| seed_set_error(column))?; + let value = item_to_value(item).ok_or_else(|| seed_set_error(column))?; + *i += 1; + Ok(value) +} + +/// Drift-guard error for the `set`-clause fold (see `seed_take_value`). +fn seed_set_error(column: &str) -> ValidationError { + ValidationError { + message_key: "parse.error_wrapper", + args: vec![("detail", format!("malformed `set` clause for `{column}`"))], + } +} + fn parse_seed_u64(text: &str) -> Result { text.parse::().map_err(|_| ValidationError { message_key: "parse.custom.bind_type_mismatch", diff --git a/src/dsl/grammar/mod.rs b/src/dsl/grammar/mod.rs index 30ebbf5..f06cf3f 100644 --- a/src/dsl/grammar/mod.rs +++ b/src/dsl/grammar/mod.rs @@ -57,6 +57,12 @@ pub enum HighlightClass { String, Punct, Flag, + /// A curated function-vocabulary name — the `seed … set as + /// ` generator names (ADR-0048 D2/§Grammar). Rendered in + /// the existing `tok_function` colour (ADR-0022 Amд6 blue — no new + /// theme colour), assigned via a generator slot's + /// `highlight_override`, not by byte shape. + Function, Error, } @@ -86,6 +92,14 @@ pub enum IdentSource { /// content validator on column-type slots; not user-listable /// from the schema. Types, + /// Closed, curated set of fake-data generator names (ADR-0048 + /// D9) — the `seed … set as ` slot. Like + /// `Types`, not user-listable from the schema; the vocabulary + /// lives in `src/seed` and the completion engine offers it. The + /// grammar slot is purely structural (matches any identifier); + /// an unknown name is flagged live (validity) and rejected at + /// execution. + Generators, /// Any identifier shape; used by synthetic catch-all branches /// (e.g., the unknown-value branch of `mode `). Free, @@ -117,6 +131,7 @@ impl IdentSource { Self::Relationships => "relationship name", Self::Indexes => "index name", Self::Types => "type", + Self::Generators => "generator name", } } @@ -134,6 +149,7 @@ impl IdentSource { "relationship name" => Some(Self::Relationships), "index name" => Some(Self::Indexes), "type" => Some(Self::Types), + "generator name" => Some(Self::Generators), _ => None, } } diff --git a/src/dsl/parser.rs b/src/dsl/parser.rs index 9e260c8..42ad61d 100644 --- a/src/dsl/parser.rs +++ b/src/dsl/parser.rs @@ -300,6 +300,7 @@ fn format_expectation(e: &crate::dsl::walker::outcome::Expectation) -> String { IdentSource::Relationships => "relationship name".to_string(), IdentSource::Indexes => "index name".to_string(), IdentSource::Types => "type".to_string(), + IdentSource::Generators => "generator name".to_string(), IdentSource::NewName | IdentSource::Free => "identifier".to_string(), }, Expectation::Punct(c) => format!("`{c}`"), diff --git a/src/dsl/walker/highlight.rs b/src/dsl/walker/highlight.rs index f2bd732..0a29ef4 100644 --- a/src/dsl/walker/highlight.rs +++ b/src/dsl/walker/highlight.rs @@ -240,6 +240,18 @@ mod tests { ); } + #[test] + fn seed_generator_name_highlighted_as_function() { + // ADR-0048 D9: the `set as ` generator name carries the + // `Function` highlight class (via the slot's `highlight_override`), + // rendered in the shared `tok_function` colour. + let runs = run("seed Members set role as email"); + assert!( + runs.iter().any(|(_, _, c)| *c == HighlightClass::Function), + "generator name `email` should be Function-highlighted: {runs:?}" + ); + } + #[test] fn unknown_command_word_classified_by_byte_shape() { // Walker doesn't engage; fallback classifies as Identifier. diff --git a/src/dsl/walker/mod.rs b/src/dsl/walker/mod.rs index d3cf55b..f32ffa1 100644 --- a/src/dsl/walker/mod.rs +++ b/src/dsl/walker/mod.rs @@ -1236,6 +1236,10 @@ fn schema_existence_diagnostics( IdentSource::Relationships | IdentSource::Indexes | IdentSource::Types + // `Generators` (the `set … as ` slot, ADR-0048 D9) is a + // curated vocabulary; its unknown-name validity is handled by + // the completion-layer indicator, not this walker diagnostic. + | IdentSource::Generators | IdentSource::Free => {} } } diff --git a/src/friendly/keys.rs b/src/friendly/keys.rs index cb950ee..b26d01e 100644 --- a/src/friendly/keys.rs +++ b/src/friendly/keys.rs @@ -553,7 +553,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[ ("ok.rows_seeded", &["count", "table"]), ("ok.rows_updated", &["count"]), ("seed.capped", &["requested"]), - ("seed.advisory_generic", &["columns"]), + ("seed.advisory_generic", &["columns", "column", "table"]), // ---- Client-side success notes (ADR-0017 §6, ADR-0018 §9) ---- ("client_side.auto_fill_add_serial", &["count"]), ("client_side.auto_fill_add_shortid", &["count"]), diff --git a/src/friendly/strings/en-US.yaml b/src/friendly/strings/en-US.yaml index 883e06a..8bfb9c5 100644 --- a/src/friendly/strings/en-US.yaml +++ b/src/friendly/strings/en-US.yaml @@ -337,6 +337,13 @@ help: seed [] — fill a table with generated sample rows (default 20). Existing rows are kept; foreign keys draw from existing parent rows. + seed ... set = 'v' | in ('a','b') | as | between x and y + — pin how a column is generated: a fixed + value, a pick-list, a named generator + (email, name, product, ...), or a range. + seed .[set ...] — fill one column across the EXISTING rows + (the follow-up to `add column`). + seed ... --seed — reproducible: same data for the same n. insert: |- insert into [(cols)] [values] (vals) — add a row update: |- @@ -573,7 +580,7 @@ parse: change_column: |- change column [in] [table]
: () [--force-conversion | --dont-convert] - seed: "seed
[count]" + seed: "seed
[count] [set = ... | in (...) | as | between x and y] | seed
." show_data: "show data
" show_table: "show table
" show_tables: "show tables" @@ -988,7 +995,11 @@ db: # generic text that look like fixed value sets. seed: capped: "(of {requested} requested — ran out of distinct value combinations)" - advisory_generic: "{columns} filled with generic text — they look like fixed value sets." + # ADR-0048 D13 (Phase 2/3 wording): name the generically-filled + # enum-ish / CHECK columns and point at the concrete repairs — the + # `set` clause on a fresh seed, or the column-fill form for the rows + # just created. + advisory_generic: "{columns} filled with generic text — they look like fixed value sets. Pin them next time with `set {column} in ('…', '…')`, or fix these rows with `seed {table}.{column} set {column} in ('…', '…')`." ok: # ADR-0040: the generic `[ok] ` summary line was diff --git a/src/input_render.rs b/src/input_render.rs index e0bac70..7358ff6 100644 --- a/src/input_render.rs +++ b/src/input_render.rs @@ -817,6 +817,9 @@ fn ambient_hint_core_in_mode( crate::dsl::grammar::IdentSource::Tables => "table", crate::dsl::grammar::IdentSource::Columns => "column", crate::dsl::grammar::IdentSource::Relationships => "relationship", + // The `seed … set as ` curated vocabulary + // (ADR-0048 D9) flags an unknown name here. + crate::dsl::grammar::IdentSource::Generators => "generator", // `NewName`, `Types`, `Free` are filtered out by // `invalid_ident_at_cursor` (it only fires for // known-set sources via `completes_from_schema`), so diff --git a/src/runtime.rs b/src/runtime.rs index ce86895..df5b8fb 100644 --- a/src/runtime.rs +++ b/src/runtime.rs @@ -2916,13 +2916,15 @@ async fn execute_command_typed( .insert(table, columns, values, src) .await .map(CommandOutcome::Insert), - // ADR-0048 (SD1). + // ADR-0048 (SD1/SD2 Phase 2). Command::Seed { table, + target_column, count, + overrides, rng_seed, } => database - .seed(table, count, rng_seed, src) + .seed(table, target_column, count, overrides, rng_seed, src) .await .map(CommandOutcome::Seed), Command::Update { diff --git a/src/seed/generators.rs b/src/seed/generators.rs index c068465..a5d6a99 100644 --- a/src/seed/generators.rs +++ b/src/seed/generators.rs @@ -81,6 +81,11 @@ pub fn generate_value(generator: &Generator, ty: Type, rng: &mut SeedRng) -> Val let chosen: &String = pick(rng, values); literal_to_value(chosen, ty) } + // The `set between low and high` override (D2). Bounds are + // interpreted per the destination type; the executor has already + // validated they parse, so a defensive parse failure here falls + // back to type-based generation rather than producing junk. + Generator::Range { low, high } => range_value(low, high, ty, rng), // Un-intercepted markers + an empty pick list → type-based. Generator::PickFrom(_) | Generator::IdentitySequential @@ -89,6 +94,132 @@ pub fn generate_value(generator: &Generator, ty: Type, rng: &mut SeedRng) -> Val } } +/// Uniform value in `[low, high]` for the `between` override (D2). +/// +/// Bounds are interpreted by destination type. Returns the type-based +/// fallback for a bound that does not parse or a type that has no range +/// meaning — the executor pre-validates, so this is defensive only. +fn range_value(low: &str, high: &str, ty: Type, rng: &mut SeedRng) -> Value { + match ty { + Type::Int | Type::Serial => parse_int_range(low, high) + .map(|(lo, hi)| Value::Number(rng.random_range(lo..=hi).to_string())) + .unwrap_or_else(|| generic_for_type(ty, rng)), + Type::Real | Type::Decimal => parse_real_range(low, high) + .map(|(lo, hi)| { + let v = rng.random::().mul_add(hi - lo, lo); + Value::Number(format!("{v:.2}")) + }) + .unwrap_or_else(|| generic_for_type(ty, rng)), + Type::Date => parse_date_range(low, high) + .map(|(lo, hi)| Value::Text(format_date(random_date_between(rng, lo, hi)))) + .unwrap_or_else(|| generic_for_type(ty, rng)), + Type::DateTime => parse_datetime_range(low, high) + .map(|(lo, hi)| Value::Text(random_datetime_between(rng, lo, hi))) + .unwrap_or_else(|| generic_for_type(ty, rng)), + // text / bool / blob / shortid have no range meaning. + _ => generic_for_type(ty, rng), + } +} + +/// Validate that `low`/`high` parse as bounds for `ty`. +/// +/// The `between` override (D2) is checked by the executor *before* +/// generation. Returns a short human reason on failure (the executor +/// wraps it in a friendly error naming the column), `None` when valid. +#[must_use] +pub fn range_bounds_reason(ty: Type, low: &str, high: &str) -> Option { + let ok = match ty { + Type::Int | Type::Serial => parse_int_range(low, high).is_some(), + Type::Real | Type::Decimal => parse_real_range(low, high).is_some(), + Type::Date => parse_date_range(low, high).is_some(), + Type::DateTime => parse_datetime_range(low, high).is_some(), + // text / bool / blob / shortid have no range meaning. + Type::Text | Type::Bool | Type::Blob | Type::ShortId => false, + }; + if ok { + return None; + } + Some(match ty { + Type::Int | Type::Serial => "expected two whole numbers, e.g. `between 1 and 100`".to_string(), + Type::Real | Type::Decimal => "expected two numbers, e.g. `between 1.0 and 9.99`".to_string(), + Type::Date => "expected two quoted dates, e.g. `between '2023-01-01' and '2024-12-31'`".to_string(), + Type::DateTime => { + "expected two quoted datetimes, e.g. `between '2023-01-01T00:00:00' and '2024-12-31T23:59:59'`" + .to_string() + } + Type::Text | Type::Bool | Type::Blob | Type::ShortId => { + "a `between` range only applies to numeric and date/datetime columns".to_string() + } + }) +} + +/// Parse and order an integer range; `None` if either bound is not an +/// integer. +fn parse_int_range(low: &str, high: &str) -> Option<(i64, i64)> { + let lo: i64 = low.trim().parse().ok()?; + let hi: i64 = high.trim().parse().ok()?; + Some(if lo <= hi { (lo, hi) } else { (hi, lo) }) +} + +fn parse_real_range(low: &str, high: &str) -> Option<(f64, f64)> { + let lo: f64 = low.trim().parse().ok()?; + let hi: f64 = high.trim().parse().ok()?; + if !lo.is_finite() || !hi.is_finite() { + return None; + } + Some(if lo <= hi { (lo, hi) } else { (hi, lo) }) +} + +fn parse_date_range(low: &str, high: &str) -> Option<(NaiveDate, NaiveDate)> { + let lo = NaiveDate::parse_from_str(low.trim(), "%Y-%m-%d").ok()?; + let hi = NaiveDate::parse_from_str(high.trim(), "%Y-%m-%d").ok()?; + Some(if lo <= hi { (lo, hi) } else { (hi, lo) }) +} + +/// Accept both the `T`-separated and space-separated datetime spellings +/// the app validates (`bind_datetime` / `validate_datetime`). +fn parse_one_datetime(s: &str) -> Option { + let t = s.trim(); + chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%dT%H:%M:%S") + .or_else(|_| chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%d %H:%M:%S")) + .ok() +} + +fn parse_datetime_range( + low: &str, + high: &str, +) -> Option<(chrono::NaiveDateTime, chrono::NaiveDateTime)> { + let lo = parse_one_datetime(low)?; + let hi = parse_one_datetime(high)?; + Some(if lo <= hi { (lo, hi) } else { (hi, lo) }) +} + +/// Uniform date in `[lo, hi]` (inclusive). +fn random_date_between(rng: &mut SeedRng, lo: NaiveDate, hi: NaiveDate) -> NaiveDate { + let lo_ce = lo.num_days_from_ce(); + let hi_ce = hi.num_days_from_ce(); + let day = rng.random_range(lo_ce..=hi_ce); + NaiveDate::from_num_days_from_ce_opt(day).unwrap_or(lo) +} + +/// Uniform datetime in `[lo, hi]`, rendered `YYYY-MM-DDTHH:MM:SS`. +fn random_datetime_between( + rng: &mut SeedRng, + lo: chrono::NaiveDateTime, + hi: chrono::NaiveDateTime, +) -> String { + let lo_s = lo.and_utc().timestamp(); + let hi_s = hi.and_utc().timestamp(); + let secs = if lo_s <= hi_s { + rng.random_range(lo_s..=hi_s) + } else { + rng.random_range(hi_s..=lo_s) + }; + let dt = chrono::DateTime::from_timestamp(secs, 0) + .map_or(lo, |d| d.naive_utc()); + dt.format("%Y-%m-%dT%H:%M:%S").to_string() +} + /// Type-based fallback generation (D8). Never produces NULL for a /// generatable type; `blob`/`serial`/`shortid` are handled by the /// executor (autogen / block guard) and yield NULL here only as a @@ -358,6 +489,76 @@ mod tests { assert!(matches!(v, Value::Number(_)), "numeric pick should be a Number: {v:?}"); } + #[test] + fn int_range_stays_within_inclusive_bounds() { + let g = Generator::Range { low: "10".into(), high: "20".into() }; + let mut rng = make_rng(Some(5)); + for _ in 0..200 { + let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else { + panic!("int range should be a number") + }; + let n: i64 = s.parse().unwrap(); + assert!((10..=20).contains(&n), "int {n} out of [10,20]"); + } + } + + #[test] + fn real_range_stays_within_bounds_and_has_cents() { + let g = Generator::Range { low: "1.0".into(), high: "9.0".into() }; + let mut rng = make_rng(Some(5)); + for _ in 0..200 { + let Value::Number(s) = generate_value(&g, Type::Real, &mut rng) else { + panic!("real range should be a number") + }; + let n: f64 = s.parse().unwrap(); + assert!((1.0..=9.0).contains(&n), "real {n} out of [1,9]"); + assert!(s.contains('.'), "real should be formatted with cents: {s}"); + } + } + + #[test] + fn date_range_stays_within_quoted_bounds() { + let g = Generator::Range { + low: "2023-01-01".into(), + high: "2023-12-31".into(), + }; + let lo = NaiveDate::parse_from_str("2023-01-01", "%Y-%m-%d").unwrap(); + let hi = NaiveDate::parse_from_str("2023-12-31", "%Y-%m-%d").unwrap(); + let mut rng = make_rng(Some(9)); + for _ in 0..200 { + let Value::Text(s) = generate_value(&g, Type::Date, &mut rng) else { + panic!("date range should be text") + }; + let d = NaiveDate::parse_from_str(&s, "%Y-%m-%d").expect("valid date"); + assert!(d >= lo && d <= hi, "date {d} out of range"); + } + } + + #[test] + fn reversed_bounds_are_tolerated() { + let g = Generator::Range { low: "20".into(), high: "10".into() }; + let mut rng = make_rng(Some(1)); + let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else { + panic!("number") + }; + let n: i64 = s.parse().unwrap(); + assert!((10..=20).contains(&n), "reversed bounds still produce in-range: {n}"); + } + + #[test] + fn range_bounds_reason_accepts_compatible_and_rejects_incompatible() { + // Numeric / date / datetime accept; text / bool reject. + assert!(range_bounds_reason(Type::Int, "1", "10").is_none()); + assert!(range_bounds_reason(Type::Real, "1.5", "9.9").is_none()); + assert!(range_bounds_reason(Type::Date, "2023-01-01", "2024-01-01").is_none()); + assert!(range_bounds_reason(Type::DateTime, "2023-01-01T00:00:00", "2024-01-01T00:00:00").is_none()); + // Non-numeric bound on a numeric column. + assert!(range_bounds_reason(Type::Int, "abc", "10").is_some()); + // A range on a text column is meaningless. + assert!(range_bounds_reason(Type::Text, "a", "z").is_some()); + assert!(range_bounds_reason(Type::Bool, "0", "1").is_some()); + } + #[test] fn markers_fall_back_to_type_based_generation() { // An un-intercepted marker must not panic; it generates by type. diff --git a/src/seed/mod.rs b/src/seed/mod.rs index daeca2c..1a4d424 100644 --- a/src/seed/mod.rs +++ b/src/seed/mod.rs @@ -27,10 +27,12 @@ mod check; mod generators; mod heuristics; +mod vocabulary; pub use check::parse_in_check_values; -pub use generators::generate_value; +pub use generators::{generate_value, range_bounds_reason}; pub use heuristics::{choose_generator, is_enum_ish}; +pub use vocabulary::{generator_for_name, is_known_generator_prefix, KNOWN_GENERATORS}; use rand::rngs::StdRng; use rand::{RngExt, SeedableRng}; @@ -162,10 +164,19 @@ pub enum Generator { IdentitySequential, /// FK column (D14): the executor samples an existing parent key. ForeignKeySample, - // — List / fallback — + // — List / range (the `set` override clause, D2) — /// Uniform pick from a fixed list — a simple `IN`-CHECK (D17), an - /// enum, or a future `set in (…)` override. + /// enum, or a `set in (…)` / `= ` override (D2). PickFrom(Vec), + /// Uniform value in `[low, high]` — the `set between low and + /// high` override (D2). Bounds are the raw literal strings; their + /// interpretation (int / real / date / datetime) follows the + /// destination column type at generation time. The executor + /// validates type-compatibility *before* generation (a bound that + /// does not parse for the column type is a friendly error), so + /// [`generate_value`] only ever sees parseable bounds; a defensive + /// parse failure falls back to type-based generation. + Range { low: String, high: String }, /// Type-based fallback (D8) when no name heuristic matches. Generic, } diff --git a/src/seed/vocabulary.rs b/src/seed/vocabulary.rs new file mode 100644 index 0000000..578bee3 --- /dev/null +++ b/src/seed/vocabulary.rs @@ -0,0 +1,149 @@ +//! The curated named-generator vocabulary (ADR-0048 D9). +//! +//! This is the **single source of truth** for "what generator names can +//! a learner write after `set as …`", shared by three consumers +//! (mirroring `KNOWN_SQL_FUNCTIONS`, ADR-0022 Amд6): +//! +//! - **Tab completion** — the `seed … set as ⟨here⟩` slot offers +//! these names (`src/completion.rs`). +//! - **The typing-time validity indicator (ADR-0027)** — an unknown +//! name after `as` is flagged `[ERR]` while typing. +//! - **The executor** — `db.rs::do_seed` maps a name to a [`Generator`] +//! via [`generator_for_name`]; an unknown name is a friendly error. +//! +//! The list is a deliberately *curated pedagogical set* — the generators +//! a learner reaches for, not every internal [`Generator`] variant +//! (stateful markers like `ForeignKeySample` are executor-only and have +//! no name). It is lowercase + sorted (pinned by a unit test). + +use crate::seed::Generator; + +/// The curated generator names, lowercase and **sorted** (invariant +/// pinned by a test — completion relies on stable order and a +/// case-insensitive prefix match against these canonical spellings). +pub const KNOWN_GENERATORS: &[&str] = &[ + "age", + "bool", + "city", + "color", + "company", + "country", + "date", + "datetime", + "email", + "first_name", + "job", + "last_name", + "name", + "paragraph", + "password", + "phone", + "price", + "product", + "sentence", + "state", + "street", + "url", + "username", + "zip", +]; + +/// Map a generator name (case-insensitive) to its [`Generator`]. +/// +/// `None` for an unrecognised name — the executor turns that into a +/// friendly "unknown generator" error naming the curated set. A couple +/// of common spelling variants (`firstname`, `lastname`, `colour`, +/// `full_name`) are accepted as aliases even though only the canonical +/// spelling is offered for completion. +#[must_use] +pub fn generator_for_name(name: &str) -> Option { + let n = name.to_ascii_lowercase(); + let g = match n.as_str() { + "name" | "full_name" => Generator::FullName, + "first_name" | "firstname" => Generator::FirstName, + "last_name" | "lastname" | "surname" => Generator::LastName, + "email" => Generator::Email, + "username" => Generator::Username, + "password" => Generator::Password, + "phone" => Generator::Phone, + "city" => Generator::City, + "country" => Generator::Country, + "state" => Generator::StateName, + "street" => Generator::Street, + "zip" => Generator::ZipCode, + "company" => Generator::Company, + "job" => Generator::JobTitle, + "product" => Generator::ProductName, + "sentence" => Generator::Sentence, + "paragraph" => Generator::Paragraph, + "url" => Generator::Url, + "color" | "colour" => Generator::HexColor, + "price" => Generator::CurrencyAmount, + "age" => Generator::Age, + "date" => Generator::DateRecent, + "datetime" => Generator::DateTimeRecent, + "bool" => Generator::Boolean, + _ => return None, + }; + Some(g) +} + +/// Whether `partial` is a case-insensitive prefix of at least one known +/// generator name. +/// +/// An empty `partial` matches every generator (it is a prefix of all) — +/// mirrors `is_known_function_prefix`. Used by the validity indicator to +/// avoid flagging a still-being-typed name. +#[must_use] +pub fn is_known_generator_prefix(partial: &str) -> bool { + let lowered = partial.to_ascii_lowercase(); + KNOWN_GENERATORS.iter().any(|g| g.starts_with(&lowered)) +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn known_generators_is_sorted_and_lowercase() { + let mut sorted = KNOWN_GENERATORS.to_vec(); + sorted.sort_unstable(); + assert_eq!(KNOWN_GENERATORS, sorted.as_slice(), "must be sorted"); + for g in KNOWN_GENERATORS { + assert_eq!(*g, g.to_ascii_lowercase(), "must be lowercase: {g}"); + } + } + + #[test] + fn every_listed_name_maps_to_a_generator() { + for g in KNOWN_GENERATORS { + assert!( + generator_for_name(g).is_some(), + "listed generator name `{g}` has no mapping" + ); + } + } + + #[test] + fn mapping_is_case_insensitive_and_has_aliases() { + assert_eq!(generator_for_name("EMAIL"), Some(Generator::Email)); + assert_eq!(generator_for_name("FirstName"), Some(Generator::FirstName)); + assert_eq!(generator_for_name("colour"), Some(Generator::HexColor)); + assert_eq!(generator_for_name("full_name"), Some(Generator::FullName)); + } + + #[test] + fn unknown_name_has_no_mapping() { + assert_eq!(generator_for_name("bogus"), None); + assert_eq!(generator_for_name(""), None); + } + + #[test] + fn prefix_check_matches_known_and_rejects_unknown() { + assert!(is_known_generator_prefix("ema")); + assert!(is_known_generator_prefix("EMA")); + assert!(is_known_generator_prefix("")); // empty is a prefix of all + assert!(!is_known_generator_prefix("zzz")); + } +} diff --git a/src/theme.rs b/src/theme.rs index ad2a424..8ffeabb 100644 --- a/src/theme.rs +++ b/src/theme.rs @@ -163,6 +163,7 @@ impl Theme { HighlightClass::String => self.tok_string, HighlightClass::Punct => self.tok_punct, HighlightClass::Flag => self.tok_flag, + HighlightClass::Function => self.tok_function, HighlightClass::Error => self.tok_error, } } @@ -228,6 +229,7 @@ mod tests { assert_eq!(t.highlight_class_color(HighlightClass::String), t.tok_string); assert_eq!(t.highlight_class_color(HighlightClass::Punct), t.tok_punct); assert_eq!(t.highlight_class_color(HighlightClass::Flag), t.tok_flag); + assert_eq!(t.highlight_class_color(HighlightClass::Function), t.tok_function); assert_eq!(t.highlight_class_color(HighlightClass::Error), t.tok_error); } diff --git a/tests/it/parse_error_pedagogy.rs b/tests/it/parse_error_pedagogy.rs index d421b82..919c07a 100644 --- a/tests/it/parse_error_pedagogy.rs +++ b/tests/it/parse_error_pedagogy.rs @@ -110,6 +110,13 @@ fn near_miss_matrix_simple_mode() { ("delete from", &["after `delete from`, expected table name", "delete from
"]), ("delete from T", &["expected `where` or `--all-rows`", "delete from
"]), ("seed", &["after `seed`, expected table name", "seed
[count]"]), + // Phase 2 (ADR-0048 D2/D1): malformed `set` clause + column-fill. + ("seed T set", &["after `seed T set`, expected column name", "seed
."]), + ( + "seed T set role", + &["after `seed T set role`, expected `=`, `in`, `between`, or `as`", "seed
."], + ), + ("seed T.", &["after `seed T.`, expected column name", "seed
."]), ("replay", &["after `replay`, expected string literal or path", "replay "]), ("explain", &["after `explain`, expected `show`, `update`, or `delete`", "explain show data"]), // advanced-only entry word typed in simple mode → "this is SQL" rail @@ -540,3 +547,4 @@ fn caret_aligns_under_offending_token() { + diff --git a/tests/it/seed.rs b/tests/it/seed.rs index 5fcdb6e..a4a1bc2 100644 --- a/tests/it/seed.rs +++ b/tests/it/seed.rs @@ -60,11 +60,15 @@ fn seed_parses_with_and_without_count() { match parse_command("seed People 5").expect("`seed People 5` parses") { Command::Seed { table, + target_column, count, + overrides, rng_seed, } => { assert_eq!(table, "People"); + assert_eq!(target_column, None); assert_eq!(count, Some(5)); + assert!(overrides.is_empty()); assert_eq!(rng_seed, None); } other => panic!("expected Command::Seed, got {other:?}"), @@ -86,6 +90,7 @@ fn seed_parses_the_reproducibility_flag() { table, count, rng_seed, + .. } => { assert_eq!(table, "People"); assert_eq!(count, Some(5)); @@ -106,6 +111,155 @@ fn seed_parses_the_reproducibility_flag() { } } +// — Phase 2 (SD2): set-clause + column-fill parse path (ADR-0048 D2/D1) — + +use rdbms_playground::dsl::command::{SeedOverride, SeedOverrideKind}; +use rdbms_playground::dsl::value::Value; + +/// Pull the `overrides` out of a parsed `seed` command (panics on a +/// non-seed command), for the builder-fold assertions below. +fn seed_overrides(input: &str) -> (Option, Vec) { + match parse_command(input).unwrap_or_else(|e| panic!("`{input}` should parse: {e:?}")) { + Command::Seed { + target_column, + overrides, + .. + } => (target_column, overrides), + other => panic!("expected Command::Seed, got {other:?}"), + } +} + +#[test] +fn seed_set_fixed_value_override_parses() { + let (_t, ov) = seed_overrides("seed users 5 set status = 'active'"); + assert_eq!(ov.len(), 1); + assert_eq!(ov[0].column, "status"); + assert_eq!(ov[0].kind, SeedOverrideKind::Fixed(Value::Text("active".into()))); +} + +#[test] +fn seed_set_pick_list_override_parses() { + let (_t, ov) = seed_overrides("seed users set role in ('admin', 'editor', 'viewer')"); + assert_eq!(ov.len(), 1); + assert_eq!(ov[0].column, "role"); + assert_eq!( + ov[0].kind, + SeedOverrideKind::PickList(vec![ + Value::Text("admin".into()), + Value::Text("editor".into()), + Value::Text("viewer".into()), + ]) + ); +} + +#[test] +fn seed_set_generator_override_parses() { + let (_t, ov) = seed_overrides("seed users set work_addr as email"); + assert_eq!(ov.len(), 1); + assert_eq!(ov[0].column, "work_addr"); + assert_eq!(ov[0].kind, SeedOverrideKind::Generator("email".into())); +} + +#[test] +fn seed_set_numeric_range_override_parses() { + let (_t, ov) = seed_overrides("seed products set price between 10 and 100"); + assert_eq!(ov.len(), 1); + assert_eq!(ov[0].column, "price"); + assert_eq!( + ov[0].kind, + SeedOverrideKind::Range { + low: Value::Number("10".into()), + high: Value::Number("100".into()), + } + ); +} + +#[test] +fn seed_set_date_range_override_parses_with_quoted_dates() { + // ADR-0048 D2 amendment: dates in the range form are quoted strings. + let (_t, ov) = + seed_overrides("seed users set signup between '2023-01-01' and '2024-12-31'"); + assert_eq!( + ov[0].kind, + SeedOverrideKind::Range { + low: Value::Text("2023-01-01".into()), + high: Value::Text("2024-12-31".into()), + } + ); +} + +#[test] +fn seed_multiple_overrides_combine() { + let (_t, ov) = seed_overrides( + "seed users 20 set role in ('admin', 'user'), status = 'active', signup between '2023-01-01' and '2024-12-31'", + ); + assert_eq!(ov.len(), 3, "three comma-separated overrides: {ov:?}"); + assert_eq!(ov[0].column, "role"); + assert!(matches!(ov[0].kind, SeedOverrideKind::PickList(_))); + assert_eq!(ov[1].column, "status"); + assert!(matches!(ov[1].kind, SeedOverrideKind::Fixed(_))); + assert_eq!(ov[2].column, "signup"); + assert!(matches!(ov[2].kind, SeedOverrideKind::Range { .. })); +} + +#[test] +fn seed_count_is_not_confused_by_a_range_value() { + // No positional count, but `between 18 and 80` carries NumberLits — + // they must not be read as the count (bounded to before `set`). + match parse_command("seed users set age between 18 and 80").expect("parses") { + Command::Seed { count, overrides, .. } => { + assert_eq!(count, None, "the count is None, not 18"); + assert_eq!(overrides.len(), 1); + } + other => panic!("expected seed, got {other:?}"), + } +} + +#[test] +fn seed_set_combines_with_count_and_flag() { + match parse_command("seed users 30 set status = 'x' --seed 42").expect("parses") { + Command::Seed { + count, + overrides, + rng_seed, + .. + } => { + assert_eq!(count, Some(30)); + assert_eq!(rng_seed, Some(42)); + assert_eq!(overrides.len(), 1); + } + other => panic!("expected seed, got {other:?}"), + } +} + +#[test] +fn seed_column_fill_target_parses() { + let (target, ov) = seed_overrides("seed users.work_addr"); + assert_eq!(target.as_deref(), Some("work_addr")); + assert!(ov.is_empty()); +} + +#[test] +fn seed_column_fill_with_set_parses() { + let (target, ov) = seed_overrides("seed users.work_addr set work_addr as email"); + assert_eq!(target.as_deref(), Some("work_addr")); + assert_eq!(ov.len(), 1); + assert_eq!(ov[0].kind, SeedOverrideKind::Generator("email".into())); +} + +#[test] +fn seed_bare_word_set_value_is_rejected() { + // A bare (unquoted) word is not a value — D2 requires quoting. The + // typed value slot rejects `active` at the grammar level (it is not a + // quoted string / number), so the command does not parse. + assert!( + parse_command("seed users set status = active").is_err(), + "a bare-word `set` value must be rejected (quoting required, D2)" + ); + // The quoted form parses. + assert!(parse_command("seed users set status = 'active'").is_ok()); +} + #[test] fn seed_populates_a_table_and_persists_rows() { let (project, db, _dir) = open_project_db(); @@ -113,7 +267,7 @@ fn seed_populates_a_table_and_persists_rows() { create_people(&db, &rt); let result = rt - .block_on(db.seed("People".into(), Some(7), Some(42), Some("seed People 7".into()))) + .block_on(db.seed("People".into(), None, Some(7), Vec::new(), Some(42), Some("seed People 7".into()))) .expect("seed succeeds"); assert_eq!(result.produced, 7); @@ -134,7 +288,7 @@ fn seed_count_defaults_to_twenty() { create_people(&db, &rt); let result = rt - .block_on(db.seed("People".into(), None, Some(1), Some("seed People".into()))) + .block_on(db.seed("People".into(), None, None, Vec::new(), Some(1), Some("seed People".into()))) .expect("seed succeeds"); assert_eq!(result.produced, 20, "omitted count defaults to 20"); let csv = read_csv(&project, "People").expect("People CSV exists"); @@ -149,9 +303,9 @@ fn seed_is_reproducible_with_a_fixed_seed() { create_people(&db1, &rt); create_people(&db2, &rt); - rt.block_on(db1.seed("People".into(), Some(4), Some(123), Some("seed People 4".into()))) + rt.block_on(db1.seed("People".into(), None, Some(4), Vec::new(), Some(123), Some("seed People 4".into()))) .expect("seed run 1"); - rt.block_on(db2.seed("People".into(), Some(4), Some(123), Some("seed People 4".into()))) + rt.block_on(db2.seed("People".into(), None, Some(4), Vec::new(), Some(123), Some("seed People 4".into()))) .expect("seed run 2"); let csv1 = read_csv(&p1, "People").expect("csv 1"); @@ -165,7 +319,7 @@ fn seed_writes_exactly_one_history_line() { let rt = rt(); create_people(&db, &rt); - rt.block_on(db.seed("People".into(), Some(5), Some(1), Some("seed People 5".into()))) + rt.block_on(db.seed("People".into(), None, Some(5), Vec::new(), Some(1), Some("seed People 5".into()))) .expect("seed succeeds"); let history = std::fs::read_to_string(project.path().join("history.log")) @@ -240,10 +394,10 @@ fn seed_fills_foreign_keys_from_existing_parents() { create_users_and_orders(&db, &rt, true); // 5 parents → serial ids 1..=5. - rt.block_on(db.seed("Users".into(), Some(5), Some(1), Some("seed Users 5".into()))) + rt.block_on(db.seed("Users".into(), None, Some(5), Vec::new(), Some(1), Some("seed Users 5".into()))) .expect("seed Users"); let res = rt - .block_on(db.seed("Orders".into(), Some(10), Some(2), Some("seed Orders 10".into()))) + .block_on(db.seed("Orders".into(), None, Some(10), Vec::new(), Some(2), Some("seed Orders 10".into()))) .expect("seed Orders"); assert_eq!(res.produced, 10, "every child row must insert (valid FK)"); @@ -267,7 +421,7 @@ fn seed_refuses_when_a_parent_table_is_empty() { // Users is empty — no valid FK can be fabricated. let err = rt - .block_on(db.seed("Orders".into(), Some(3), Some(1), Some("seed Orders 3".into()))) + .block_on(db.seed("Orders".into(), None, Some(3), Vec::new(), Some(1), Some("seed Orders 3".into()))) .expect_err("seed must refuse an empty parent"); let msg = err.to_string(); assert!(msg.contains("Users"), "error should name the empty parent: {msg}"); @@ -293,7 +447,7 @@ fn seed_refuses_a_not_null_blob_column() { .expect("create Files"); let err = rt - .block_on(db.seed("Files".into(), Some(2), Some(1), Some("seed Files 2".into()))) + .block_on(db.seed("Files".into(), None, Some(2), Vec::new(), Some(1), Some("seed Files 2".into()))) .expect_err("seed must refuse a NOT NULL blob"); let msg = err.to_string(); assert!( @@ -320,7 +474,7 @@ fn seed_omits_a_nullable_blob_column() { .expect("create Files"); let res = rt - .block_on(db.seed("Files".into(), Some(3), Some(1), Some("seed Files 3".into()))) + .block_on(db.seed("Files".into(), None, Some(3), Vec::new(), Some(1), Some("seed Files 3".into()))) .expect("seed succeeds despite the nullable blob"); assert_eq!(res.produced, 3); let csv = read_csv(&project, "Files").expect("Files CSV"); @@ -354,7 +508,7 @@ fn seed_keeps_unique_columns_distinct() { .expect("create Tags"); let res = rt - .block_on(db.seed("Tags".into(), Some(8), Some(3), Some("seed Tags 8".into()))) + .block_on(db.seed("Tags".into(), None, Some(8), Vec::new(), Some(3), Some("seed Tags 8".into()))) .expect("seed"); assert_eq!(res.produced, 8); @@ -383,7 +537,7 @@ fn seed_sequences_identifier_int_columns() { .expect("create Items"); let res = rt - .block_on(db.seed("Items".into(), Some(5), Some(1), Some("seed Items 5".into()))) + .block_on(db.seed("Items".into(), None, Some(5), Vec::new(), Some(1), Some("seed Items 5".into()))) .expect("seed"); assert_eq!(res.produced, 5); @@ -414,7 +568,7 @@ fn seed_junction_produces_distinct_combinations_and_caps() { ) .await .expect("create parent"); - db.seed(t.into(), Some(2), Some(1), Some(format!("seed {t} 2"))) + db.seed(t.into(), None, Some(2), Vec::new(), Some(1), Some(format!("seed {t} 2"))) .await .expect("seed parent"); } @@ -456,7 +610,7 @@ fn seed_junction_produces_distinct_combinations_and_caps() { // Requesting 10 caps at the 4 available distinct combinations. let res = db - .seed("J".into(), Some(10), Some(7), Some("seed J 10".into())) + .seed("J".into(), None, Some(10), Vec::new(), Some(7), Some("seed J 10".into())) .await .expect("seed J"); assert_eq!(res.produced, 4, "junction caps at available combos"); @@ -490,7 +644,7 @@ fn seed_draws_enum_values_from_an_in_check() { // Every generated status must satisfy the CHECK, so all rows insert. let res = rt - .block_on(db.seed("Tickets".into(), Some(12), Some(2), Some("seed Tickets 12".into()))) + .block_on(db.seed("Tickets".into(), None, Some(12), Vec::new(), Some(2), Some("seed Tickets 12".into()))) .expect("seed"); assert_eq!(res.produced, 12, "all rows insert — values satisfy the CHECK"); @@ -527,7 +681,7 @@ fn seed_advises_on_enum_ish_columns() { .expect("create Tasks"); let res = rt - .block_on(db.seed("Tasks".into(), Some(3), Some(1), Some("seed Tasks 3".into()))) + .block_on(db.seed("Tasks".into(), None, Some(3), Vec::new(), Some(1), Some("seed Tasks 3".into()))) .expect("seed"); assert!( res.advisory_columns.contains(&"status".to_string()), @@ -542,7 +696,7 @@ fn seed_refuses_an_excessive_count() { let rt = rt(); create_people(&db, &rt); let err = rt - .block_on(db.seed("People".into(), Some(1_000_000), Some(1), Some("seed People 1000000".into()))) + .block_on(db.seed("People".into(), None, Some(1_000_000), Vec::new(), Some(1), Some("seed People 1000000".into()))) .expect_err("an excessive count must be refused"); assert!( err.to_string().to_lowercase().contains("maximum"), @@ -557,7 +711,7 @@ fn seed_preview_is_capped_but_count_is_full() { create_people(&db, &rt); let res = rt - .block_on(db.seed("People".into(), Some(25), Some(1), Some("seed People 25".into()))) + .block_on(db.seed("People".into(), None, Some(25), Vec::new(), Some(1), Some("seed People 25".into()))) .expect("seed"); assert_eq!(res.produced, 25, "the full count is produced"); assert_eq!(res.data.rows.len(), 20, "the preview is capped at 20 rows"); @@ -573,6 +727,25 @@ fn seed_is_available_in_advanced_mode() { matches!(r, Ok(Command::Seed { .. })), "seed must parse in advanced mode: {r:?}" ); + // The Phase 2 surfaces (set clause + column-fill) also parse in + // advanced mode — same grammar, no mode gate. + assert!( + matches!( + parse_command_in_mode("seed People 5 set status = 'active'", Mode::Advanced), + Ok(Command::Seed { .. }) + ), + "set clause must parse in advanced mode" + ); + assert!( + matches!( + parse_command_in_mode("seed People.email set email as email", Mode::Advanced), + Ok(Command::Seed { + target_column: Some(_), + .. + }) + ), + "column-fill must parse in advanced mode" + ); } // — DA-pass coverage: undo (D15), replay (D16), atomicity, zero count, @@ -588,7 +761,7 @@ fn seed_is_one_undo_step() { .expect("open db with undo"); let rt = rt(); create_people(&db, &rt); - rt.block_on(db.seed("People".into(), Some(6), Some(1), Some("seed People 6".into()))) + rt.block_on(db.seed("People".into(), None, Some(6), Vec::new(), Some(1), Some("seed People 6".into()))) .expect("seed"); assert_eq!(data_row_count(&read_csv(&project, "People").unwrap()), 6); @@ -598,6 +771,32 @@ fn seed_is_one_undo_step() { assert_eq!(rows, 0, "one undo must remove every seeded row in a single step"); } +#[test] +fn seed_column_fill_is_one_undo_step() { + // ADR-0048 D15: column-fill's bulk UPDATE is one undo step too. + let dir = tempfile::tempdir().expect("tempdir"); + let project = project::open_or_create(None, Some(dir.path())).expect("project"); + let persistence = Persistence::new(project.path().to_path_buf()); + let db = Database::open_with_persistence_and_undo(project.db_path(), persistence, true) + .expect("open db with undo"); + let rt = rt(); + create_members(&db, &rt); + run_seed(&db, &rt, "seed Members 5 --seed 1").expect("seed"); + // Fill `status` across all 5 rows with a constant, then undo once. + run_seed(&db, &rt, "seed Members.status set status = 'flagged' --seed 2") + .expect("column-fill"); + let before = named_column_values(&read_csv(&project, "Members").unwrap(), "status"); + assert!(before.iter().all(|s| s == "flagged"), "all rows filled: {before:?}"); + + rt.block_on(db.undo()).unwrap().expect("undo applied"); + let after = named_column_values(&read_csv(&project, "Members").unwrap(), "status"); + assert!( + after.iter().all(|s| s != "flagged"), + "one undo reverts the whole column-fill in a single step: {after:?}" + ); + assert_eq!(after.len(), 5, "undo restores the original rows, not removes them"); +} + #[test] fn replay_reruns_a_seed_line_as_a_data_write() { use rdbms_playground::runtime::run_replay; @@ -632,7 +831,7 @@ fn seed_rolls_back_atomically_on_a_constraint_failure() { )) .expect("create Bad"); - let res = rt.block_on(db.seed("Bad".into(), Some(5), Some(1), Some("seed Bad 5".into()))); + let res = rt.block_on(db.seed("Bad".into(), None, Some(5), Vec::new(), Some(1), Some("seed Bad 5".into()))); assert!(res.is_err(), "seed must fail when generated rows violate the CHECK"); let rows = read_csv(&project, "Bad").map_or(0, |c| data_row_count(&c)); assert_eq!(rows, 0, "a failed seed must leave the table unchanged (atomic)"); @@ -644,7 +843,7 @@ fn seed_zero_is_a_no_op() { let rt = rt(); create_people(&db, &rt); let res = rt - .block_on(db.seed("People".into(), Some(0), Some(1), Some("seed People 0".into()))) + .block_on(db.seed("People".into(), None, Some(0), Vec::new(), Some(1), Some("seed People 0".into()))) .expect("seed 0 succeeds"); assert_eq!(res.produced, 0); let rows = read_csv(&project, "People").map_or(0, |c| data_row_count(&c)); @@ -669,7 +868,7 @@ fn seed_advises_on_a_complex_check_column() { .expect("create Widgets"); let res = rt - .block_on(db.seed("Widgets".into(), Some(3), Some(1), Some("seed Widgets 3".into()))) + .block_on(db.seed("Widgets".into(), None, Some(3), Vec::new(), Some(1), Some("seed Widgets 3".into()))) .expect("seed"); assert!( res.advisory_columns.contains(&"label".to_string()), @@ -683,9 +882,9 @@ fn seed_foreign_keys_are_reproducible_with_a_fixed_seed() { let rt = rt(); let seed_one = |db: &Database| { create_users_and_orders(db, &rt, true); - rt.block_on(db.seed("Users".into(), Some(4), Some(1), Some("seed Users 4".into()))) + rt.block_on(db.seed("Users".into(), None, Some(4), Vec::new(), Some(1), Some("seed Users 4".into()))) .expect("seed users"); - rt.block_on(db.seed("Orders".into(), Some(8), Some(99), Some("seed Orders 8".into()))) + rt.block_on(db.seed("Orders".into(), None, Some(8), Vec::new(), Some(99), Some("seed Orders 8".into()))) .expect("seed orders"); }; let (p1, db1, _d1) = open_project_db(); @@ -715,7 +914,7 @@ fn seed_shortid_columns_are_reproducible_with_a_fixed_seed() { None, )) .expect("create Contacts"); - rt.block_on(db.seed("Contacts".into(), Some(5), Some(42), Some("seed Contacts 5".into()))) + rt.block_on(db.seed("Contacts".into(), None, Some(5), Vec::new(), Some(42), Some("seed Contacts 5".into()))) .expect("seed"); }; let (p1, db1, _d1) = open_project_db(); @@ -736,3 +935,343 @@ fn seed_shortid_columns_are_reproducible_with_a_fixed_seed() { assert_eq!(code.len(), 10, "shortid should be 10 chars: {code}"); } } + +// ================================================================= +// Phase 2 (SD2) executor: set-clause overrides + column-fill, +// exercised full-stack (parse → worker) — ADR-0048 D2 / D1. +// ================================================================= + +/// Parse `input` as a `seed` command and run it through the worker — +/// the full stack minus UI render (grammar → builder → executor). +fn run_seed( + db: &Database, + rt: &tokio::runtime::Runtime, + input: &str, +) -> Result { + match parse_command(input).unwrap_or_else(|e| panic!("`{input}` should parse: {e:?}")) { + Command::Seed { + table, + target_column, + count, + overrides, + rng_seed, + } => rt.block_on(db.seed( + table, + target_column, + count, + overrides, + rng_seed, + Some(input.to_string()), + )), + other => panic!("expected a seed command, got {other:?}"), + } +} + +/// Values of the column named `col` (by header lookup) across the CSV's +/// data rows. +fn named_column_values(csv: &str, col: &str) -> Vec { + let header = csv.lines().next().unwrap_or_default(); + let idx = header + .split(',') + .position(|h| h.trim() == col) + .unwrap_or_else(|| panic!("column `{col}` not in header `{header}`")); + nth_column_values(csv, idx) +} + +/// `Members(id serial pk, name text, status text, role text, age int)`. +/// `status`/`role` are enum-ish names (advisory targets without an +/// override); `name`/`age` exercise the generator / range overrides. +fn create_members(db: &Database, rt: &tokio::runtime::Runtime) { + rt.block_on(db.create_table( + "Members".to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + ColumnSpec::new("name", Type::Text), + ColumnSpec::new("status", Type::Text), + ColumnSpec::new("role", Type::Text), + ColumnSpec::new("age", Type::Int), + ], + vec!["id".to_string()], + None, + )) + .expect("create Members"); +} + +#[test] +fn seed_set_fixed_value_fills_every_row() { + let (project, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + run_seed(&db, &rt, "seed Members 6 set status = 'active' --seed 1").expect("seed"); + let csv = read_csv(&project, "Members").unwrap(); + let statuses = named_column_values(&csv, "status"); + assert_eq!(statuses.len(), 6); + assert!(statuses.iter().all(|s| s == "active"), "every status pinned: {statuses:?}"); +} + +#[test] +fn seed_set_pick_list_draws_only_from_the_list() { + let (project, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + run_seed(&db, &rt, "seed Members 20 set role in ('admin', 'user') --seed 2").expect("seed"); + let csv = read_csv(&project, "Members").unwrap(); + let roles = named_column_values(&csv, "role"); + assert!( + roles.iter().all(|r| r == "admin" || r == "user"), + "roles only from the list: {roles:?}" + ); +} + +#[test] +fn seed_set_as_generator_forces_the_shape() { + let (project, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + // Force the `name` column (a person-name heuristic) to emails. + run_seed(&db, &rt, "seed Members 5 set name as email --seed 3").expect("seed"); + let csv = read_csv(&project, "Members").unwrap(); + let names = named_column_values(&csv, "name"); + assert!(names.iter().all(|n| n.contains('@')), "name forced to email shape: {names:?}"); +} + +#[test] +fn seed_set_numeric_range_stays_within_bounds() { + let (project, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + run_seed(&db, &rt, "seed Members 30 set age between 30 and 40 --seed 4").expect("seed"); + let csv = read_csv(&project, "Members").unwrap(); + for a in named_column_values(&csv, "age") { + let n: i64 = a.parse().unwrap_or_else(|_| panic!("age `{a}` not an int")); + assert!((30..=40).contains(&n), "age {n} out of [30,40]"); + } +} + +#[test] +fn seed_override_drops_the_column_from_the_advisory() { + let (_p, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + // Without an override, `status` (enum-ish) is flagged in the advisory. + let plain = run_seed(&db, &rt, "seed Members 3 --seed 5").expect("seed"); + assert!( + plain.advisory_columns.iter().any(|c| c == "status"), + "status should be advised without an override: {:?}", + plain.advisory_columns + ); + // With an override on status, it must not appear in the advisory. + let overridden = + run_seed(&db, &rt, "seed Members 3 set status in ('a', 'b') --seed 5").expect("seed"); + assert!( + !overridden.advisory_columns.iter().any(|c| c == "status"), + "overridden status must drop from advisory: {:?}", + overridden.advisory_columns + ); +} + +#[test] +fn seed_unknown_generator_is_a_friendly_error() { + let (_p, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + let err = run_seed(&db, &rt, "seed Members 3 set name as bogus").unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("unknown generator") && msg.contains("bogus"), + "should name the unknown generator: {msg}" + ); +} + +#[test] +fn seed_incompatible_range_is_a_friendly_error() { + let (_p, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + // A numeric range on a text column (`name`) is rejected. + let err = run_seed(&db, &rt, "seed Members 3 set name between 1 and 10").unwrap_err(); + let msg = format!("{err}"); + assert!(msg.contains("between"), "range error should mention `between`: {msg}"); +} + +#[test] +fn seed_with_set_is_reproducible() { + let (p1, db1, _d1) = open_project_db(); + let (p2, db2, _d2) = open_project_db(); + let rt = rt(); + create_members(&db1, &rt); + create_members(&db2, &rt); + let cmd = "seed Members 10 set role in ('a', 'b', 'c'), age between 20 and 60 --seed 77"; + run_seed(&db1, &rt, cmd).expect("seed 1"); + run_seed(&db2, &rt, cmd).expect("seed 2"); + assert_eq!( + read_csv(&p1, "Members").unwrap(), + read_csv(&p2, "Members").unwrap(), + "the same --seed + set clause must reproduce identical data" + ); +} + +// — column-fill (ADR-0048 D1 form 2) — + +#[test] +fn seed_column_fill_updates_existing_rows_without_adding() { + let (project, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + run_seed(&db, &rt, "seed Members 5 --seed 1").expect("initial seed"); + let before = data_row_count(&read_csv(&project, "Members").unwrap()); + assert_eq!(before, 5); + + let res = run_seed(&db, &rt, "seed Members.status set status in ('x', 'y') --seed 2") + .expect("column-fill"); + assert_eq!(res.produced, 5, "column-fill touches the 5 existing rows"); + let csv = read_csv(&project, "Members").unwrap(); + assert_eq!(data_row_count(&csv), 5, "no new rows added"); + let statuses = named_column_values(&csv, "status"); + assert!( + statuses.iter().all(|s| s == "x" || s == "y"), + "every existing row's status refilled from the list: {statuses:?}" + ); +} + +#[test] +fn seed_column_fill_refuses_a_pk_target() { + let (_p, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + run_seed(&db, &rt, "seed Members 3 --seed 1").expect("seed"); + let err = run_seed(&db, &rt, "seed Members.id").unwrap_err(); + assert!(format!("{err}").contains("primary key"), "PK target refused: {err}"); +} + +#[test] +fn seed_column_fill_empty_table_is_a_noop() { + let (_p, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + // No rows yet → friendly no-op, not an error. + let res = run_seed(&db, &rt, "seed Members.status set status in ('a', 'b')").expect("no-op"); + assert_eq!(res.produced, 0, "empty table → nothing filled"); +} + +#[test] +fn seed_column_fill_set_may_only_target_the_filled_column() { + let (_p, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + run_seed(&db, &rt, "seed Members 3 --seed 1").expect("seed"); + let err = run_seed(&db, &rt, "seed Members.status set role = 'x'").unwrap_err(); + assert!( + format!("{err}").contains("can only adjust"), + "set targeting another column is refused: {err}" + ); +} + +#[test] +fn seed_column_fill_rejects_a_row_count() { + let (_p, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + // `seed T.col 5` parses, but a count is meaningless for column-fill. + let err = rt + .block_on(db.seed( + "Members".into(), + Some("status".into()), + Some(5), + Vec::new(), + Some(1), + Some("seed Members.status 5".into()), + )) + .unwrap_err(); + assert!(format!("{err}").contains("no row count"), "count refused: {err}"); +} + +#[test] +fn seed_column_fill_fk_target_samples_the_parent() { + let (project, db, _d) = open_project_db(); + let rt = rt(); + create_users_and_orders(&db, &rt, true); + run_seed(&db, &rt, "seed Users 4 --seed 1").expect("seed users"); + run_seed(&db, &rt, "seed Orders 8 --seed 2").expect("seed orders"); + // Re-fill the FK column across existing orders; every value must be a + // valid parent key (the UPDATE would fail FK enforcement otherwise). + let res = run_seed(&db, &rt, "seed Orders.user_id --seed 3").expect("column-fill FK"); + assert_eq!(res.produced, 8); + let csv = read_csv(&project, "Orders").unwrap(); + let user_ids = named_column_values(&csv, "user_id"); + assert!(user_ids.iter().all(|v| (1..=4).contains(&v.parse::().unwrap()))); +} + +#[test] +fn seed_fixed_override_on_unique_column_is_a_friendly_error() { + // DA finding (user-chosen: friendly error). A fixed value can't fill a + // UNIQUE column for more than one row — refuse up front rather than + // silently capping to 1. + let (_p, db, _d) = open_project_db(); + let rt = rt(); + rt.block_on(db.create_table( + "U".to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + { + let mut c = ColumnSpec::new("email", Type::Text); + c.unique = true; + c + }, + ], + vec!["id".to_string()], + None, + )) + .expect("create U"); + let err = run_seed(&db, &rt, "seed U 5 set email = 'x@y.com'").unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("UNIQUE") && msg.contains("distinct"), + "fixed value on a UNIQUE column should be a friendly capacity error: {msg}" + ); + // A short pick-list (< count) is likewise refused... + let err2 = run_seed(&db, &rt, "seed U 5 set email in ('a@b.c', 'd@e.f')").unwrap_err(); + assert!(format!("{err2}").contains("distinct"), "short list refused: {err2}"); + // ...but a pick-list with enough distinct values succeeds. + let ok = run_seed( + &db, + &rt, + "seed U 3 set email in ('a@b.c', 'd@e.f', 'g@h.i') --seed 1", + ) + .expect("a list >= count fills cleanly"); + assert_eq!(ok.produced, 3); + // A generator is unbounded — also fine. + assert_eq!( + run_seed(&db, &rt, "seed U 4 set email as email --seed 2") + .expect("generator fills a unique column") + .produced, + 4 + ); +} + +#[test] +fn seed_column_fill_fixed_on_unique_column_is_a_friendly_error() { + let (_p, db, _d) = open_project_db(); + let rt = rt(); + rt.block_on(db.create_table( + "U".to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + { + let mut c = ColumnSpec::new("email", Type::Text); + c.unique = true; + c + }, + ], + vec!["id".to_string()], + None, + )) + .expect("create U"); + run_seed(&db, &rt, "seed U 4 set email as email --seed 1").expect("seed 4 rows"); + // Filling the UNIQUE column on 4 rows with one fixed value is refused. + let err = run_seed(&db, &rt, "seed U.email set email = 'same@x.com'").unwrap_err(); + assert!( + format!("{err}").contains("UNIQUE"), + "column-fill of a fixed value on a UNIQUE column should refuse: {err}" + ); +} diff --git a/tests/typing_surface/mod.rs b/tests/typing_surface/mod.rs index 8a5b937..53bef3b 100644 --- a/tests/typing_surface/mod.rs +++ b/tests/typing_surface/mod.rs @@ -477,4 +477,32 @@ fn seed_completion_and_validity() { flag_cands.iter().any(|c| c.contains("seed")), "`--seed` should be offered as a candidate, got {flag_cands:?}" ); + + // Phase 2 (ADR-0048 D2): the `set` clause is offered after the count. + assert!( + flag_cands.iter().any(|c| c == "set"), + "`set` should be offered after the count, got {flag_cands:?}" + ); + + // `set ` offers the active table's columns (narrowed to Customers). + let set_cands = completion_candidate_texts(&assess_at_end("seed Customers set ", &schema)); + assert!( + set_cands.iter().any(|c| c == "Name") && set_cands.iter().any(|c| c == "Email"), + "`set ` should complete this table's columns, got {set_cands:?}" + ); + + // `set as ` offers the curated generator vocabulary (D9). + let gen_cands = + completion_candidate_texts(&assess_at_end("seed Customers set Email as ", &schema)); + assert!( + gen_cands.iter().any(|c| c == "email") && gen_cands.iter().any(|c| c == "product"), + "`as ` should complete generator names, got {gen_cands:?}" + ); + + // Column-fill (D1 form 2): `seed Customers.` offers the columns. + let fill_cands = completion_candidate_texts(&assess_at_end("seed Customers.", &schema)); + assert!( + fill_cands.iter().any(|c| c == "Name"), + "`seed Customers.` should complete column names, got {fill_cands:?}" + ); }