as ⟨here⟩` slot (`IdentSource::Generators`) the
+ // curated vocabulary is offered so a learner can discover `email` /
+ // `product` / … by Tab. Same `Function` kind / `tok_function` colour
+ // as SQL functions (no new theme colour — ADR-0048 §Grammar).
+ let has_generator_slot = expected
+ .iter()
+ .any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. }));
+ if has_generator_slot {
+ functions.extend(
+ crate::seed::KNOWN_GENERATORS
+ .iter()
+ .filter(|g| matches_prefix(g))
+ .map(|g| (*g).to_string()),
+ );
+ }
// Source 2: schema identifiers — accumulated across every
// matching schema-listable `Ident { source }` expectation.
@@ -1200,6 +1222,24 @@ pub fn invalid_ident_at_cursor_in_mode(
if has_sql_expr_slot && crate::dsl::sql_functions::is_known_function_prefix(partial) {
return None;
}
+ // ADR-0048 D9: the `seed … set
as ` slot is a curated
+ // vocabulary (`IdentSource::Generators`), not a schema source, so the
+ // schema-column check below would never see it. A partial that
+ // prefix-matches a known generator is an in-progress name; anything
+ // else is an unknown generator → flag it `[ERR]` while typing.
+ let has_generator_slot = expected
+ .iter()
+ .any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. }));
+ if has_generator_slot {
+ if crate::seed::is_known_generator_prefix(partial) {
+ return None;
+ }
+ return Some(InvalidIdent {
+ range: (start, cursor),
+ found: partial.to_string(),
+ source: IdentSource::Generators,
+ });
+ }
// Find every schema-listable source in the expected list.
let sources: Vec = expected
.iter()
@@ -2606,6 +2646,48 @@ mod tests {
);
}
+ #[test]
+ fn invalid_ident_fires_for_unknown_generator_after_as() {
+ // ADR-0048 D9: an unknown name at the `set
as ` slot is
+ // flagged `[ERR]` while typing.
+ let cache = two_table_schema();
+ let input = "seed a set name as bogus";
+ let inv = invalid_ident_at_cursor(input, input.len(), &cache)
+ .expect("unknown generator must flag");
+ assert_eq!(inv.found, "bogus");
+ assert_eq!(inv.source, IdentSource::Generators);
+ }
+
+ #[test]
+ fn invalid_ident_fires_for_unknown_column_in_seed_set_and_column_fill() {
+ // ADR-0048: an unknown column at the `set
` slot and the
+ // `
.
` column-fill slot is flagged like any other
+ // column slot (both are `IdentSource::Columns`).
+ let cache = two_table_schema(); // table `a`; columns id, name
+ let set_in = invalid_ident_at_cursor("seed a set xyz", 14, &cache)
+ .expect("unknown column in `set` must flag");
+ assert_eq!(set_in.found, "xyz");
+ assert_eq!(set_in.source, IdentSource::Columns);
+
+ let fill = invalid_ident_at_cursor("seed a.xyz", 10, &cache)
+ .expect("unknown column in column-fill must flag");
+ assert_eq!(fill.source, IdentSource::Columns);
+ }
+
+ #[test]
+ fn invalid_ident_does_not_fire_for_generator_prefix() {
+ // A prefix of a known generator is an in-progress name, not a typo.
+ let cache = two_table_schema();
+ assert!(
+ invalid_ident_at_cursor("seed a set name as ema", 22, &cache).is_none(),
+ "`ema` prefixes `email` — must not flag",
+ );
+ assert!(
+ invalid_ident_at_cursor("seed a set name as email", 24, &cache).is_none(),
+ "`email` is a known generator — must not flag",
+ );
+ }
+
fn two_table_schema() -> SchemaCache {
use crate::dsl::types::Type;
let mut s = SchemaCache::default();
diff --git a/src/db.rs b/src/db.rs
index 1c51c0d..19e4d07 100644
--- a/src/db.rs
+++ b/src/db.rs
@@ -33,7 +33,8 @@ use tracing::{debug, info, warn};
use crate::dsl::action::ReferentialAction;
use crate::dsl::command::{
ChangeColumnMode, Command, CompareOp, Constraint, ConstraintKind, Expr, IndexSelector,
- Operand, Predicate, RelationshipSelector, RowFilter, SqlForeignKey,
+ Operand, Predicate, RelationshipSelector, RowFilter, SeedOverride, SeedOverrideKind,
+ SqlForeignKey,
};
use crate::dsl::ColumnSpec;
use crate::dsl::shortid;
@@ -723,7 +724,9 @@ enum Request {
/// snapshot wraps the whole seed via `snapshot_then`.
Seed {
table: String,
+ target_column: Option,
count: Option,
+ overrides: Vec,
rng_seed: Option,
source: Option,
reply: oneshot::Sender>,
@@ -1517,18 +1520,22 @@ impl Database {
recv.await.map_err(|_| DbError::WorkerGone)?
}
- /// Populate a table with generated fake data (ADR-0048, SD1).
+ /// Populate a table with generated fake data (ADR-0048, SD1/SD2).
pub async fn seed(
&self,
table: String,
+ target_column: Option,
count: Option,
+ overrides: Vec,
rng_seed: Option,
source: Option,
) -> Result {
let (reply, recv) = oneshot::channel();
self.send(Request::Seed {
table,
+ target_column,
count,
+ overrides,
rng_seed,
source,
reply,
@@ -2694,7 +2701,9 @@ fn handle_request(
}
Request::Seed {
table,
+ target_column,
count,
+ overrides,
rng_seed,
source,
reply,
@@ -2706,7 +2715,9 @@ fn handle_request(
persistence,
source.as_deref(),
&table,
+ target_column.as_deref(),
count,
+ &overrides,
rng_seed,
));
}
@@ -2938,7 +2949,10 @@ fn do_list_names_for(
}
Ok(out)
}
- IdentSource::NewName | IdentSource::Types | IdentSource::Free => Ok(Vec::new()),
+ IdentSource::NewName
+ | IdentSource::Types
+ | IdentSource::Generators
+ | IdentSource::Free => Ok(Vec::new()),
}
}
@@ -8808,14 +8822,13 @@ fn sample_parent_key_tuples(
Ok(tuples)
}
-/// Populate a table with generated fake data (ADR-0048, SD1).
+/// Populate a table with generated fake data (ADR-0048, SD1/SD2).
///
-/// **Phase 1.** Generates whole rows and inserts them one at a time
-/// through [`do_insert`] — reusing all the existing per-value
-/// validation, autogen autofill, FK-error enrichment and persistence
-/// machinery. The whole seed is a single undo step (the worker wraps
-/// the call in one `snapshot_then`) and writes exactly one
-/// `history.log` line (only the first row carries the `source`).
+/// Generates whole rows and inserts them in one transaction, reusing the
+/// per-value validation, autogen autofill, FK-error enrichment and
+/// persistence machinery via [`insert_one_row`]. The whole seed is a
+/// single undo step (the worker wraps the call in one `snapshot_then`)
+/// and writes exactly one `history.log` line.
///
/// Foreign-key columns are filled by sampling existing parent rows
/// (D14); a compound FK reads all its child columns from one sampled
@@ -8823,16 +8836,20 @@ fn sample_parent_key_tuples(
/// `NOT NULL blob` column (which seed cannot generate) is refused by
/// the block guard (D1); a nullable blob is omitted (→ NULL).
///
-/// Deferred: identifier/constraint uniqueness incl. junction
-/// distinct-combos (D10), the `IN`-CHECK value derivation (D17), the
-/// efficient single-transaction multi-row path, the capped auto-show
-/// preview (D18), and the enum/CHECK advisory (D12/D13).
+/// **Phase 2 (SD2):** when `target_column` is `Some`, this delegates to
+/// [`do_seed_column_fill`] (fill one column across existing rows, D1
+/// form 2). `overrides` carries the `set
…` clause (D2): per-column
+/// pins that replace the heuristic generator and drop the column from the
+/// generic-fill advisory (D13).
+#[allow(clippy::too_many_arguments)]
fn do_seed(
conn: &Connection,
persistence: Option<&Persistence>,
source: Option<&str>,
table: &str,
+ target_column: Option<&str>,
count: Option,
+ overrides: &[SeedOverride],
rng_seed: Option,
) -> Result {
use crate::seed;
@@ -8840,6 +8857,14 @@ fn do_seed(
let canonical_table = require_canonical_table(conn, table)?;
let table = canonical_table.as_str();
+
+ // Column-fill (D1 form 2) is a distinct UPDATE path.
+ if let Some(col) = target_column {
+ return do_seed_column_fill(
+ conn, persistence, source, table, col, count, overrides, rng_seed,
+ );
+ }
+
let n = count.unwrap_or(DEFAULT_SEED_COUNT);
debug!(table = %table, count = n, "seed");
if n > MAX_SEED_COUNT {
@@ -8937,6 +8962,17 @@ fn do_seed(
}
}
+ // Apply the `set
…` overrides (D2): each replaces the named
+ // column's plan with the pinned generator and removes it from the
+ // generic-fill advisory (the user chose its values deliberately,
+ // D13). An override that names a non-fillable column is a friendly
+ // error; a bounded value source (fixed / pick-list) that can't supply
+ // enough distinct values for a single-column-UNIQUE target is refused
+ // up front rather than silently capped (DA finding). FK / type binding
+ // still apply — a value that violates a constraint surfaces through the
+ // existing FK-error guard.
+ apply_seed_overrides(&schema, overrides, n, &col_names, &mut plans, &mut advisory_columns)?;
+
// Uniqueness groups (ADR-0048 D10): value tuples that must stay
// distinct across the batch and against existing rows — the
// user-fillable PK (so junction distinct-combos fall out of this),
@@ -9131,6 +9167,434 @@ fn do_seed(
})
}
+/// Apply the `set
…` overrides (ADR-0048 D2) to the per-column
+/// generation plan. Each override replaces the named column's plan and
+/// drops it from the generic-fill advisory (D13 — the user chose those
+/// values). An override naming a column that is not in the fillable set
+/// (unknown, or an auto-generated `serial`) is a friendly error.
+fn apply_seed_overrides(
+ schema: &ReadSchema,
+ overrides: &[SeedOverride],
+ row_count: u64,
+ col_names: &[String],
+ plans: &mut [SeedColPlan],
+ advisory_columns: &mut Vec,
+) -> Result<(), DbError> {
+ for ov in overrides {
+ let Some(idx) = col_names
+ .iter()
+ .position(|c| c.eq_ignore_ascii_case(&ov.column))
+ else {
+ return Err(DbError::Unsupported(format!(
+ "cannot apply `set {col} …`: `{col}` is not a fillable column of this \
+ table (it is unknown, or an auto-generated column).",
+ col = ov.column,
+ )));
+ };
+ let ty = schema
+ .columns
+ .iter()
+ .find(|c| c.name.eq_ignore_ascii_case(&ov.column))
+ .and_then(|c| c.user_type)
+ .unwrap_or(Type::Text);
+ seed_override_capacity_guard(schema, &ov.column, &ov.kind, row_count)?;
+ plans[idx] = seed_override_plan(&ov.kind, ty, &ov.column)?;
+ advisory_columns.retain(|c| !c.eq_ignore_ascii_case(&ov.column));
+ }
+ Ok(())
+}
+
+/// Refuse up front when a **bounded** override (a fixed value or a
+/// pick-list) cannot supply enough *distinct* values to fill a
+/// single-column-UNIQUE target across `row_count` rows — otherwise the
+/// uniqueness machinery would silently cap the run to the achievable
+/// count (DA finding; the ADR left this interaction open and the user
+/// chose a friendly error). Generators and ranges are treated as
+/// effectively unbounded sources here; if one does exhaust, the existing
+/// distinct-combination cap (D14) still applies.
+fn seed_override_capacity_guard(
+ schema: &ReadSchema,
+ column: &str,
+ kind: &SeedOverrideKind,
+ row_count: u64,
+) -> Result<(), DbError> {
+ let distinct = match kind {
+ SeedOverrideKind::Fixed(_) => 1,
+ SeedOverrideKind::PickList(values) => {
+ let mut set = std::collections::HashSet::new();
+ for v in values {
+ set.insert(seed_override_literal(v, column)?);
+ }
+ set.len()
+ }
+ // Unbounded-enough sources — leave to the cap if they exhaust.
+ SeedOverrideKind::Generator(_) | SeedOverrideKind::Range { .. } => return Ok(()),
+ };
+ if distinct as u64 >= row_count.max(1) {
+ return Ok(());
+ }
+ // Single-column uniqueness only: a compound UNIQUE / compound PK can
+ // still be satisfied by varying the *other* columns, so a pinned
+ // value there does not force a cap.
+ let single_unique = schema
+ .columns
+ .iter()
+ .find(|c| c.name.eq_ignore_ascii_case(column))
+ .is_some_and(|c| c.unique)
+ || (schema.primary_key.len() == 1
+ && schema.primary_key[0].eq_ignore_ascii_case(column));
+ if single_unique {
+ return Err(DbError::Unsupported(format!(
+ "cannot fill {row_count} rows: `set {column} …` offers only {distinct} distinct \
+ value(s), but `{column}` is UNIQUE. Use a generator (e.g. `as email`) or a list \
+ of at least {row_count} values."
+ )));
+ }
+ Ok(())
+}
+
+/// Turn one `set` override into the `SeedColPlan` that produces its
+/// values (ADR-0048 D2). `Fixed`/`PickList` become a `PickFrom` over the
+/// literal(s); `Generator` resolves the curated name (unknown → friendly
+/// error); `Range` validates its bounds against the column type *before*
+/// generation (an incompatible bound → friendly error).
+fn seed_override_plan(
+ kind: &SeedOverrideKind,
+ ty: Type,
+ column: &str,
+) -> Result {
+ use crate::seed::Generator;
+ let generator = match kind {
+ SeedOverrideKind::Fixed(v) => Generator::PickFrom(vec![seed_override_literal(v, column)?]),
+ SeedOverrideKind::PickList(vs) => {
+ let lits = vs
+ .iter()
+ .map(|v| seed_override_literal(v, column))
+ .collect::, _>>()?;
+ Generator::PickFrom(lits)
+ }
+ SeedOverrideKind::Generator(name) => {
+ crate::seed::generator_for_name(name).ok_or_else(|| {
+ DbError::Unsupported(format!(
+ "unknown generator `{name}` in `set {column} as {name}`. \
+ Known generators: {}.",
+ crate::seed::KNOWN_GENERATORS.join(", "),
+ ))
+ })?
+ }
+ SeedOverrideKind::Range { low, high } => {
+ let lo = seed_override_literal(low, column)?;
+ let hi = seed_override_literal(high, column)?;
+ if let Some(reason) = crate::seed::range_bounds_reason(ty, &lo, &hi) {
+ return Err(DbError::Unsupported(format!(
+ "cannot apply `set {column} between …`: {reason}."
+ )));
+ }
+ Generator::Range { low: lo, high: hi }
+ }
+ };
+ Ok(SeedColPlan::Generated { generator, ty })
+}
+
+/// Extract the literal string an override value contributes to a
+/// `PickFrom` / `Range` (re-typed per column by `generate_value`). A
+/// `null` override is refused — seed always fills a value (NULL
+/// injection is out of scope, ADR-0048 Out-of-scope).
+fn seed_override_literal(value: &Value, column: &str) -> Result {
+ match value {
+ Value::Number(s) | Value::Text(s) => Ok(s.clone()),
+ Value::Bool(b) => Ok(if *b { "true" } else { "false" }.to_string()),
+ Value::Null => Err(DbError::Unsupported(format!(
+ "`set {column} = null` is not supported — seed always fills a value."
+ ))),
+ }
+}
+
+/// Column-fill (ADR-0048 D1 form 2): fill one column across the table's
+/// **existing** rows (an UPDATE), the natural follow-up to `add column`.
+///
+/// Refuses PK and auto-generated (`serial`/`shortid`/`blob`) targets;
+/// an empty table is a friendly no-op. The `set` clause may only adjust
+/// the column being filled (the rest of the per-column heuristics do not
+/// apply — there is exactly one column). A UNIQUE / identifier target
+/// gets collision-free values (generated distinct from *every* existing
+/// value in the column, so no row-by-row UPDATE can transiently collide);
+/// an FK target samples an existing parent key (D14). The whole fill is
+/// one transaction → one undo step (D15), persisted once (commit-db-last).
+#[allow(clippy::too_many_arguments)]
+fn do_seed_column_fill(
+ conn: &Connection,
+ persistence: Option<&Persistence>,
+ source: Option<&str>,
+ table: &str,
+ column: &str,
+ count: Option,
+ overrides: &[SeedOverride],
+ rng_seed: Option,
+) -> Result {
+ use crate::seed;
+ use rand::RngExt;
+
+ debug!(table = %table, column = %column, "seed column-fill");
+
+ // A row count is meaningless when filling existing rows (D1 form 2).
+ if count.is_some() {
+ return Err(DbError::Unsupported(format!(
+ "`seed {table}.{column}` fills existing rows, so it takes no row count \
+ (drop the number)."
+ )));
+ }
+
+ let schema = read_schema(conn, table)?;
+ let col = schema
+ .columns
+ .iter()
+ .find(|c| c.name.eq_ignore_ascii_case(column))
+ .ok_or_else(|| {
+ DbError::Unsupported(format!("cannot fill `{table}.{column}`: no such column."))
+ })?;
+ let canonical_col = col.name.clone();
+ let ty = col.user_type.unwrap_or(Type::Text);
+
+ // Refuse identity / auto-generated / un-generatable targets (D1).
+ if col.primary_key {
+ return Err(DbError::Unsupported(format!(
+ "cannot fill `{table}.{canonical_col}`: it is part of the primary key — \
+ you don't fill an identity column."
+ )));
+ }
+ if matches!(ty, Type::Serial | Type::ShortId) {
+ return Err(DbError::Unsupported(format!(
+ "cannot fill `{table}.{canonical_col}`: `{}` columns generate their own \
+ values automatically.",
+ ty.keyword(),
+ )));
+ }
+ if matches!(ty, Type::Blob) {
+ return Err(DbError::Unsupported(format!(
+ "cannot fill `{table}.{canonical_col}`: seed cannot generate `blob` values."
+ )));
+ }
+
+ // The `set` clause may only adjust the filled column (user decision).
+ for ov in overrides {
+ if !ov.column.eq_ignore_ascii_case(&canonical_col) {
+ return Err(DbError::Unsupported(format!(
+ "in `seed {table}.{canonical_col}`, `set` can only adjust \
+ `{canonical_col}` (the column being filled), not `{}`.",
+ ov.column,
+ )));
+ }
+ }
+
+ // Existing rowids in a deterministic order (D4 reproducibility).
+ let rowids: Vec = {
+ let sql = format!(
+ "SELECT rowid FROM \"{}\" ORDER BY rowid",
+ table.replace('"', "\"\"")
+ );
+ let mut stmt = conn.prepare(&sql).map_err(DbError::from_rusqlite)?;
+ stmt.query_map([], |r| r.get::<_, i64>(0))
+ .map_err(DbError::from_rusqlite)?
+ .collect::, _>>()
+ .map_err(DbError::from_rusqlite)?
+ };
+
+ // Empty table → friendly no-op (D1).
+ if rowids.is_empty() {
+ return Ok(SeedResult {
+ table: table.to_string(),
+ requested: 0,
+ produced: 0,
+ data: DataResult {
+ table_name: table.to_string(),
+ columns: Vec::new(),
+ column_types: Vec::new(),
+ rows: Vec::new(),
+ },
+ advisory_columns: Vec::new(),
+ });
+ }
+
+ // FK target → sample an existing parent key column (D14).
+ let fk_sample: Option> = {
+ let fk = schema.foreign_keys.iter().find(|fk| {
+ fk.child_columns
+ .iter()
+ .any(|c| c.eq_ignore_ascii_case(&canonical_col))
+ });
+ match fk {
+ Some(fk) => {
+ // Single-column position within the FK (column-fill targets
+ // one column; a compound FK filled one column at a time is
+ // unusual but we sample that column's parent values).
+ let pos = fk
+ .child_columns
+ .iter()
+ .position(|c| c.eq_ignore_ascii_case(&canonical_col))
+ .unwrap_or(0);
+ let parent_col = fk.parent_columns.get(pos).cloned().unwrap_or_default();
+ let tuples = sample_parent_key_tuples(conn, &fk.parent_table, &[parent_col])?;
+ if tuples.is_empty() {
+ return Err(DbError::Unsupported(format!(
+ "cannot fill `{table}.{canonical_col}`: parent table `{}` has no \
+ rows to reference. Seed or insert into `{}` first.",
+ fk.parent_table, fk.parent_table,
+ )));
+ }
+ Some(tuples.into_iter().map(|mut t| t.remove(0)).collect())
+ }
+ None => None,
+ }
+ };
+
+ // The value source: an override (if present) else the heuristic.
+ let mut advisory_columns: Vec = Vec::new();
+ let plan: SeedColPlan = if let Some(ov) = overrides
+ .iter()
+ .find(|o| o.column.eq_ignore_ascii_case(&canonical_col))
+ {
+ // Same capacity guard as whole-row: a bounded override that can't
+ // give enough distinct values for a UNIQUE column across the
+ // existing rows is refused up front, not silently capped.
+ seed_override_capacity_guard(&schema, &canonical_col, &ov.kind, rowids.len() as u64)?;
+ seed_override_plan(&ov.kind, ty, &canonical_col)?
+ } else if fk_sample.is_some() {
+ SeedColPlan::ForeignKey { fk_idx: 0, pos: 0 }
+ } else if matches!(ty, Type::ShortId) {
+ SeedColPlan::ShortId // unreachable (refused above), kept for totality
+ } else {
+ let check_in_values = col
+ .check
+ .as_deref()
+ .and_then(|chk| seed::parse_in_check_values(chk, &canonical_col));
+ let spec = seed::ColumnSpec {
+ name: canonical_col.clone(),
+ ty,
+ not_null: col.notnull,
+ primary_key: col.primary_key,
+ unique: col.unique,
+ is_foreign_key: false,
+ check_in_values,
+ };
+ let generator = seed::choose_generator(table, &spec);
+ if matches!(generator, crate::seed::Generator::Generic)
+ && (seed::is_enum_ish(&canonical_col)
+ || (col.check.is_some() && spec.check_in_values.is_none()))
+ {
+ advisory_columns.push(canonical_col.clone());
+ }
+ SeedColPlan::Generated { generator, ty }
+ };
+
+ // Collision-free generation for UNIQUE / identifier targets: seed the
+ // used-set with EVERY existing value of the column so a generated
+ // value never matches a not-yet-updated row (no transient UNIQUE
+ // violation) nor a value already assigned this batch (ADR-0048 D10).
+ let enforce_unique = col.unique
+ || matches!(
+ &plan,
+ SeedColPlan::Generated {
+ generator: crate::seed::Generator::IdentitySequential,
+ ..
+ }
+ );
+ let mut used: std::collections::HashSet = std::collections::HashSet::new();
+ if enforce_unique {
+ for tuple in
+ sample_parent_key_tuples(conn, table, std::slice::from_ref(&canonical_col))?
+ {
+ used.insert(seed_value_list_key(&tuple));
+ }
+ }
+ let seq_base = if matches!(
+ &plan,
+ SeedColPlan::Generated {
+ generator: crate::seed::Generator::IdentitySequential,
+ ..
+ }
+ ) && matches!(ty, Type::Int)
+ {
+ Some(seed_max_int(conn, table, &canonical_col)?)
+ } else {
+ None
+ };
+
+ const MAX_ATTEMPTS: u32 = 200;
+ let mut rng = seed::make_rng(rng_seed);
+ let tx = conn
+ .unchecked_transaction()
+ .map_err(DbError::from_rusqlite)?;
+
+ let update_sql = format!(
+ "UPDATE \"{}\" SET \"{}\" = ?1 WHERE rowid = ?2",
+ table.replace('"', "\"\""),
+ canonical_col.replace('"', "\"\""),
+ );
+ let mut produced: u64 = 0;
+ for (offset, rowid) in rowids.iter().enumerate() {
+ let mut attempt = 0u32;
+ let value = loop {
+ let v = match &plan {
+ SeedColPlan::ForeignKey { .. } => {
+ let samples = fk_sample.as_ref().expect("fk plan implies samples");
+ samples[rng.random_range(0..samples.len())].clone()
+ }
+ SeedColPlan::ShortId => {
+ Value::Text(crate::dsl::shortid::generate_with_rng(&mut rng))
+ }
+ SeedColPlan::Generated { generator, ty }
+ if matches!(generator, crate::seed::Generator::IdentitySequential)
+ && matches!(ty, Type::Int) =>
+ {
+ Value::Number((seq_base.unwrap_or(0) + produced as i64 + 1).to_string())
+ }
+ SeedColPlan::Generated { generator, ty } => {
+ seed::generate_value(generator, *ty, &mut rng)
+ }
+ };
+ if enforce_unique {
+ let key = seed_value_list_key(std::slice::from_ref(&v));
+ if used.contains(&key) {
+ attempt += 1;
+ if attempt >= MAX_ATTEMPTS {
+ break v; // give up on distinctness; DB may reject
+ }
+ continue;
+ }
+ used.insert(key);
+ }
+ break v;
+ };
+ let bound = impl_value_for(&schema, &canonical_col, &value)?;
+ let params: Vec =
+ vec![bound_to_sqlite_value(&bound), rusqlite::types::Value::Integer(*rowid)];
+ execute_with_fk_enrichment(conn, table, &update_sql, ¶ms)?;
+ produced += 1;
+ let _ = offset;
+ }
+
+ let changes = Changes {
+ schema_dirty: false,
+ rewritten_tables: vec![table.to_string()],
+ ..Changes::default()
+ };
+ finalize_persistence(conn, persistence, source, &changes)?;
+ tx.commit().map_err(DbError::from_rusqlite)?;
+
+ // Preview the first capped rows (D18).
+ let preview: Vec = rowids.iter().take(SEED_PREVIEW_CAP).copied().collect();
+ let data = query_rows_by_rowid(conn, table, &preview)?;
+
+ Ok(SeedResult {
+ table: table.to_string(),
+ requested: produced,
+ produced,
+ data,
+ advisory_columns,
+ })
+}
+
/// Build and execute a single-row `INSERT` — column resolution, value
/// binding, `serial`/`shortid` autofill, and the FK-enriched execute —
/// returning `(rows_affected, new rowid)`.
diff --git a/src/dsl/command.rs b/src/dsl/command.rs
index a1f834b..99304a3 100644
--- a/src/dsl/command.rs
+++ b/src/dsl/command.rs
@@ -402,14 +402,23 @@ pub enum Command {
filter: Option,
limit: Option,
},
- /// Populate a table with generated fake data (ADR-0048, SD1).
- /// `count` defaults to 20 when omitted; `rng_seed` (from a future
- /// `--seed ` flag) makes generation reproducible. Phase 1 is
- /// whole-row generation; the `set` override clause and the
- /// `
.` column-fill form arrive in later phases.
+ /// Populate a table with generated fake data (ADR-0048, SD1/SD2).
+ /// `count` defaults to 20 when omitted; `rng_seed` (from the
+ /// `--seed ` flag) makes generation reproducible.
+ ///
+ /// Phase 2 surfaces (ADR-0048 D1/D2):
+ /// - `target_column` is `Some` for the **column-fill** form
+ /// `seed
.` — fill one column across the table's
+ /// *existing* rows (an UPDATE), rather than generating new rows.
+ /// - `overrides` carries the `set
…` clause: per-column pins
+ /// that take precedence over the heuristic generator (D2).
Seed {
table: String,
+ /// `Some(col)` → column-fill mode (UPDATE existing rows);
+ /// `None` → whole-row generation (INSERT new rows).
+ target_column: Option,
count: Option,
+ overrides: Vec,
rng_seed: Option,
},
/// Replay a sequence of DSL commands from a file. Each line
@@ -647,6 +656,38 @@ impl RowFilter {
}
}
+/// One `set
…` override on a `seed` command (ADR-0048 D2, Phase 2).
+///
+/// The user can pin a column's generated values to a constant, a
+/// pick-list, an explicit named generator, or a range — overriding the
+/// per-column heuristic the executor would otherwise pick. `column` is
+/// the user-typed column name (validated against the table at execution,
+/// like every other column slot).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct SeedOverride {
+ pub column: String,
+ pub kind: SeedOverrideKind,
+}
+
+/// The four `set` override forms (ADR-0048 D2).
+///
+/// Values arrive as the DSL's `Value` (quoted text / unquoted number —
+/// dates are quoted text per the D2 amendment); the `Generator` name is
+/// a raw string validated at execution because `src/dsl` cannot depend
+/// on `src/seed` (the curated vocabulary lives there).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum SeedOverrideKind {
+ /// `set status = 'pending'` — every row gets the constant.
+ Fixed(Value),
+ /// `set role in ('admin', 'editor')` — uniform pick from the list.
+ PickList(Vec),
+ /// `set work_addr as email` — force the named generator (D9).
+ Generator(String),
+ /// `set price between 10 and 100` — uniform in `[low, high]`;
+ /// numeric or (quoted) date bounds per the destination column type.
+ Range { low: Value, high: Value },
+}
+
/// A complex WHERE expression (ADR-0026 §4).
///
/// Built by `grammar::expr::build_expr` from the flat
diff --git a/src/dsl/grammar/data.rs b/src/dsl/grammar/data.rs
index 9dc1428..b111075 100644
--- a/src/dsl/grammar/data.rs
+++ b/src/dsl/grammar/data.rs
@@ -24,7 +24,9 @@
//! later swap that capture for the same typed slots used here, adding
//! live hints/highlighting.
-use crate::dsl::command::{Command, Expr, RowFilter, ShowListKind};
+use crate::dsl::command::{
+ Command, Expr, RowFilter, SeedOverride, SeedOverrideKind, ShowListKind,
+};
use crate::dsl::grammar::{
CommandNode, IdentSource, Node, NumberValidator, ValidationError, Word, expr,
shared::{
@@ -426,7 +428,9 @@ const LIMIT_CLAUSE_NODES: &[Node] = &[
const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES);
// =================================================================
-// seed — `seed []` (ADR-0048, SD1)
+// seed — `seed [.
] [] [set ] [--seed ]`
+// (ADR-0048, SD1 whole-row + SD2 Phase 2 set-clause /
+// column-fill)
// =================================================================
/// Optional positional row count. Reuses `LIMIT_VALIDATOR` (a
@@ -444,11 +448,127 @@ const SEED_FLAG_NODES: &[Node] = &[
},
];
const SEED_FLAG: Node = Node::Seq(SEED_FLAG_NODES);
+
+// --- column-fill target: the optional `.` (ADR-0048 D1
+// form 2) ----------------------------------------------------
+//
+// `seed users.email …` fills one column across existing rows. The
+// table ident stops at `.` (idents are alnum/underscore), so an
+// `Optional(Seq['.', column])` after the table cleanly discriminates:
+// when the next token is not `.`, the `Punct('.')` first-child
+// NoMatches and `walk_optional` skips it; once `.` commits, a missing
+// column propagates as the user mid-typing `seed users.` (driver
+// `walk_optional` semantics). The column resolves against
+// `current_table_columns` (populated by `TABLE_NAME_WRITES`).
+const SEED_TARGET_COLUMN: Node = Node::Ident {
+ source: IdentSource::Columns,
+ role: "seed_target_column",
+ validator: None,
+ highlight_override: None,
+ writes_table: false,
+ writes_column: false,
+ writes_user_listed_column: false,
+ writes_table_alias: false,
+ writes_cte_name: false,
+ writes_projection_alias: false,
+};
+const SEED_DOT_COLUMN_NODES: &[Node] = &[Node::Punct('.'), SEED_TARGET_COLUMN];
+const SEED_DOT_COLUMN: Node = Node::Optional(&Node::Seq(SEED_DOT_COLUMN_NODES));
+
+// --- the `set
[, …]` clause (ADR-0048 D2) --------
+//
+// Each override pins one column's generation. The column slot
+// `writes_column` so the typed value slots (`PER_COLUMN_VALUE`, the
+// same `current_column_value` dispatch `update … set` uses) narrow to
+// the column's type — so list/range/fixed values get the column's
+// typed slot (quoted text, unquoted number, quoted date) and a
+// type-mismatched literal is flagged. The four tails each start with a
+// distinct token (`=` / `in` / `between` / `as`), so the `Choice`
+// discriminates cleanly (no Optional-first branch).
+
+/// The `set
` column slot. Distinct role from `update`'s
+/// `update_set_column` and the expression `expr_column`.
+const SEED_SET_COLUMN: Node = Node::Ident {
+ source: IdentSource::Columns,
+ role: "seed_set_column",
+ validator: None,
+ highlight_override: None,
+ writes_table: false,
+ writes_column: true,
+ writes_user_listed_column: false,
+ writes_table_alias: false,
+ writes_cte_name: false,
+ writes_projection_alias: false,
+};
+
+/// `as ` — the curated generator-name vocabulary (D9),
+/// highlighted in the `tok_function` colour. The slot is structural
+/// (any identifier matches); the name is validated at execution and
+/// flagged live by the validity indicator.
+const SEED_GENERATOR: Node = Node::Ident {
+ source: IdentSource::Generators,
+ role: "seed_generator",
+ validator: None,
+ highlight_override: Some(crate::dsl::grammar::HighlightClass::Function),
+ writes_table: false,
+ writes_column: false,
+ writes_user_listed_column: false,
+ writes_table_alias: false,
+ writes_cte_name: false,
+ writes_projection_alias: false,
+};
+
+/// `= ` — a fixed constant for every row.
+const SEED_OV_FIXED_NODES: &[Node] = &[Node::Punct('='), PER_COLUMN_VALUE];
+/// `in ( [, ]* )` — uniform pick from the list.
+const SEED_OV_IN_VALUES: Node = Node::Repeated {
+ inner: &PER_COLUMN_VALUE,
+ separator: Some(&Node::Punct(',')),
+ min: 1,
+};
+const SEED_OV_IN_NODES: &[Node] = &[
+ Node::Word(Word::keyword("in")),
+ Node::Punct('('),
+ SEED_OV_IN_VALUES,
+ Node::Punct(')'),
+];
+/// `between and ` — uniform in the (typed) range.
+const SEED_OV_BETWEEN_NODES: &[Node] = &[
+ Node::Word(Word::keyword("between")),
+ PER_COLUMN_VALUE,
+ Node::Word(Word::keyword("and")),
+ PER_COLUMN_VALUE,
+];
+/// `as ` — force a named generator.
+const SEED_OV_AS_NODES: &[Node] = &[Node::Word(Word::keyword("as")), SEED_GENERATOR];
+
+const SEED_OV_TAIL_CHOICES: &[Node] = &[
+ Node::Seq(SEED_OV_FIXED_NODES),
+ Node::Seq(SEED_OV_IN_NODES),
+ Node::Seq(SEED_OV_BETWEEN_NODES),
+ Node::Seq(SEED_OV_AS_NODES),
+];
+const SEED_OV_TAIL: Node = Node::Choice(SEED_OV_TAIL_CHOICES);
+
+const SEED_OVERRIDE_NODES: &[Node] = &[SEED_SET_COLUMN, SEED_OV_TAIL];
+const SEED_OVERRIDE: Node = Node::Seq(SEED_OVERRIDE_NODES);
+const SEED_OVERRIDES: Node = Node::Repeated {
+ inner: &SEED_OVERRIDE,
+ separator: Some(&Node::Punct(',')),
+ min: 1,
+};
+const SEED_SET_CLAUSE_NODES: &[Node] =
+ &[Node::Word(Word::keyword("set")), SEED_OVERRIDES];
+const SEED_SET_CLAUSE: Node = Node::Seq(SEED_SET_CLAUSE_NODES);
+
const SEED_NODES: &[Node] = &[
- // `writes_table` so a future `set
=…` clause's column slots
- // can resolve against this table.
+ // `writes_table` so the `.column` target, the `set
=…`
+ // clause's column slots, and the typed value slots all resolve
+ // against this table.
TABLE_NAME_WRITES,
+ SEED_DOT_COLUMN,
Node::Optional(&SEED_COUNT),
+ Node::Optional(&SEED_SET_CLAUSE),
Node::Optional(&SEED_FLAG),
];
const SEED_SHAPE: Node = Node::Seq(SEED_NODES);
@@ -736,16 +856,29 @@ fn build_show_limit(path: &MatchedPath) -> Result