feat(seed): set override clause + column-fill (ADR-0048 Phase 2)

Build the two SD2 surfaces Phase 1 deferred:

- `set` override clause (D2): comma-separated per-column pins —
  `= 'v'` (fixed), `in ('a','b')` (pick-list), `as <generator>`
  (named), `between x and y` (range; numeric and quoted dates).
  Type-aware via the typed `current_column_value` slot; an override
  drops its column from the generic-fill advisory (D13). Folded from
  the flat matched path (build_seed_overrides) and applied to the
  per-column plan (apply_seed_overrides).
- `<table>.<column>` column-fill (D1 form 2): an UPDATE over existing
  rows. Refuses PK/autogen targets, empty-table no-op, FK-samples the
  parent, collision-free for UNIQUE/identifier targets, one undo step;
  `set` may only adjust the filled column.

Supporting work: KNOWN_GENERATORS vocabulary + generator_for_name
(src/seed/vocabulary.rs, D9); a range Generator + range_bounds_reason;
IdentSource::Generators and HighlightClass::Function; completion of the
generator vocabulary after `as` and the set/.col column slots; the
typing-time validity indicator for an unknown generator; help,
parse-error pedagogy rows, and the D13 advisory's Phase-2/3 wording.

A bounded override (fixed value / too-short pick-list) on a
single-column-UNIQUE target is a friendly error rather than a silent
uniqueness cap (post-implementation /runda finding, user-chosen).

Dates in the range form are quoted (no date-literal token exists);
ADR-0048 D2 amended accordingly. Both modes (D5); reproducible (D4).
This commit is contained in:
claude@clouddev1
2026-06-12 09:44:30 +00:00
parent 78c38e8b33
commit a12facc784
20 changed files with 1913 additions and 65 deletions
+10 -1
View File
@@ -2097,9 +2097,14 @@ impl App {
self.note_system(line); self.note_system(line);
} }
if !result.advisory_columns.is_empty() { if !result.advisory_columns.is_empty() {
// `column` (the first advised column) seeds the concrete
// repair examples (D13 Phase 2/3 wording); `columns` lists
// them all.
self.push_category_three_prose(crate::t!( self.push_category_three_prose(crate::t!(
"seed.advisory_generic", "seed.advisory_generic",
columns = result.advisory_columns.join(", ") columns = result.advisory_columns.join(", "),
column = result.advisory_columns[0],
table = result.table
)); ));
} }
} }
@@ -6268,7 +6273,9 @@ mod tests {
app.update(AppEvent::DslSeedSucceeded { app.update(AppEvent::DslSeedSucceeded {
command: Command::Seed { command: Command::Seed {
table: "users".to_string(), table: "users".to_string(),
target_column: None,
count: Some(20), count: Some(20),
overrides: Vec::new(),
rng_seed: None, rng_seed: None,
}, },
result: crate::db::SeedResult { result: crate::db::SeedResult {
@@ -6304,7 +6311,9 @@ mod tests {
app.update(AppEvent::DslSeedSucceeded { app.update(AppEvent::DslSeedSucceeded {
command: Command::Seed { command: Command::Seed {
table: "J".to_string(), table: "J".to_string(),
target_column: None,
count: Some(10), count: Some(10),
overrides: Vec::new(),
rng_seed: None, rng_seed: None,
}, },
result: crate::db::SeedResult { result: crate::db::SeedResult {
+83 -1
View File
@@ -120,7 +120,13 @@ impl SchemaCache {
IdentSource::Columns => &self.columns, IdentSource::Columns => &self.columns,
IdentSource::Relationships => &self.relationships, IdentSource::Relationships => &self.relationships,
IdentSource::Indexes => &self.indexes, IdentSource::Indexes => &self.indexes,
IdentSource::NewName | IdentSource::Types | IdentSource::Free => &[], // Curated / invented sources never come from the schema
// cache — `Generators` candidates are supplied separately
// from the `seed` vocabulary (ADR-0048 D9).
IdentSource::NewName
| IdentSource::Types
| IdentSource::Generators
| IdentSource::Free => &[],
} }
} }
@@ -709,6 +715,22 @@ pub fn candidates_at_cursor_with_in_mode(
} else { } else {
Vec::new() Vec::new()
}; };
// Source 1.9: fake-data generator names (ADR-0048 D9). At the
// `seed … set <col> as ⟨here⟩` slot (`IdentSource::Generators`) the
// curated vocabulary is offered so a learner can discover `email` /
// `product` / … by Tab. Same `Function` kind / `tok_function` colour
// as SQL functions (no new theme colour — ADR-0048 §Grammar).
let has_generator_slot = expected
.iter()
.any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. }));
if has_generator_slot {
functions.extend(
crate::seed::KNOWN_GENERATORS
.iter()
.filter(|g| matches_prefix(g))
.map(|g| (*g).to_string()),
);
}
// Source 2: schema identifiers — accumulated across every // Source 2: schema identifiers — accumulated across every
// matching schema-listable `Ident { source }` expectation. // matching schema-listable `Ident { source }` expectation.
@@ -1200,6 +1222,24 @@ pub fn invalid_ident_at_cursor_in_mode(
if has_sql_expr_slot && crate::dsl::sql_functions::is_known_function_prefix(partial) { if has_sql_expr_slot && crate::dsl::sql_functions::is_known_function_prefix(partial) {
return None; return None;
} }
// ADR-0048 D9: the `seed … set <col> as <gen>` slot is a curated
// vocabulary (`IdentSource::Generators`), not a schema source, so the
// schema-column check below would never see it. A partial that
// prefix-matches a known generator is an in-progress name; anything
// else is an unknown generator → flag it `[ERR]` while typing.
let has_generator_slot = expected
.iter()
.any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. }));
if has_generator_slot {
if crate::seed::is_known_generator_prefix(partial) {
return None;
}
return Some(InvalidIdent {
range: (start, cursor),
found: partial.to_string(),
source: IdentSource::Generators,
});
}
// Find every schema-listable source in the expected list. // Find every schema-listable source in the expected list.
let sources: Vec<IdentSource> = expected let sources: Vec<IdentSource> = expected
.iter() .iter()
@@ -2606,6 +2646,48 @@ mod tests {
); );
} }
#[test]
fn invalid_ident_fires_for_unknown_generator_after_as() {
// ADR-0048 D9: an unknown name at the `set <col> as <gen>` slot is
// flagged `[ERR]` while typing.
let cache = two_table_schema();
let input = "seed a set name as bogus";
let inv = invalid_ident_at_cursor(input, input.len(), &cache)
.expect("unknown generator must flag");
assert_eq!(inv.found, "bogus");
assert_eq!(inv.source, IdentSource::Generators);
}
#[test]
fn invalid_ident_fires_for_unknown_column_in_seed_set_and_column_fill() {
// ADR-0048: an unknown column at the `set <col>` slot and the
// `<table>.<col>` column-fill slot is flagged like any other
// column slot (both are `IdentSource::Columns`).
let cache = two_table_schema(); // table `a`; columns id, name
let set_in = invalid_ident_at_cursor("seed a set xyz", 14, &cache)
.expect("unknown column in `set` must flag");
assert_eq!(set_in.found, "xyz");
assert_eq!(set_in.source, IdentSource::Columns);
let fill = invalid_ident_at_cursor("seed a.xyz", 10, &cache)
.expect("unknown column in column-fill must flag");
assert_eq!(fill.source, IdentSource::Columns);
}
#[test]
fn invalid_ident_does_not_fire_for_generator_prefix() {
// A prefix of a known generator is an in-progress name, not a typo.
let cache = two_table_schema();
assert!(
invalid_ident_at_cursor("seed a set name as ema", 22, &cache).is_none(),
"`ema` prefixes `email` — must not flag",
);
assert!(
invalid_ident_at_cursor("seed a set name as email", 24, &cache).is_none(),
"`email` is a known generator — must not flag",
);
}
fn two_table_schema() -> SchemaCache { fn two_table_schema() -> SchemaCache {
use crate::dsl::types::Type; use crate::dsl::types::Type;
let mut s = SchemaCache::default(); let mut s = SchemaCache::default();
+478 -14
View File
@@ -33,7 +33,8 @@ use tracing::{debug, info, warn};
use crate::dsl::action::ReferentialAction; use crate::dsl::action::ReferentialAction;
use crate::dsl::command::{ use crate::dsl::command::{
ChangeColumnMode, Command, CompareOp, Constraint, ConstraintKind, Expr, IndexSelector, ChangeColumnMode, Command, CompareOp, Constraint, ConstraintKind, Expr, IndexSelector,
Operand, Predicate, RelationshipSelector, RowFilter, SqlForeignKey, Operand, Predicate, RelationshipSelector, RowFilter, SeedOverride, SeedOverrideKind,
SqlForeignKey,
}; };
use crate::dsl::ColumnSpec; use crate::dsl::ColumnSpec;
use crate::dsl::shortid; use crate::dsl::shortid;
@@ -723,7 +724,9 @@ enum Request {
/// snapshot wraps the whole seed via `snapshot_then`. /// snapshot wraps the whole seed via `snapshot_then`.
Seed { Seed {
table: String, table: String,
target_column: Option<String>,
count: Option<u64>, count: Option<u64>,
overrides: Vec<SeedOverride>,
rng_seed: Option<u64>, rng_seed: Option<u64>,
source: Option<String>, source: Option<String>,
reply: oneshot::Sender<Result<SeedResult, DbError>>, reply: oneshot::Sender<Result<SeedResult, DbError>>,
@@ -1517,18 +1520,22 @@ impl Database {
recv.await.map_err(|_| DbError::WorkerGone)? recv.await.map_err(|_| DbError::WorkerGone)?
} }
/// Populate a table with generated fake data (ADR-0048, SD1). /// Populate a table with generated fake data (ADR-0048, SD1/SD2).
pub async fn seed( pub async fn seed(
&self, &self,
table: String, table: String,
target_column: Option<String>,
count: Option<u64>, count: Option<u64>,
overrides: Vec<SeedOverride>,
rng_seed: Option<u64>, rng_seed: Option<u64>,
source: Option<String>, source: Option<String>,
) -> Result<SeedResult, DbError> { ) -> Result<SeedResult, DbError> {
let (reply, recv) = oneshot::channel(); let (reply, recv) = oneshot::channel();
self.send(Request::Seed { self.send(Request::Seed {
table, table,
target_column,
count, count,
overrides,
rng_seed, rng_seed,
source, source,
reply, reply,
@@ -2694,7 +2701,9 @@ fn handle_request(
} }
Request::Seed { Request::Seed {
table, table,
target_column,
count, count,
overrides,
rng_seed, rng_seed,
source, source,
reply, reply,
@@ -2706,7 +2715,9 @@ fn handle_request(
persistence, persistence,
source.as_deref(), source.as_deref(),
&table, &table,
target_column.as_deref(),
count, count,
&overrides,
rng_seed, rng_seed,
)); ));
} }
@@ -2938,7 +2949,10 @@ fn do_list_names_for(
} }
Ok(out) Ok(out)
} }
IdentSource::NewName | IdentSource::Types | IdentSource::Free => Ok(Vec::new()), IdentSource::NewName
| IdentSource::Types
| IdentSource::Generators
| IdentSource::Free => Ok(Vec::new()),
} }
} }
@@ -8808,14 +8822,13 @@ fn sample_parent_key_tuples(
Ok(tuples) Ok(tuples)
} }
/// Populate a table with generated fake data (ADR-0048, SD1). /// Populate a table with generated fake data (ADR-0048, SD1/SD2).
/// ///
/// **Phase 1.** Generates whole rows and inserts them one at a time /// Generates whole rows and inserts them in one transaction, reusing the
/// through [`do_insert`] — reusing all the existing per-value /// per-value validation, autogen autofill, FK-error enrichment and
/// validation, autogen autofill, FK-error enrichment and persistence /// persistence machinery via [`insert_one_row`]. The whole seed is a
/// machinery. The whole seed is a single undo step (the worker wraps /// single undo step (the worker wraps the call in one `snapshot_then`)
/// the call in one `snapshot_then`) and writes exactly one /// and writes exactly one `history.log` line.
/// `history.log` line (only the first row carries the `source`).
/// ///
/// Foreign-key columns are filled by sampling existing parent rows /// Foreign-key columns are filled by sampling existing parent rows
/// (D14); a compound FK reads all its child columns from one sampled /// (D14); a compound FK reads all its child columns from one sampled
@@ -8823,16 +8836,20 @@ fn sample_parent_key_tuples(
/// `NOT NULL blob` column (which seed cannot generate) is refused by /// `NOT NULL blob` column (which seed cannot generate) is refused by
/// the block guard (D1); a nullable blob is omitted (→ NULL). /// the block guard (D1); a nullable blob is omitted (→ NULL).
/// ///
/// Deferred: identifier/constraint uniqueness incl. junction /// **Phase 2 (SD2):** when `target_column` is `Some`, this delegates to
/// distinct-combos (D10), the `IN`-CHECK value derivation (D17), the /// [`do_seed_column_fill`] (fill one column across existing rows, D1
/// efficient single-transaction multi-row path, the capped auto-show /// form 2). `overrides` carries the `set <col> …` clause (D2): per-column
/// preview (D18), and the enum/CHECK advisory (D12/D13). /// pins that replace the heuristic generator and drop the column from the
/// generic-fill advisory (D13).
#[allow(clippy::too_many_arguments)]
fn do_seed( fn do_seed(
conn: &Connection, conn: &Connection,
persistence: Option<&Persistence>, persistence: Option<&Persistence>,
source: Option<&str>, source: Option<&str>,
table: &str, table: &str,
target_column: Option<&str>,
count: Option<u64>, count: Option<u64>,
overrides: &[SeedOverride],
rng_seed: Option<u64>, rng_seed: Option<u64>,
) -> Result<SeedResult, DbError> { ) -> Result<SeedResult, DbError> {
use crate::seed; use crate::seed;
@@ -8840,6 +8857,14 @@ fn do_seed(
let canonical_table = require_canonical_table(conn, table)?; let canonical_table = require_canonical_table(conn, table)?;
let table = canonical_table.as_str(); let table = canonical_table.as_str();
// Column-fill (D1 form 2) is a distinct UPDATE path.
if let Some(col) = target_column {
return do_seed_column_fill(
conn, persistence, source, table, col, count, overrides, rng_seed,
);
}
let n = count.unwrap_or(DEFAULT_SEED_COUNT); let n = count.unwrap_or(DEFAULT_SEED_COUNT);
debug!(table = %table, count = n, "seed"); debug!(table = %table, count = n, "seed");
if n > MAX_SEED_COUNT { if n > MAX_SEED_COUNT {
@@ -8937,6 +8962,17 @@ fn do_seed(
} }
} }
// Apply the `set <col> …` overrides (D2): each replaces the named
// column's plan with the pinned generator and removes it from the
// generic-fill advisory (the user chose its values deliberately,
// D13). An override that names a non-fillable column is a friendly
// error; a bounded value source (fixed / pick-list) that can't supply
// enough distinct values for a single-column-UNIQUE target is refused
// up front rather than silently capped (DA finding). FK / type binding
// still apply — a value that violates a constraint surfaces through the
// existing FK-error guard.
apply_seed_overrides(&schema, overrides, n, &col_names, &mut plans, &mut advisory_columns)?;
// Uniqueness groups (ADR-0048 D10): value tuples that must stay // Uniqueness groups (ADR-0048 D10): value tuples that must stay
// distinct across the batch and against existing rows — the // distinct across the batch and against existing rows — the
// user-fillable PK (so junction distinct-combos fall out of this), // user-fillable PK (so junction distinct-combos fall out of this),
@@ -9131,6 +9167,434 @@ fn do_seed(
}) })
} }
/// Apply the `set <col> …` overrides (ADR-0048 D2) to the per-column
/// generation plan. Each override replaces the named column's plan and
/// drops it from the generic-fill advisory (D13 — the user chose those
/// values). An override naming a column that is not in the fillable set
/// (unknown, or an auto-generated `serial`) is a friendly error.
fn apply_seed_overrides(
schema: &ReadSchema,
overrides: &[SeedOverride],
row_count: u64,
col_names: &[String],
plans: &mut [SeedColPlan],
advisory_columns: &mut Vec<String>,
) -> Result<(), DbError> {
for ov in overrides {
let Some(idx) = col_names
.iter()
.position(|c| c.eq_ignore_ascii_case(&ov.column))
else {
return Err(DbError::Unsupported(format!(
"cannot apply `set {col} …`: `{col}` is not a fillable column of this \
table (it is unknown, or an auto-generated column).",
col = ov.column,
)));
};
let ty = schema
.columns
.iter()
.find(|c| c.name.eq_ignore_ascii_case(&ov.column))
.and_then(|c| c.user_type)
.unwrap_or(Type::Text);
seed_override_capacity_guard(schema, &ov.column, &ov.kind, row_count)?;
plans[idx] = seed_override_plan(&ov.kind, ty, &ov.column)?;
advisory_columns.retain(|c| !c.eq_ignore_ascii_case(&ov.column));
}
Ok(())
}
/// Refuse up front when a **bounded** override (a fixed value or a
/// pick-list) cannot supply enough *distinct* values to fill a
/// single-column-UNIQUE target across `row_count` rows — otherwise the
/// uniqueness machinery would silently cap the run to the achievable
/// count (DA finding; the ADR left this interaction open and the user
/// chose a friendly error). Generators and ranges are treated as
/// effectively unbounded sources here; if one does exhaust, the existing
/// distinct-combination cap (D14) still applies.
fn seed_override_capacity_guard(
schema: &ReadSchema,
column: &str,
kind: &SeedOverrideKind,
row_count: u64,
) -> Result<(), DbError> {
let distinct = match kind {
SeedOverrideKind::Fixed(_) => 1,
SeedOverrideKind::PickList(values) => {
let mut set = std::collections::HashSet::new();
for v in values {
set.insert(seed_override_literal(v, column)?);
}
set.len()
}
// Unbounded-enough sources — leave to the cap if they exhaust.
SeedOverrideKind::Generator(_) | SeedOverrideKind::Range { .. } => return Ok(()),
};
if distinct as u64 >= row_count.max(1) {
return Ok(());
}
// Single-column uniqueness only: a compound UNIQUE / compound PK can
// still be satisfied by varying the *other* columns, so a pinned
// value there does not force a cap.
let single_unique = schema
.columns
.iter()
.find(|c| c.name.eq_ignore_ascii_case(column))
.is_some_and(|c| c.unique)
|| (schema.primary_key.len() == 1
&& schema.primary_key[0].eq_ignore_ascii_case(column));
if single_unique {
return Err(DbError::Unsupported(format!(
"cannot fill {row_count} rows: `set {column} …` offers only {distinct} distinct \
value(s), but `{column}` is UNIQUE. Use a generator (e.g. `as email`) or a list \
of at least {row_count} values."
)));
}
Ok(())
}
/// Turn one `set` override into the `SeedColPlan` that produces its
/// values (ADR-0048 D2). `Fixed`/`PickList` become a `PickFrom` over the
/// literal(s); `Generator` resolves the curated name (unknown → friendly
/// error); `Range` validates its bounds against the column type *before*
/// generation (an incompatible bound → friendly error).
fn seed_override_plan(
kind: &SeedOverrideKind,
ty: Type,
column: &str,
) -> Result<SeedColPlan, DbError> {
use crate::seed::Generator;
let generator = match kind {
SeedOverrideKind::Fixed(v) => Generator::PickFrom(vec![seed_override_literal(v, column)?]),
SeedOverrideKind::PickList(vs) => {
let lits = vs
.iter()
.map(|v| seed_override_literal(v, column))
.collect::<Result<Vec<_>, _>>()?;
Generator::PickFrom(lits)
}
SeedOverrideKind::Generator(name) => {
crate::seed::generator_for_name(name).ok_or_else(|| {
DbError::Unsupported(format!(
"unknown generator `{name}` in `set {column} as {name}`. \
Known generators: {}.",
crate::seed::KNOWN_GENERATORS.join(", "),
))
})?
}
SeedOverrideKind::Range { low, high } => {
let lo = seed_override_literal(low, column)?;
let hi = seed_override_literal(high, column)?;
if let Some(reason) = crate::seed::range_bounds_reason(ty, &lo, &hi) {
return Err(DbError::Unsupported(format!(
"cannot apply `set {column} between …`: {reason}."
)));
}
Generator::Range { low: lo, high: hi }
}
};
Ok(SeedColPlan::Generated { generator, ty })
}
/// Extract the literal string an override value contributes to a
/// `PickFrom` / `Range` (re-typed per column by `generate_value`). A
/// `null` override is refused — seed always fills a value (NULL
/// injection is out of scope, ADR-0048 Out-of-scope).
fn seed_override_literal(value: &Value, column: &str) -> Result<String, DbError> {
match value {
Value::Number(s) | Value::Text(s) => Ok(s.clone()),
Value::Bool(b) => Ok(if *b { "true" } else { "false" }.to_string()),
Value::Null => Err(DbError::Unsupported(format!(
"`set {column} = null` is not supported — seed always fills a value."
))),
}
}
/// Column-fill (ADR-0048 D1 form 2): fill one column across the table's
/// **existing** rows (an UPDATE), the natural follow-up to `add column`.
///
/// Refuses PK and auto-generated (`serial`/`shortid`/`blob`) targets;
/// an empty table is a friendly no-op. The `set` clause may only adjust
/// the column being filled (the rest of the per-column heuristics do not
/// apply — there is exactly one column). A UNIQUE / identifier target
/// gets collision-free values (generated distinct from *every* existing
/// value in the column, so no row-by-row UPDATE can transiently collide);
/// an FK target samples an existing parent key (D14). The whole fill is
/// one transaction → one undo step (D15), persisted once (commit-db-last).
#[allow(clippy::too_many_arguments)]
fn do_seed_column_fill(
conn: &Connection,
persistence: Option<&Persistence>,
source: Option<&str>,
table: &str,
column: &str,
count: Option<u64>,
overrides: &[SeedOverride],
rng_seed: Option<u64>,
) -> Result<SeedResult, DbError> {
use crate::seed;
use rand::RngExt;
debug!(table = %table, column = %column, "seed column-fill");
// A row count is meaningless when filling existing rows (D1 form 2).
if count.is_some() {
return Err(DbError::Unsupported(format!(
"`seed {table}.{column}` fills existing rows, so it takes no row count \
(drop the number)."
)));
}
let schema = read_schema(conn, table)?;
let col = schema
.columns
.iter()
.find(|c| c.name.eq_ignore_ascii_case(column))
.ok_or_else(|| {
DbError::Unsupported(format!("cannot fill `{table}.{column}`: no such column."))
})?;
let canonical_col = col.name.clone();
let ty = col.user_type.unwrap_or(Type::Text);
// Refuse identity / auto-generated / un-generatable targets (D1).
if col.primary_key {
return Err(DbError::Unsupported(format!(
"cannot fill `{table}.{canonical_col}`: it is part of the primary key — \
you don't fill an identity column."
)));
}
if matches!(ty, Type::Serial | Type::ShortId) {
return Err(DbError::Unsupported(format!(
"cannot fill `{table}.{canonical_col}`: `{}` columns generate their own \
values automatically.",
ty.keyword(),
)));
}
if matches!(ty, Type::Blob) {
return Err(DbError::Unsupported(format!(
"cannot fill `{table}.{canonical_col}`: seed cannot generate `blob` values."
)));
}
// The `set` clause may only adjust the filled column (user decision).
for ov in overrides {
if !ov.column.eq_ignore_ascii_case(&canonical_col) {
return Err(DbError::Unsupported(format!(
"in `seed {table}.{canonical_col}`, `set` can only adjust \
`{canonical_col}` (the column being filled), not `{}`.",
ov.column,
)));
}
}
// Existing rowids in a deterministic order (D4 reproducibility).
let rowids: Vec<i64> = {
let sql = format!(
"SELECT rowid FROM \"{}\" ORDER BY rowid",
table.replace('"', "\"\"")
);
let mut stmt = conn.prepare(&sql).map_err(DbError::from_rusqlite)?;
stmt.query_map([], |r| r.get::<_, i64>(0))
.map_err(DbError::from_rusqlite)?
.collect::<Result<Vec<_>, _>>()
.map_err(DbError::from_rusqlite)?
};
// Empty table → friendly no-op (D1).
if rowids.is_empty() {
return Ok(SeedResult {
table: table.to_string(),
requested: 0,
produced: 0,
data: DataResult {
table_name: table.to_string(),
columns: Vec::new(),
column_types: Vec::new(),
rows: Vec::new(),
},
advisory_columns: Vec::new(),
});
}
// FK target → sample an existing parent key column (D14).
let fk_sample: Option<Vec<Value>> = {
let fk = schema.foreign_keys.iter().find(|fk| {
fk.child_columns
.iter()
.any(|c| c.eq_ignore_ascii_case(&canonical_col))
});
match fk {
Some(fk) => {
// Single-column position within the FK (column-fill targets
// one column; a compound FK filled one column at a time is
// unusual but we sample that column's parent values).
let pos = fk
.child_columns
.iter()
.position(|c| c.eq_ignore_ascii_case(&canonical_col))
.unwrap_or(0);
let parent_col = fk.parent_columns.get(pos).cloned().unwrap_or_default();
let tuples = sample_parent_key_tuples(conn, &fk.parent_table, &[parent_col])?;
if tuples.is_empty() {
return Err(DbError::Unsupported(format!(
"cannot fill `{table}.{canonical_col}`: parent table `{}` has no \
rows to reference. Seed or insert into `{}` first.",
fk.parent_table, fk.parent_table,
)));
}
Some(tuples.into_iter().map(|mut t| t.remove(0)).collect())
}
None => None,
}
};
// The value source: an override (if present) else the heuristic.
let mut advisory_columns: Vec<String> = Vec::new();
let plan: SeedColPlan = if let Some(ov) = overrides
.iter()
.find(|o| o.column.eq_ignore_ascii_case(&canonical_col))
{
// Same capacity guard as whole-row: a bounded override that can't
// give enough distinct values for a UNIQUE column across the
// existing rows is refused up front, not silently capped.
seed_override_capacity_guard(&schema, &canonical_col, &ov.kind, rowids.len() as u64)?;
seed_override_plan(&ov.kind, ty, &canonical_col)?
} else if fk_sample.is_some() {
SeedColPlan::ForeignKey { fk_idx: 0, pos: 0 }
} else if matches!(ty, Type::ShortId) {
SeedColPlan::ShortId // unreachable (refused above), kept for totality
} else {
let check_in_values = col
.check
.as_deref()
.and_then(|chk| seed::parse_in_check_values(chk, &canonical_col));
let spec = seed::ColumnSpec {
name: canonical_col.clone(),
ty,
not_null: col.notnull,
primary_key: col.primary_key,
unique: col.unique,
is_foreign_key: false,
check_in_values,
};
let generator = seed::choose_generator(table, &spec);
if matches!(generator, crate::seed::Generator::Generic)
&& (seed::is_enum_ish(&canonical_col)
|| (col.check.is_some() && spec.check_in_values.is_none()))
{
advisory_columns.push(canonical_col.clone());
}
SeedColPlan::Generated { generator, ty }
};
// Collision-free generation for UNIQUE / identifier targets: seed the
// used-set with EVERY existing value of the column so a generated
// value never matches a not-yet-updated row (no transient UNIQUE
// violation) nor a value already assigned this batch (ADR-0048 D10).
let enforce_unique = col.unique
|| matches!(
&plan,
SeedColPlan::Generated {
generator: crate::seed::Generator::IdentitySequential,
..
}
);
let mut used: std::collections::HashSet<String> = std::collections::HashSet::new();
if enforce_unique {
for tuple in
sample_parent_key_tuples(conn, table, std::slice::from_ref(&canonical_col))?
{
used.insert(seed_value_list_key(&tuple));
}
}
let seq_base = if matches!(
&plan,
SeedColPlan::Generated {
generator: crate::seed::Generator::IdentitySequential,
..
}
) && matches!(ty, Type::Int)
{
Some(seed_max_int(conn, table, &canonical_col)?)
} else {
None
};
const MAX_ATTEMPTS: u32 = 200;
let mut rng = seed::make_rng(rng_seed);
let tx = conn
.unchecked_transaction()
.map_err(DbError::from_rusqlite)?;
let update_sql = format!(
"UPDATE \"{}\" SET \"{}\" = ?1 WHERE rowid = ?2",
table.replace('"', "\"\""),
canonical_col.replace('"', "\"\""),
);
let mut produced: u64 = 0;
for (offset, rowid) in rowids.iter().enumerate() {
let mut attempt = 0u32;
let value = loop {
let v = match &plan {
SeedColPlan::ForeignKey { .. } => {
let samples = fk_sample.as_ref().expect("fk plan implies samples");
samples[rng.random_range(0..samples.len())].clone()
}
SeedColPlan::ShortId => {
Value::Text(crate::dsl::shortid::generate_with_rng(&mut rng))
}
SeedColPlan::Generated { generator, ty }
if matches!(generator, crate::seed::Generator::IdentitySequential)
&& matches!(ty, Type::Int) =>
{
Value::Number((seq_base.unwrap_or(0) + produced as i64 + 1).to_string())
}
SeedColPlan::Generated { generator, ty } => {
seed::generate_value(generator, *ty, &mut rng)
}
};
if enforce_unique {
let key = seed_value_list_key(std::slice::from_ref(&v));
if used.contains(&key) {
attempt += 1;
if attempt >= MAX_ATTEMPTS {
break v; // give up on distinctness; DB may reject
}
continue;
}
used.insert(key);
}
break v;
};
let bound = impl_value_for(&schema, &canonical_col, &value)?;
let params: Vec<rusqlite::types::Value> =
vec![bound_to_sqlite_value(&bound), rusqlite::types::Value::Integer(*rowid)];
execute_with_fk_enrichment(conn, table, &update_sql, &params)?;
produced += 1;
let _ = offset;
}
let changes = Changes {
schema_dirty: false,
rewritten_tables: vec![table.to_string()],
..Changes::default()
};
finalize_persistence(conn, persistence, source, &changes)?;
tx.commit().map_err(DbError::from_rusqlite)?;
// Preview the first capped rows (D18).
let preview: Vec<i64> = rowids.iter().take(SEED_PREVIEW_CAP).copied().collect();
let data = query_rows_by_rowid(conn, table, &preview)?;
Ok(SeedResult {
table: table.to_string(),
requested: produced,
produced,
data,
advisory_columns,
})
}
/// Build and execute a single-row `INSERT` — column resolution, value /// Build and execute a single-row `INSERT` — column resolution, value
/// binding, `serial`/`shortid` autofill, and the FK-enriched execute — /// binding, `serial`/`shortid` autofill, and the FK-enriched execute —
/// returning `(rows_affected, new rowid)`. /// returning `(rows_affected, new rowid)`.
+46 -5
View File
@@ -402,14 +402,23 @@ pub enum Command {
filter: Option<Expr>, filter: Option<Expr>,
limit: Option<u64>, limit: Option<u64>,
}, },
/// Populate a table with generated fake data (ADR-0048, SD1). /// Populate a table with generated fake data (ADR-0048, SD1/SD2).
/// `count` defaults to 20 when omitted; `rng_seed` (from a future /// `count` defaults to 20 when omitted; `rng_seed` (from the
/// `--seed <n>` flag) makes generation reproducible. Phase 1 is /// `--seed <n>` flag) makes generation reproducible.
/// whole-row generation; the `set` override clause and the ///
/// `<table>.<column>` column-fill form arrive in later phases. /// Phase 2 surfaces (ADR-0048 D1/D2):
/// - `target_column` is `Some` for the **column-fill** form
/// `seed <table>.<column>` — fill one column across the table's
/// *existing* rows (an UPDATE), rather than generating new rows.
/// - `overrides` carries the `set <col> …` clause: per-column pins
/// that take precedence over the heuristic generator (D2).
Seed { Seed {
table: String, table: String,
/// `Some(col)` → column-fill mode (UPDATE existing rows);
/// `None` → whole-row generation (INSERT new rows).
target_column: Option<String>,
count: Option<u64>, count: Option<u64>,
overrides: Vec<SeedOverride>,
rng_seed: Option<u64>, rng_seed: Option<u64>,
}, },
/// Replay a sequence of DSL commands from a file. Each line /// Replay a sequence of DSL commands from a file. Each line
@@ -647,6 +656,38 @@ impl RowFilter {
} }
} }
/// One `set <col> …` override on a `seed` command (ADR-0048 D2, Phase 2).
///
/// The user can pin a column's generated values to a constant, a
/// pick-list, an explicit named generator, or a range — overriding the
/// per-column heuristic the executor would otherwise pick. `column` is
/// the user-typed column name (validated against the table at execution,
/// like every other column slot).
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SeedOverride {
pub column: String,
pub kind: SeedOverrideKind,
}
/// The four `set` override forms (ADR-0048 D2).
///
/// Values arrive as the DSL's `Value` (quoted text / unquoted number —
/// dates are quoted text per the D2 amendment); the `Generator` name is
/// a raw string validated at execution because `src/dsl` cannot depend
/// on `src/seed` (the curated vocabulary lives there).
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SeedOverrideKind {
/// `set status = 'pending'` — every row gets the constant.
Fixed(Value),
/// `set role in ('admin', 'editor')` — uniform pick from the list.
PickList(Vec<Value>),
/// `set work_addr as email` — force the named generator (D9).
Generator(String),
/// `set price between 10 and 100` — uniform in `[low, high]`;
/// numeric or (quoted) date bounds per the destination column type.
Range { low: Value, high: Value },
}
/// A complex WHERE expression (ADR-0026 §4). /// A complex WHERE expression (ADR-0026 §4).
/// ///
/// Built by `grammar::expr::build_expr` from the flat /// Built by `grammar::expr::build_expr` from the flat
+276 -11
View File
@@ -24,7 +24,9 @@
//! later swap that capture for the same typed slots used here, adding //! later swap that capture for the same typed slots used here, adding
//! live hints/highlighting. //! live hints/highlighting.
use crate::dsl::command::{Command, Expr, RowFilter, ShowListKind}; use crate::dsl::command::{
Command, Expr, RowFilter, SeedOverride, SeedOverrideKind, ShowListKind,
};
use crate::dsl::grammar::{ use crate::dsl::grammar::{
CommandNode, IdentSource, Node, NumberValidator, ValidationError, Word, expr, CommandNode, IdentSource, Node, NumberValidator, ValidationError, Word, expr,
shared::{ shared::{
@@ -426,7 +428,9 @@ const LIMIT_CLAUSE_NODES: &[Node] = &[
const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES); const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES);
// ================================================================= // =================================================================
// seed — `seed <T> [<count>]` (ADR-0048, SD1) // seed — `seed <T>[.<col>] [<count>] [set <overrides>] [--seed <n>]`
// (ADR-0048, SD1 whole-row + SD2 Phase 2 set-clause /
// column-fill)
// ================================================================= // =================================================================
/// Optional positional row count. Reuses `LIMIT_VALIDATOR` (a /// Optional positional row count. Reuses `LIMIT_VALIDATOR` (a
@@ -444,11 +448,127 @@ const SEED_FLAG_NODES: &[Node] = &[
}, },
]; ];
const SEED_FLAG: Node = Node::Seq(SEED_FLAG_NODES); const SEED_FLAG: Node = Node::Seq(SEED_FLAG_NODES);
// --- column-fill target: the optional `.<column>` (ADR-0048 D1
// form 2) ----------------------------------------------------
//
// `seed users.email …` fills one column across existing rows. The
// table ident stops at `.` (idents are alnum/underscore), so an
// `Optional(Seq['.', column])` after the table cleanly discriminates:
// when the next token is not `.`, the `Punct('.')` first-child
// NoMatches and `walk_optional` skips it; once `.` commits, a missing
// column propagates as the user mid-typing `seed users.` (driver
// `walk_optional` semantics). The column resolves against
// `current_table_columns` (populated by `TABLE_NAME_WRITES`).
const SEED_TARGET_COLUMN: Node = Node::Ident {
source: IdentSource::Columns,
role: "seed_target_column",
validator: None,
highlight_override: None,
writes_table: false,
writes_column: false,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
const SEED_DOT_COLUMN_NODES: &[Node] = &[Node::Punct('.'), SEED_TARGET_COLUMN];
const SEED_DOT_COLUMN: Node = Node::Optional(&Node::Seq(SEED_DOT_COLUMN_NODES));
// --- the `set <col> <override>[, …]` clause (ADR-0048 D2) --------
//
// Each override pins one column's generation. The column slot
// `writes_column` so the typed value slots (`PER_COLUMN_VALUE`, the
// same `current_column_value` dispatch `update … set` uses) narrow to
// the column's type — so list/range/fixed values get the column's
// typed slot (quoted text, unquoted number, quoted date) and a
// type-mismatched literal is flagged. The four tails each start with a
// distinct token (`=` / `in` / `between` / `as`), so the `Choice`
// discriminates cleanly (no Optional-first branch).
/// The `set <col>` column slot. Distinct role from `update`'s
/// `update_set_column` and the expression `expr_column`.
const SEED_SET_COLUMN: Node = Node::Ident {
source: IdentSource::Columns,
role: "seed_set_column",
validator: None,
highlight_override: None,
writes_table: false,
writes_column: true,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
/// `as <generator>` — the curated generator-name vocabulary (D9),
/// highlighted in the `tok_function` colour. The slot is structural
/// (any identifier matches); the name is validated at execution and
/// flagged live by the validity indicator.
const SEED_GENERATOR: Node = Node::Ident {
source: IdentSource::Generators,
role: "seed_generator",
validator: None,
highlight_override: Some(crate::dsl::grammar::HighlightClass::Function),
writes_table: false,
writes_column: false,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
/// `= <value>` — a fixed constant for every row.
const SEED_OV_FIXED_NODES: &[Node] = &[Node::Punct('='), PER_COLUMN_VALUE];
/// `in ( <value> [, <value>]* )` — uniform pick from the list.
const SEED_OV_IN_VALUES: Node = Node::Repeated {
inner: &PER_COLUMN_VALUE,
separator: Some(&Node::Punct(',')),
min: 1,
};
const SEED_OV_IN_NODES: &[Node] = &[
Node::Word(Word::keyword("in")),
Node::Punct('('),
SEED_OV_IN_VALUES,
Node::Punct(')'),
];
/// `between <value> and <value>` — uniform in the (typed) range.
const SEED_OV_BETWEEN_NODES: &[Node] = &[
Node::Word(Word::keyword("between")),
PER_COLUMN_VALUE,
Node::Word(Word::keyword("and")),
PER_COLUMN_VALUE,
];
/// `as <generator>` — force a named generator.
const SEED_OV_AS_NODES: &[Node] = &[Node::Word(Word::keyword("as")), SEED_GENERATOR];
const SEED_OV_TAIL_CHOICES: &[Node] = &[
Node::Seq(SEED_OV_FIXED_NODES),
Node::Seq(SEED_OV_IN_NODES),
Node::Seq(SEED_OV_BETWEEN_NODES),
Node::Seq(SEED_OV_AS_NODES),
];
const SEED_OV_TAIL: Node = Node::Choice(SEED_OV_TAIL_CHOICES);
const SEED_OVERRIDE_NODES: &[Node] = &[SEED_SET_COLUMN, SEED_OV_TAIL];
const SEED_OVERRIDE: Node = Node::Seq(SEED_OVERRIDE_NODES);
const SEED_OVERRIDES: Node = Node::Repeated {
inner: &SEED_OVERRIDE,
separator: Some(&Node::Punct(',')),
min: 1,
};
const SEED_SET_CLAUSE_NODES: &[Node] =
&[Node::Word(Word::keyword("set")), SEED_OVERRIDES];
const SEED_SET_CLAUSE: Node = Node::Seq(SEED_SET_CLAUSE_NODES);
const SEED_NODES: &[Node] = &[ const SEED_NODES: &[Node] = &[
// `writes_table` so a future `set <col>=…` clause's column slots // `writes_table` so the `.column` target, the `set <col>=…`
// can resolve against this table. // clause's column slots, and the typed value slots all resolve
// against this table.
TABLE_NAME_WRITES, TABLE_NAME_WRITES,
SEED_DOT_COLUMN,
Node::Optional(&SEED_COUNT), Node::Optional(&SEED_COUNT),
Node::Optional(&SEED_SET_CLAUSE),
Node::Optional(&SEED_FLAG), Node::Optional(&SEED_FLAG),
]; ];
const SEED_SHAPE: Node = Node::Seq(SEED_NODES); const SEED_SHAPE: Node = Node::Seq(SEED_NODES);
@@ -736,16 +856,29 @@ fn build_show_limit(path: &MatchedPath) -> Result<Option<u64>, ValidationError>
}) })
} }
/// Build a `seed <T> [<count>] [--seed <n>]` command (ADR-0048). The /// Build a `seed <T>[.<col>] [<count>] [set <overrides>] [--seed <n>]`
/// `--seed` flag's value is the `NumberLit` right after the flag; the /// command (ADR-0048, SD1 + SD2 Phase 2).
/// positional count is the `NumberLit` *before* the flag (or the only ///
/// one when no flag is present). /// - `target_column` (column-fill, D1 form 2) is the `seed_target_column`
/// ident, present only for the `seed <T>.<col>` form.
/// - The positional `count` is the `NumberLit` that precedes both the
/// `set` keyword and the `--seed` flag — bounding it that way keeps a
/// `set age between 18 and 80` value (also a `NumberLit`) from being
/// mistaken for the count.
/// - `--seed <n>` is the `NumberLit` right after the flag (D4).
/// - `overrides` (D2) is folded from the flat `set`-clause terminals.
fn build_seed(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> { fn build_seed(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
let table = require_ident(path, "table_name")?; let table = require_ident(path, "table_name")?;
let target_column = ident_text(path, "seed_target_column").map(str::to_string);
let flag_idx = path let flag_idx = path
.items .items
.iter() .iter()
.position(|i| matches!(&i.kind, MatchedKind::Flag("seed"))); .position(|i| matches!(&i.kind, MatchedKind::Flag("seed")));
let set_idx = path
.items
.iter()
.position(|i| matches!(&i.kind, MatchedKind::Word("set")));
let rng_seed = flag_idx let rng_seed = flag_idx
.and_then(|fi| path.items.get(fi + 1)) .and_then(|fi| path.items.get(fi + 1))
@@ -753,23 +886,155 @@ fn build_seed(path: &MatchedPath, _source: &str) -> Result<Command, ValidationEr
.map(|i| parse_seed_u64(&i.text)) .map(|i| parse_seed_u64(&i.text))
.transpose()?; .transpose()?;
// The count is bounded to before the `set` clause and the flag, so a
// numeric value inside `set` (e.g. `between 18 and 80`) is never read
// as the count.
let count_boundary = [set_idx, flag_idx]
.into_iter()
.flatten()
.min()
.unwrap_or(path.items.len());
let count = path let count = path
.items .items
.iter() .iter()
.enumerate() .enumerate()
.find(|(idx, i)| { .find(|(idx, i)| matches!(i.kind, MatchedKind::NumberLit) && *idx < count_boundary)
matches!(i.kind, MatchedKind::NumberLit) && flag_idx.is_none_or(|fi| *idx < fi)
})
.map(|(_, i)| parse_seed_u64(&i.text)) .map(|(_, i)| parse_seed_u64(&i.text))
.transpose()?; .transpose()?;
let overrides = build_seed_overrides(path, set_idx, flag_idx)?;
Ok(Command::Seed { Ok(Command::Seed {
table, table,
target_column,
count, count,
overrides,
rng_seed, rng_seed,
}) })
} }
/// Fold the flat `set`-clause terminals into [`SeedOverride`]s
/// (ADR-0048 D2). The clause region runs from just after `Word("set")`
/// to the `--seed` flag (or the path end). Each override begins at a
/// `seed_set_column` ident; the token right after it selects the form
/// (`=` / `in` / `between` / `as`). Top-level comma separators between
/// overrides are skipped (the `in (...)` form consumes its own inner
/// commas up to `)`).
fn build_seed_overrides(
path: &MatchedPath,
set_idx: Option<usize>,
flag_idx: Option<usize>,
) -> Result<Vec<SeedOverride>, ValidationError> {
let Some(set_idx) = set_idx else {
return Ok(Vec::new());
};
let end = flag_idx.unwrap_or(path.items.len());
let region = &path.items[set_idx + 1..end];
let mut overrides = Vec::new();
let mut i = 0;
while i < region.len() {
// The next override starts at its column ident; skip the
// top-level comma separators (and any stray token) between them.
let MatchedKind::Ident {
role: "seed_set_column",
..
} = &region[i].kind
else {
i += 1;
continue;
};
let column = region[i].text.clone();
i += 1;
let kind = parse_seed_override_tail(region, &mut i, &column)?;
overrides.push(SeedOverride { column, kind });
}
Ok(overrides)
}
/// Parse one override tail starting at `region[*i]` (just past the
/// column ident), advancing `*i` past the consumed tokens.
fn parse_seed_override_tail(
region: &[MatchedItem],
i: &mut usize,
column: &str,
) -> Result<SeedOverrideKind, ValidationError> {
let head = region.get(*i).ok_or_else(|| seed_set_error(column))?;
match &head.kind {
MatchedKind::Punct('=') => {
*i += 1;
let value = seed_take_value(region, i, column)?;
Ok(SeedOverrideKind::Fixed(value))
}
MatchedKind::Word("in") => {
*i += 1; // `in`
// `(`
if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Punct('('))) {
*i += 1;
}
let mut values = Vec::new();
while let Some(item) = region.get(*i) {
match &item.kind {
MatchedKind::Punct(')') => {
*i += 1;
break;
}
MatchedKind::Punct(',') => {
*i += 1;
}
_ => values.push(seed_take_value(region, i, column)?),
}
}
Ok(SeedOverrideKind::PickList(values))
}
MatchedKind::Word("between") => {
*i += 1; // `between`
let low = seed_take_value(region, i, column)?;
if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Word("and"))) {
*i += 1;
}
let high = seed_take_value(region, i, column)?;
Ok(SeedOverrideKind::Range { low, high })
}
MatchedKind::Word("as") => {
*i += 1; // `as`
let gen_item = region
.get(*i)
.filter(|t| matches!(t.kind, MatchedKind::Ident { role: "seed_generator", .. }))
.ok_or_else(|| seed_set_error(column))?;
*i += 1;
Ok(SeedOverrideKind::Generator(gen_item.text.clone()))
}
_ => Err(seed_set_error(column)),
}
}
/// Take one value literal at `region[*i]`, advancing past it.
///
/// The grammar's typed value slots only ever match value literals (a
/// bare unquoted word fails to match the slot and is rejected *before*
/// this fold runs — D2's quoting requirement enforced structurally), so
/// a non-literal here can only mean a grammar/builder drift bug; the
/// `Err` is a drift guard (mirrors `expr::build_expr`).
fn seed_take_value(
region: &[MatchedItem],
i: &mut usize,
column: &str,
) -> Result<Value, ValidationError> {
let item = region.get(*i).ok_or_else(|| seed_set_error(column))?;
let value = item_to_value(item).ok_or_else(|| seed_set_error(column))?;
*i += 1;
Ok(value)
}
/// Drift-guard error for the `set`-clause fold (see `seed_take_value`).
fn seed_set_error(column: &str) -> ValidationError {
ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", format!("malformed `set` clause for `{column}`"))],
}
}
fn parse_seed_u64(text: &str) -> Result<u64, ValidationError> { fn parse_seed_u64(text: &str) -> Result<u64, ValidationError> {
text.parse::<u64>().map_err(|_| ValidationError { text.parse::<u64>().map_err(|_| ValidationError {
message_key: "parse.custom.bind_type_mismatch", message_key: "parse.custom.bind_type_mismatch",
+16
View File
@@ -57,6 +57,12 @@ pub enum HighlightClass {
String, String,
Punct, Punct,
Flag, Flag,
/// A curated function-vocabulary name — the `seed … set <col> as
/// <generator>` generator names (ADR-0048 D2/§Grammar). Rendered in
/// the existing `tok_function` colour (ADR-0022 Amд6 blue — no new
/// theme colour), assigned via a generator slot's
/// `highlight_override`, not by byte shape.
Function,
Error, Error,
} }
@@ -86,6 +92,14 @@ pub enum IdentSource {
/// content validator on column-type slots; not user-listable /// content validator on column-type slots; not user-listable
/// from the schema. /// from the schema.
Types, Types,
/// Closed, curated set of fake-data generator names (ADR-0048
/// D9) — the `seed … set <col> as <generator>` slot. Like
/// `Types`, not user-listable from the schema; the vocabulary
/// lives in `src/seed` and the completion engine offers it. The
/// grammar slot is purely structural (matches any identifier);
/// an unknown name is flagged live (validity) and rejected at
/// execution.
Generators,
/// Any identifier shape; used by synthetic catch-all branches /// Any identifier shape; used by synthetic catch-all branches
/// (e.g., the unknown-value branch of `mode <value>`). /// (e.g., the unknown-value branch of `mode <value>`).
Free, Free,
@@ -117,6 +131,7 @@ impl IdentSource {
Self::Relationships => "relationship name", Self::Relationships => "relationship name",
Self::Indexes => "index name", Self::Indexes => "index name",
Self::Types => "type", Self::Types => "type",
Self::Generators => "generator name",
} }
} }
@@ -134,6 +149,7 @@ impl IdentSource {
"relationship name" => Some(Self::Relationships), "relationship name" => Some(Self::Relationships),
"index name" => Some(Self::Indexes), "index name" => Some(Self::Indexes),
"type" => Some(Self::Types), "type" => Some(Self::Types),
"generator name" => Some(Self::Generators),
_ => None, _ => None,
} }
} }
+1
View File
@@ -300,6 +300,7 @@ fn format_expectation(e: &crate::dsl::walker::outcome::Expectation) -> String {
IdentSource::Relationships => "relationship name".to_string(), IdentSource::Relationships => "relationship name".to_string(),
IdentSource::Indexes => "index name".to_string(), IdentSource::Indexes => "index name".to_string(),
IdentSource::Types => "type".to_string(), IdentSource::Types => "type".to_string(),
IdentSource::Generators => "generator name".to_string(),
IdentSource::NewName | IdentSource::Free => "identifier".to_string(), IdentSource::NewName | IdentSource::Free => "identifier".to_string(),
}, },
Expectation::Punct(c) => format!("`{c}`"), Expectation::Punct(c) => format!("`{c}`"),
+12
View File
@@ -240,6 +240,18 @@ mod tests {
); );
} }
#[test]
fn seed_generator_name_highlighted_as_function() {
// ADR-0048 D9: the `set <col> as <gen>` generator name carries the
// `Function` highlight class (via the slot's `highlight_override`),
// rendered in the shared `tok_function` colour.
let runs = run("seed Members set role as email");
assert!(
runs.iter().any(|(_, _, c)| *c == HighlightClass::Function),
"generator name `email` should be Function-highlighted: {runs:?}"
);
}
#[test] #[test]
fn unknown_command_word_classified_by_byte_shape() { fn unknown_command_word_classified_by_byte_shape() {
// Walker doesn't engage; fallback classifies as Identifier. // Walker doesn't engage; fallback classifies as Identifier.
+4
View File
@@ -1236,6 +1236,10 @@ fn schema_existence_diagnostics(
IdentSource::Relationships IdentSource::Relationships
| IdentSource::Indexes | IdentSource::Indexes
| IdentSource::Types | IdentSource::Types
// `Generators` (the `set … as <gen>` slot, ADR-0048 D9) is a
// curated vocabulary; its unknown-name validity is handled by
// the completion-layer indicator, not this walker diagnostic.
| IdentSource::Generators
| IdentSource::Free => {} | IdentSource::Free => {}
} }
} }
+1 -1
View File
@@ -553,7 +553,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[
("ok.rows_seeded", &["count", "table"]), ("ok.rows_seeded", &["count", "table"]),
("ok.rows_updated", &["count"]), ("ok.rows_updated", &["count"]),
("seed.capped", &["requested"]), ("seed.capped", &["requested"]),
("seed.advisory_generic", &["columns"]), ("seed.advisory_generic", &["columns", "column", "table"]),
// ---- Client-side success notes (ADR-0017 §6, ADR-0018 §9) ---- // ---- Client-side success notes (ADR-0017 §6, ADR-0018 §9) ----
("client_side.auto_fill_add_serial", &["count"]), ("client_side.auto_fill_add_serial", &["count"]),
("client_side.auto_fill_add_shortid", &["count"]), ("client_side.auto_fill_add_shortid", &["count"]),
+13 -2
View File
@@ -337,6 +337,13 @@ help:
seed <T> [<count>] — fill a table with generated sample rows seed <T> [<count>] — fill a table with generated sample rows
(default 20). Existing rows are kept; (default 20). Existing rows are kept;
foreign keys draw from existing parent rows. foreign keys draw from existing parent rows.
seed <T> ... set <c> = 'v' | in ('a','b') | as <gen> | between x and y
— pin how a column is generated: a fixed
value, a pick-list, a named generator
(email, name, product, ...), or a range.
seed <T>.<col> [set ...] — fill one column across the EXISTING rows
(the follow-up to `add column`).
seed <T> ... --seed <n> — reproducible: same data for the same n.
insert: |- insert: |-
insert into <T> [(cols)] [values] (vals) — add a row insert into <T> [(cols)] [values] (vals) — add a row
update: |- update: |-
@@ -573,7 +580,7 @@ parse:
change_column: |- change_column: |-
change column [in] [table] <Table>: <Name> (<Type>) change column [in] [table] <Table>: <Name> (<Type>)
[--force-conversion | --dont-convert] [--force-conversion | --dont-convert]
seed: "seed <Table> [count]" seed: "seed <Table> [count] [set <col> = ... | in (...) | as <gen> | between x and y] | seed <Table>.<col>"
show_data: "show data <Table>" show_data: "show data <Table>"
show_table: "show table <Table>" show_table: "show table <Table>"
show_tables: "show tables" show_tables: "show tables"
@@ -988,7 +995,11 @@ db:
# generic text that look like fixed value sets. # generic text that look like fixed value sets.
seed: seed:
capped: "(of {requested} requested — ran out of distinct value combinations)" capped: "(of {requested} requested — ran out of distinct value combinations)"
advisory_generic: "{columns} filled with generic text — they look like fixed value sets." # ADR-0048 D13 (Phase 2/3 wording): name the generically-filled
# enum-ish / CHECK columns and point at the concrete repairs — the
# `set` clause on a fresh seed, or the column-fill form for the rows
# just created.
advisory_generic: "{columns} filled with generic text — they look like fixed value sets. Pin them next time with `set {column} in ('…', '…')`, or fix these rows with `seed {table}.{column} set {column} in ('…', '…')`."
ok: ok:
# ADR-0040: the generic `[ok] <verb> <subject>` summary line was # ADR-0040: the generic `[ok] <verb> <subject>` summary line was
+3
View File
@@ -817,6 +817,9 @@ fn ambient_hint_core_in_mode(
crate::dsl::grammar::IdentSource::Tables => "table", crate::dsl::grammar::IdentSource::Tables => "table",
crate::dsl::grammar::IdentSource::Columns => "column", crate::dsl::grammar::IdentSource::Columns => "column",
crate::dsl::grammar::IdentSource::Relationships => "relationship", crate::dsl::grammar::IdentSource::Relationships => "relationship",
// The `seed … set <col> as <gen>` curated vocabulary
// (ADR-0048 D9) flags an unknown name here.
crate::dsl::grammar::IdentSource::Generators => "generator",
// `NewName`, `Types`, `Free` are filtered out by // `NewName`, `Types`, `Free` are filtered out by
// `invalid_ident_at_cursor` (it only fires for // `invalid_ident_at_cursor` (it only fires for
// known-set sources via `completes_from_schema`), so // known-set sources via `completes_from_schema`), so
+4 -2
View File
@@ -2916,13 +2916,15 @@ async fn execute_command_typed(
.insert(table, columns, values, src) .insert(table, columns, values, src)
.await .await
.map(CommandOutcome::Insert), .map(CommandOutcome::Insert),
// ADR-0048 (SD1). // ADR-0048 (SD1/SD2 Phase 2).
Command::Seed { Command::Seed {
table, table,
target_column,
count, count,
overrides,
rng_seed, rng_seed,
} => database } => database
.seed(table, count, rng_seed, src) .seed(table, target_column, count, overrides, rng_seed, src)
.await .await
.map(CommandOutcome::Seed), .map(CommandOutcome::Seed),
Command::Update { Command::Update {
+201
View File
@@ -81,6 +81,11 @@ pub fn generate_value(generator: &Generator, ty: Type, rng: &mut SeedRng) -> Val
let chosen: &String = pick(rng, values); let chosen: &String = pick(rng, values);
literal_to_value(chosen, ty) literal_to_value(chosen, ty)
} }
// The `set <col> between low and high` override (D2). Bounds are
// interpreted per the destination type; the executor has already
// validated they parse, so a defensive parse failure here falls
// back to type-based generation rather than producing junk.
Generator::Range { low, high } => range_value(low, high, ty, rng),
// Un-intercepted markers + an empty pick list → type-based. // Un-intercepted markers + an empty pick list → type-based.
Generator::PickFrom(_) Generator::PickFrom(_)
| Generator::IdentitySequential | Generator::IdentitySequential
@@ -89,6 +94,132 @@ pub fn generate_value(generator: &Generator, ty: Type, rng: &mut SeedRng) -> Val
} }
} }
/// Uniform value in `[low, high]` for the `between` override (D2).
///
/// Bounds are interpreted by destination type. Returns the type-based
/// fallback for a bound that does not parse or a type that has no range
/// meaning — the executor pre-validates, so this is defensive only.
fn range_value(low: &str, high: &str, ty: Type, rng: &mut SeedRng) -> Value {
match ty {
Type::Int | Type::Serial => parse_int_range(low, high)
.map(|(lo, hi)| Value::Number(rng.random_range(lo..=hi).to_string()))
.unwrap_or_else(|| generic_for_type(ty, rng)),
Type::Real | Type::Decimal => parse_real_range(low, high)
.map(|(lo, hi)| {
let v = rng.random::<f64>().mul_add(hi - lo, lo);
Value::Number(format!("{v:.2}"))
})
.unwrap_or_else(|| generic_for_type(ty, rng)),
Type::Date => parse_date_range(low, high)
.map(|(lo, hi)| Value::Text(format_date(random_date_between(rng, lo, hi))))
.unwrap_or_else(|| generic_for_type(ty, rng)),
Type::DateTime => parse_datetime_range(low, high)
.map(|(lo, hi)| Value::Text(random_datetime_between(rng, lo, hi)))
.unwrap_or_else(|| generic_for_type(ty, rng)),
// text / bool / blob / shortid have no range meaning.
_ => generic_for_type(ty, rng),
}
}
/// Validate that `low`/`high` parse as bounds for `ty`.
///
/// The `between` override (D2) is checked by the executor *before*
/// generation. Returns a short human reason on failure (the executor
/// wraps it in a friendly error naming the column), `None` when valid.
#[must_use]
pub fn range_bounds_reason(ty: Type, low: &str, high: &str) -> Option<String> {
let ok = match ty {
Type::Int | Type::Serial => parse_int_range(low, high).is_some(),
Type::Real | Type::Decimal => parse_real_range(low, high).is_some(),
Type::Date => parse_date_range(low, high).is_some(),
Type::DateTime => parse_datetime_range(low, high).is_some(),
// text / bool / blob / shortid have no range meaning.
Type::Text | Type::Bool | Type::Blob | Type::ShortId => false,
};
if ok {
return None;
}
Some(match ty {
Type::Int | Type::Serial => "expected two whole numbers, e.g. `between 1 and 100`".to_string(),
Type::Real | Type::Decimal => "expected two numbers, e.g. `between 1.0 and 9.99`".to_string(),
Type::Date => "expected two quoted dates, e.g. `between '2023-01-01' and '2024-12-31'`".to_string(),
Type::DateTime => {
"expected two quoted datetimes, e.g. `between '2023-01-01T00:00:00' and '2024-12-31T23:59:59'`"
.to_string()
}
Type::Text | Type::Bool | Type::Blob | Type::ShortId => {
"a `between` range only applies to numeric and date/datetime columns".to_string()
}
})
}
/// Parse and order an integer range; `None` if either bound is not an
/// integer.
fn parse_int_range(low: &str, high: &str) -> Option<(i64, i64)> {
let lo: i64 = low.trim().parse().ok()?;
let hi: i64 = high.trim().parse().ok()?;
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
}
fn parse_real_range(low: &str, high: &str) -> Option<(f64, f64)> {
let lo: f64 = low.trim().parse().ok()?;
let hi: f64 = high.trim().parse().ok()?;
if !lo.is_finite() || !hi.is_finite() {
return None;
}
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
}
fn parse_date_range(low: &str, high: &str) -> Option<(NaiveDate, NaiveDate)> {
let lo = NaiveDate::parse_from_str(low.trim(), "%Y-%m-%d").ok()?;
let hi = NaiveDate::parse_from_str(high.trim(), "%Y-%m-%d").ok()?;
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
}
/// Accept both the `T`-separated and space-separated datetime spellings
/// the app validates (`bind_datetime` / `validate_datetime`).
fn parse_one_datetime(s: &str) -> Option<chrono::NaiveDateTime> {
let t = s.trim();
chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%dT%H:%M:%S")
.or_else(|_| chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%d %H:%M:%S"))
.ok()
}
fn parse_datetime_range(
low: &str,
high: &str,
) -> Option<(chrono::NaiveDateTime, chrono::NaiveDateTime)> {
let lo = parse_one_datetime(low)?;
let hi = parse_one_datetime(high)?;
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
}
/// Uniform date in `[lo, hi]` (inclusive).
fn random_date_between(rng: &mut SeedRng, lo: NaiveDate, hi: NaiveDate) -> NaiveDate {
let lo_ce = lo.num_days_from_ce();
let hi_ce = hi.num_days_from_ce();
let day = rng.random_range(lo_ce..=hi_ce);
NaiveDate::from_num_days_from_ce_opt(day).unwrap_or(lo)
}
/// Uniform datetime in `[lo, hi]`, rendered `YYYY-MM-DDTHH:MM:SS`.
fn random_datetime_between(
rng: &mut SeedRng,
lo: chrono::NaiveDateTime,
hi: chrono::NaiveDateTime,
) -> String {
let lo_s = lo.and_utc().timestamp();
let hi_s = hi.and_utc().timestamp();
let secs = if lo_s <= hi_s {
rng.random_range(lo_s..=hi_s)
} else {
rng.random_range(hi_s..=lo_s)
};
let dt = chrono::DateTime::from_timestamp(secs, 0)
.map_or(lo, |d| d.naive_utc());
dt.format("%Y-%m-%dT%H:%M:%S").to_string()
}
/// Type-based fallback generation (D8). Never produces NULL for a /// Type-based fallback generation (D8). Never produces NULL for a
/// generatable type; `blob`/`serial`/`shortid` are handled by the /// generatable type; `blob`/`serial`/`shortid` are handled by the
/// executor (autogen / block guard) and yield NULL here only as a /// executor (autogen / block guard) and yield NULL here only as a
@@ -358,6 +489,76 @@ mod tests {
assert!(matches!(v, Value::Number(_)), "numeric pick should be a Number: {v:?}"); assert!(matches!(v, Value::Number(_)), "numeric pick should be a Number: {v:?}");
} }
#[test]
fn int_range_stays_within_inclusive_bounds() {
let g = Generator::Range { low: "10".into(), high: "20".into() };
let mut rng = make_rng(Some(5));
for _ in 0..200 {
let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else {
panic!("int range should be a number")
};
let n: i64 = s.parse().unwrap();
assert!((10..=20).contains(&n), "int {n} out of [10,20]");
}
}
#[test]
fn real_range_stays_within_bounds_and_has_cents() {
let g = Generator::Range { low: "1.0".into(), high: "9.0".into() };
let mut rng = make_rng(Some(5));
for _ in 0..200 {
let Value::Number(s) = generate_value(&g, Type::Real, &mut rng) else {
panic!("real range should be a number")
};
let n: f64 = s.parse().unwrap();
assert!((1.0..=9.0).contains(&n), "real {n} out of [1,9]");
assert!(s.contains('.'), "real should be formatted with cents: {s}");
}
}
#[test]
fn date_range_stays_within_quoted_bounds() {
let g = Generator::Range {
low: "2023-01-01".into(),
high: "2023-12-31".into(),
};
let lo = NaiveDate::parse_from_str("2023-01-01", "%Y-%m-%d").unwrap();
let hi = NaiveDate::parse_from_str("2023-12-31", "%Y-%m-%d").unwrap();
let mut rng = make_rng(Some(9));
for _ in 0..200 {
let Value::Text(s) = generate_value(&g, Type::Date, &mut rng) else {
panic!("date range should be text")
};
let d = NaiveDate::parse_from_str(&s, "%Y-%m-%d").expect("valid date");
assert!(d >= lo && d <= hi, "date {d} out of range");
}
}
#[test]
fn reversed_bounds_are_tolerated() {
let g = Generator::Range { low: "20".into(), high: "10".into() };
let mut rng = make_rng(Some(1));
let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else {
panic!("number")
};
let n: i64 = s.parse().unwrap();
assert!((10..=20).contains(&n), "reversed bounds still produce in-range: {n}");
}
#[test]
fn range_bounds_reason_accepts_compatible_and_rejects_incompatible() {
// Numeric / date / datetime accept; text / bool reject.
assert!(range_bounds_reason(Type::Int, "1", "10").is_none());
assert!(range_bounds_reason(Type::Real, "1.5", "9.9").is_none());
assert!(range_bounds_reason(Type::Date, "2023-01-01", "2024-01-01").is_none());
assert!(range_bounds_reason(Type::DateTime, "2023-01-01T00:00:00", "2024-01-01T00:00:00").is_none());
// Non-numeric bound on a numeric column.
assert!(range_bounds_reason(Type::Int, "abc", "10").is_some());
// A range on a text column is meaningless.
assert!(range_bounds_reason(Type::Text, "a", "z").is_some());
assert!(range_bounds_reason(Type::Bool, "0", "1").is_some());
}
#[test] #[test]
fn markers_fall_back_to_type_based_generation() { fn markers_fall_back_to_type_based_generation() {
// An un-intercepted marker must not panic; it generates by type. // An un-intercepted marker must not panic; it generates by type.
+14 -3
View File
@@ -27,10 +27,12 @@
mod check; mod check;
mod generators; mod generators;
mod heuristics; mod heuristics;
mod vocabulary;
pub use check::parse_in_check_values; pub use check::parse_in_check_values;
pub use generators::generate_value; pub use generators::{generate_value, range_bounds_reason};
pub use heuristics::{choose_generator, is_enum_ish}; pub use heuristics::{choose_generator, is_enum_ish};
pub use vocabulary::{generator_for_name, is_known_generator_prefix, KNOWN_GENERATORS};
use rand::rngs::StdRng; use rand::rngs::StdRng;
use rand::{RngExt, SeedableRng}; use rand::{RngExt, SeedableRng};
@@ -162,10 +164,19 @@ pub enum Generator {
IdentitySequential, IdentitySequential,
/// FK column (D14): the executor samples an existing parent key. /// FK column (D14): the executor samples an existing parent key.
ForeignKeySample, ForeignKeySample,
// — List / fallback // — List / range (the `set` override clause, D2)
/// Uniform pick from a fixed list — a simple `IN`-CHECK (D17), an /// Uniform pick from a fixed list — a simple `IN`-CHECK (D17), an
/// enum, or a future `set <col> in (…)` override. /// enum, or a `set <col> in (…)` / `= <value>` override (D2).
PickFrom(Vec<String>), PickFrom(Vec<String>),
/// Uniform value in `[low, high]` — the `set <col> between low and
/// high` override (D2). Bounds are the raw literal strings; their
/// interpretation (int / real / date / datetime) follows the
/// destination column type at generation time. The executor
/// validates type-compatibility *before* generation (a bound that
/// does not parse for the column type is a friendly error), so
/// [`generate_value`] only ever sees parseable bounds; a defensive
/// parse failure falls back to type-based generation.
Range { low: String, high: String },
/// Type-based fallback (D8) when no name heuristic matches. /// Type-based fallback (D8) when no name heuristic matches.
Generic, Generic,
} }
+149
View File
@@ -0,0 +1,149 @@
//! The curated named-generator vocabulary (ADR-0048 D9).
//!
//! This is the **single source of truth** for "what generator names can
//! a learner write after `set <col> as …`", shared by three consumers
//! (mirroring `KNOWN_SQL_FUNCTIONS`, ADR-0022 Amд6):
//!
//! - **Tab completion** — the `seed … set <col> as ⟨here⟩` slot offers
//! these names (`src/completion.rs`).
//! - **The typing-time validity indicator (ADR-0027)** — an unknown
//! name after `as` is flagged `[ERR]` while typing.
//! - **The executor** — `db.rs::do_seed` maps a name to a [`Generator`]
//! via [`generator_for_name`]; an unknown name is a friendly error.
//!
//! The list is a deliberately *curated pedagogical set* — the generators
//! a learner reaches for, not every internal [`Generator`] variant
//! (stateful markers like `ForeignKeySample` are executor-only and have
//! no name). It is lowercase + sorted (pinned by a unit test).
use crate::seed::Generator;
/// The curated generator names, lowercase and **sorted** (invariant
/// pinned by a test — completion relies on stable order and a
/// case-insensitive prefix match against these canonical spellings).
pub const KNOWN_GENERATORS: &[&str] = &[
"age",
"bool",
"city",
"color",
"company",
"country",
"date",
"datetime",
"email",
"first_name",
"job",
"last_name",
"name",
"paragraph",
"password",
"phone",
"price",
"product",
"sentence",
"state",
"street",
"url",
"username",
"zip",
];
/// Map a generator name (case-insensitive) to its [`Generator`].
///
/// `None` for an unrecognised name — the executor turns that into a
/// friendly "unknown generator" error naming the curated set. A couple
/// of common spelling variants (`firstname`, `lastname`, `colour`,
/// `full_name`) are accepted as aliases even though only the canonical
/// spelling is offered for completion.
#[must_use]
pub fn generator_for_name(name: &str) -> Option<Generator> {
let n = name.to_ascii_lowercase();
let g = match n.as_str() {
"name" | "full_name" => Generator::FullName,
"first_name" | "firstname" => Generator::FirstName,
"last_name" | "lastname" | "surname" => Generator::LastName,
"email" => Generator::Email,
"username" => Generator::Username,
"password" => Generator::Password,
"phone" => Generator::Phone,
"city" => Generator::City,
"country" => Generator::Country,
"state" => Generator::StateName,
"street" => Generator::Street,
"zip" => Generator::ZipCode,
"company" => Generator::Company,
"job" => Generator::JobTitle,
"product" => Generator::ProductName,
"sentence" => Generator::Sentence,
"paragraph" => Generator::Paragraph,
"url" => Generator::Url,
"color" | "colour" => Generator::HexColor,
"price" => Generator::CurrencyAmount,
"age" => Generator::Age,
"date" => Generator::DateRecent,
"datetime" => Generator::DateTimeRecent,
"bool" => Generator::Boolean,
_ => return None,
};
Some(g)
}
/// Whether `partial` is a case-insensitive prefix of at least one known
/// generator name.
///
/// An empty `partial` matches every generator (it is a prefix of all) —
/// mirrors `is_known_function_prefix`. Used by the validity indicator to
/// avoid flagging a still-being-typed name.
#[must_use]
pub fn is_known_generator_prefix(partial: &str) -> bool {
let lowered = partial.to_ascii_lowercase();
KNOWN_GENERATORS.iter().any(|g| g.starts_with(&lowered))
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn known_generators_is_sorted_and_lowercase() {
let mut sorted = KNOWN_GENERATORS.to_vec();
sorted.sort_unstable();
assert_eq!(KNOWN_GENERATORS, sorted.as_slice(), "must be sorted");
for g in KNOWN_GENERATORS {
assert_eq!(*g, g.to_ascii_lowercase(), "must be lowercase: {g}");
}
}
#[test]
fn every_listed_name_maps_to_a_generator() {
for g in KNOWN_GENERATORS {
assert!(
generator_for_name(g).is_some(),
"listed generator name `{g}` has no mapping"
);
}
}
#[test]
fn mapping_is_case_insensitive_and_has_aliases() {
assert_eq!(generator_for_name("EMAIL"), Some(Generator::Email));
assert_eq!(generator_for_name("FirstName"), Some(Generator::FirstName));
assert_eq!(generator_for_name("colour"), Some(Generator::HexColor));
assert_eq!(generator_for_name("full_name"), Some(Generator::FullName));
}
#[test]
fn unknown_name_has_no_mapping() {
assert_eq!(generator_for_name("bogus"), None);
assert_eq!(generator_for_name(""), None);
}
#[test]
fn prefix_check_matches_known_and_rejects_unknown() {
assert!(is_known_generator_prefix("ema"));
assert!(is_known_generator_prefix("EMA"));
assert!(is_known_generator_prefix("")); // empty is a prefix of all
assert!(!is_known_generator_prefix("zzz"));
}
}
+2
View File
@@ -163,6 +163,7 @@ impl Theme {
HighlightClass::String => self.tok_string, HighlightClass::String => self.tok_string,
HighlightClass::Punct => self.tok_punct, HighlightClass::Punct => self.tok_punct,
HighlightClass::Flag => self.tok_flag, HighlightClass::Flag => self.tok_flag,
HighlightClass::Function => self.tok_function,
HighlightClass::Error => self.tok_error, HighlightClass::Error => self.tok_error,
} }
} }
@@ -228,6 +229,7 @@ mod tests {
assert_eq!(t.highlight_class_color(HighlightClass::String), t.tok_string); assert_eq!(t.highlight_class_color(HighlightClass::String), t.tok_string);
assert_eq!(t.highlight_class_color(HighlightClass::Punct), t.tok_punct); assert_eq!(t.highlight_class_color(HighlightClass::Punct), t.tok_punct);
assert_eq!(t.highlight_class_color(HighlightClass::Flag), t.tok_flag); assert_eq!(t.highlight_class_color(HighlightClass::Flag), t.tok_flag);
assert_eq!(t.highlight_class_color(HighlightClass::Function), t.tok_function);
assert_eq!(t.highlight_class_color(HighlightClass::Error), t.tok_error); assert_eq!(t.highlight_class_color(HighlightClass::Error), t.tok_error);
} }
+8
View File
@@ -110,6 +110,13 @@ fn near_miss_matrix_simple_mode() {
("delete from", &["after `delete from`, expected table name", "delete from <Table>"]), ("delete from", &["after `delete from`, expected table name", "delete from <Table>"]),
("delete from T", &["expected `where` or `--all-rows`", "delete from <Table>"]), ("delete from T", &["expected `where` or `--all-rows`", "delete from <Table>"]),
("seed", &["after `seed`, expected table name", "seed <Table> [count]"]), ("seed", &["after `seed`, expected table name", "seed <Table> [count]"]),
// Phase 2 (ADR-0048 D2/D1): malformed `set` clause + column-fill.
("seed T set", &["after `seed T set`, expected column name", "seed <Table>.<col>"]),
(
"seed T set role",
&["after `seed T set role`, expected `=`, `in`, `between`, or `as`", "seed <Table>.<col>"],
),
("seed T.", &["after `seed T.`, expected column name", "seed <Table>.<col>"]),
("replay", &["after `replay`, expected string literal or path", "replay <path>"]), ("replay", &["after `replay`, expected string literal or path", "replay <path>"]),
("explain", &["after `explain`, expected `show`, `update`, or `delete`", "explain show data"]), ("explain", &["after `explain`, expected `show`, `update`, or `delete`", "explain show data"]),
// advanced-only entry word typed in simple mode → "this is SQL" rail // advanced-only entry word typed in simple mode → "this is SQL" rail
@@ -540,3 +547,4 @@ fn caret_aligns_under_offending_token() {
+564 -25
View File
@@ -60,11 +60,15 @@ fn seed_parses_with_and_without_count() {
match parse_command("seed People 5").expect("`seed People 5` parses") { match parse_command("seed People 5").expect("`seed People 5` parses") {
Command::Seed { Command::Seed {
table, table,
target_column,
count, count,
overrides,
rng_seed, rng_seed,
} => { } => {
assert_eq!(table, "People"); assert_eq!(table, "People");
assert_eq!(target_column, None);
assert_eq!(count, Some(5)); assert_eq!(count, Some(5));
assert!(overrides.is_empty());
assert_eq!(rng_seed, None); assert_eq!(rng_seed, None);
} }
other => panic!("expected Command::Seed, got {other:?}"), other => panic!("expected Command::Seed, got {other:?}"),
@@ -86,6 +90,7 @@ fn seed_parses_the_reproducibility_flag() {
table, table,
count, count,
rng_seed, rng_seed,
..
} => { } => {
assert_eq!(table, "People"); assert_eq!(table, "People");
assert_eq!(count, Some(5)); assert_eq!(count, Some(5));
@@ -106,6 +111,155 @@ fn seed_parses_the_reproducibility_flag() {
} }
} }
// — Phase 2 (SD2): set-clause + column-fill parse path (ADR-0048 D2/D1) —
use rdbms_playground::dsl::command::{SeedOverride, SeedOverrideKind};
use rdbms_playground::dsl::value::Value;
/// Pull the `overrides` out of a parsed `seed` command (panics on a
/// non-seed command), for the builder-fold assertions below.
fn seed_overrides(input: &str) -> (Option<String>, Vec<SeedOverride>) {
match parse_command(input).unwrap_or_else(|e| panic!("`{input}` should parse: {e:?}")) {
Command::Seed {
target_column,
overrides,
..
} => (target_column, overrides),
other => panic!("expected Command::Seed, got {other:?}"),
}
}
#[test]
fn seed_set_fixed_value_override_parses() {
let (_t, ov) = seed_overrides("seed users 5 set status = 'active'");
assert_eq!(ov.len(), 1);
assert_eq!(ov[0].column, "status");
assert_eq!(ov[0].kind, SeedOverrideKind::Fixed(Value::Text("active".into())));
}
#[test]
fn seed_set_pick_list_override_parses() {
let (_t, ov) = seed_overrides("seed users set role in ('admin', 'editor', 'viewer')");
assert_eq!(ov.len(), 1);
assert_eq!(ov[0].column, "role");
assert_eq!(
ov[0].kind,
SeedOverrideKind::PickList(vec![
Value::Text("admin".into()),
Value::Text("editor".into()),
Value::Text("viewer".into()),
])
);
}
#[test]
fn seed_set_generator_override_parses() {
let (_t, ov) = seed_overrides("seed users set work_addr as email");
assert_eq!(ov.len(), 1);
assert_eq!(ov[0].column, "work_addr");
assert_eq!(ov[0].kind, SeedOverrideKind::Generator("email".into()));
}
#[test]
fn seed_set_numeric_range_override_parses() {
let (_t, ov) = seed_overrides("seed products set price between 10 and 100");
assert_eq!(ov.len(), 1);
assert_eq!(ov[0].column, "price");
assert_eq!(
ov[0].kind,
SeedOverrideKind::Range {
low: Value::Number("10".into()),
high: Value::Number("100".into()),
}
);
}
#[test]
fn seed_set_date_range_override_parses_with_quoted_dates() {
// ADR-0048 D2 amendment: dates in the range form are quoted strings.
let (_t, ov) =
seed_overrides("seed users set signup between '2023-01-01' and '2024-12-31'");
assert_eq!(
ov[0].kind,
SeedOverrideKind::Range {
low: Value::Text("2023-01-01".into()),
high: Value::Text("2024-12-31".into()),
}
);
}
#[test]
fn seed_multiple_overrides_combine() {
let (_t, ov) = seed_overrides(
"seed users 20 set role in ('admin', 'user'), status = 'active', signup between '2023-01-01' and '2024-12-31'",
);
assert_eq!(ov.len(), 3, "three comma-separated overrides: {ov:?}");
assert_eq!(ov[0].column, "role");
assert!(matches!(ov[0].kind, SeedOverrideKind::PickList(_)));
assert_eq!(ov[1].column, "status");
assert!(matches!(ov[1].kind, SeedOverrideKind::Fixed(_)));
assert_eq!(ov[2].column, "signup");
assert!(matches!(ov[2].kind, SeedOverrideKind::Range { .. }));
}
#[test]
fn seed_count_is_not_confused_by_a_range_value() {
// No positional count, but `between 18 and 80` carries NumberLits —
// they must not be read as the count (bounded to before `set`).
match parse_command("seed users set age between 18 and 80").expect("parses") {
Command::Seed { count, overrides, .. } => {
assert_eq!(count, None, "the count is None, not 18");
assert_eq!(overrides.len(), 1);
}
other => panic!("expected seed, got {other:?}"),
}
}
#[test]
fn seed_set_combines_with_count_and_flag() {
match parse_command("seed users 30 set status = 'x' --seed 42").expect("parses") {
Command::Seed {
count,
overrides,
rng_seed,
..
} => {
assert_eq!(count, Some(30));
assert_eq!(rng_seed, Some(42));
assert_eq!(overrides.len(), 1);
}
other => panic!("expected seed, got {other:?}"),
}
}
#[test]
fn seed_column_fill_target_parses() {
let (target, ov) = seed_overrides("seed users.work_addr");
assert_eq!(target.as_deref(), Some("work_addr"));
assert!(ov.is_empty());
}
#[test]
fn seed_column_fill_with_set_parses() {
let (target, ov) = seed_overrides("seed users.work_addr set work_addr as email");
assert_eq!(target.as_deref(), Some("work_addr"));
assert_eq!(ov.len(), 1);
assert_eq!(ov[0].kind, SeedOverrideKind::Generator("email".into()));
}
#[test]
fn seed_bare_word_set_value_is_rejected() {
// A bare (unquoted) word is not a value — D2 requires quoting. The
// typed value slot rejects `active` at the grammar level (it is not a
// quoted string / number), so the command does not parse.
assert!(
parse_command("seed users set status = active").is_err(),
"a bare-word `set` value must be rejected (quoting required, D2)"
);
// The quoted form parses.
assert!(parse_command("seed users set status = 'active'").is_ok());
}
#[test] #[test]
fn seed_populates_a_table_and_persists_rows() { fn seed_populates_a_table_and_persists_rows() {
let (project, db, _dir) = open_project_db(); let (project, db, _dir) = open_project_db();
@@ -113,7 +267,7 @@ fn seed_populates_a_table_and_persists_rows() {
create_people(&db, &rt); create_people(&db, &rt);
let result = rt let result = rt
.block_on(db.seed("People".into(), Some(7), Some(42), Some("seed People 7".into()))) .block_on(db.seed("People".into(), None, Some(7), Vec::new(), Some(42), Some("seed People 7".into())))
.expect("seed succeeds"); .expect("seed succeeds");
assert_eq!(result.produced, 7); assert_eq!(result.produced, 7);
@@ -134,7 +288,7 @@ fn seed_count_defaults_to_twenty() {
create_people(&db, &rt); create_people(&db, &rt);
let result = rt let result = rt
.block_on(db.seed("People".into(), None, Some(1), Some("seed People".into()))) .block_on(db.seed("People".into(), None, None, Vec::new(), Some(1), Some("seed People".into())))
.expect("seed succeeds"); .expect("seed succeeds");
assert_eq!(result.produced, 20, "omitted count defaults to 20"); assert_eq!(result.produced, 20, "omitted count defaults to 20");
let csv = read_csv(&project, "People").expect("People CSV exists"); let csv = read_csv(&project, "People").expect("People CSV exists");
@@ -149,9 +303,9 @@ fn seed_is_reproducible_with_a_fixed_seed() {
create_people(&db1, &rt); create_people(&db1, &rt);
create_people(&db2, &rt); create_people(&db2, &rt);
rt.block_on(db1.seed("People".into(), Some(4), Some(123), Some("seed People 4".into()))) rt.block_on(db1.seed("People".into(), None, Some(4), Vec::new(), Some(123), Some("seed People 4".into())))
.expect("seed run 1"); .expect("seed run 1");
rt.block_on(db2.seed("People".into(), Some(4), Some(123), Some("seed People 4".into()))) rt.block_on(db2.seed("People".into(), None, Some(4), Vec::new(), Some(123), Some("seed People 4".into())))
.expect("seed run 2"); .expect("seed run 2");
let csv1 = read_csv(&p1, "People").expect("csv 1"); let csv1 = read_csv(&p1, "People").expect("csv 1");
@@ -165,7 +319,7 @@ fn seed_writes_exactly_one_history_line() {
let rt = rt(); let rt = rt();
create_people(&db, &rt); create_people(&db, &rt);
rt.block_on(db.seed("People".into(), Some(5), Some(1), Some("seed People 5".into()))) rt.block_on(db.seed("People".into(), None, Some(5), Vec::new(), Some(1), Some("seed People 5".into())))
.expect("seed succeeds"); .expect("seed succeeds");
let history = std::fs::read_to_string(project.path().join("history.log")) let history = std::fs::read_to_string(project.path().join("history.log"))
@@ -240,10 +394,10 @@ fn seed_fills_foreign_keys_from_existing_parents() {
create_users_and_orders(&db, &rt, true); create_users_and_orders(&db, &rt, true);
// 5 parents → serial ids 1..=5. // 5 parents → serial ids 1..=5.
rt.block_on(db.seed("Users".into(), Some(5), Some(1), Some("seed Users 5".into()))) rt.block_on(db.seed("Users".into(), None, Some(5), Vec::new(), Some(1), Some("seed Users 5".into())))
.expect("seed Users"); .expect("seed Users");
let res = rt let res = rt
.block_on(db.seed("Orders".into(), Some(10), Some(2), Some("seed Orders 10".into()))) .block_on(db.seed("Orders".into(), None, Some(10), Vec::new(), Some(2), Some("seed Orders 10".into())))
.expect("seed Orders"); .expect("seed Orders");
assert_eq!(res.produced, 10, "every child row must insert (valid FK)"); assert_eq!(res.produced, 10, "every child row must insert (valid FK)");
@@ -267,7 +421,7 @@ fn seed_refuses_when_a_parent_table_is_empty() {
// Users is empty — no valid FK can be fabricated. // Users is empty — no valid FK can be fabricated.
let err = rt let err = rt
.block_on(db.seed("Orders".into(), Some(3), Some(1), Some("seed Orders 3".into()))) .block_on(db.seed("Orders".into(), None, Some(3), Vec::new(), Some(1), Some("seed Orders 3".into())))
.expect_err("seed must refuse an empty parent"); .expect_err("seed must refuse an empty parent");
let msg = err.to_string(); let msg = err.to_string();
assert!(msg.contains("Users"), "error should name the empty parent: {msg}"); assert!(msg.contains("Users"), "error should name the empty parent: {msg}");
@@ -293,7 +447,7 @@ fn seed_refuses_a_not_null_blob_column() {
.expect("create Files"); .expect("create Files");
let err = rt let err = rt
.block_on(db.seed("Files".into(), Some(2), Some(1), Some("seed Files 2".into()))) .block_on(db.seed("Files".into(), None, Some(2), Vec::new(), Some(1), Some("seed Files 2".into())))
.expect_err("seed must refuse a NOT NULL blob"); .expect_err("seed must refuse a NOT NULL blob");
let msg = err.to_string(); let msg = err.to_string();
assert!( assert!(
@@ -320,7 +474,7 @@ fn seed_omits_a_nullable_blob_column() {
.expect("create Files"); .expect("create Files");
let res = rt let res = rt
.block_on(db.seed("Files".into(), Some(3), Some(1), Some("seed Files 3".into()))) .block_on(db.seed("Files".into(), None, Some(3), Vec::new(), Some(1), Some("seed Files 3".into())))
.expect("seed succeeds despite the nullable blob"); .expect("seed succeeds despite the nullable blob");
assert_eq!(res.produced, 3); assert_eq!(res.produced, 3);
let csv = read_csv(&project, "Files").expect("Files CSV"); let csv = read_csv(&project, "Files").expect("Files CSV");
@@ -354,7 +508,7 @@ fn seed_keeps_unique_columns_distinct() {
.expect("create Tags"); .expect("create Tags");
let res = rt let res = rt
.block_on(db.seed("Tags".into(), Some(8), Some(3), Some("seed Tags 8".into()))) .block_on(db.seed("Tags".into(), None, Some(8), Vec::new(), Some(3), Some("seed Tags 8".into())))
.expect("seed"); .expect("seed");
assert_eq!(res.produced, 8); assert_eq!(res.produced, 8);
@@ -383,7 +537,7 @@ fn seed_sequences_identifier_int_columns() {
.expect("create Items"); .expect("create Items");
let res = rt let res = rt
.block_on(db.seed("Items".into(), Some(5), Some(1), Some("seed Items 5".into()))) .block_on(db.seed("Items".into(), None, Some(5), Vec::new(), Some(1), Some("seed Items 5".into())))
.expect("seed"); .expect("seed");
assert_eq!(res.produced, 5); assert_eq!(res.produced, 5);
@@ -414,7 +568,7 @@ fn seed_junction_produces_distinct_combinations_and_caps() {
) )
.await .await
.expect("create parent"); .expect("create parent");
db.seed(t.into(), Some(2), Some(1), Some(format!("seed {t} 2"))) db.seed(t.into(), None, Some(2), Vec::new(), Some(1), Some(format!("seed {t} 2")))
.await .await
.expect("seed parent"); .expect("seed parent");
} }
@@ -456,7 +610,7 @@ fn seed_junction_produces_distinct_combinations_and_caps() {
// Requesting 10 caps at the 4 available distinct combinations. // Requesting 10 caps at the 4 available distinct combinations.
let res = db let res = db
.seed("J".into(), Some(10), Some(7), Some("seed J 10".into())) .seed("J".into(), None, Some(10), Vec::new(), Some(7), Some("seed J 10".into()))
.await .await
.expect("seed J"); .expect("seed J");
assert_eq!(res.produced, 4, "junction caps at available combos"); assert_eq!(res.produced, 4, "junction caps at available combos");
@@ -490,7 +644,7 @@ fn seed_draws_enum_values_from_an_in_check() {
// Every generated status must satisfy the CHECK, so all rows insert. // Every generated status must satisfy the CHECK, so all rows insert.
let res = rt let res = rt
.block_on(db.seed("Tickets".into(), Some(12), Some(2), Some("seed Tickets 12".into()))) .block_on(db.seed("Tickets".into(), None, Some(12), Vec::new(), Some(2), Some("seed Tickets 12".into())))
.expect("seed"); .expect("seed");
assert_eq!(res.produced, 12, "all rows insert — values satisfy the CHECK"); assert_eq!(res.produced, 12, "all rows insert — values satisfy the CHECK");
@@ -527,7 +681,7 @@ fn seed_advises_on_enum_ish_columns() {
.expect("create Tasks"); .expect("create Tasks");
let res = rt let res = rt
.block_on(db.seed("Tasks".into(), Some(3), Some(1), Some("seed Tasks 3".into()))) .block_on(db.seed("Tasks".into(), None, Some(3), Vec::new(), Some(1), Some("seed Tasks 3".into())))
.expect("seed"); .expect("seed");
assert!( assert!(
res.advisory_columns.contains(&"status".to_string()), res.advisory_columns.contains(&"status".to_string()),
@@ -542,7 +696,7 @@ fn seed_refuses_an_excessive_count() {
let rt = rt(); let rt = rt();
create_people(&db, &rt); create_people(&db, &rt);
let err = rt let err = rt
.block_on(db.seed("People".into(), Some(1_000_000), Some(1), Some("seed People 1000000".into()))) .block_on(db.seed("People".into(), None, Some(1_000_000), Vec::new(), Some(1), Some("seed People 1000000".into())))
.expect_err("an excessive count must be refused"); .expect_err("an excessive count must be refused");
assert!( assert!(
err.to_string().to_lowercase().contains("maximum"), err.to_string().to_lowercase().contains("maximum"),
@@ -557,7 +711,7 @@ fn seed_preview_is_capped_but_count_is_full() {
create_people(&db, &rt); create_people(&db, &rt);
let res = rt let res = rt
.block_on(db.seed("People".into(), Some(25), Some(1), Some("seed People 25".into()))) .block_on(db.seed("People".into(), None, Some(25), Vec::new(), Some(1), Some("seed People 25".into())))
.expect("seed"); .expect("seed");
assert_eq!(res.produced, 25, "the full count is produced"); assert_eq!(res.produced, 25, "the full count is produced");
assert_eq!(res.data.rows.len(), 20, "the preview is capped at 20 rows"); assert_eq!(res.data.rows.len(), 20, "the preview is capped at 20 rows");
@@ -573,6 +727,25 @@ fn seed_is_available_in_advanced_mode() {
matches!(r, Ok(Command::Seed { .. })), matches!(r, Ok(Command::Seed { .. })),
"seed must parse in advanced mode: {r:?}" "seed must parse in advanced mode: {r:?}"
); );
// The Phase 2 surfaces (set clause + column-fill) also parse in
// advanced mode — same grammar, no mode gate.
assert!(
matches!(
parse_command_in_mode("seed People 5 set status = 'active'", Mode::Advanced),
Ok(Command::Seed { .. })
),
"set clause must parse in advanced mode"
);
assert!(
matches!(
parse_command_in_mode("seed People.email set email as email", Mode::Advanced),
Ok(Command::Seed {
target_column: Some(_),
..
})
),
"column-fill must parse in advanced mode"
);
} }
// — DA-pass coverage: undo (D15), replay (D16), atomicity, zero count, // — DA-pass coverage: undo (D15), replay (D16), atomicity, zero count,
@@ -588,7 +761,7 @@ fn seed_is_one_undo_step() {
.expect("open db with undo"); .expect("open db with undo");
let rt = rt(); let rt = rt();
create_people(&db, &rt); create_people(&db, &rt);
rt.block_on(db.seed("People".into(), Some(6), Some(1), Some("seed People 6".into()))) rt.block_on(db.seed("People".into(), None, Some(6), Vec::new(), Some(1), Some("seed People 6".into())))
.expect("seed"); .expect("seed");
assert_eq!(data_row_count(&read_csv(&project, "People").unwrap()), 6); assert_eq!(data_row_count(&read_csv(&project, "People").unwrap()), 6);
@@ -598,6 +771,32 @@ fn seed_is_one_undo_step() {
assert_eq!(rows, 0, "one undo must remove every seeded row in a single step"); assert_eq!(rows, 0, "one undo must remove every seeded row in a single step");
} }
#[test]
fn seed_column_fill_is_one_undo_step() {
// ADR-0048 D15: column-fill's bulk UPDATE is one undo step too.
let dir = tempfile::tempdir().expect("tempdir");
let project = project::open_or_create(None, Some(dir.path())).expect("project");
let persistence = Persistence::new(project.path().to_path_buf());
let db = Database::open_with_persistence_and_undo(project.db_path(), persistence, true)
.expect("open db with undo");
let rt = rt();
create_members(&db, &rt);
run_seed(&db, &rt, "seed Members 5 --seed 1").expect("seed");
// Fill `status` across all 5 rows with a constant, then undo once.
run_seed(&db, &rt, "seed Members.status set status = 'flagged' --seed 2")
.expect("column-fill");
let before = named_column_values(&read_csv(&project, "Members").unwrap(), "status");
assert!(before.iter().all(|s| s == "flagged"), "all rows filled: {before:?}");
rt.block_on(db.undo()).unwrap().expect("undo applied");
let after = named_column_values(&read_csv(&project, "Members").unwrap(), "status");
assert!(
after.iter().all(|s| s != "flagged"),
"one undo reverts the whole column-fill in a single step: {after:?}"
);
assert_eq!(after.len(), 5, "undo restores the original rows, not removes them");
}
#[test] #[test]
fn replay_reruns_a_seed_line_as_a_data_write() { fn replay_reruns_a_seed_line_as_a_data_write() {
use rdbms_playground::runtime::run_replay; use rdbms_playground::runtime::run_replay;
@@ -632,7 +831,7 @@ fn seed_rolls_back_atomically_on_a_constraint_failure() {
)) ))
.expect("create Bad"); .expect("create Bad");
let res = rt.block_on(db.seed("Bad".into(), Some(5), Some(1), Some("seed Bad 5".into()))); let res = rt.block_on(db.seed("Bad".into(), None, Some(5), Vec::new(), Some(1), Some("seed Bad 5".into())));
assert!(res.is_err(), "seed must fail when generated rows violate the CHECK"); assert!(res.is_err(), "seed must fail when generated rows violate the CHECK");
let rows = read_csv(&project, "Bad").map_or(0, |c| data_row_count(&c)); let rows = read_csv(&project, "Bad").map_or(0, |c| data_row_count(&c));
assert_eq!(rows, 0, "a failed seed must leave the table unchanged (atomic)"); assert_eq!(rows, 0, "a failed seed must leave the table unchanged (atomic)");
@@ -644,7 +843,7 @@ fn seed_zero_is_a_no_op() {
let rt = rt(); let rt = rt();
create_people(&db, &rt); create_people(&db, &rt);
let res = rt let res = rt
.block_on(db.seed("People".into(), Some(0), Some(1), Some("seed People 0".into()))) .block_on(db.seed("People".into(), None, Some(0), Vec::new(), Some(1), Some("seed People 0".into())))
.expect("seed 0 succeeds"); .expect("seed 0 succeeds");
assert_eq!(res.produced, 0); assert_eq!(res.produced, 0);
let rows = read_csv(&project, "People").map_or(0, |c| data_row_count(&c)); let rows = read_csv(&project, "People").map_or(0, |c| data_row_count(&c));
@@ -669,7 +868,7 @@ fn seed_advises_on_a_complex_check_column() {
.expect("create Widgets"); .expect("create Widgets");
let res = rt let res = rt
.block_on(db.seed("Widgets".into(), Some(3), Some(1), Some("seed Widgets 3".into()))) .block_on(db.seed("Widgets".into(), None, Some(3), Vec::new(), Some(1), Some("seed Widgets 3".into())))
.expect("seed"); .expect("seed");
assert!( assert!(
res.advisory_columns.contains(&"label".to_string()), res.advisory_columns.contains(&"label".to_string()),
@@ -683,9 +882,9 @@ fn seed_foreign_keys_are_reproducible_with_a_fixed_seed() {
let rt = rt(); let rt = rt();
let seed_one = |db: &Database| { let seed_one = |db: &Database| {
create_users_and_orders(db, &rt, true); create_users_and_orders(db, &rt, true);
rt.block_on(db.seed("Users".into(), Some(4), Some(1), Some("seed Users 4".into()))) rt.block_on(db.seed("Users".into(), None, Some(4), Vec::new(), Some(1), Some("seed Users 4".into())))
.expect("seed users"); .expect("seed users");
rt.block_on(db.seed("Orders".into(), Some(8), Some(99), Some("seed Orders 8".into()))) rt.block_on(db.seed("Orders".into(), None, Some(8), Vec::new(), Some(99), Some("seed Orders 8".into())))
.expect("seed orders"); .expect("seed orders");
}; };
let (p1, db1, _d1) = open_project_db(); let (p1, db1, _d1) = open_project_db();
@@ -715,7 +914,7 @@ fn seed_shortid_columns_are_reproducible_with_a_fixed_seed() {
None, None,
)) ))
.expect("create Contacts"); .expect("create Contacts");
rt.block_on(db.seed("Contacts".into(), Some(5), Some(42), Some("seed Contacts 5".into()))) rt.block_on(db.seed("Contacts".into(), None, Some(5), Vec::new(), Some(42), Some("seed Contacts 5".into())))
.expect("seed"); .expect("seed");
}; };
let (p1, db1, _d1) = open_project_db(); let (p1, db1, _d1) = open_project_db();
@@ -736,3 +935,343 @@ fn seed_shortid_columns_are_reproducible_with_a_fixed_seed() {
assert_eq!(code.len(), 10, "shortid should be 10 chars: {code}"); assert_eq!(code.len(), 10, "shortid should be 10 chars: {code}");
} }
} }
// =================================================================
// Phase 2 (SD2) executor: set-clause overrides + column-fill,
// exercised full-stack (parse → worker) — ADR-0048 D2 / D1.
// =================================================================
/// Parse `input` as a `seed` command and run it through the worker —
/// the full stack minus UI render (grammar → builder → executor).
fn run_seed(
db: &Database,
rt: &tokio::runtime::Runtime,
input: &str,
) -> Result<rdbms_playground::db::SeedResult, rdbms_playground::db::DbError> {
match parse_command(input).unwrap_or_else(|e| panic!("`{input}` should parse: {e:?}")) {
Command::Seed {
table,
target_column,
count,
overrides,
rng_seed,
} => rt.block_on(db.seed(
table,
target_column,
count,
overrides,
rng_seed,
Some(input.to_string()),
)),
other => panic!("expected a seed command, got {other:?}"),
}
}
/// Values of the column named `col` (by header lookup) across the CSV's
/// data rows.
fn named_column_values(csv: &str, col: &str) -> Vec<String> {
let header = csv.lines().next().unwrap_or_default();
let idx = header
.split(',')
.position(|h| h.trim() == col)
.unwrap_or_else(|| panic!("column `{col}` not in header `{header}`"));
nth_column_values(csv, idx)
}
/// `Members(id serial pk, name text, status text, role text, age int)`.
/// `status`/`role` are enum-ish names (advisory targets without an
/// override); `name`/`age` exercise the generator / range overrides.
fn create_members(db: &Database, rt: &tokio::runtime::Runtime) {
rt.block_on(db.create_table(
"Members".to_string(),
vec![
ColumnSpec::new("id", Type::Serial),
ColumnSpec::new("name", Type::Text),
ColumnSpec::new("status", Type::Text),
ColumnSpec::new("role", Type::Text),
ColumnSpec::new("age", Type::Int),
],
vec!["id".to_string()],
None,
))
.expect("create Members");
}
#[test]
fn seed_set_fixed_value_fills_every_row() {
let (project, db, _d) = open_project_db();
let rt = rt();
create_members(&db, &rt);
run_seed(&db, &rt, "seed Members 6 set status = 'active' --seed 1").expect("seed");
let csv = read_csv(&project, "Members").unwrap();
let statuses = named_column_values(&csv, "status");
assert_eq!(statuses.len(), 6);
assert!(statuses.iter().all(|s| s == "active"), "every status pinned: {statuses:?}");
}
#[test]
fn seed_set_pick_list_draws_only_from_the_list() {
let (project, db, _d) = open_project_db();
let rt = rt();
create_members(&db, &rt);
run_seed(&db, &rt, "seed Members 20 set role in ('admin', 'user') --seed 2").expect("seed");
let csv = read_csv(&project, "Members").unwrap();
let roles = named_column_values(&csv, "role");
assert!(
roles.iter().all(|r| r == "admin" || r == "user"),
"roles only from the list: {roles:?}"
);
}
#[test]
fn seed_set_as_generator_forces_the_shape() {
let (project, db, _d) = open_project_db();
let rt = rt();
create_members(&db, &rt);
// Force the `name` column (a person-name heuristic) to emails.
run_seed(&db, &rt, "seed Members 5 set name as email --seed 3").expect("seed");
let csv = read_csv(&project, "Members").unwrap();
let names = named_column_values(&csv, "name");
assert!(names.iter().all(|n| n.contains('@')), "name forced to email shape: {names:?}");
}
#[test]
fn seed_set_numeric_range_stays_within_bounds() {
let (project, db, _d) = open_project_db();
let rt = rt();
create_members(&db, &rt);
run_seed(&db, &rt, "seed Members 30 set age between 30 and 40 --seed 4").expect("seed");
let csv = read_csv(&project, "Members").unwrap();
for a in named_column_values(&csv, "age") {
let n: i64 = a.parse().unwrap_or_else(|_| panic!("age `{a}` not an int"));
assert!((30..=40).contains(&n), "age {n} out of [30,40]");
}
}
#[test]
fn seed_override_drops_the_column_from_the_advisory() {
let (_p, db, _d) = open_project_db();
let rt = rt();
create_members(&db, &rt);
// Without an override, `status` (enum-ish) is flagged in the advisory.
let plain = run_seed(&db, &rt, "seed Members 3 --seed 5").expect("seed");
assert!(
plain.advisory_columns.iter().any(|c| c == "status"),
"status should be advised without an override: {:?}",
plain.advisory_columns
);
// With an override on status, it must not appear in the advisory.
let overridden =
run_seed(&db, &rt, "seed Members 3 set status in ('a', 'b') --seed 5").expect("seed");
assert!(
!overridden.advisory_columns.iter().any(|c| c == "status"),
"overridden status must drop from advisory: {:?}",
overridden.advisory_columns
);
}
#[test]
fn seed_unknown_generator_is_a_friendly_error() {
let (_p, db, _d) = open_project_db();
let rt = rt();
create_members(&db, &rt);
let err = run_seed(&db, &rt, "seed Members 3 set name as bogus").unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("unknown generator") && msg.contains("bogus"),
"should name the unknown generator: {msg}"
);
}
#[test]
fn seed_incompatible_range_is_a_friendly_error() {
let (_p, db, _d) = open_project_db();
let rt = rt();
create_members(&db, &rt);
// A numeric range on a text column (`name`) is rejected.
let err = run_seed(&db, &rt, "seed Members 3 set name between 1 and 10").unwrap_err();
let msg = format!("{err}");
assert!(msg.contains("between"), "range error should mention `between`: {msg}");
}
#[test]
fn seed_with_set_is_reproducible() {
let (p1, db1, _d1) = open_project_db();
let (p2, db2, _d2) = open_project_db();
let rt = rt();
create_members(&db1, &rt);
create_members(&db2, &rt);
let cmd = "seed Members 10 set role in ('a', 'b', 'c'), age between 20 and 60 --seed 77";
run_seed(&db1, &rt, cmd).expect("seed 1");
run_seed(&db2, &rt, cmd).expect("seed 2");
assert_eq!(
read_csv(&p1, "Members").unwrap(),
read_csv(&p2, "Members").unwrap(),
"the same --seed + set clause must reproduce identical data"
);
}
// — column-fill (ADR-0048 D1 form 2) —
#[test]
fn seed_column_fill_updates_existing_rows_without_adding() {
let (project, db, _d) = open_project_db();
let rt = rt();
create_members(&db, &rt);
run_seed(&db, &rt, "seed Members 5 --seed 1").expect("initial seed");
let before = data_row_count(&read_csv(&project, "Members").unwrap());
assert_eq!(before, 5);
let res = run_seed(&db, &rt, "seed Members.status set status in ('x', 'y') --seed 2")
.expect("column-fill");
assert_eq!(res.produced, 5, "column-fill touches the 5 existing rows");
let csv = read_csv(&project, "Members").unwrap();
assert_eq!(data_row_count(&csv), 5, "no new rows added");
let statuses = named_column_values(&csv, "status");
assert!(
statuses.iter().all(|s| s == "x" || s == "y"),
"every existing row's status refilled from the list: {statuses:?}"
);
}
#[test]
fn seed_column_fill_refuses_a_pk_target() {
let (_p, db, _d) = open_project_db();
let rt = rt();
create_members(&db, &rt);
run_seed(&db, &rt, "seed Members 3 --seed 1").expect("seed");
let err = run_seed(&db, &rt, "seed Members.id").unwrap_err();
assert!(format!("{err}").contains("primary key"), "PK target refused: {err}");
}
#[test]
fn seed_column_fill_empty_table_is_a_noop() {
let (_p, db, _d) = open_project_db();
let rt = rt();
create_members(&db, &rt);
// No rows yet → friendly no-op, not an error.
let res = run_seed(&db, &rt, "seed Members.status set status in ('a', 'b')").expect("no-op");
assert_eq!(res.produced, 0, "empty table → nothing filled");
}
#[test]
fn seed_column_fill_set_may_only_target_the_filled_column() {
let (_p, db, _d) = open_project_db();
let rt = rt();
create_members(&db, &rt);
run_seed(&db, &rt, "seed Members 3 --seed 1").expect("seed");
let err = run_seed(&db, &rt, "seed Members.status set role = 'x'").unwrap_err();
assert!(
format!("{err}").contains("can only adjust"),
"set targeting another column is refused: {err}"
);
}
#[test]
fn seed_column_fill_rejects_a_row_count() {
let (_p, db, _d) = open_project_db();
let rt = rt();
create_members(&db, &rt);
// `seed T.col 5` parses, but a count is meaningless for column-fill.
let err = rt
.block_on(db.seed(
"Members".into(),
Some("status".into()),
Some(5),
Vec::new(),
Some(1),
Some("seed Members.status 5".into()),
))
.unwrap_err();
assert!(format!("{err}").contains("no row count"), "count refused: {err}");
}
#[test]
fn seed_column_fill_fk_target_samples_the_parent() {
let (project, db, _d) = open_project_db();
let rt = rt();
create_users_and_orders(&db, &rt, true);
run_seed(&db, &rt, "seed Users 4 --seed 1").expect("seed users");
run_seed(&db, &rt, "seed Orders 8 --seed 2").expect("seed orders");
// Re-fill the FK column across existing orders; every value must be a
// valid parent key (the UPDATE would fail FK enforcement otherwise).
let res = run_seed(&db, &rt, "seed Orders.user_id --seed 3").expect("column-fill FK");
assert_eq!(res.produced, 8);
let csv = read_csv(&project, "Orders").unwrap();
let user_ids = named_column_values(&csv, "user_id");
assert!(user_ids.iter().all(|v| (1..=4).contains(&v.parse::<i64>().unwrap())));
}
#[test]
fn seed_fixed_override_on_unique_column_is_a_friendly_error() {
// DA finding (user-chosen: friendly error). A fixed value can't fill a
// UNIQUE column for more than one row — refuse up front rather than
// silently capping to 1.
let (_p, db, _d) = open_project_db();
let rt = rt();
rt.block_on(db.create_table(
"U".to_string(),
vec![
ColumnSpec::new("id", Type::Serial),
{
let mut c = ColumnSpec::new("email", Type::Text);
c.unique = true;
c
},
],
vec!["id".to_string()],
None,
))
.expect("create U");
let err = run_seed(&db, &rt, "seed U 5 set email = 'x@y.com'").unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains("UNIQUE") && msg.contains("distinct"),
"fixed value on a UNIQUE column should be a friendly capacity error: {msg}"
);
// A short pick-list (< count) is likewise refused...
let err2 = run_seed(&db, &rt, "seed U 5 set email in ('a@b.c', 'd@e.f')").unwrap_err();
assert!(format!("{err2}").contains("distinct"), "short list refused: {err2}");
// ...but a pick-list with enough distinct values succeeds.
let ok = run_seed(
&db,
&rt,
"seed U 3 set email in ('a@b.c', 'd@e.f', 'g@h.i') --seed 1",
)
.expect("a list >= count fills cleanly");
assert_eq!(ok.produced, 3);
// A generator is unbounded — also fine.
assert_eq!(
run_seed(&db, &rt, "seed U 4 set email as email --seed 2")
.expect("generator fills a unique column")
.produced,
4
);
}
#[test]
fn seed_column_fill_fixed_on_unique_column_is_a_friendly_error() {
let (_p, db, _d) = open_project_db();
let rt = rt();
rt.block_on(db.create_table(
"U".to_string(),
vec![
ColumnSpec::new("id", Type::Serial),
{
let mut c = ColumnSpec::new("email", Type::Text);
c.unique = true;
c
},
],
vec!["id".to_string()],
None,
))
.expect("create U");
run_seed(&db, &rt, "seed U 4 set email as email --seed 1").expect("seed 4 rows");
// Filling the UNIQUE column on 4 rows with one fixed value is refused.
let err = run_seed(&db, &rt, "seed U.email set email = 'same@x.com'").unwrap_err();
assert!(
format!("{err}").contains("UNIQUE"),
"column-fill of a fixed value on a UNIQUE column should refuse: {err}"
);
}
+28
View File
@@ -477,4 +477,32 @@ fn seed_completion_and_validity() {
flag_cands.iter().any(|c| c.contains("seed")), flag_cands.iter().any(|c| c.contains("seed")),
"`--seed` should be offered as a candidate, got {flag_cands:?}" "`--seed` should be offered as a candidate, got {flag_cands:?}"
); );
// Phase 2 (ADR-0048 D2): the `set` clause is offered after the count.
assert!(
flag_cands.iter().any(|c| c == "set"),
"`set` should be offered after the count, got {flag_cands:?}"
);
// `set ` offers the active table's columns (narrowed to Customers).
let set_cands = completion_candidate_texts(&assess_at_end("seed Customers set ", &schema));
assert!(
set_cands.iter().any(|c| c == "Name") && set_cands.iter().any(|c| c == "Email"),
"`set ` should complete this table's columns, got {set_cands:?}"
);
// `set <col> as ` offers the curated generator vocabulary (D9).
let gen_cands =
completion_candidate_texts(&assess_at_end("seed Customers set Email as ", &schema));
assert!(
gen_cands.iter().any(|c| c == "email") && gen_cands.iter().any(|c| c == "product"),
"`as ` should complete generator names, got {gen_cands:?}"
);
// Column-fill (D1 form 2): `seed Customers.` offers the columns.
let fill_cands = completion_candidate_texts(&assess_at_end("seed Customers.", &schema));
assert!(
fill_cands.iter().any(|c| c == "Name"),
"`seed Customers.` should complete column names, got {fill_cands:?}"
);
} }