feat(seed): FK sampling, empty-parent error, block guard (ADR-0048 P1.3a)

do_seed fills foreign-key columns by sampling existing parent rows
(D14): sample_parent_key_tuples reads distinct parent keys, and a
compound FK reads all its child columns from one sampled parent row per
child row. An empty parent is refused with a friendly "seed the parent
first" error. The block guard (D1) refuses a NOT NULL blob column (seed
can't generate one); a nullable blob is omitted (-> NULL).

4 integration tests (valid FK references, empty-parent refusal, NOT NULL
blob refusal, nullable-blob omission). 2331 pass / 0 fail / 0 skip,
clippy all-targets clean.

Deferred to P1.3b: identifier/constraint uniqueness incl. junction
distinct-combos (D10), IN-CHECK derivation (D17), dedicated SeedResult +
capped preview (D18) + advisory (D12/D13), and the multi-row path.
This commit is contained in:
claude@clouddev1
2026-06-11 17:22:04 +00:00
parent f1e9484af3
commit 73493fa68b
2 changed files with 308 additions and 48 deletions
+157 -47
View File
@@ -8686,22 +8686,87 @@ fn count_rows(conn: &Connection, table: &str) -> Result<i64, DbError> {
/// Default row count when `seed <T>` omits the count (ADR-0048 D6).
const DEFAULT_SEED_COUNT: u64 = 20;
/// How a single column's value is produced for each seeded row.
enum SeedColPlan {
/// Generated from the seed library (the generator is chosen once;
/// `generate_value` runs per row).
Generated {
generator: crate::seed::Generator,
ty: Type,
},
/// A foreign-key child column: sampled from an existing parent row
/// (ADR-0048 D14). `fk_idx` selects the FK; `pos` selects this
/// column's slot within the parent key tuple (so a compound FK's
/// child columns all read from the *same* sampled parent row).
ForeignKey { fk_idx: usize, pos: usize },
}
/// Sample existing parent-key tuples for FK generation (ADR-0048 D14).
///
/// Returns one `Value` tuple per distinct parent row in
/// `parent_columns` order, so a compound FK's children can be filled
/// from one consistent parent row. Empty when the parent has no rows
/// (the caller turns that into the friendly "seed the parent first"
/// error).
fn sample_parent_key_tuples(
conn: &Connection,
parent_table: &str,
parent_columns: &[String],
) -> Result<Vec<Vec<Value>>, DbError> {
let cols = parent_columns
.iter()
.map(|c| format!("\"{}\"", c.replace('"', "\"\"")))
.collect::<Vec<_>>()
.join(", ");
let sql = format!(
"SELECT DISTINCT {cols} FROM \"{}\"",
parent_table.replace('"', "\"\"")
);
let n = parent_columns.len();
let mut stmt = conn.prepare(&sql).map_err(DbError::from_rusqlite)?;
let tuples = stmt
.query_map([], |row| {
let mut tuple = Vec::with_capacity(n);
for i in 0..n {
let v = match row.get_ref(i)? {
rusqlite::types::ValueRef::Null => Value::Null,
rusqlite::types::ValueRef::Integer(x) => Value::Number(x.to_string()),
rusqlite::types::ValueRef::Real(x) => Value::Number(x.to_string()),
rusqlite::types::ValueRef::Text(t) => {
Value::Text(String::from_utf8_lossy(t).into_owned())
}
// FK keys are never blobs in this app; treat as NULL.
rusqlite::types::ValueRef::Blob(_) => Value::Null,
};
tuple.push(v);
}
Ok(tuple)
})
.map_err(DbError::from_rusqlite)?
.collect::<Result<Vec<_>, _>>()
.map_err(DbError::from_rusqlite)?;
Ok(tuples)
}
/// Populate a table with generated fake data (ADR-0048, SD1).
///
/// **Phase 1 walking skeleton.** Generates whole rows for every user
/// column that is not an autogen `serial`/`shortid` and not a foreign
/// key, inserting them one at a time through [`do_insert`] — which
/// reuses all the existing per-value validation, autogen autofill,
/// FK-error enrichment and persistence machinery. The whole seed is a
/// single undo step (the worker wraps the call in one `snapshot_then`)
/// and writes exactly one `history.log` line (only the first row
/// carries the `source`).
/// **Phase 1.** Generates whole rows and inserts them one at a time
/// through [`do_insert`] — reusing all the existing per-value
/// validation, autogen autofill, FK-error enrichment and persistence
/// machinery. The whole seed is a single undo step (the worker wraps
/// the call in one `snapshot_then`) and writes exactly one
/// `history.log` line (only the first row carries the `source`).
///
/// Deferred to the next phase (ADR-0048): FK sampling from parent rows
/// (D14), the efficient single-transaction multi-row path, identifier
/// uniqueness (D10), the `IN`-CHECK value derivation (D17), the
/// required-column block guard (D1), the capped auto-show preview
/// (D18), and the enum/CHECK advisory (D12/D13).
/// Foreign-key columns are filled by sampling existing parent rows
/// (D14); a compound FK reads all its child columns from one sampled
/// parent row. An empty parent is refused with a friendly error. A
/// `NOT NULL blob` column (which seed cannot generate) is refused by
/// the block guard (D1); a nullable blob is omitted (→ NULL).
///
/// Deferred: identifier/constraint uniqueness incl. junction
/// distinct-combos (D10), the `IN`-CHECK value derivation (D17), the
/// efficient single-transaction multi-row path, the capped auto-show
/// preview (D18), and the enum/CHECK advisory (D12/D13).
fn do_seed(
conn: &Connection,
persistence: Option<&Persistence>,
@@ -8711,6 +8776,7 @@ fn do_seed(
rng_seed: Option<u64>,
) -> Result<InsertResult, DbError> {
use crate::seed;
use rand::RngExt;
let canonical_table = require_canonical_table(conn, table)?;
let table = canonical_table.as_str();
@@ -8719,48 +8785,92 @@ fn do_seed(
let schema = read_schema(conn, table)?;
// FK child columns are filled by the executor in a later phase; for
// now they are omitted (left to NULL / default).
let fk_children: std::collections::HashSet<&str> = schema
.foreign_keys
.iter()
.flat_map(|fk| fk.child_columns.iter().map(String::as_str))
.collect();
// Pre-sample each FK's parent key tuples (D14); refuse if a parent
// is empty (no valid reference can be fabricated).
let mut fk_samples: Vec<Vec<Vec<Value>>> = Vec::with_capacity(schema.foreign_keys.len());
for fk in &schema.foreign_keys {
let tuples = sample_parent_key_tuples(conn, &fk.parent_table, &fk.parent_columns)?;
if tuples.is_empty() {
return Err(DbError::Unsupported(format!(
"cannot seed `{table}`: parent table `{}` (referenced by `{}`) has no rows. \
Seed or insert into `{}` first.",
fk.parent_table,
fk.child_columns.join(", "),
fk.parent_table,
)));
}
fk_samples.push(tuples);
}
// child column → (fk index, position within the FK's column list).
let mut fk_child_pos: std::collections::HashMap<&str, (usize, usize)> =
std::collections::HashMap::new();
for (fk_idx, fk) in schema.foreign_keys.iter().enumerate() {
for (pos, child) in fk.child_columns.iter().enumerate() {
fk_child_pos.insert(child.as_str(), (fk_idx, pos));
}
}
// Columns we generate values for: every user column that is not an
// autogen serial/shortid and not an FK child.
let gen_columns: Vec<&ReadColumn> = schema
.columns
.iter()
.filter(|c| {
!matches!(c.user_type, Some(Type::Serial) | Some(Type::ShortId))
&& !fk_children.contains(c.name.as_str())
})
.collect();
let col_names: Vec<String> = gen_columns.iter().map(|c| c.name.clone()).collect();
// Build the per-column generation plan, skipping autogen and
// un-generatable columns.
let mut col_names: Vec<String> = Vec::new();
let mut plans: Vec<SeedColPlan> = Vec::new();
for c in &schema.columns {
let ty = c.user_type.unwrap_or(Type::Text);
// serial/shortid auto-fill in `do_insert`; omit them.
if matches!(ty, Type::Serial | Type::ShortId) {
continue;
}
// blob has no DSL value path: refuse if required (D1), else omit.
if matches!(ty, Type::Blob) {
if c.notnull {
return Err(DbError::Unsupported(format!(
"cannot seed `{table}`: column `{}` is `NOT NULL` but has type `blob`, \
which seed cannot generate. Add the rows another way or make it nullable.",
c.name,
)));
}
continue;
}
col_names.push(c.name.clone());
if let Some(&(fk_idx, pos)) = fk_child_pos.get(c.name.as_str()) {
plans.push(SeedColPlan::ForeignKey { fk_idx, pos });
} else {
let spec = seed::ColumnSpec {
name: c.name.clone(),
ty,
not_null: c.notnull,
primary_key: c.primary_key,
unique: c.unique,
is_foreign_key: false,
// `IN`-CHECK derivation is a later phase.
check_in_values: None,
};
let generator = seed::choose_generator(table, &spec);
plans.push(SeedColPlan::Generated { generator, ty });
}
}
let mut rng = seed::make_rng(rng_seed);
let mut rows_affected = 0usize;
let mut last_data: Option<DataResult> = None;
for i in 0..n {
let values: Vec<Value> = gen_columns
// One sampled parent row per FK for this row, so a compound FK's
// children stay consistent.
let fk_choice: Vec<usize> = fk_samples
.iter()
.map(|c| {
let ty = c.user_type.unwrap_or(Type::Text);
let spec = seed::ColumnSpec {
name: c.name.clone(),
ty,
not_null: c.notnull,
primary_key: c.primary_key,
unique: c.unique,
// FK children are already filtered out above.
is_foreign_key: false,
// `IN`-CHECK derivation is a later phase.
check_in_values: None,
};
let generator = seed::choose_generator(table, &spec);
seed::generate_value(&generator, ty, &mut rng)
.map(|tuples| rng.random_range(0..tuples.len()))
.collect();
let values: Vec<Value> = plans
.iter()
.map(|plan| match plan {
SeedColPlan::Generated { generator, ty } => {
seed::generate_value(generator, *ty, &mut rng)
}
SeedColPlan::ForeignKey { fk_idx, pos } => {
fk_samples[*fk_idx][fk_choice[*fk_idx]][*pos].clone()
}
})
.collect();