feat(seed): FK sampling, empty-parent error, block guard (ADR-0048 P1.3a)
do_seed fills foreign-key columns by sampling existing parent rows (D14): sample_parent_key_tuples reads distinct parent keys, and a compound FK reads all its child columns from one sampled parent row per child row. An empty parent is refused with a friendly "seed the parent first" error. The block guard (D1) refuses a NOT NULL blob column (seed can't generate one); a nullable blob is omitted (-> NULL). 4 integration tests (valid FK references, empty-parent refusal, NOT NULL blob refusal, nullable-blob omission). 2331 pass / 0 fail / 0 skip, clippy all-targets clean. Deferred to P1.3b: identifier/constraint uniqueness incl. junction distinct-combos (D10), IN-CHECK derivation (D17), dedicated SeedResult + capped preview (D18) + advisory (D12/D13), and the multi-row path.
This commit is contained in:
@@ -8686,22 +8686,87 @@ fn count_rows(conn: &Connection, table: &str) -> Result<i64, DbError> {
|
|||||||
/// Default row count when `seed <T>` omits the count (ADR-0048 D6).
|
/// Default row count when `seed <T>` omits the count (ADR-0048 D6).
|
||||||
const DEFAULT_SEED_COUNT: u64 = 20;
|
const DEFAULT_SEED_COUNT: u64 = 20;
|
||||||
|
|
||||||
|
/// How a single column's value is produced for each seeded row.
|
||||||
|
enum SeedColPlan {
|
||||||
|
/// Generated from the seed library (the generator is chosen once;
|
||||||
|
/// `generate_value` runs per row).
|
||||||
|
Generated {
|
||||||
|
generator: crate::seed::Generator,
|
||||||
|
ty: Type,
|
||||||
|
},
|
||||||
|
/// A foreign-key child column: sampled from an existing parent row
|
||||||
|
/// (ADR-0048 D14). `fk_idx` selects the FK; `pos` selects this
|
||||||
|
/// column's slot within the parent key tuple (so a compound FK's
|
||||||
|
/// child columns all read from the *same* sampled parent row).
|
||||||
|
ForeignKey { fk_idx: usize, pos: usize },
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sample existing parent-key tuples for FK generation (ADR-0048 D14).
|
||||||
|
///
|
||||||
|
/// Returns one `Value` tuple per distinct parent row in
|
||||||
|
/// `parent_columns` order, so a compound FK's children can be filled
|
||||||
|
/// from one consistent parent row. Empty when the parent has no rows
|
||||||
|
/// (the caller turns that into the friendly "seed the parent first"
|
||||||
|
/// error).
|
||||||
|
fn sample_parent_key_tuples(
|
||||||
|
conn: &Connection,
|
||||||
|
parent_table: &str,
|
||||||
|
parent_columns: &[String],
|
||||||
|
) -> Result<Vec<Vec<Value>>, DbError> {
|
||||||
|
let cols = parent_columns
|
||||||
|
.iter()
|
||||||
|
.map(|c| format!("\"{}\"", c.replace('"', "\"\"")))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join(", ");
|
||||||
|
let sql = format!(
|
||||||
|
"SELECT DISTINCT {cols} FROM \"{}\"",
|
||||||
|
parent_table.replace('"', "\"\"")
|
||||||
|
);
|
||||||
|
let n = parent_columns.len();
|
||||||
|
let mut stmt = conn.prepare(&sql).map_err(DbError::from_rusqlite)?;
|
||||||
|
let tuples = stmt
|
||||||
|
.query_map([], |row| {
|
||||||
|
let mut tuple = Vec::with_capacity(n);
|
||||||
|
for i in 0..n {
|
||||||
|
let v = match row.get_ref(i)? {
|
||||||
|
rusqlite::types::ValueRef::Null => Value::Null,
|
||||||
|
rusqlite::types::ValueRef::Integer(x) => Value::Number(x.to_string()),
|
||||||
|
rusqlite::types::ValueRef::Real(x) => Value::Number(x.to_string()),
|
||||||
|
rusqlite::types::ValueRef::Text(t) => {
|
||||||
|
Value::Text(String::from_utf8_lossy(t).into_owned())
|
||||||
|
}
|
||||||
|
// FK keys are never blobs in this app; treat as NULL.
|
||||||
|
rusqlite::types::ValueRef::Blob(_) => Value::Null,
|
||||||
|
};
|
||||||
|
tuple.push(v);
|
||||||
|
}
|
||||||
|
Ok(tuple)
|
||||||
|
})
|
||||||
|
.map_err(DbError::from_rusqlite)?
|
||||||
|
.collect::<Result<Vec<_>, _>>()
|
||||||
|
.map_err(DbError::from_rusqlite)?;
|
||||||
|
Ok(tuples)
|
||||||
|
}
|
||||||
|
|
||||||
/// Populate a table with generated fake data (ADR-0048, SD1).
|
/// Populate a table with generated fake data (ADR-0048, SD1).
|
||||||
///
|
///
|
||||||
/// **Phase 1 walking skeleton.** Generates whole rows for every user
|
/// **Phase 1.** Generates whole rows and inserts them one at a time
|
||||||
/// column that is not an autogen `serial`/`shortid` and not a foreign
|
/// through [`do_insert`] — reusing all the existing per-value
|
||||||
/// key, inserting them one at a time through [`do_insert`] — which
|
/// validation, autogen autofill, FK-error enrichment and persistence
|
||||||
/// reuses all the existing per-value validation, autogen autofill,
|
/// machinery. The whole seed is a single undo step (the worker wraps
|
||||||
/// FK-error enrichment and persistence machinery. The whole seed is a
|
/// the call in one `snapshot_then`) and writes exactly one
|
||||||
/// single undo step (the worker wraps the call in one `snapshot_then`)
|
/// `history.log` line (only the first row carries the `source`).
|
||||||
/// and writes exactly one `history.log` line (only the first row
|
|
||||||
/// carries the `source`).
|
|
||||||
///
|
///
|
||||||
/// Deferred to the next phase (ADR-0048): FK sampling from parent rows
|
/// Foreign-key columns are filled by sampling existing parent rows
|
||||||
/// (D14), the efficient single-transaction multi-row path, identifier
|
/// (D14); a compound FK reads all its child columns from one sampled
|
||||||
/// uniqueness (D10), the `IN`-CHECK value derivation (D17), the
|
/// parent row. An empty parent is refused with a friendly error. A
|
||||||
/// required-column block guard (D1), the capped auto-show preview
|
/// `NOT NULL blob` column (which seed cannot generate) is refused by
|
||||||
/// (D18), and the enum/CHECK advisory (D12/D13).
|
/// the block guard (D1); a nullable blob is omitted (→ NULL).
|
||||||
|
///
|
||||||
|
/// Deferred: identifier/constraint uniqueness incl. junction
|
||||||
|
/// distinct-combos (D10), the `IN`-CHECK value derivation (D17), the
|
||||||
|
/// efficient single-transaction multi-row path, the capped auto-show
|
||||||
|
/// preview (D18), and the enum/CHECK advisory (D12/D13).
|
||||||
fn do_seed(
|
fn do_seed(
|
||||||
conn: &Connection,
|
conn: &Connection,
|
||||||
persistence: Option<&Persistence>,
|
persistence: Option<&Persistence>,
|
||||||
@@ -8711,6 +8776,7 @@ fn do_seed(
|
|||||||
rng_seed: Option<u64>,
|
rng_seed: Option<u64>,
|
||||||
) -> Result<InsertResult, DbError> {
|
) -> Result<InsertResult, DbError> {
|
||||||
use crate::seed;
|
use crate::seed;
|
||||||
|
use rand::RngExt;
|
||||||
|
|
||||||
let canonical_table = require_canonical_table(conn, table)?;
|
let canonical_table = require_canonical_table(conn, table)?;
|
||||||
let table = canonical_table.as_str();
|
let table = canonical_table.as_str();
|
||||||
@@ -8719,48 +8785,92 @@ fn do_seed(
|
|||||||
|
|
||||||
let schema = read_schema(conn, table)?;
|
let schema = read_schema(conn, table)?;
|
||||||
|
|
||||||
// FK child columns are filled by the executor in a later phase; for
|
// Pre-sample each FK's parent key tuples (D14); refuse if a parent
|
||||||
// now they are omitted (left to NULL / default).
|
// is empty (no valid reference can be fabricated).
|
||||||
let fk_children: std::collections::HashSet<&str> = schema
|
let mut fk_samples: Vec<Vec<Vec<Value>>> = Vec::with_capacity(schema.foreign_keys.len());
|
||||||
.foreign_keys
|
for fk in &schema.foreign_keys {
|
||||||
.iter()
|
let tuples = sample_parent_key_tuples(conn, &fk.parent_table, &fk.parent_columns)?;
|
||||||
.flat_map(|fk| fk.child_columns.iter().map(String::as_str))
|
if tuples.is_empty() {
|
||||||
.collect();
|
return Err(DbError::Unsupported(format!(
|
||||||
|
"cannot seed `{table}`: parent table `{}` (referenced by `{}`) has no rows. \
|
||||||
|
Seed or insert into `{}` first.",
|
||||||
|
fk.parent_table,
|
||||||
|
fk.child_columns.join(", "),
|
||||||
|
fk.parent_table,
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
fk_samples.push(tuples);
|
||||||
|
}
|
||||||
|
// child column → (fk index, position within the FK's column list).
|
||||||
|
let mut fk_child_pos: std::collections::HashMap<&str, (usize, usize)> =
|
||||||
|
std::collections::HashMap::new();
|
||||||
|
for (fk_idx, fk) in schema.foreign_keys.iter().enumerate() {
|
||||||
|
for (pos, child) in fk.child_columns.iter().enumerate() {
|
||||||
|
fk_child_pos.insert(child.as_str(), (fk_idx, pos));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Columns we generate values for: every user column that is not an
|
// Build the per-column generation plan, skipping autogen and
|
||||||
// autogen serial/shortid and not an FK child.
|
// un-generatable columns.
|
||||||
let gen_columns: Vec<&ReadColumn> = schema
|
let mut col_names: Vec<String> = Vec::new();
|
||||||
.columns
|
let mut plans: Vec<SeedColPlan> = Vec::new();
|
||||||
.iter()
|
for c in &schema.columns {
|
||||||
.filter(|c| {
|
let ty = c.user_type.unwrap_or(Type::Text);
|
||||||
!matches!(c.user_type, Some(Type::Serial) | Some(Type::ShortId))
|
// serial/shortid auto-fill in `do_insert`; omit them.
|
||||||
&& !fk_children.contains(c.name.as_str())
|
if matches!(ty, Type::Serial | Type::ShortId) {
|
||||||
})
|
continue;
|
||||||
.collect();
|
}
|
||||||
let col_names: Vec<String> = gen_columns.iter().map(|c| c.name.clone()).collect();
|
// blob has no DSL value path: refuse if required (D1), else omit.
|
||||||
|
if matches!(ty, Type::Blob) {
|
||||||
|
if c.notnull {
|
||||||
|
return Err(DbError::Unsupported(format!(
|
||||||
|
"cannot seed `{table}`: column `{}` is `NOT NULL` but has type `blob`, \
|
||||||
|
which seed cannot generate. Add the rows another way or make it nullable.",
|
||||||
|
c.name,
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
col_names.push(c.name.clone());
|
||||||
|
if let Some(&(fk_idx, pos)) = fk_child_pos.get(c.name.as_str()) {
|
||||||
|
plans.push(SeedColPlan::ForeignKey { fk_idx, pos });
|
||||||
|
} else {
|
||||||
|
let spec = seed::ColumnSpec {
|
||||||
|
name: c.name.clone(),
|
||||||
|
ty,
|
||||||
|
not_null: c.notnull,
|
||||||
|
primary_key: c.primary_key,
|
||||||
|
unique: c.unique,
|
||||||
|
is_foreign_key: false,
|
||||||
|
// `IN`-CHECK derivation is a later phase.
|
||||||
|
check_in_values: None,
|
||||||
|
};
|
||||||
|
let generator = seed::choose_generator(table, &spec);
|
||||||
|
plans.push(SeedColPlan::Generated { generator, ty });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let mut rng = seed::make_rng(rng_seed);
|
let mut rng = seed::make_rng(rng_seed);
|
||||||
let mut rows_affected = 0usize;
|
let mut rows_affected = 0usize;
|
||||||
let mut last_data: Option<DataResult> = None;
|
let mut last_data: Option<DataResult> = None;
|
||||||
|
|
||||||
for i in 0..n {
|
for i in 0..n {
|
||||||
let values: Vec<Value> = gen_columns
|
// One sampled parent row per FK for this row, so a compound FK's
|
||||||
|
// children stay consistent.
|
||||||
|
let fk_choice: Vec<usize> = fk_samples
|
||||||
.iter()
|
.iter()
|
||||||
.map(|c| {
|
.map(|tuples| rng.random_range(0..tuples.len()))
|
||||||
let ty = c.user_type.unwrap_or(Type::Text);
|
.collect();
|
||||||
let spec = seed::ColumnSpec {
|
|
||||||
name: c.name.clone(),
|
let values: Vec<Value> = plans
|
||||||
ty,
|
.iter()
|
||||||
not_null: c.notnull,
|
.map(|plan| match plan {
|
||||||
primary_key: c.primary_key,
|
SeedColPlan::Generated { generator, ty } => {
|
||||||
unique: c.unique,
|
seed::generate_value(generator, *ty, &mut rng)
|
||||||
// FK children are already filtered out above.
|
}
|
||||||
is_foreign_key: false,
|
SeedColPlan::ForeignKey { fk_idx, pos } => {
|
||||||
// `IN`-CHECK derivation is a later phase.
|
fk_samples[*fk_idx][fk_choice[*fk_idx]][*pos].clone()
|
||||||
check_in_values: None,
|
}
|
||||||
};
|
|
||||||
let generator = seed::choose_generator(table, &spec);
|
|
||||||
seed::generate_value(&generator, ty, &mut rng)
|
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
|
|||||||
+151
-1
@@ -5,7 +5,7 @@
|
|||||||
//! line for the whole command (ADR-0048 D15 / U3).
|
//! line for the whole command (ADR-0048 D15 / U3).
|
||||||
|
|
||||||
use rdbms_playground::db::Database;
|
use rdbms_playground::db::Database;
|
||||||
use rdbms_playground::dsl::{ColumnSpec, Command, Type, parse_command};
|
use rdbms_playground::dsl::{ColumnSpec, Command, ReferentialAction, Type, parse_command};
|
||||||
use rdbms_playground::persistence::Persistence;
|
use rdbms_playground::persistence::Persistence;
|
||||||
use rdbms_playground::project;
|
use rdbms_playground::project;
|
||||||
|
|
||||||
@@ -148,3 +148,153 @@ fn seed_writes_exactly_one_history_line() {
|
|||||||
"a seed of 5 rows must write exactly one history line:\n{history}"
|
"a seed of 5 rows must write exactly one history line:\n{history}"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// — FK sampling, empty-parent error, block guard (ADR-0048 D14 / D1) —
|
||||||
|
|
||||||
|
/// `Users(id serial pk, name text)` + `Orders(id serial pk, user_id
|
||||||
|
/// int, total decimal)` with `Orders.user_id -> Users.id`.
|
||||||
|
fn create_users_and_orders(db: &Database, rt: &tokio::runtime::Runtime, add_fk: bool) {
|
||||||
|
rt.block_on(async {
|
||||||
|
db.create_table(
|
||||||
|
"Users".to_string(),
|
||||||
|
vec![
|
||||||
|
ColumnSpec::new("id", Type::Serial),
|
||||||
|
ColumnSpec::new("name", Type::Text),
|
||||||
|
],
|
||||||
|
vec!["id".to_string()],
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.expect("create Users");
|
||||||
|
db.create_table(
|
||||||
|
"Orders".to_string(),
|
||||||
|
vec![
|
||||||
|
ColumnSpec::new("id", Type::Serial),
|
||||||
|
ColumnSpec::new("user_id", Type::Int),
|
||||||
|
ColumnSpec::new("total", Type::Decimal),
|
||||||
|
],
|
||||||
|
vec!["id".to_string()],
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.expect("create Orders");
|
||||||
|
if add_fk {
|
||||||
|
db.add_relationship(
|
||||||
|
None,
|
||||||
|
"Users".to_string(),
|
||||||
|
vec!["id".to_string()],
|
||||||
|
"Orders".to_string(),
|
||||||
|
vec!["user_id".to_string()],
|
||||||
|
ReferentialAction::NoAction,
|
||||||
|
ReferentialAction::NoAction,
|
||||||
|
false,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.expect("add Orders->Users FK");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `user_id` is column index 1 of `Orders(id, user_id, total)`.
|
||||||
|
fn order_user_ids(csv: &str) -> Vec<String> {
|
||||||
|
let mut lines = csv.lines().filter(|l| !l.trim().is_empty());
|
||||||
|
lines.next(); // header
|
||||||
|
lines
|
||||||
|
.map(|l| l.split(',').nth(1).unwrap_or_default().to_string())
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn seed_fills_foreign_keys_from_existing_parents() {
|
||||||
|
let (project, db, _dir) = open_project_db();
|
||||||
|
let rt = rt();
|
||||||
|
create_users_and_orders(&db, &rt, true);
|
||||||
|
|
||||||
|
// 5 parents → serial ids 1..=5.
|
||||||
|
rt.block_on(db.seed("Users".into(), Some(5), Some(1), Some("seed Users 5".into())))
|
||||||
|
.expect("seed Users");
|
||||||
|
let res = rt
|
||||||
|
.block_on(db.seed("Orders".into(), Some(10), Some(2), Some("seed Orders 10".into())))
|
||||||
|
.expect("seed Orders");
|
||||||
|
assert_eq!(res.rows_affected, 10, "every child row must insert (valid FK)");
|
||||||
|
|
||||||
|
let csv = read_csv(&project, "Orders").expect("Orders CSV");
|
||||||
|
let valid: std::collections::HashSet<String> = (1..=5).map(|i| i.to_string()).collect();
|
||||||
|
let user_ids = order_user_ids(&csv);
|
||||||
|
assert_eq!(user_ids.len(), 10);
|
||||||
|
for uid in &user_ids {
|
||||||
|
assert!(
|
||||||
|
valid.contains(uid),
|
||||||
|
"user_id `{uid}` does not reference an existing parent:\n{csv}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn seed_refuses_when_a_parent_table_is_empty() {
|
||||||
|
let (_project, db, _dir) = open_project_db();
|
||||||
|
let rt = rt();
|
||||||
|
create_users_and_orders(&db, &rt, true);
|
||||||
|
|
||||||
|
// Users is empty — no valid FK can be fabricated.
|
||||||
|
let err = rt
|
||||||
|
.block_on(db.seed("Orders".into(), Some(3), Some(1), Some("seed Orders 3".into())))
|
||||||
|
.expect_err("seed must refuse an empty parent");
|
||||||
|
let msg = err.to_string();
|
||||||
|
assert!(msg.contains("Users"), "error should name the empty parent: {msg}");
|
||||||
|
let lower = msg.to_lowercase();
|
||||||
|
assert!(
|
||||||
|
lower.contains("no rows") || lower.contains("first"),
|
||||||
|
"error should explain how to fix it: {msg}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn seed_refuses_a_not_null_blob_column() {
|
||||||
|
let (_project, db, _dir) = open_project_db();
|
||||||
|
let rt = rt();
|
||||||
|
let mut payload = ColumnSpec::new("payload", Type::Blob);
|
||||||
|
payload.not_null = true;
|
||||||
|
rt.block_on(db.create_table(
|
||||||
|
"Files".to_string(),
|
||||||
|
vec![ColumnSpec::new("id", Type::Serial), payload],
|
||||||
|
vec!["id".to_string()],
|
||||||
|
None,
|
||||||
|
))
|
||||||
|
.expect("create Files");
|
||||||
|
|
||||||
|
let err = rt
|
||||||
|
.block_on(db.seed("Files".into(), Some(2), Some(1), Some("seed Files 2".into())))
|
||||||
|
.expect_err("seed must refuse a NOT NULL blob");
|
||||||
|
let msg = err.to_string();
|
||||||
|
assert!(
|
||||||
|
msg.contains("payload") && msg.to_lowercase().contains("blob"),
|
||||||
|
"error should name the un-generatable blob column: {msg}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn seed_omits_a_nullable_blob_column() {
|
||||||
|
let (project, db, _dir) = open_project_db();
|
||||||
|
let rt = rt();
|
||||||
|
rt.block_on(db.create_table(
|
||||||
|
"Files".to_string(),
|
||||||
|
vec![
|
||||||
|
ColumnSpec::new("id", Type::Serial),
|
||||||
|
ColumnSpec::new("name", Type::Text),
|
||||||
|
// nullable blob → omitted (→ NULL), seed still succeeds.
|
||||||
|
ColumnSpec::new("payload", Type::Blob),
|
||||||
|
],
|
||||||
|
vec!["id".to_string()],
|
||||||
|
None,
|
||||||
|
))
|
||||||
|
.expect("create Files");
|
||||||
|
|
||||||
|
let res = rt
|
||||||
|
.block_on(db.seed("Files".into(), Some(3), Some(1), Some("seed Files 3".into())))
|
||||||
|
.expect("seed succeeds despite the nullable blob");
|
||||||
|
assert_eq!(res.rows_affected, 3);
|
||||||
|
let csv = read_csv(&project, "Files").expect("Files CSV");
|
||||||
|
assert_eq!(data_row_count(&csv), 3);
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user