feat(seed): uniqueness, junction distinct-combos, IN-CHECK (ADR-0048 P1.3b)
do_seed now enforces value uniqueness and derives enum values:
- Uniqueness groups (D10): the user-fillable PK, compound UNIQUE
constraints, and single-column UNIQUE / identifier columns stay
distinct across the batch and against existing rows (retry per row).
Junction distinct-combos fall out of PK-tuple uniqueness and cap at
the available parent combinations (logged when capped; the
user-facing note arrives with the advisory in P1.3c).
- Identifier-int columns get a monotonic sequence past MAX(col) (D10),
so they never collide.
- IN-CHECK derivation (D17): a simple `col IN ('a','b')` CHECK becomes
the value source via the new, unit-tested seed::parse_in_check_values,
so the enum-as-CHECK pattern just works.
8 parser unit tests + 4 integration tests (unique column, identifier
sequencing, junction cap, IN-check enum). 2343 pass / 0 fail / 0 skip,
clippy all-targets clean.
Deferred to P1.3c: dedicated SeedResult + capped preview (D18) + the
enum/CHECK advisory incl. the cap note (D12/D13); P1.3d: multi-row path.
This commit is contained in:
@@ -8701,6 +8701,34 @@ enum SeedColPlan {
|
||||
ForeignKey { fk_idx: usize, pos: usize },
|
||||
}
|
||||
|
||||
/// Collision key for a positional list of seeded values, used to keep
|
||||
/// uniqueness groups (PK tuple, UNIQUE columns) distinct (ADR-0048 D10).
|
||||
/// `\u{1}` separates fields; `\u{0}` marks NULL.
|
||||
fn seed_value_list_key(values: &[Value]) -> String {
|
||||
let mut key = String::new();
|
||||
for v in values {
|
||||
match v {
|
||||
Value::Number(s) | Value::Text(s) => key.push_str(s),
|
||||
Value::Bool(b) => key.push(if *b { 'T' } else { 'F' }),
|
||||
Value::Null => key.push('\u{0}'),
|
||||
}
|
||||
key.push('\u{1}');
|
||||
}
|
||||
key
|
||||
}
|
||||
|
||||
/// `COALESCE(MAX(col), 0)` — the base for sequencing identifier-int
|
||||
/// columns (ADR-0048 D10) so generated ids continue past existing rows.
|
||||
fn seed_max_int(conn: &Connection, table: &str, column: &str) -> Result<i64, DbError> {
|
||||
let sql = format!(
|
||||
"SELECT COALESCE(MAX(\"{}\"), 0) FROM \"{}\"",
|
||||
column.replace('"', "\"\""),
|
||||
table.replace('"', "\"\"")
|
||||
);
|
||||
conn.query_row(&sql, [], |r| r.get::<_, i64>(0))
|
||||
.map_err(DbError::from_rusqlite)
|
||||
}
|
||||
|
||||
/// Sample existing parent-key tuples for FK generation (ADR-0048 D14).
|
||||
///
|
||||
/// Returns one `Value` tuple per distinct parent row in
|
||||
@@ -8835,6 +8863,12 @@ fn do_seed(
|
||||
if let Some(&(fk_idx, pos)) = fk_child_pos.get(c.name.as_str()) {
|
||||
plans.push(SeedColPlan::ForeignKey { fk_idx, pos });
|
||||
} else {
|
||||
// A simple `col IN ('a','b')` CHECK becomes the value source
|
||||
// (D17) so the enum-as-CHECK pattern just works.
|
||||
let check_in_values = c
|
||||
.check
|
||||
.as_deref()
|
||||
.and_then(|chk| seed::parse_in_check_values(chk, &c.name));
|
||||
let spec = seed::ColumnSpec {
|
||||
name: c.name.clone(),
|
||||
ty,
|
||||
@@ -8842,44 +8876,170 @@ fn do_seed(
|
||||
primary_key: c.primary_key,
|
||||
unique: c.unique,
|
||||
is_foreign_key: false,
|
||||
// `IN`-CHECK derivation is a later phase.
|
||||
check_in_values: None,
|
||||
check_in_values,
|
||||
};
|
||||
let generator = seed::choose_generator(table, &spec);
|
||||
plans.push(SeedColPlan::Generated { generator, ty });
|
||||
}
|
||||
}
|
||||
|
||||
// Uniqueness groups (ADR-0048 D10): value tuples that must stay
|
||||
// distinct across the batch and against existing rows — the
|
||||
// user-fillable PK (so junction distinct-combos fall out of this),
|
||||
// each compound UNIQUE constraint, and each single-column UNIQUE or
|
||||
// identifier-named column. Each group is a list of indices into
|
||||
// `col_names` / `plans`.
|
||||
let col_index: std::collections::HashMap<&str, usize> = col_names
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, name)| (name.as_str(), i))
|
||||
.collect();
|
||||
let project_group = |cols: &[String]| -> Vec<usize> {
|
||||
cols.iter()
|
||||
.filter_map(|c| col_index.get(c.as_str()).copied())
|
||||
.collect()
|
||||
};
|
||||
let mut unique_groups: Vec<Vec<usize>> = Vec::new();
|
||||
let pk_group = project_group(&schema.primary_key);
|
||||
if !pk_group.is_empty() {
|
||||
unique_groups.push(pk_group);
|
||||
}
|
||||
for uc in &schema.unique_constraints {
|
||||
let g = project_group(uc);
|
||||
if !g.is_empty() {
|
||||
unique_groups.push(g);
|
||||
}
|
||||
}
|
||||
for (i, name) in col_names.iter().enumerate() {
|
||||
let unique_col = schema
|
||||
.columns
|
||||
.iter()
|
||||
.find(|c| &c.name == name)
|
||||
.is_some_and(|c| c.unique);
|
||||
let is_identifier = matches!(
|
||||
&plans[i],
|
||||
SeedColPlan::Generated {
|
||||
generator: crate::seed::Generator::IdentitySequential,
|
||||
..
|
||||
}
|
||||
);
|
||||
if unique_col || is_identifier {
|
||||
unique_groups.push(vec![i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Sequence base for identifier-int columns (D10): start past the
|
||||
// current MAX so generated ids continue cleanly.
|
||||
let mut seq_base: std::collections::HashMap<usize, i64> = std::collections::HashMap::new();
|
||||
for (i, plan) in plans.iter().enumerate() {
|
||||
if let SeedColPlan::Generated { generator, ty } = plan
|
||||
&& matches!(generator, crate::seed::Generator::IdentitySequential)
|
||||
&& matches!(ty, Type::Int)
|
||||
{
|
||||
seq_base.insert(i, seed_max_int(conn, table, &col_names[i])?);
|
||||
}
|
||||
}
|
||||
|
||||
// Pre-load each group's existing tuples so generation never
|
||||
// collides with rows already present.
|
||||
let mut used: Vec<std::collections::HashSet<String>> =
|
||||
vec![std::collections::HashSet::new(); unique_groups.len()];
|
||||
for (gi, group) in unique_groups.iter().enumerate() {
|
||||
let cols: Vec<String> = group.iter().map(|&i| col_names[i].clone()).collect();
|
||||
for tuple in sample_parent_key_tuples(conn, table, &cols)? {
|
||||
used[gi].insert(seed_value_list_key(&tuple));
|
||||
}
|
||||
}
|
||||
|
||||
// Retry cap per row: when the unique space is exhausted (e.g. a
|
||||
// junction requested more rows than there are parent combinations),
|
||||
// stop and cap rather than spin (D14).
|
||||
const MAX_ATTEMPTS: u32 = 200;
|
||||
|
||||
let mut rng = seed::make_rng(rng_seed);
|
||||
let mut rows_affected = 0usize;
|
||||
let mut last_data: Option<DataResult> = None;
|
||||
let mut accepted: u64 = 0;
|
||||
let mut capped = false;
|
||||
|
||||
for i in 0..n {
|
||||
// One sampled parent row per FK for this row, so a compound FK's
|
||||
// children stay consistent.
|
||||
let fk_choice: Vec<usize> = fk_samples
|
||||
.iter()
|
||||
.map(|tuples| rng.random_range(0..tuples.len()))
|
||||
.collect();
|
||||
while accepted < n {
|
||||
let mut attempt = 0u32;
|
||||
let inserted = loop {
|
||||
// One sampled parent row per FK for this attempt, so a
|
||||
// compound FK's children stay consistent.
|
||||
let fk_choice: Vec<usize> = fk_samples
|
||||
.iter()
|
||||
.map(|tuples| rng.random_range(0..tuples.len()))
|
||||
.collect();
|
||||
let values: Vec<Value> = plans
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, plan)| match plan {
|
||||
SeedColPlan::ForeignKey { fk_idx, pos } => {
|
||||
fk_samples[*fk_idx][fk_choice[*fk_idx]][*pos].clone()
|
||||
}
|
||||
SeedColPlan::Generated { generator, ty }
|
||||
if matches!(generator, crate::seed::Generator::IdentitySequential)
|
||||
&& matches!(ty, Type::Int) =>
|
||||
{
|
||||
// Monotonic past existing rows → inherently unique.
|
||||
Value::Number((seq_base[&i] + accepted as i64 + 1).to_string())
|
||||
}
|
||||
SeedColPlan::Generated { generator, ty } => {
|
||||
seed::generate_value(generator, *ty, &mut rng)
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
let values: Vec<Value> = plans
|
||||
.iter()
|
||||
.map(|plan| match plan {
|
||||
SeedColPlan::Generated { generator, ty } => {
|
||||
seed::generate_value(generator, *ty, &mut rng)
|
||||
let keys: Vec<String> = unique_groups
|
||||
.iter()
|
||||
.map(|group| {
|
||||
let projected: Vec<Value> =
|
||||
group.iter().map(|&i| values[i].clone()).collect();
|
||||
seed_value_list_key(&projected)
|
||||
})
|
||||
.collect();
|
||||
if keys.iter().enumerate().any(|(gi, k)| used[gi].contains(k)) {
|
||||
attempt += 1;
|
||||
if attempt >= MAX_ATTEMPTS {
|
||||
capped = true;
|
||||
break None;
|
||||
}
|
||||
SeedColPlan::ForeignKey { fk_idx, pos } => {
|
||||
fk_samples[*fk_idx][fk_choice[*fk_idx]][*pos].clone()
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
continue;
|
||||
}
|
||||
for (gi, k) in keys.into_iter().enumerate() {
|
||||
used[gi].insert(k);
|
||||
}
|
||||
// Only the first inserted row carries the `source`, so the
|
||||
// whole seed writes exactly one `history.log` line.
|
||||
let row_source = if accepted == 0 { source } else { None };
|
||||
break Some(do_insert(
|
||||
conn,
|
||||
persistence,
|
||||
row_source,
|
||||
table,
|
||||
Some(&col_names),
|
||||
&values,
|
||||
)?);
|
||||
};
|
||||
match inserted {
|
||||
Some(result) => {
|
||||
rows_affected += result.rows_affected;
|
||||
last_data = Some(result.data);
|
||||
accepted += 1;
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
|
||||
// Only the first row carries the `source`, so the whole seed
|
||||
// writes exactly one `history.log` line.
|
||||
let row_source = if i == 0 { source } else { None };
|
||||
let result = do_insert(conn, persistence, row_source, table, Some(&col_names), &values)?;
|
||||
rows_affected += result.rows_affected;
|
||||
last_data = Some(result.data);
|
||||
if capped {
|
||||
warn!(
|
||||
table = %table,
|
||||
requested = n,
|
||||
produced = accepted,
|
||||
"seed capped: ran out of distinct unique-value combinations before the \
|
||||
requested count (user-facing note arrives with the advisory in P1.3c)"
|
||||
);
|
||||
}
|
||||
|
||||
Ok(InsertResult {
|
||||
|
||||
Reference in New Issue
Block a user