feat(seed): uniqueness, junction distinct-combos, IN-CHECK (ADR-0048 P1.3b)

do_seed now enforces value uniqueness and derives enum values:
- Uniqueness groups (D10): the user-fillable PK, compound UNIQUE
  constraints, and single-column UNIQUE / identifier columns stay
  distinct across the batch and against existing rows (retry per row).
  Junction distinct-combos fall out of PK-tuple uniqueness and cap at
  the available parent combinations (logged when capped; the
  user-facing note arrives with the advisory in P1.3c).
- Identifier-int columns get a monotonic sequence past MAX(col) (D10),
  so they never collide.
- IN-CHECK derivation (D17): a simple `col IN ('a','b')` CHECK becomes
  the value source via the new, unit-tested seed::parse_in_check_values,
  so the enum-as-CHECK pattern just works.

8 parser unit tests + 4 integration tests (unique column, identifier
sequencing, junction cap, IN-check enum). 2343 pass / 0 fail / 0 skip,
clippy all-targets clean.

Deferred to P1.3c: dedicated SeedResult + capped preview (D18) + the
enum/CHECK advisory incl. the cap note (D12/D13); P1.3d: multi-row path.
This commit is contained in:
claude@clouddev1
2026-06-11 18:50:05 +00:00
parent 73493fa68b
commit 9c135010ba
4 changed files with 555 additions and 25 deletions
+175
View File
@@ -298,3 +298,178 @@ fn seed_omits_a_nullable_blob_column() {
let csv = read_csv(&project, "Files").expect("Files CSV");
assert_eq!(data_row_count(&csv), 3);
}
// — uniqueness, junction distinct-combos, IN-CHECK (D10 / D14 / D17) —
/// The `n`th comma-separated field of each data row (the generated
/// values here never contain commas).
fn nth_column_values(csv: &str, n: usize) -> Vec<String> {
csv.lines()
.filter(|l| !l.trim().is_empty())
.skip(1)
.map(|l| l.split(',').nth(n).unwrap_or_default().trim().to_string())
.collect()
}
#[test]
fn seed_keeps_unique_columns_distinct() {
let (project, db, _dir) = open_project_db();
let rt = rt();
let mut label = ColumnSpec::new("label", Type::Text);
label.unique = true;
rt.block_on(db.create_table(
"Tags".to_string(),
vec![ColumnSpec::new("id", Type::Serial), label],
vec!["id".to_string()],
None,
))
.expect("create Tags");
let res = rt
.block_on(db.seed("Tags".into(), Some(8), Some(3), Some("seed Tags 8".into())))
.expect("seed");
assert_eq!(res.rows_affected, 8);
let csv = read_csv(&project, "Tags").expect("Tags CSV");
let labels = nth_column_values(&csv, 1);
let distinct: std::collections::HashSet<&String> = labels.iter().collect();
assert_eq!(distinct.len(), labels.len(), "UNIQUE column has duplicates:\n{csv}");
}
#[test]
fn seed_sequences_identifier_int_columns() {
let (project, db, _dir) = open_project_db();
let rt = rt();
// `code` is an identifier-named int (D10) but not a constraint —
// uniqueness comes from the identifier rule.
rt.block_on(db.create_table(
"Items".to_string(),
vec![
ColumnSpec::new("id", Type::Serial),
ColumnSpec::new("code", Type::Int),
ColumnSpec::new("name", Type::Text),
],
vec!["id".to_string()],
None,
))
.expect("create Items");
let res = rt
.block_on(db.seed("Items".into(), Some(5), Some(1), Some("seed Items 5".into())))
.expect("seed");
assert_eq!(res.rows_affected, 5);
let csv = read_csv(&project, "Items").expect("Items CSV");
let codes: Vec<i64> = nth_column_values(&csv, 1)
.iter()
.map(|s| s.parse().expect("code is an int"))
.collect();
let distinct: std::collections::HashSet<i64> = codes.iter().copied().collect();
assert_eq!(distinct.len(), 5, "identifier ints must be unique: {codes:?}");
}
#[test]
fn seed_junction_produces_distinct_combinations_and_caps() {
let (project, db, _dir) = open_project_db();
let rt = rt();
rt.block_on(async {
// Two parents, 2 rows each → 2x2 = 4 possible (a, b) pairs.
for t in ["P1", "P2"] {
db.create_table(
t.to_string(),
vec![
ColumnSpec::new("id", Type::Serial),
ColumnSpec::new("name", Type::Text),
],
vec!["id".to_string()],
None,
)
.await
.expect("create parent");
db.seed(t.into(), Some(2), Some(1), Some(format!("seed {t} 2")))
.await
.expect("seed parent");
}
// Junction with a compound PK over its two FK columns.
db.create_table(
"J".to_string(),
vec![ColumnSpec::new("a", Type::Int), ColumnSpec::new("b", Type::Int)],
vec!["a".to_string(), "b".to_string()],
None,
)
.await
.expect("create J");
db.add_relationship(
None,
"P1".into(),
vec!["id".into()],
"J".into(),
vec!["a".into()],
ReferentialAction::NoAction,
ReferentialAction::NoAction,
false,
None,
)
.await
.expect("fk a");
db.add_relationship(
None,
"P2".into(),
vec!["id".into()],
"J".into(),
vec!["b".into()],
ReferentialAction::NoAction,
ReferentialAction::NoAction,
false,
None,
)
.await
.expect("fk b");
// Requesting 10 caps at the 4 available distinct combinations.
let res = db
.seed("J".into(), Some(10), Some(7), Some("seed J 10".into()))
.await
.expect("seed J");
assert_eq!(res.rows_affected, 4, "junction caps at available combos");
});
let csv = read_csv(&project, "J").expect("J CSV");
let pairs: Vec<String> = csv
.lines()
.filter(|l| !l.trim().is_empty())
.skip(1)
.map(str::to_string)
.collect();
let distinct: std::collections::HashSet<&String> = pairs.iter().collect();
assert_eq!(distinct.len(), pairs.len(), "junction rows must be distinct:\n{csv}");
}
#[test]
fn seed_draws_enum_values_from_an_in_check() {
let (project, db, _dir) = open_project_db();
let rt = rt();
let mut status = ColumnSpec::new("status", Type::Text);
status.check_sql = Some("status IN ('active', 'closed')".to_string());
rt.block_on(db.create_table(
"Tickets".to_string(),
vec![ColumnSpec::new("id", Type::Serial), status],
vec!["id".to_string()],
None,
))
.expect("create Tickets");
// Every generated status must satisfy the CHECK, so all rows insert.
let res = rt
.block_on(db.seed("Tickets".into(), Some(12), Some(2), Some("seed Tickets 12".into())))
.expect("seed");
assert_eq!(res.rows_affected, 12, "all rows insert — values satisfy the CHECK");
let csv = read_csv(&project, "Tickets").expect("Tickets CSV");
for v in nth_column_values(&csv, 1) {
assert!(
matches!(v.as_str(), "active" | "closed"),
"status `{v}` was not drawn from the IN check:\n{csv}"
);
}
}