feat(seed): set override clause + column-fill (ADR-0048 Phase 2)
Build the two SD2 surfaces Phase 1 deferred:
- `set` override clause (D2): comma-separated per-column pins —
`= 'v'` (fixed), `in ('a','b')` (pick-list), `as <generator>`
(named), `between x and y` (range; numeric and quoted dates).
Type-aware via the typed `current_column_value` slot; an override
drops its column from the generic-fill advisory (D13). Folded from
the flat matched path (build_seed_overrides) and applied to the
per-column plan (apply_seed_overrides).
- `<table>.<column>` column-fill (D1 form 2): an UPDATE over existing
rows. Refuses PK/autogen targets, empty-table no-op, FK-samples the
parent, collision-free for UNIQUE/identifier targets, one undo step;
`set` may only adjust the filled column.
Supporting work: KNOWN_GENERATORS vocabulary + generator_for_name
(src/seed/vocabulary.rs, D9); a range Generator + range_bounds_reason;
IdentSource::Generators and HighlightClass::Function; completion of the
generator vocabulary after `as` and the set/.col column slots; the
typing-time validity indicator for an unknown generator; help,
parse-error pedagogy rows, and the D13 advisory's Phase-2/3 wording.
A bounded override (fixed value / too-short pick-list) on a
single-column-UNIQUE target is a friendly error rather than a silent
uniqueness cap (post-implementation /runda finding, user-chosen).
Dates in the range form are quoted (no date-literal token exists);
ADR-0048 D2 amended accordingly. Both modes (D5); reproducible (D4).
This commit is contained in:
+10
-1
@@ -2097,9 +2097,14 @@ impl App {
|
||||
self.note_system(line);
|
||||
}
|
||||
if !result.advisory_columns.is_empty() {
|
||||
// `column` (the first advised column) seeds the concrete
|
||||
// repair examples (D13 Phase 2/3 wording); `columns` lists
|
||||
// them all.
|
||||
self.push_category_three_prose(crate::t!(
|
||||
"seed.advisory_generic",
|
||||
columns = result.advisory_columns.join(", ")
|
||||
columns = result.advisory_columns.join(", "),
|
||||
column = result.advisory_columns[0],
|
||||
table = result.table
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -6268,7 +6273,9 @@ mod tests {
|
||||
app.update(AppEvent::DslSeedSucceeded {
|
||||
command: Command::Seed {
|
||||
table: "users".to_string(),
|
||||
target_column: None,
|
||||
count: Some(20),
|
||||
overrides: Vec::new(),
|
||||
rng_seed: None,
|
||||
},
|
||||
result: crate::db::SeedResult {
|
||||
@@ -6304,7 +6311,9 @@ mod tests {
|
||||
app.update(AppEvent::DslSeedSucceeded {
|
||||
command: Command::Seed {
|
||||
table: "J".to_string(),
|
||||
target_column: None,
|
||||
count: Some(10),
|
||||
overrides: Vec::new(),
|
||||
rng_seed: None,
|
||||
},
|
||||
result: crate::db::SeedResult {
|
||||
|
||||
+83
-1
@@ -120,7 +120,13 @@ impl SchemaCache {
|
||||
IdentSource::Columns => &self.columns,
|
||||
IdentSource::Relationships => &self.relationships,
|
||||
IdentSource::Indexes => &self.indexes,
|
||||
IdentSource::NewName | IdentSource::Types | IdentSource::Free => &[],
|
||||
// Curated / invented sources never come from the schema
|
||||
// cache — `Generators` candidates are supplied separately
|
||||
// from the `seed` vocabulary (ADR-0048 D9).
|
||||
IdentSource::NewName
|
||||
| IdentSource::Types
|
||||
| IdentSource::Generators
|
||||
| IdentSource::Free => &[],
|
||||
}
|
||||
}
|
||||
|
||||
@@ -709,6 +715,22 @@ pub fn candidates_at_cursor_with_in_mode(
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
// Source 1.9: fake-data generator names (ADR-0048 D9). At the
|
||||
// `seed … set <col> as ⟨here⟩` slot (`IdentSource::Generators`) the
|
||||
// curated vocabulary is offered so a learner can discover `email` /
|
||||
// `product` / … by Tab. Same `Function` kind / `tok_function` colour
|
||||
// as SQL functions (no new theme colour — ADR-0048 §Grammar).
|
||||
let has_generator_slot = expected
|
||||
.iter()
|
||||
.any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. }));
|
||||
if has_generator_slot {
|
||||
functions.extend(
|
||||
crate::seed::KNOWN_GENERATORS
|
||||
.iter()
|
||||
.filter(|g| matches_prefix(g))
|
||||
.map(|g| (*g).to_string()),
|
||||
);
|
||||
}
|
||||
|
||||
// Source 2: schema identifiers — accumulated across every
|
||||
// matching schema-listable `Ident { source }` expectation.
|
||||
@@ -1200,6 +1222,24 @@ pub fn invalid_ident_at_cursor_in_mode(
|
||||
if has_sql_expr_slot && crate::dsl::sql_functions::is_known_function_prefix(partial) {
|
||||
return None;
|
||||
}
|
||||
// ADR-0048 D9: the `seed … set <col> as <gen>` slot is a curated
|
||||
// vocabulary (`IdentSource::Generators`), not a schema source, so the
|
||||
// schema-column check below would never see it. A partial that
|
||||
// prefix-matches a known generator is an in-progress name; anything
|
||||
// else is an unknown generator → flag it `[ERR]` while typing.
|
||||
let has_generator_slot = expected
|
||||
.iter()
|
||||
.any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. }));
|
||||
if has_generator_slot {
|
||||
if crate::seed::is_known_generator_prefix(partial) {
|
||||
return None;
|
||||
}
|
||||
return Some(InvalidIdent {
|
||||
range: (start, cursor),
|
||||
found: partial.to_string(),
|
||||
source: IdentSource::Generators,
|
||||
});
|
||||
}
|
||||
// Find every schema-listable source in the expected list.
|
||||
let sources: Vec<IdentSource> = expected
|
||||
.iter()
|
||||
@@ -2606,6 +2646,48 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_ident_fires_for_unknown_generator_after_as() {
|
||||
// ADR-0048 D9: an unknown name at the `set <col> as <gen>` slot is
|
||||
// flagged `[ERR]` while typing.
|
||||
let cache = two_table_schema();
|
||||
let input = "seed a set name as bogus";
|
||||
let inv = invalid_ident_at_cursor(input, input.len(), &cache)
|
||||
.expect("unknown generator must flag");
|
||||
assert_eq!(inv.found, "bogus");
|
||||
assert_eq!(inv.source, IdentSource::Generators);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_ident_fires_for_unknown_column_in_seed_set_and_column_fill() {
|
||||
// ADR-0048: an unknown column at the `set <col>` slot and the
|
||||
// `<table>.<col>` column-fill slot is flagged like any other
|
||||
// column slot (both are `IdentSource::Columns`).
|
||||
let cache = two_table_schema(); // table `a`; columns id, name
|
||||
let set_in = invalid_ident_at_cursor("seed a set xyz", 14, &cache)
|
||||
.expect("unknown column in `set` must flag");
|
||||
assert_eq!(set_in.found, "xyz");
|
||||
assert_eq!(set_in.source, IdentSource::Columns);
|
||||
|
||||
let fill = invalid_ident_at_cursor("seed a.xyz", 10, &cache)
|
||||
.expect("unknown column in column-fill must flag");
|
||||
assert_eq!(fill.source, IdentSource::Columns);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_ident_does_not_fire_for_generator_prefix() {
|
||||
// A prefix of a known generator is an in-progress name, not a typo.
|
||||
let cache = two_table_schema();
|
||||
assert!(
|
||||
invalid_ident_at_cursor("seed a set name as ema", 22, &cache).is_none(),
|
||||
"`ema` prefixes `email` — must not flag",
|
||||
);
|
||||
assert!(
|
||||
invalid_ident_at_cursor("seed a set name as email", 24, &cache).is_none(),
|
||||
"`email` is a known generator — must not flag",
|
||||
);
|
||||
}
|
||||
|
||||
fn two_table_schema() -> SchemaCache {
|
||||
use crate::dsl::types::Type;
|
||||
let mut s = SchemaCache::default();
|
||||
|
||||
@@ -33,7 +33,8 @@ use tracing::{debug, info, warn};
|
||||
use crate::dsl::action::ReferentialAction;
|
||||
use crate::dsl::command::{
|
||||
ChangeColumnMode, Command, CompareOp, Constraint, ConstraintKind, Expr, IndexSelector,
|
||||
Operand, Predicate, RelationshipSelector, RowFilter, SqlForeignKey,
|
||||
Operand, Predicate, RelationshipSelector, RowFilter, SeedOverride, SeedOverrideKind,
|
||||
SqlForeignKey,
|
||||
};
|
||||
use crate::dsl::ColumnSpec;
|
||||
use crate::dsl::shortid;
|
||||
@@ -723,7 +724,9 @@ enum Request {
|
||||
/// snapshot wraps the whole seed via `snapshot_then`.
|
||||
Seed {
|
||||
table: String,
|
||||
target_column: Option<String>,
|
||||
count: Option<u64>,
|
||||
overrides: Vec<SeedOverride>,
|
||||
rng_seed: Option<u64>,
|
||||
source: Option<String>,
|
||||
reply: oneshot::Sender<Result<SeedResult, DbError>>,
|
||||
@@ -1517,18 +1520,22 @@ impl Database {
|
||||
recv.await.map_err(|_| DbError::WorkerGone)?
|
||||
}
|
||||
|
||||
/// Populate a table with generated fake data (ADR-0048, SD1).
|
||||
/// Populate a table with generated fake data (ADR-0048, SD1/SD2).
|
||||
pub async fn seed(
|
||||
&self,
|
||||
table: String,
|
||||
target_column: Option<String>,
|
||||
count: Option<u64>,
|
||||
overrides: Vec<SeedOverride>,
|
||||
rng_seed: Option<u64>,
|
||||
source: Option<String>,
|
||||
) -> Result<SeedResult, DbError> {
|
||||
let (reply, recv) = oneshot::channel();
|
||||
self.send(Request::Seed {
|
||||
table,
|
||||
target_column,
|
||||
count,
|
||||
overrides,
|
||||
rng_seed,
|
||||
source,
|
||||
reply,
|
||||
@@ -2694,7 +2701,9 @@ fn handle_request(
|
||||
}
|
||||
Request::Seed {
|
||||
table,
|
||||
target_column,
|
||||
count,
|
||||
overrides,
|
||||
rng_seed,
|
||||
source,
|
||||
reply,
|
||||
@@ -2706,7 +2715,9 @@ fn handle_request(
|
||||
persistence,
|
||||
source.as_deref(),
|
||||
&table,
|
||||
target_column.as_deref(),
|
||||
count,
|
||||
&overrides,
|
||||
rng_seed,
|
||||
));
|
||||
}
|
||||
@@ -2938,7 +2949,10 @@ fn do_list_names_for(
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
IdentSource::NewName | IdentSource::Types | IdentSource::Free => Ok(Vec::new()),
|
||||
IdentSource::NewName
|
||||
| IdentSource::Types
|
||||
| IdentSource::Generators
|
||||
| IdentSource::Free => Ok(Vec::new()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8808,14 +8822,13 @@ fn sample_parent_key_tuples(
|
||||
Ok(tuples)
|
||||
}
|
||||
|
||||
/// Populate a table with generated fake data (ADR-0048, SD1).
|
||||
/// Populate a table with generated fake data (ADR-0048, SD1/SD2).
|
||||
///
|
||||
/// **Phase 1.** Generates whole rows and inserts them one at a time
|
||||
/// through [`do_insert`] — reusing all the existing per-value
|
||||
/// validation, autogen autofill, FK-error enrichment and persistence
|
||||
/// machinery. The whole seed is a single undo step (the worker wraps
|
||||
/// the call in one `snapshot_then`) and writes exactly one
|
||||
/// `history.log` line (only the first row carries the `source`).
|
||||
/// Generates whole rows and inserts them in one transaction, reusing the
|
||||
/// per-value validation, autogen autofill, FK-error enrichment and
|
||||
/// persistence machinery via [`insert_one_row`]. The whole seed is a
|
||||
/// single undo step (the worker wraps the call in one `snapshot_then`)
|
||||
/// and writes exactly one `history.log` line.
|
||||
///
|
||||
/// Foreign-key columns are filled by sampling existing parent rows
|
||||
/// (D14); a compound FK reads all its child columns from one sampled
|
||||
@@ -8823,16 +8836,20 @@ fn sample_parent_key_tuples(
|
||||
/// `NOT NULL blob` column (which seed cannot generate) is refused by
|
||||
/// the block guard (D1); a nullable blob is omitted (→ NULL).
|
||||
///
|
||||
/// Deferred: identifier/constraint uniqueness incl. junction
|
||||
/// distinct-combos (D10), the `IN`-CHECK value derivation (D17), the
|
||||
/// efficient single-transaction multi-row path, the capped auto-show
|
||||
/// preview (D18), and the enum/CHECK advisory (D12/D13).
|
||||
/// **Phase 2 (SD2):** when `target_column` is `Some`, this delegates to
|
||||
/// [`do_seed_column_fill`] (fill one column across existing rows, D1
|
||||
/// form 2). `overrides` carries the `set <col> …` clause (D2): per-column
|
||||
/// pins that replace the heuristic generator and drop the column from the
|
||||
/// generic-fill advisory (D13).
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn do_seed(
|
||||
conn: &Connection,
|
||||
persistence: Option<&Persistence>,
|
||||
source: Option<&str>,
|
||||
table: &str,
|
||||
target_column: Option<&str>,
|
||||
count: Option<u64>,
|
||||
overrides: &[SeedOverride],
|
||||
rng_seed: Option<u64>,
|
||||
) -> Result<SeedResult, DbError> {
|
||||
use crate::seed;
|
||||
@@ -8840,6 +8857,14 @@ fn do_seed(
|
||||
|
||||
let canonical_table = require_canonical_table(conn, table)?;
|
||||
let table = canonical_table.as_str();
|
||||
|
||||
// Column-fill (D1 form 2) is a distinct UPDATE path.
|
||||
if let Some(col) = target_column {
|
||||
return do_seed_column_fill(
|
||||
conn, persistence, source, table, col, count, overrides, rng_seed,
|
||||
);
|
||||
}
|
||||
|
||||
let n = count.unwrap_or(DEFAULT_SEED_COUNT);
|
||||
debug!(table = %table, count = n, "seed");
|
||||
if n > MAX_SEED_COUNT {
|
||||
@@ -8937,6 +8962,17 @@ fn do_seed(
|
||||
}
|
||||
}
|
||||
|
||||
// Apply the `set <col> …` overrides (D2): each replaces the named
|
||||
// column's plan with the pinned generator and removes it from the
|
||||
// generic-fill advisory (the user chose its values deliberately,
|
||||
// D13). An override that names a non-fillable column is a friendly
|
||||
// error; a bounded value source (fixed / pick-list) that can't supply
|
||||
// enough distinct values for a single-column-UNIQUE target is refused
|
||||
// up front rather than silently capped (DA finding). FK / type binding
|
||||
// still apply — a value that violates a constraint surfaces through the
|
||||
// existing FK-error guard.
|
||||
apply_seed_overrides(&schema, overrides, n, &col_names, &mut plans, &mut advisory_columns)?;
|
||||
|
||||
// Uniqueness groups (ADR-0048 D10): value tuples that must stay
|
||||
// distinct across the batch and against existing rows — the
|
||||
// user-fillable PK (so junction distinct-combos fall out of this),
|
||||
@@ -9131,6 +9167,434 @@ fn do_seed(
|
||||
})
|
||||
}
|
||||
|
||||
/// Apply the `set <col> …` overrides (ADR-0048 D2) to the per-column
|
||||
/// generation plan. Each override replaces the named column's plan and
|
||||
/// drops it from the generic-fill advisory (D13 — the user chose those
|
||||
/// values). An override naming a column that is not in the fillable set
|
||||
/// (unknown, or an auto-generated `serial`) is a friendly error.
|
||||
fn apply_seed_overrides(
|
||||
schema: &ReadSchema,
|
||||
overrides: &[SeedOverride],
|
||||
row_count: u64,
|
||||
col_names: &[String],
|
||||
plans: &mut [SeedColPlan],
|
||||
advisory_columns: &mut Vec<String>,
|
||||
) -> Result<(), DbError> {
|
||||
for ov in overrides {
|
||||
let Some(idx) = col_names
|
||||
.iter()
|
||||
.position(|c| c.eq_ignore_ascii_case(&ov.column))
|
||||
else {
|
||||
return Err(DbError::Unsupported(format!(
|
||||
"cannot apply `set {col} …`: `{col}` is not a fillable column of this \
|
||||
table (it is unknown, or an auto-generated column).",
|
||||
col = ov.column,
|
||||
)));
|
||||
};
|
||||
let ty = schema
|
||||
.columns
|
||||
.iter()
|
||||
.find(|c| c.name.eq_ignore_ascii_case(&ov.column))
|
||||
.and_then(|c| c.user_type)
|
||||
.unwrap_or(Type::Text);
|
||||
seed_override_capacity_guard(schema, &ov.column, &ov.kind, row_count)?;
|
||||
plans[idx] = seed_override_plan(&ov.kind, ty, &ov.column)?;
|
||||
advisory_columns.retain(|c| !c.eq_ignore_ascii_case(&ov.column));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Refuse up front when a **bounded** override (a fixed value or a
|
||||
/// pick-list) cannot supply enough *distinct* values to fill a
|
||||
/// single-column-UNIQUE target across `row_count` rows — otherwise the
|
||||
/// uniqueness machinery would silently cap the run to the achievable
|
||||
/// count (DA finding; the ADR left this interaction open and the user
|
||||
/// chose a friendly error). Generators and ranges are treated as
|
||||
/// effectively unbounded sources here; if one does exhaust, the existing
|
||||
/// distinct-combination cap (D14) still applies.
|
||||
fn seed_override_capacity_guard(
|
||||
schema: &ReadSchema,
|
||||
column: &str,
|
||||
kind: &SeedOverrideKind,
|
||||
row_count: u64,
|
||||
) -> Result<(), DbError> {
|
||||
let distinct = match kind {
|
||||
SeedOverrideKind::Fixed(_) => 1,
|
||||
SeedOverrideKind::PickList(values) => {
|
||||
let mut set = std::collections::HashSet::new();
|
||||
for v in values {
|
||||
set.insert(seed_override_literal(v, column)?);
|
||||
}
|
||||
set.len()
|
||||
}
|
||||
// Unbounded-enough sources — leave to the cap if they exhaust.
|
||||
SeedOverrideKind::Generator(_) | SeedOverrideKind::Range { .. } => return Ok(()),
|
||||
};
|
||||
if distinct as u64 >= row_count.max(1) {
|
||||
return Ok(());
|
||||
}
|
||||
// Single-column uniqueness only: a compound UNIQUE / compound PK can
|
||||
// still be satisfied by varying the *other* columns, so a pinned
|
||||
// value there does not force a cap.
|
||||
let single_unique = schema
|
||||
.columns
|
||||
.iter()
|
||||
.find(|c| c.name.eq_ignore_ascii_case(column))
|
||||
.is_some_and(|c| c.unique)
|
||||
|| (schema.primary_key.len() == 1
|
||||
&& schema.primary_key[0].eq_ignore_ascii_case(column));
|
||||
if single_unique {
|
||||
return Err(DbError::Unsupported(format!(
|
||||
"cannot fill {row_count} rows: `set {column} …` offers only {distinct} distinct \
|
||||
value(s), but `{column}` is UNIQUE. Use a generator (e.g. `as email`) or a list \
|
||||
of at least {row_count} values."
|
||||
)));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Turn one `set` override into the `SeedColPlan` that produces its
|
||||
/// values (ADR-0048 D2). `Fixed`/`PickList` become a `PickFrom` over the
|
||||
/// literal(s); `Generator` resolves the curated name (unknown → friendly
|
||||
/// error); `Range` validates its bounds against the column type *before*
|
||||
/// generation (an incompatible bound → friendly error).
|
||||
fn seed_override_plan(
|
||||
kind: &SeedOverrideKind,
|
||||
ty: Type,
|
||||
column: &str,
|
||||
) -> Result<SeedColPlan, DbError> {
|
||||
use crate::seed::Generator;
|
||||
let generator = match kind {
|
||||
SeedOverrideKind::Fixed(v) => Generator::PickFrom(vec![seed_override_literal(v, column)?]),
|
||||
SeedOverrideKind::PickList(vs) => {
|
||||
let lits = vs
|
||||
.iter()
|
||||
.map(|v| seed_override_literal(v, column))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
Generator::PickFrom(lits)
|
||||
}
|
||||
SeedOverrideKind::Generator(name) => {
|
||||
crate::seed::generator_for_name(name).ok_or_else(|| {
|
||||
DbError::Unsupported(format!(
|
||||
"unknown generator `{name}` in `set {column} as {name}`. \
|
||||
Known generators: {}.",
|
||||
crate::seed::KNOWN_GENERATORS.join(", "),
|
||||
))
|
||||
})?
|
||||
}
|
||||
SeedOverrideKind::Range { low, high } => {
|
||||
let lo = seed_override_literal(low, column)?;
|
||||
let hi = seed_override_literal(high, column)?;
|
||||
if let Some(reason) = crate::seed::range_bounds_reason(ty, &lo, &hi) {
|
||||
return Err(DbError::Unsupported(format!(
|
||||
"cannot apply `set {column} between …`: {reason}."
|
||||
)));
|
||||
}
|
||||
Generator::Range { low: lo, high: hi }
|
||||
}
|
||||
};
|
||||
Ok(SeedColPlan::Generated { generator, ty })
|
||||
}
|
||||
|
||||
/// Extract the literal string an override value contributes to a
|
||||
/// `PickFrom` / `Range` (re-typed per column by `generate_value`). A
|
||||
/// `null` override is refused — seed always fills a value (NULL
|
||||
/// injection is out of scope, ADR-0048 Out-of-scope).
|
||||
fn seed_override_literal(value: &Value, column: &str) -> Result<String, DbError> {
|
||||
match value {
|
||||
Value::Number(s) | Value::Text(s) => Ok(s.clone()),
|
||||
Value::Bool(b) => Ok(if *b { "true" } else { "false" }.to_string()),
|
||||
Value::Null => Err(DbError::Unsupported(format!(
|
||||
"`set {column} = null` is not supported — seed always fills a value."
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
/// Column-fill (ADR-0048 D1 form 2): fill one column across the table's
|
||||
/// **existing** rows (an UPDATE), the natural follow-up to `add column`.
|
||||
///
|
||||
/// Refuses PK and auto-generated (`serial`/`shortid`/`blob`) targets;
|
||||
/// an empty table is a friendly no-op. The `set` clause may only adjust
|
||||
/// the column being filled (the rest of the per-column heuristics do not
|
||||
/// apply — there is exactly one column). A UNIQUE / identifier target
|
||||
/// gets collision-free values (generated distinct from *every* existing
|
||||
/// value in the column, so no row-by-row UPDATE can transiently collide);
|
||||
/// an FK target samples an existing parent key (D14). The whole fill is
|
||||
/// one transaction → one undo step (D15), persisted once (commit-db-last).
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn do_seed_column_fill(
|
||||
conn: &Connection,
|
||||
persistence: Option<&Persistence>,
|
||||
source: Option<&str>,
|
||||
table: &str,
|
||||
column: &str,
|
||||
count: Option<u64>,
|
||||
overrides: &[SeedOverride],
|
||||
rng_seed: Option<u64>,
|
||||
) -> Result<SeedResult, DbError> {
|
||||
use crate::seed;
|
||||
use rand::RngExt;
|
||||
|
||||
debug!(table = %table, column = %column, "seed column-fill");
|
||||
|
||||
// A row count is meaningless when filling existing rows (D1 form 2).
|
||||
if count.is_some() {
|
||||
return Err(DbError::Unsupported(format!(
|
||||
"`seed {table}.{column}` fills existing rows, so it takes no row count \
|
||||
(drop the number)."
|
||||
)));
|
||||
}
|
||||
|
||||
let schema = read_schema(conn, table)?;
|
||||
let col = schema
|
||||
.columns
|
||||
.iter()
|
||||
.find(|c| c.name.eq_ignore_ascii_case(column))
|
||||
.ok_or_else(|| {
|
||||
DbError::Unsupported(format!("cannot fill `{table}.{column}`: no such column."))
|
||||
})?;
|
||||
let canonical_col = col.name.clone();
|
||||
let ty = col.user_type.unwrap_or(Type::Text);
|
||||
|
||||
// Refuse identity / auto-generated / un-generatable targets (D1).
|
||||
if col.primary_key {
|
||||
return Err(DbError::Unsupported(format!(
|
||||
"cannot fill `{table}.{canonical_col}`: it is part of the primary key — \
|
||||
you don't fill an identity column."
|
||||
)));
|
||||
}
|
||||
if matches!(ty, Type::Serial | Type::ShortId) {
|
||||
return Err(DbError::Unsupported(format!(
|
||||
"cannot fill `{table}.{canonical_col}`: `{}` columns generate their own \
|
||||
values automatically.",
|
||||
ty.keyword(),
|
||||
)));
|
||||
}
|
||||
if matches!(ty, Type::Blob) {
|
||||
return Err(DbError::Unsupported(format!(
|
||||
"cannot fill `{table}.{canonical_col}`: seed cannot generate `blob` values."
|
||||
)));
|
||||
}
|
||||
|
||||
// The `set` clause may only adjust the filled column (user decision).
|
||||
for ov in overrides {
|
||||
if !ov.column.eq_ignore_ascii_case(&canonical_col) {
|
||||
return Err(DbError::Unsupported(format!(
|
||||
"in `seed {table}.{canonical_col}`, `set` can only adjust \
|
||||
`{canonical_col}` (the column being filled), not `{}`.",
|
||||
ov.column,
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
// Existing rowids in a deterministic order (D4 reproducibility).
|
||||
let rowids: Vec<i64> = {
|
||||
let sql = format!(
|
||||
"SELECT rowid FROM \"{}\" ORDER BY rowid",
|
||||
table.replace('"', "\"\"")
|
||||
);
|
||||
let mut stmt = conn.prepare(&sql).map_err(DbError::from_rusqlite)?;
|
||||
stmt.query_map([], |r| r.get::<_, i64>(0))
|
||||
.map_err(DbError::from_rusqlite)?
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.map_err(DbError::from_rusqlite)?
|
||||
};
|
||||
|
||||
// Empty table → friendly no-op (D1).
|
||||
if rowids.is_empty() {
|
||||
return Ok(SeedResult {
|
||||
table: table.to_string(),
|
||||
requested: 0,
|
||||
produced: 0,
|
||||
data: DataResult {
|
||||
table_name: table.to_string(),
|
||||
columns: Vec::new(),
|
||||
column_types: Vec::new(),
|
||||
rows: Vec::new(),
|
||||
},
|
||||
advisory_columns: Vec::new(),
|
||||
});
|
||||
}
|
||||
|
||||
// FK target → sample an existing parent key column (D14).
|
||||
let fk_sample: Option<Vec<Value>> = {
|
||||
let fk = schema.foreign_keys.iter().find(|fk| {
|
||||
fk.child_columns
|
||||
.iter()
|
||||
.any(|c| c.eq_ignore_ascii_case(&canonical_col))
|
||||
});
|
||||
match fk {
|
||||
Some(fk) => {
|
||||
// Single-column position within the FK (column-fill targets
|
||||
// one column; a compound FK filled one column at a time is
|
||||
// unusual but we sample that column's parent values).
|
||||
let pos = fk
|
||||
.child_columns
|
||||
.iter()
|
||||
.position(|c| c.eq_ignore_ascii_case(&canonical_col))
|
||||
.unwrap_or(0);
|
||||
let parent_col = fk.parent_columns.get(pos).cloned().unwrap_or_default();
|
||||
let tuples = sample_parent_key_tuples(conn, &fk.parent_table, &[parent_col])?;
|
||||
if tuples.is_empty() {
|
||||
return Err(DbError::Unsupported(format!(
|
||||
"cannot fill `{table}.{canonical_col}`: parent table `{}` has no \
|
||||
rows to reference. Seed or insert into `{}` first.",
|
||||
fk.parent_table, fk.parent_table,
|
||||
)));
|
||||
}
|
||||
Some(tuples.into_iter().map(|mut t| t.remove(0)).collect())
|
||||
}
|
||||
None => None,
|
||||
}
|
||||
};
|
||||
|
||||
// The value source: an override (if present) else the heuristic.
|
||||
let mut advisory_columns: Vec<String> = Vec::new();
|
||||
let plan: SeedColPlan = if let Some(ov) = overrides
|
||||
.iter()
|
||||
.find(|o| o.column.eq_ignore_ascii_case(&canonical_col))
|
||||
{
|
||||
// Same capacity guard as whole-row: a bounded override that can't
|
||||
// give enough distinct values for a UNIQUE column across the
|
||||
// existing rows is refused up front, not silently capped.
|
||||
seed_override_capacity_guard(&schema, &canonical_col, &ov.kind, rowids.len() as u64)?;
|
||||
seed_override_plan(&ov.kind, ty, &canonical_col)?
|
||||
} else if fk_sample.is_some() {
|
||||
SeedColPlan::ForeignKey { fk_idx: 0, pos: 0 }
|
||||
} else if matches!(ty, Type::ShortId) {
|
||||
SeedColPlan::ShortId // unreachable (refused above), kept for totality
|
||||
} else {
|
||||
let check_in_values = col
|
||||
.check
|
||||
.as_deref()
|
||||
.and_then(|chk| seed::parse_in_check_values(chk, &canonical_col));
|
||||
let spec = seed::ColumnSpec {
|
||||
name: canonical_col.clone(),
|
||||
ty,
|
||||
not_null: col.notnull,
|
||||
primary_key: col.primary_key,
|
||||
unique: col.unique,
|
||||
is_foreign_key: false,
|
||||
check_in_values,
|
||||
};
|
||||
let generator = seed::choose_generator(table, &spec);
|
||||
if matches!(generator, crate::seed::Generator::Generic)
|
||||
&& (seed::is_enum_ish(&canonical_col)
|
||||
|| (col.check.is_some() && spec.check_in_values.is_none()))
|
||||
{
|
||||
advisory_columns.push(canonical_col.clone());
|
||||
}
|
||||
SeedColPlan::Generated { generator, ty }
|
||||
};
|
||||
|
||||
// Collision-free generation for UNIQUE / identifier targets: seed the
|
||||
// used-set with EVERY existing value of the column so a generated
|
||||
// value never matches a not-yet-updated row (no transient UNIQUE
|
||||
// violation) nor a value already assigned this batch (ADR-0048 D10).
|
||||
let enforce_unique = col.unique
|
||||
|| matches!(
|
||||
&plan,
|
||||
SeedColPlan::Generated {
|
||||
generator: crate::seed::Generator::IdentitySequential,
|
||||
..
|
||||
}
|
||||
);
|
||||
let mut used: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||
if enforce_unique {
|
||||
for tuple in
|
||||
sample_parent_key_tuples(conn, table, std::slice::from_ref(&canonical_col))?
|
||||
{
|
||||
used.insert(seed_value_list_key(&tuple));
|
||||
}
|
||||
}
|
||||
let seq_base = if matches!(
|
||||
&plan,
|
||||
SeedColPlan::Generated {
|
||||
generator: crate::seed::Generator::IdentitySequential,
|
||||
..
|
||||
}
|
||||
) && matches!(ty, Type::Int)
|
||||
{
|
||||
Some(seed_max_int(conn, table, &canonical_col)?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
const MAX_ATTEMPTS: u32 = 200;
|
||||
let mut rng = seed::make_rng(rng_seed);
|
||||
let tx = conn
|
||||
.unchecked_transaction()
|
||||
.map_err(DbError::from_rusqlite)?;
|
||||
|
||||
let update_sql = format!(
|
||||
"UPDATE \"{}\" SET \"{}\" = ?1 WHERE rowid = ?2",
|
||||
table.replace('"', "\"\""),
|
||||
canonical_col.replace('"', "\"\""),
|
||||
);
|
||||
let mut produced: u64 = 0;
|
||||
for (offset, rowid) in rowids.iter().enumerate() {
|
||||
let mut attempt = 0u32;
|
||||
let value = loop {
|
||||
let v = match &plan {
|
||||
SeedColPlan::ForeignKey { .. } => {
|
||||
let samples = fk_sample.as_ref().expect("fk plan implies samples");
|
||||
samples[rng.random_range(0..samples.len())].clone()
|
||||
}
|
||||
SeedColPlan::ShortId => {
|
||||
Value::Text(crate::dsl::shortid::generate_with_rng(&mut rng))
|
||||
}
|
||||
SeedColPlan::Generated { generator, ty }
|
||||
if matches!(generator, crate::seed::Generator::IdentitySequential)
|
||||
&& matches!(ty, Type::Int) =>
|
||||
{
|
||||
Value::Number((seq_base.unwrap_or(0) + produced as i64 + 1).to_string())
|
||||
}
|
||||
SeedColPlan::Generated { generator, ty } => {
|
||||
seed::generate_value(generator, *ty, &mut rng)
|
||||
}
|
||||
};
|
||||
if enforce_unique {
|
||||
let key = seed_value_list_key(std::slice::from_ref(&v));
|
||||
if used.contains(&key) {
|
||||
attempt += 1;
|
||||
if attempt >= MAX_ATTEMPTS {
|
||||
break v; // give up on distinctness; DB may reject
|
||||
}
|
||||
continue;
|
||||
}
|
||||
used.insert(key);
|
||||
}
|
||||
break v;
|
||||
};
|
||||
let bound = impl_value_for(&schema, &canonical_col, &value)?;
|
||||
let params: Vec<rusqlite::types::Value> =
|
||||
vec![bound_to_sqlite_value(&bound), rusqlite::types::Value::Integer(*rowid)];
|
||||
execute_with_fk_enrichment(conn, table, &update_sql, ¶ms)?;
|
||||
produced += 1;
|
||||
let _ = offset;
|
||||
}
|
||||
|
||||
let changes = Changes {
|
||||
schema_dirty: false,
|
||||
rewritten_tables: vec![table.to_string()],
|
||||
..Changes::default()
|
||||
};
|
||||
finalize_persistence(conn, persistence, source, &changes)?;
|
||||
tx.commit().map_err(DbError::from_rusqlite)?;
|
||||
|
||||
// Preview the first capped rows (D18).
|
||||
let preview: Vec<i64> = rowids.iter().take(SEED_PREVIEW_CAP).copied().collect();
|
||||
let data = query_rows_by_rowid(conn, table, &preview)?;
|
||||
|
||||
Ok(SeedResult {
|
||||
table: table.to_string(),
|
||||
requested: produced,
|
||||
produced,
|
||||
data,
|
||||
advisory_columns,
|
||||
})
|
||||
}
|
||||
|
||||
/// Build and execute a single-row `INSERT` — column resolution, value
|
||||
/// binding, `serial`/`shortid` autofill, and the FK-enriched execute —
|
||||
/// returning `(rows_affected, new rowid)`.
|
||||
|
||||
+46
-5
@@ -402,14 +402,23 @@ pub enum Command {
|
||||
filter: Option<Expr>,
|
||||
limit: Option<u64>,
|
||||
},
|
||||
/// Populate a table with generated fake data (ADR-0048, SD1).
|
||||
/// `count` defaults to 20 when omitted; `rng_seed` (from a future
|
||||
/// `--seed <n>` flag) makes generation reproducible. Phase 1 is
|
||||
/// whole-row generation; the `set` override clause and the
|
||||
/// `<table>.<column>` column-fill form arrive in later phases.
|
||||
/// Populate a table with generated fake data (ADR-0048, SD1/SD2).
|
||||
/// `count` defaults to 20 when omitted; `rng_seed` (from the
|
||||
/// `--seed <n>` flag) makes generation reproducible.
|
||||
///
|
||||
/// Phase 2 surfaces (ADR-0048 D1/D2):
|
||||
/// - `target_column` is `Some` for the **column-fill** form
|
||||
/// `seed <table>.<column>` — fill one column across the table's
|
||||
/// *existing* rows (an UPDATE), rather than generating new rows.
|
||||
/// - `overrides` carries the `set <col> …` clause: per-column pins
|
||||
/// that take precedence over the heuristic generator (D2).
|
||||
Seed {
|
||||
table: String,
|
||||
/// `Some(col)` → column-fill mode (UPDATE existing rows);
|
||||
/// `None` → whole-row generation (INSERT new rows).
|
||||
target_column: Option<String>,
|
||||
count: Option<u64>,
|
||||
overrides: Vec<SeedOverride>,
|
||||
rng_seed: Option<u64>,
|
||||
},
|
||||
/// Replay a sequence of DSL commands from a file. Each line
|
||||
@@ -647,6 +656,38 @@ impl RowFilter {
|
||||
}
|
||||
}
|
||||
|
||||
/// One `set <col> …` override on a `seed` command (ADR-0048 D2, Phase 2).
|
||||
///
|
||||
/// The user can pin a column's generated values to a constant, a
|
||||
/// pick-list, an explicit named generator, or a range — overriding the
|
||||
/// per-column heuristic the executor would otherwise pick. `column` is
|
||||
/// the user-typed column name (validated against the table at execution,
|
||||
/// like every other column slot).
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct SeedOverride {
|
||||
pub column: String,
|
||||
pub kind: SeedOverrideKind,
|
||||
}
|
||||
|
||||
/// The four `set` override forms (ADR-0048 D2).
|
||||
///
|
||||
/// Values arrive as the DSL's `Value` (quoted text / unquoted number —
|
||||
/// dates are quoted text per the D2 amendment); the `Generator` name is
|
||||
/// a raw string validated at execution because `src/dsl` cannot depend
|
||||
/// on `src/seed` (the curated vocabulary lives there).
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum SeedOverrideKind {
|
||||
/// `set status = 'pending'` — every row gets the constant.
|
||||
Fixed(Value),
|
||||
/// `set role in ('admin', 'editor')` — uniform pick from the list.
|
||||
PickList(Vec<Value>),
|
||||
/// `set work_addr as email` — force the named generator (D9).
|
||||
Generator(String),
|
||||
/// `set price between 10 and 100` — uniform in `[low, high]`;
|
||||
/// numeric or (quoted) date bounds per the destination column type.
|
||||
Range { low: Value, high: Value },
|
||||
}
|
||||
|
||||
/// A complex WHERE expression (ADR-0026 §4).
|
||||
///
|
||||
/// Built by `grammar::expr::build_expr` from the flat
|
||||
|
||||
+276
-11
@@ -24,7 +24,9 @@
|
||||
//! later swap that capture for the same typed slots used here, adding
|
||||
//! live hints/highlighting.
|
||||
|
||||
use crate::dsl::command::{Command, Expr, RowFilter, ShowListKind};
|
||||
use crate::dsl::command::{
|
||||
Command, Expr, RowFilter, SeedOverride, SeedOverrideKind, ShowListKind,
|
||||
};
|
||||
use crate::dsl::grammar::{
|
||||
CommandNode, IdentSource, Node, NumberValidator, ValidationError, Word, expr,
|
||||
shared::{
|
||||
@@ -426,7 +428,9 @@ const LIMIT_CLAUSE_NODES: &[Node] = &[
|
||||
const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES);
|
||||
|
||||
// =================================================================
|
||||
// seed — `seed <T> [<count>]` (ADR-0048, SD1)
|
||||
// seed — `seed <T>[.<col>] [<count>] [set <overrides>] [--seed <n>]`
|
||||
// (ADR-0048, SD1 whole-row + SD2 Phase 2 set-clause /
|
||||
// column-fill)
|
||||
// =================================================================
|
||||
|
||||
/// Optional positional row count. Reuses `LIMIT_VALIDATOR` (a
|
||||
@@ -444,11 +448,127 @@ const SEED_FLAG_NODES: &[Node] = &[
|
||||
},
|
||||
];
|
||||
const SEED_FLAG: Node = Node::Seq(SEED_FLAG_NODES);
|
||||
|
||||
// --- column-fill target: the optional `.<column>` (ADR-0048 D1
|
||||
// form 2) ----------------------------------------------------
|
||||
//
|
||||
// `seed users.email …` fills one column across existing rows. The
|
||||
// table ident stops at `.` (idents are alnum/underscore), so an
|
||||
// `Optional(Seq['.', column])` after the table cleanly discriminates:
|
||||
// when the next token is not `.`, the `Punct('.')` first-child
|
||||
// NoMatches and `walk_optional` skips it; once `.` commits, a missing
|
||||
// column propagates as the user mid-typing `seed users.` (driver
|
||||
// `walk_optional` semantics). The column resolves against
|
||||
// `current_table_columns` (populated by `TABLE_NAME_WRITES`).
|
||||
const SEED_TARGET_COLUMN: Node = Node::Ident {
|
||||
source: IdentSource::Columns,
|
||||
role: "seed_target_column",
|
||||
validator: None,
|
||||
highlight_override: None,
|
||||
writes_table: false,
|
||||
writes_column: false,
|
||||
writes_user_listed_column: false,
|
||||
writes_table_alias: false,
|
||||
writes_cte_name: false,
|
||||
writes_projection_alias: false,
|
||||
};
|
||||
const SEED_DOT_COLUMN_NODES: &[Node] = &[Node::Punct('.'), SEED_TARGET_COLUMN];
|
||||
const SEED_DOT_COLUMN: Node = Node::Optional(&Node::Seq(SEED_DOT_COLUMN_NODES));
|
||||
|
||||
// --- the `set <col> <override>[, …]` clause (ADR-0048 D2) --------
|
||||
//
|
||||
// Each override pins one column's generation. The column slot
|
||||
// `writes_column` so the typed value slots (`PER_COLUMN_VALUE`, the
|
||||
// same `current_column_value` dispatch `update … set` uses) narrow to
|
||||
// the column's type — so list/range/fixed values get the column's
|
||||
// typed slot (quoted text, unquoted number, quoted date) and a
|
||||
// type-mismatched literal is flagged. The four tails each start with a
|
||||
// distinct token (`=` / `in` / `between` / `as`), so the `Choice`
|
||||
// discriminates cleanly (no Optional-first branch).
|
||||
|
||||
/// The `set <col>` column slot. Distinct role from `update`'s
|
||||
/// `update_set_column` and the expression `expr_column`.
|
||||
const SEED_SET_COLUMN: Node = Node::Ident {
|
||||
source: IdentSource::Columns,
|
||||
role: "seed_set_column",
|
||||
validator: None,
|
||||
highlight_override: None,
|
||||
writes_table: false,
|
||||
writes_column: true,
|
||||
writes_user_listed_column: false,
|
||||
writes_table_alias: false,
|
||||
writes_cte_name: false,
|
||||
writes_projection_alias: false,
|
||||
};
|
||||
|
||||
/// `as <generator>` — the curated generator-name vocabulary (D9),
|
||||
/// highlighted in the `tok_function` colour. The slot is structural
|
||||
/// (any identifier matches); the name is validated at execution and
|
||||
/// flagged live by the validity indicator.
|
||||
const SEED_GENERATOR: Node = Node::Ident {
|
||||
source: IdentSource::Generators,
|
||||
role: "seed_generator",
|
||||
validator: None,
|
||||
highlight_override: Some(crate::dsl::grammar::HighlightClass::Function),
|
||||
writes_table: false,
|
||||
writes_column: false,
|
||||
writes_user_listed_column: false,
|
||||
writes_table_alias: false,
|
||||
writes_cte_name: false,
|
||||
writes_projection_alias: false,
|
||||
};
|
||||
|
||||
/// `= <value>` — a fixed constant for every row.
|
||||
const SEED_OV_FIXED_NODES: &[Node] = &[Node::Punct('='), PER_COLUMN_VALUE];
|
||||
/// `in ( <value> [, <value>]* )` — uniform pick from the list.
|
||||
const SEED_OV_IN_VALUES: Node = Node::Repeated {
|
||||
inner: &PER_COLUMN_VALUE,
|
||||
separator: Some(&Node::Punct(',')),
|
||||
min: 1,
|
||||
};
|
||||
const SEED_OV_IN_NODES: &[Node] = &[
|
||||
Node::Word(Word::keyword("in")),
|
||||
Node::Punct('('),
|
||||
SEED_OV_IN_VALUES,
|
||||
Node::Punct(')'),
|
||||
];
|
||||
/// `between <value> and <value>` — uniform in the (typed) range.
|
||||
const SEED_OV_BETWEEN_NODES: &[Node] = &[
|
||||
Node::Word(Word::keyword("between")),
|
||||
PER_COLUMN_VALUE,
|
||||
Node::Word(Word::keyword("and")),
|
||||
PER_COLUMN_VALUE,
|
||||
];
|
||||
/// `as <generator>` — force a named generator.
|
||||
const SEED_OV_AS_NODES: &[Node] = &[Node::Word(Word::keyword("as")), SEED_GENERATOR];
|
||||
|
||||
const SEED_OV_TAIL_CHOICES: &[Node] = &[
|
||||
Node::Seq(SEED_OV_FIXED_NODES),
|
||||
Node::Seq(SEED_OV_IN_NODES),
|
||||
Node::Seq(SEED_OV_BETWEEN_NODES),
|
||||
Node::Seq(SEED_OV_AS_NODES),
|
||||
];
|
||||
const SEED_OV_TAIL: Node = Node::Choice(SEED_OV_TAIL_CHOICES);
|
||||
|
||||
const SEED_OVERRIDE_NODES: &[Node] = &[SEED_SET_COLUMN, SEED_OV_TAIL];
|
||||
const SEED_OVERRIDE: Node = Node::Seq(SEED_OVERRIDE_NODES);
|
||||
const SEED_OVERRIDES: Node = Node::Repeated {
|
||||
inner: &SEED_OVERRIDE,
|
||||
separator: Some(&Node::Punct(',')),
|
||||
min: 1,
|
||||
};
|
||||
const SEED_SET_CLAUSE_NODES: &[Node] =
|
||||
&[Node::Word(Word::keyword("set")), SEED_OVERRIDES];
|
||||
const SEED_SET_CLAUSE: Node = Node::Seq(SEED_SET_CLAUSE_NODES);
|
||||
|
||||
const SEED_NODES: &[Node] = &[
|
||||
// `writes_table` so a future `set <col>=…` clause's column slots
|
||||
// can resolve against this table.
|
||||
// `writes_table` so the `.column` target, the `set <col>=…`
|
||||
// clause's column slots, and the typed value slots all resolve
|
||||
// against this table.
|
||||
TABLE_NAME_WRITES,
|
||||
SEED_DOT_COLUMN,
|
||||
Node::Optional(&SEED_COUNT),
|
||||
Node::Optional(&SEED_SET_CLAUSE),
|
||||
Node::Optional(&SEED_FLAG),
|
||||
];
|
||||
const SEED_SHAPE: Node = Node::Seq(SEED_NODES);
|
||||
@@ -736,16 +856,29 @@ fn build_show_limit(path: &MatchedPath) -> Result<Option<u64>, ValidationError>
|
||||
})
|
||||
}
|
||||
|
||||
/// Build a `seed <T> [<count>] [--seed <n>]` command (ADR-0048). The
|
||||
/// `--seed` flag's value is the `NumberLit` right after the flag; the
|
||||
/// positional count is the `NumberLit` *before* the flag (or the only
|
||||
/// one when no flag is present).
|
||||
/// Build a `seed <T>[.<col>] [<count>] [set <overrides>] [--seed <n>]`
|
||||
/// command (ADR-0048, SD1 + SD2 Phase 2).
|
||||
///
|
||||
/// - `target_column` (column-fill, D1 form 2) is the `seed_target_column`
|
||||
/// ident, present only for the `seed <T>.<col>` form.
|
||||
/// - The positional `count` is the `NumberLit` that precedes both the
|
||||
/// `set` keyword and the `--seed` flag — bounding it that way keeps a
|
||||
/// `set age between 18 and 80` value (also a `NumberLit`) from being
|
||||
/// mistaken for the count.
|
||||
/// - `--seed <n>` is the `NumberLit` right after the flag (D4).
|
||||
/// - `overrides` (D2) is folded from the flat `set`-clause terminals.
|
||||
fn build_seed(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
|
||||
let table = require_ident(path, "table_name")?;
|
||||
let target_column = ident_text(path, "seed_target_column").map(str::to_string);
|
||||
|
||||
let flag_idx = path
|
||||
.items
|
||||
.iter()
|
||||
.position(|i| matches!(&i.kind, MatchedKind::Flag("seed")));
|
||||
let set_idx = path
|
||||
.items
|
||||
.iter()
|
||||
.position(|i| matches!(&i.kind, MatchedKind::Word("set")));
|
||||
|
||||
let rng_seed = flag_idx
|
||||
.and_then(|fi| path.items.get(fi + 1))
|
||||
@@ -753,23 +886,155 @@ fn build_seed(path: &MatchedPath, _source: &str) -> Result<Command, ValidationEr
|
||||
.map(|i| parse_seed_u64(&i.text))
|
||||
.transpose()?;
|
||||
|
||||
// The count is bounded to before the `set` clause and the flag, so a
|
||||
// numeric value inside `set` (e.g. `between 18 and 80`) is never read
|
||||
// as the count.
|
||||
let count_boundary = [set_idx, flag_idx]
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.min()
|
||||
.unwrap_or(path.items.len());
|
||||
let count = path
|
||||
.items
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(idx, i)| {
|
||||
matches!(i.kind, MatchedKind::NumberLit) && flag_idx.is_none_or(|fi| *idx < fi)
|
||||
})
|
||||
.find(|(idx, i)| matches!(i.kind, MatchedKind::NumberLit) && *idx < count_boundary)
|
||||
.map(|(_, i)| parse_seed_u64(&i.text))
|
||||
.transpose()?;
|
||||
|
||||
let overrides = build_seed_overrides(path, set_idx, flag_idx)?;
|
||||
|
||||
Ok(Command::Seed {
|
||||
table,
|
||||
target_column,
|
||||
count,
|
||||
overrides,
|
||||
rng_seed,
|
||||
})
|
||||
}
|
||||
|
||||
/// Fold the flat `set`-clause terminals into [`SeedOverride`]s
|
||||
/// (ADR-0048 D2). The clause region runs from just after `Word("set")`
|
||||
/// to the `--seed` flag (or the path end). Each override begins at a
|
||||
/// `seed_set_column` ident; the token right after it selects the form
|
||||
/// (`=` / `in` / `between` / `as`). Top-level comma separators between
|
||||
/// overrides are skipped (the `in (...)` form consumes its own inner
|
||||
/// commas up to `)`).
|
||||
fn build_seed_overrides(
|
||||
path: &MatchedPath,
|
||||
set_idx: Option<usize>,
|
||||
flag_idx: Option<usize>,
|
||||
) -> Result<Vec<SeedOverride>, ValidationError> {
|
||||
let Some(set_idx) = set_idx else {
|
||||
return Ok(Vec::new());
|
||||
};
|
||||
let end = flag_idx.unwrap_or(path.items.len());
|
||||
let region = &path.items[set_idx + 1..end];
|
||||
|
||||
let mut overrides = Vec::new();
|
||||
let mut i = 0;
|
||||
while i < region.len() {
|
||||
// The next override starts at its column ident; skip the
|
||||
// top-level comma separators (and any stray token) between them.
|
||||
let MatchedKind::Ident {
|
||||
role: "seed_set_column",
|
||||
..
|
||||
} = ®ion[i].kind
|
||||
else {
|
||||
i += 1;
|
||||
continue;
|
||||
};
|
||||
let column = region[i].text.clone();
|
||||
i += 1;
|
||||
let kind = parse_seed_override_tail(region, &mut i, &column)?;
|
||||
overrides.push(SeedOverride { column, kind });
|
||||
}
|
||||
Ok(overrides)
|
||||
}
|
||||
|
||||
/// Parse one override tail starting at `region[*i]` (just past the
|
||||
/// column ident), advancing `*i` past the consumed tokens.
|
||||
fn parse_seed_override_tail(
|
||||
region: &[MatchedItem],
|
||||
i: &mut usize,
|
||||
column: &str,
|
||||
) -> Result<SeedOverrideKind, ValidationError> {
|
||||
let head = region.get(*i).ok_or_else(|| seed_set_error(column))?;
|
||||
match &head.kind {
|
||||
MatchedKind::Punct('=') => {
|
||||
*i += 1;
|
||||
let value = seed_take_value(region, i, column)?;
|
||||
Ok(SeedOverrideKind::Fixed(value))
|
||||
}
|
||||
MatchedKind::Word("in") => {
|
||||
*i += 1; // `in`
|
||||
// `(`
|
||||
if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Punct('('))) {
|
||||
*i += 1;
|
||||
}
|
||||
let mut values = Vec::new();
|
||||
while let Some(item) = region.get(*i) {
|
||||
match &item.kind {
|
||||
MatchedKind::Punct(')') => {
|
||||
*i += 1;
|
||||
break;
|
||||
}
|
||||
MatchedKind::Punct(',') => {
|
||||
*i += 1;
|
||||
}
|
||||
_ => values.push(seed_take_value(region, i, column)?),
|
||||
}
|
||||
}
|
||||
Ok(SeedOverrideKind::PickList(values))
|
||||
}
|
||||
MatchedKind::Word("between") => {
|
||||
*i += 1; // `between`
|
||||
let low = seed_take_value(region, i, column)?;
|
||||
if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Word("and"))) {
|
||||
*i += 1;
|
||||
}
|
||||
let high = seed_take_value(region, i, column)?;
|
||||
Ok(SeedOverrideKind::Range { low, high })
|
||||
}
|
||||
MatchedKind::Word("as") => {
|
||||
*i += 1; // `as`
|
||||
let gen_item = region
|
||||
.get(*i)
|
||||
.filter(|t| matches!(t.kind, MatchedKind::Ident { role: "seed_generator", .. }))
|
||||
.ok_or_else(|| seed_set_error(column))?;
|
||||
*i += 1;
|
||||
Ok(SeedOverrideKind::Generator(gen_item.text.clone()))
|
||||
}
|
||||
_ => Err(seed_set_error(column)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Take one value literal at `region[*i]`, advancing past it.
|
||||
///
|
||||
/// The grammar's typed value slots only ever match value literals (a
|
||||
/// bare unquoted word fails to match the slot and is rejected *before*
|
||||
/// this fold runs — D2's quoting requirement enforced structurally), so
|
||||
/// a non-literal here can only mean a grammar/builder drift bug; the
|
||||
/// `Err` is a drift guard (mirrors `expr::build_expr`).
|
||||
fn seed_take_value(
|
||||
region: &[MatchedItem],
|
||||
i: &mut usize,
|
||||
column: &str,
|
||||
) -> Result<Value, ValidationError> {
|
||||
let item = region.get(*i).ok_or_else(|| seed_set_error(column))?;
|
||||
let value = item_to_value(item).ok_or_else(|| seed_set_error(column))?;
|
||||
*i += 1;
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
/// Drift-guard error for the `set`-clause fold (see `seed_take_value`).
|
||||
fn seed_set_error(column: &str) -> ValidationError {
|
||||
ValidationError {
|
||||
message_key: "parse.error_wrapper",
|
||||
args: vec![("detail", format!("malformed `set` clause for `{column}`"))],
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_seed_u64(text: &str) -> Result<u64, ValidationError> {
|
||||
text.parse::<u64>().map_err(|_| ValidationError {
|
||||
message_key: "parse.custom.bind_type_mismatch",
|
||||
|
||||
@@ -57,6 +57,12 @@ pub enum HighlightClass {
|
||||
String,
|
||||
Punct,
|
||||
Flag,
|
||||
/// A curated function-vocabulary name — the `seed … set <col> as
|
||||
/// <generator>` generator names (ADR-0048 D2/§Grammar). Rendered in
|
||||
/// the existing `tok_function` colour (ADR-0022 Amд6 blue — no new
|
||||
/// theme colour), assigned via a generator slot's
|
||||
/// `highlight_override`, not by byte shape.
|
||||
Function,
|
||||
Error,
|
||||
}
|
||||
|
||||
@@ -86,6 +92,14 @@ pub enum IdentSource {
|
||||
/// content validator on column-type slots; not user-listable
|
||||
/// from the schema.
|
||||
Types,
|
||||
/// Closed, curated set of fake-data generator names (ADR-0048
|
||||
/// D9) — the `seed … set <col> as <generator>` slot. Like
|
||||
/// `Types`, not user-listable from the schema; the vocabulary
|
||||
/// lives in `src/seed` and the completion engine offers it. The
|
||||
/// grammar slot is purely structural (matches any identifier);
|
||||
/// an unknown name is flagged live (validity) and rejected at
|
||||
/// execution.
|
||||
Generators,
|
||||
/// Any identifier shape; used by synthetic catch-all branches
|
||||
/// (e.g., the unknown-value branch of `mode <value>`).
|
||||
Free,
|
||||
@@ -117,6 +131,7 @@ impl IdentSource {
|
||||
Self::Relationships => "relationship name",
|
||||
Self::Indexes => "index name",
|
||||
Self::Types => "type",
|
||||
Self::Generators => "generator name",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,6 +149,7 @@ impl IdentSource {
|
||||
"relationship name" => Some(Self::Relationships),
|
||||
"index name" => Some(Self::Indexes),
|
||||
"type" => Some(Self::Types),
|
||||
"generator name" => Some(Self::Generators),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -300,6 +300,7 @@ fn format_expectation(e: &crate::dsl::walker::outcome::Expectation) -> String {
|
||||
IdentSource::Relationships => "relationship name".to_string(),
|
||||
IdentSource::Indexes => "index name".to_string(),
|
||||
IdentSource::Types => "type".to_string(),
|
||||
IdentSource::Generators => "generator name".to_string(),
|
||||
IdentSource::NewName | IdentSource::Free => "identifier".to_string(),
|
||||
},
|
||||
Expectation::Punct(c) => format!("`{c}`"),
|
||||
|
||||
@@ -240,6 +240,18 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn seed_generator_name_highlighted_as_function() {
|
||||
// ADR-0048 D9: the `set <col> as <gen>` generator name carries the
|
||||
// `Function` highlight class (via the slot's `highlight_override`),
|
||||
// rendered in the shared `tok_function` colour.
|
||||
let runs = run("seed Members set role as email");
|
||||
assert!(
|
||||
runs.iter().any(|(_, _, c)| *c == HighlightClass::Function),
|
||||
"generator name `email` should be Function-highlighted: {runs:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_command_word_classified_by_byte_shape() {
|
||||
// Walker doesn't engage; fallback classifies as Identifier.
|
||||
|
||||
@@ -1236,6 +1236,10 @@ fn schema_existence_diagnostics(
|
||||
IdentSource::Relationships
|
||||
| IdentSource::Indexes
|
||||
| IdentSource::Types
|
||||
// `Generators` (the `set … as <gen>` slot, ADR-0048 D9) is a
|
||||
// curated vocabulary; its unknown-name validity is handled by
|
||||
// the completion-layer indicator, not this walker diagnostic.
|
||||
| IdentSource::Generators
|
||||
| IdentSource::Free => {}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -553,7 +553,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[
|
||||
("ok.rows_seeded", &["count", "table"]),
|
||||
("ok.rows_updated", &["count"]),
|
||||
("seed.capped", &["requested"]),
|
||||
("seed.advisory_generic", &["columns"]),
|
||||
("seed.advisory_generic", &["columns", "column", "table"]),
|
||||
// ---- Client-side success notes (ADR-0017 §6, ADR-0018 §9) ----
|
||||
("client_side.auto_fill_add_serial", &["count"]),
|
||||
("client_side.auto_fill_add_shortid", &["count"]),
|
||||
|
||||
@@ -337,6 +337,13 @@ help:
|
||||
seed <T> [<count>] — fill a table with generated sample rows
|
||||
(default 20). Existing rows are kept;
|
||||
foreign keys draw from existing parent rows.
|
||||
seed <T> ... set <c> = 'v' | in ('a','b') | as <gen> | between x and y
|
||||
— pin how a column is generated: a fixed
|
||||
value, a pick-list, a named generator
|
||||
(email, name, product, ...), or a range.
|
||||
seed <T>.<col> [set ...] — fill one column across the EXISTING rows
|
||||
(the follow-up to `add column`).
|
||||
seed <T> ... --seed <n> — reproducible: same data for the same n.
|
||||
insert: |-
|
||||
insert into <T> [(cols)] [values] (vals) — add a row
|
||||
update: |-
|
||||
@@ -573,7 +580,7 @@ parse:
|
||||
change_column: |-
|
||||
change column [in] [table] <Table>: <Name> (<Type>)
|
||||
[--force-conversion | --dont-convert]
|
||||
seed: "seed <Table> [count]"
|
||||
seed: "seed <Table> [count] [set <col> = ... | in (...) | as <gen> | between x and y] | seed <Table>.<col>"
|
||||
show_data: "show data <Table>"
|
||||
show_table: "show table <Table>"
|
||||
show_tables: "show tables"
|
||||
@@ -988,7 +995,11 @@ db:
|
||||
# generic text that look like fixed value sets.
|
||||
seed:
|
||||
capped: "(of {requested} requested — ran out of distinct value combinations)"
|
||||
advisory_generic: "{columns} filled with generic text — they look like fixed value sets."
|
||||
# ADR-0048 D13 (Phase 2/3 wording): name the generically-filled
|
||||
# enum-ish / CHECK columns and point at the concrete repairs — the
|
||||
# `set` clause on a fresh seed, or the column-fill form for the rows
|
||||
# just created.
|
||||
advisory_generic: "{columns} filled with generic text — they look like fixed value sets. Pin them next time with `set {column} in ('…', '…')`, or fix these rows with `seed {table}.{column} set {column} in ('…', '…')`."
|
||||
|
||||
ok:
|
||||
# ADR-0040: the generic `[ok] <verb> <subject>` summary line was
|
||||
|
||||
@@ -817,6 +817,9 @@ fn ambient_hint_core_in_mode(
|
||||
crate::dsl::grammar::IdentSource::Tables => "table",
|
||||
crate::dsl::grammar::IdentSource::Columns => "column",
|
||||
crate::dsl::grammar::IdentSource::Relationships => "relationship",
|
||||
// The `seed … set <col> as <gen>` curated vocabulary
|
||||
// (ADR-0048 D9) flags an unknown name here.
|
||||
crate::dsl::grammar::IdentSource::Generators => "generator",
|
||||
// `NewName`, `Types`, `Free` are filtered out by
|
||||
// `invalid_ident_at_cursor` (it only fires for
|
||||
// known-set sources via `completes_from_schema`), so
|
||||
|
||||
+4
-2
@@ -2916,13 +2916,15 @@ async fn execute_command_typed(
|
||||
.insert(table, columns, values, src)
|
||||
.await
|
||||
.map(CommandOutcome::Insert),
|
||||
// ADR-0048 (SD1).
|
||||
// ADR-0048 (SD1/SD2 Phase 2).
|
||||
Command::Seed {
|
||||
table,
|
||||
target_column,
|
||||
count,
|
||||
overrides,
|
||||
rng_seed,
|
||||
} => database
|
||||
.seed(table, count, rng_seed, src)
|
||||
.seed(table, target_column, count, overrides, rng_seed, src)
|
||||
.await
|
||||
.map(CommandOutcome::Seed),
|
||||
Command::Update {
|
||||
|
||||
@@ -81,6 +81,11 @@ pub fn generate_value(generator: &Generator, ty: Type, rng: &mut SeedRng) -> Val
|
||||
let chosen: &String = pick(rng, values);
|
||||
literal_to_value(chosen, ty)
|
||||
}
|
||||
// The `set <col> between low and high` override (D2). Bounds are
|
||||
// interpreted per the destination type; the executor has already
|
||||
// validated they parse, so a defensive parse failure here falls
|
||||
// back to type-based generation rather than producing junk.
|
||||
Generator::Range { low, high } => range_value(low, high, ty, rng),
|
||||
// Un-intercepted markers + an empty pick list → type-based.
|
||||
Generator::PickFrom(_)
|
||||
| Generator::IdentitySequential
|
||||
@@ -89,6 +94,132 @@ pub fn generate_value(generator: &Generator, ty: Type, rng: &mut SeedRng) -> Val
|
||||
}
|
||||
}
|
||||
|
||||
/// Uniform value in `[low, high]` for the `between` override (D2).
|
||||
///
|
||||
/// Bounds are interpreted by destination type. Returns the type-based
|
||||
/// fallback for a bound that does not parse or a type that has no range
|
||||
/// meaning — the executor pre-validates, so this is defensive only.
|
||||
fn range_value(low: &str, high: &str, ty: Type, rng: &mut SeedRng) -> Value {
|
||||
match ty {
|
||||
Type::Int | Type::Serial => parse_int_range(low, high)
|
||||
.map(|(lo, hi)| Value::Number(rng.random_range(lo..=hi).to_string()))
|
||||
.unwrap_or_else(|| generic_for_type(ty, rng)),
|
||||
Type::Real | Type::Decimal => parse_real_range(low, high)
|
||||
.map(|(lo, hi)| {
|
||||
let v = rng.random::<f64>().mul_add(hi - lo, lo);
|
||||
Value::Number(format!("{v:.2}"))
|
||||
})
|
||||
.unwrap_or_else(|| generic_for_type(ty, rng)),
|
||||
Type::Date => parse_date_range(low, high)
|
||||
.map(|(lo, hi)| Value::Text(format_date(random_date_between(rng, lo, hi))))
|
||||
.unwrap_or_else(|| generic_for_type(ty, rng)),
|
||||
Type::DateTime => parse_datetime_range(low, high)
|
||||
.map(|(lo, hi)| Value::Text(random_datetime_between(rng, lo, hi)))
|
||||
.unwrap_or_else(|| generic_for_type(ty, rng)),
|
||||
// text / bool / blob / shortid have no range meaning.
|
||||
_ => generic_for_type(ty, rng),
|
||||
}
|
||||
}
|
||||
|
||||
/// Validate that `low`/`high` parse as bounds for `ty`.
|
||||
///
|
||||
/// The `between` override (D2) is checked by the executor *before*
|
||||
/// generation. Returns a short human reason on failure (the executor
|
||||
/// wraps it in a friendly error naming the column), `None` when valid.
|
||||
#[must_use]
|
||||
pub fn range_bounds_reason(ty: Type, low: &str, high: &str) -> Option<String> {
|
||||
let ok = match ty {
|
||||
Type::Int | Type::Serial => parse_int_range(low, high).is_some(),
|
||||
Type::Real | Type::Decimal => parse_real_range(low, high).is_some(),
|
||||
Type::Date => parse_date_range(low, high).is_some(),
|
||||
Type::DateTime => parse_datetime_range(low, high).is_some(),
|
||||
// text / bool / blob / shortid have no range meaning.
|
||||
Type::Text | Type::Bool | Type::Blob | Type::ShortId => false,
|
||||
};
|
||||
if ok {
|
||||
return None;
|
||||
}
|
||||
Some(match ty {
|
||||
Type::Int | Type::Serial => "expected two whole numbers, e.g. `between 1 and 100`".to_string(),
|
||||
Type::Real | Type::Decimal => "expected two numbers, e.g. `between 1.0 and 9.99`".to_string(),
|
||||
Type::Date => "expected two quoted dates, e.g. `between '2023-01-01' and '2024-12-31'`".to_string(),
|
||||
Type::DateTime => {
|
||||
"expected two quoted datetimes, e.g. `between '2023-01-01T00:00:00' and '2024-12-31T23:59:59'`"
|
||||
.to_string()
|
||||
}
|
||||
Type::Text | Type::Bool | Type::Blob | Type::ShortId => {
|
||||
"a `between` range only applies to numeric and date/datetime columns".to_string()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse and order an integer range; `None` if either bound is not an
|
||||
/// integer.
|
||||
fn parse_int_range(low: &str, high: &str) -> Option<(i64, i64)> {
|
||||
let lo: i64 = low.trim().parse().ok()?;
|
||||
let hi: i64 = high.trim().parse().ok()?;
|
||||
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
|
||||
}
|
||||
|
||||
fn parse_real_range(low: &str, high: &str) -> Option<(f64, f64)> {
|
||||
let lo: f64 = low.trim().parse().ok()?;
|
||||
let hi: f64 = high.trim().parse().ok()?;
|
||||
if !lo.is_finite() || !hi.is_finite() {
|
||||
return None;
|
||||
}
|
||||
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
|
||||
}
|
||||
|
||||
fn parse_date_range(low: &str, high: &str) -> Option<(NaiveDate, NaiveDate)> {
|
||||
let lo = NaiveDate::parse_from_str(low.trim(), "%Y-%m-%d").ok()?;
|
||||
let hi = NaiveDate::parse_from_str(high.trim(), "%Y-%m-%d").ok()?;
|
||||
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
|
||||
}
|
||||
|
||||
/// Accept both the `T`-separated and space-separated datetime spellings
|
||||
/// the app validates (`bind_datetime` / `validate_datetime`).
|
||||
fn parse_one_datetime(s: &str) -> Option<chrono::NaiveDateTime> {
|
||||
let t = s.trim();
|
||||
chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%dT%H:%M:%S")
|
||||
.or_else(|_| chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%d %H:%M:%S"))
|
||||
.ok()
|
||||
}
|
||||
|
||||
fn parse_datetime_range(
|
||||
low: &str,
|
||||
high: &str,
|
||||
) -> Option<(chrono::NaiveDateTime, chrono::NaiveDateTime)> {
|
||||
let lo = parse_one_datetime(low)?;
|
||||
let hi = parse_one_datetime(high)?;
|
||||
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
|
||||
}
|
||||
|
||||
/// Uniform date in `[lo, hi]` (inclusive).
|
||||
fn random_date_between(rng: &mut SeedRng, lo: NaiveDate, hi: NaiveDate) -> NaiveDate {
|
||||
let lo_ce = lo.num_days_from_ce();
|
||||
let hi_ce = hi.num_days_from_ce();
|
||||
let day = rng.random_range(lo_ce..=hi_ce);
|
||||
NaiveDate::from_num_days_from_ce_opt(day).unwrap_or(lo)
|
||||
}
|
||||
|
||||
/// Uniform datetime in `[lo, hi]`, rendered `YYYY-MM-DDTHH:MM:SS`.
|
||||
fn random_datetime_between(
|
||||
rng: &mut SeedRng,
|
||||
lo: chrono::NaiveDateTime,
|
||||
hi: chrono::NaiveDateTime,
|
||||
) -> String {
|
||||
let lo_s = lo.and_utc().timestamp();
|
||||
let hi_s = hi.and_utc().timestamp();
|
||||
let secs = if lo_s <= hi_s {
|
||||
rng.random_range(lo_s..=hi_s)
|
||||
} else {
|
||||
rng.random_range(hi_s..=lo_s)
|
||||
};
|
||||
let dt = chrono::DateTime::from_timestamp(secs, 0)
|
||||
.map_or(lo, |d| d.naive_utc());
|
||||
dt.format("%Y-%m-%dT%H:%M:%S").to_string()
|
||||
}
|
||||
|
||||
/// Type-based fallback generation (D8). Never produces NULL for a
|
||||
/// generatable type; `blob`/`serial`/`shortid` are handled by the
|
||||
/// executor (autogen / block guard) and yield NULL here only as a
|
||||
@@ -358,6 +489,76 @@ mod tests {
|
||||
assert!(matches!(v, Value::Number(_)), "numeric pick should be a Number: {v:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn int_range_stays_within_inclusive_bounds() {
|
||||
let g = Generator::Range { low: "10".into(), high: "20".into() };
|
||||
let mut rng = make_rng(Some(5));
|
||||
for _ in 0..200 {
|
||||
let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else {
|
||||
panic!("int range should be a number")
|
||||
};
|
||||
let n: i64 = s.parse().unwrap();
|
||||
assert!((10..=20).contains(&n), "int {n} out of [10,20]");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn real_range_stays_within_bounds_and_has_cents() {
|
||||
let g = Generator::Range { low: "1.0".into(), high: "9.0".into() };
|
||||
let mut rng = make_rng(Some(5));
|
||||
for _ in 0..200 {
|
||||
let Value::Number(s) = generate_value(&g, Type::Real, &mut rng) else {
|
||||
panic!("real range should be a number")
|
||||
};
|
||||
let n: f64 = s.parse().unwrap();
|
||||
assert!((1.0..=9.0).contains(&n), "real {n} out of [1,9]");
|
||||
assert!(s.contains('.'), "real should be formatted with cents: {s}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_range_stays_within_quoted_bounds() {
|
||||
let g = Generator::Range {
|
||||
low: "2023-01-01".into(),
|
||||
high: "2023-12-31".into(),
|
||||
};
|
||||
let lo = NaiveDate::parse_from_str("2023-01-01", "%Y-%m-%d").unwrap();
|
||||
let hi = NaiveDate::parse_from_str("2023-12-31", "%Y-%m-%d").unwrap();
|
||||
let mut rng = make_rng(Some(9));
|
||||
for _ in 0..200 {
|
||||
let Value::Text(s) = generate_value(&g, Type::Date, &mut rng) else {
|
||||
panic!("date range should be text")
|
||||
};
|
||||
let d = NaiveDate::parse_from_str(&s, "%Y-%m-%d").expect("valid date");
|
||||
assert!(d >= lo && d <= hi, "date {d} out of range");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reversed_bounds_are_tolerated() {
|
||||
let g = Generator::Range { low: "20".into(), high: "10".into() };
|
||||
let mut rng = make_rng(Some(1));
|
||||
let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else {
|
||||
panic!("number")
|
||||
};
|
||||
let n: i64 = s.parse().unwrap();
|
||||
assert!((10..=20).contains(&n), "reversed bounds still produce in-range: {n}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn range_bounds_reason_accepts_compatible_and_rejects_incompatible() {
|
||||
// Numeric / date / datetime accept; text / bool reject.
|
||||
assert!(range_bounds_reason(Type::Int, "1", "10").is_none());
|
||||
assert!(range_bounds_reason(Type::Real, "1.5", "9.9").is_none());
|
||||
assert!(range_bounds_reason(Type::Date, "2023-01-01", "2024-01-01").is_none());
|
||||
assert!(range_bounds_reason(Type::DateTime, "2023-01-01T00:00:00", "2024-01-01T00:00:00").is_none());
|
||||
// Non-numeric bound on a numeric column.
|
||||
assert!(range_bounds_reason(Type::Int, "abc", "10").is_some());
|
||||
// A range on a text column is meaningless.
|
||||
assert!(range_bounds_reason(Type::Text, "a", "z").is_some());
|
||||
assert!(range_bounds_reason(Type::Bool, "0", "1").is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn markers_fall_back_to_type_based_generation() {
|
||||
// An un-intercepted marker must not panic; it generates by type.
|
||||
|
||||
+14
-3
@@ -27,10 +27,12 @@
|
||||
mod check;
|
||||
mod generators;
|
||||
mod heuristics;
|
||||
mod vocabulary;
|
||||
|
||||
pub use check::parse_in_check_values;
|
||||
pub use generators::generate_value;
|
||||
pub use generators::{generate_value, range_bounds_reason};
|
||||
pub use heuristics::{choose_generator, is_enum_ish};
|
||||
pub use vocabulary::{generator_for_name, is_known_generator_prefix, KNOWN_GENERATORS};
|
||||
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{RngExt, SeedableRng};
|
||||
@@ -162,10 +164,19 @@ pub enum Generator {
|
||||
IdentitySequential,
|
||||
/// FK column (D14): the executor samples an existing parent key.
|
||||
ForeignKeySample,
|
||||
// — List / fallback —
|
||||
// — List / range (the `set` override clause, D2) —
|
||||
/// Uniform pick from a fixed list — a simple `IN`-CHECK (D17), an
|
||||
/// enum, or a future `set <col> in (…)` override.
|
||||
/// enum, or a `set <col> in (…)` / `= <value>` override (D2).
|
||||
PickFrom(Vec<String>),
|
||||
/// Uniform value in `[low, high]` — the `set <col> between low and
|
||||
/// high` override (D2). Bounds are the raw literal strings; their
|
||||
/// interpretation (int / real / date / datetime) follows the
|
||||
/// destination column type at generation time. The executor
|
||||
/// validates type-compatibility *before* generation (a bound that
|
||||
/// does not parse for the column type is a friendly error), so
|
||||
/// [`generate_value`] only ever sees parseable bounds; a defensive
|
||||
/// parse failure falls back to type-based generation.
|
||||
Range { low: String, high: String },
|
||||
/// Type-based fallback (D8) when no name heuristic matches.
|
||||
Generic,
|
||||
}
|
||||
|
||||
@@ -0,0 +1,149 @@
|
||||
//! The curated named-generator vocabulary (ADR-0048 D9).
|
||||
//!
|
||||
//! This is the **single source of truth** for "what generator names can
|
||||
//! a learner write after `set <col> as …`", shared by three consumers
|
||||
//! (mirroring `KNOWN_SQL_FUNCTIONS`, ADR-0022 Amд6):
|
||||
//!
|
||||
//! - **Tab completion** — the `seed … set <col> as ⟨here⟩` slot offers
|
||||
//! these names (`src/completion.rs`).
|
||||
//! - **The typing-time validity indicator (ADR-0027)** — an unknown
|
||||
//! name after `as` is flagged `[ERR]` while typing.
|
||||
//! - **The executor** — `db.rs::do_seed` maps a name to a [`Generator`]
|
||||
//! via [`generator_for_name`]; an unknown name is a friendly error.
|
||||
//!
|
||||
//! The list is a deliberately *curated pedagogical set* — the generators
|
||||
//! a learner reaches for, not every internal [`Generator`] variant
|
||||
//! (stateful markers like `ForeignKeySample` are executor-only and have
|
||||
//! no name). It is lowercase + sorted (pinned by a unit test).
|
||||
|
||||
use crate::seed::Generator;
|
||||
|
||||
/// The curated generator names, lowercase and **sorted** (invariant
|
||||
/// pinned by a test — completion relies on stable order and a
|
||||
/// case-insensitive prefix match against these canonical spellings).
|
||||
pub const KNOWN_GENERATORS: &[&str] = &[
|
||||
"age",
|
||||
"bool",
|
||||
"city",
|
||||
"color",
|
||||
"company",
|
||||
"country",
|
||||
"date",
|
||||
"datetime",
|
||||
"email",
|
||||
"first_name",
|
||||
"job",
|
||||
"last_name",
|
||||
"name",
|
||||
"paragraph",
|
||||
"password",
|
||||
"phone",
|
||||
"price",
|
||||
"product",
|
||||
"sentence",
|
||||
"state",
|
||||
"street",
|
||||
"url",
|
||||
"username",
|
||||
"zip",
|
||||
];
|
||||
|
||||
/// Map a generator name (case-insensitive) to its [`Generator`].
|
||||
///
|
||||
/// `None` for an unrecognised name — the executor turns that into a
|
||||
/// friendly "unknown generator" error naming the curated set. A couple
|
||||
/// of common spelling variants (`firstname`, `lastname`, `colour`,
|
||||
/// `full_name`) are accepted as aliases even though only the canonical
|
||||
/// spelling is offered for completion.
|
||||
#[must_use]
|
||||
pub fn generator_for_name(name: &str) -> Option<Generator> {
|
||||
let n = name.to_ascii_lowercase();
|
||||
let g = match n.as_str() {
|
||||
"name" | "full_name" => Generator::FullName,
|
||||
"first_name" | "firstname" => Generator::FirstName,
|
||||
"last_name" | "lastname" | "surname" => Generator::LastName,
|
||||
"email" => Generator::Email,
|
||||
"username" => Generator::Username,
|
||||
"password" => Generator::Password,
|
||||
"phone" => Generator::Phone,
|
||||
"city" => Generator::City,
|
||||
"country" => Generator::Country,
|
||||
"state" => Generator::StateName,
|
||||
"street" => Generator::Street,
|
||||
"zip" => Generator::ZipCode,
|
||||
"company" => Generator::Company,
|
||||
"job" => Generator::JobTitle,
|
||||
"product" => Generator::ProductName,
|
||||
"sentence" => Generator::Sentence,
|
||||
"paragraph" => Generator::Paragraph,
|
||||
"url" => Generator::Url,
|
||||
"color" | "colour" => Generator::HexColor,
|
||||
"price" => Generator::CurrencyAmount,
|
||||
"age" => Generator::Age,
|
||||
"date" => Generator::DateRecent,
|
||||
"datetime" => Generator::DateTimeRecent,
|
||||
"bool" => Generator::Boolean,
|
||||
_ => return None,
|
||||
};
|
||||
Some(g)
|
||||
}
|
||||
|
||||
/// Whether `partial` is a case-insensitive prefix of at least one known
|
||||
/// generator name.
|
||||
///
|
||||
/// An empty `partial` matches every generator (it is a prefix of all) —
|
||||
/// mirrors `is_known_function_prefix`. Used by the validity indicator to
|
||||
/// avoid flagging a still-being-typed name.
|
||||
#[must_use]
|
||||
pub fn is_known_generator_prefix(partial: &str) -> bool {
|
||||
let lowered = partial.to_ascii_lowercase();
|
||||
KNOWN_GENERATORS.iter().any(|g| g.starts_with(&lowered))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn known_generators_is_sorted_and_lowercase() {
|
||||
let mut sorted = KNOWN_GENERATORS.to_vec();
|
||||
sorted.sort_unstable();
|
||||
assert_eq!(KNOWN_GENERATORS, sorted.as_slice(), "must be sorted");
|
||||
for g in KNOWN_GENERATORS {
|
||||
assert_eq!(*g, g.to_ascii_lowercase(), "must be lowercase: {g}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn every_listed_name_maps_to_a_generator() {
|
||||
for g in KNOWN_GENERATORS {
|
||||
assert!(
|
||||
generator_for_name(g).is_some(),
|
||||
"listed generator name `{g}` has no mapping"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mapping_is_case_insensitive_and_has_aliases() {
|
||||
assert_eq!(generator_for_name("EMAIL"), Some(Generator::Email));
|
||||
assert_eq!(generator_for_name("FirstName"), Some(Generator::FirstName));
|
||||
assert_eq!(generator_for_name("colour"), Some(Generator::HexColor));
|
||||
assert_eq!(generator_for_name("full_name"), Some(Generator::FullName));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_name_has_no_mapping() {
|
||||
assert_eq!(generator_for_name("bogus"), None);
|
||||
assert_eq!(generator_for_name(""), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prefix_check_matches_known_and_rejects_unknown() {
|
||||
assert!(is_known_generator_prefix("ema"));
|
||||
assert!(is_known_generator_prefix("EMA"));
|
||||
assert!(is_known_generator_prefix("")); // empty is a prefix of all
|
||||
assert!(!is_known_generator_prefix("zzz"));
|
||||
}
|
||||
}
|
||||
@@ -163,6 +163,7 @@ impl Theme {
|
||||
HighlightClass::String => self.tok_string,
|
||||
HighlightClass::Punct => self.tok_punct,
|
||||
HighlightClass::Flag => self.tok_flag,
|
||||
HighlightClass::Function => self.tok_function,
|
||||
HighlightClass::Error => self.tok_error,
|
||||
}
|
||||
}
|
||||
@@ -228,6 +229,7 @@ mod tests {
|
||||
assert_eq!(t.highlight_class_color(HighlightClass::String), t.tok_string);
|
||||
assert_eq!(t.highlight_class_color(HighlightClass::Punct), t.tok_punct);
|
||||
assert_eq!(t.highlight_class_color(HighlightClass::Flag), t.tok_flag);
|
||||
assert_eq!(t.highlight_class_color(HighlightClass::Function), t.tok_function);
|
||||
assert_eq!(t.highlight_class_color(HighlightClass::Error), t.tok_error);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user