feat(seed): set override clause + column-fill (ADR-0048 Phase 2)
Build the two SD2 surfaces Phase 1 deferred:
- `set` override clause (D2): comma-separated per-column pins —
`= 'v'` (fixed), `in ('a','b')` (pick-list), `as <generator>`
(named), `between x and y` (range; numeric and quoted dates).
Type-aware via the typed `current_column_value` slot; an override
drops its column from the generic-fill advisory (D13). Folded from
the flat matched path (build_seed_overrides) and applied to the
per-column plan (apply_seed_overrides).
- `<table>.<column>` column-fill (D1 form 2): an UPDATE over existing
rows. Refuses PK/autogen targets, empty-table no-op, FK-samples the
parent, collision-free for UNIQUE/identifier targets, one undo step;
`set` may only adjust the filled column.
Supporting work: KNOWN_GENERATORS vocabulary + generator_for_name
(src/seed/vocabulary.rs, D9); a range Generator + range_bounds_reason;
IdentSource::Generators and HighlightClass::Function; completion of the
generator vocabulary after `as` and the set/.col column slots; the
typing-time validity indicator for an unknown generator; help,
parse-error pedagogy rows, and the D13 advisory's Phase-2/3 wording.
A bounded override (fixed value / too-short pick-list) on a
single-column-UNIQUE target is a friendly error rather than a silent
uniqueness cap (post-implementation /runda finding, user-chosen).
Dates in the range form are quoted (no date-literal token exists);
ADR-0048 D2 amended accordingly. Both modes (D5); reproducible (D4).
This commit is contained in:
+276
-11
@@ -24,7 +24,9 @@
|
||||
//! later swap that capture for the same typed slots used here, adding
|
||||
//! live hints/highlighting.
|
||||
|
||||
use crate::dsl::command::{Command, Expr, RowFilter, ShowListKind};
|
||||
use crate::dsl::command::{
|
||||
Command, Expr, RowFilter, SeedOverride, SeedOverrideKind, ShowListKind,
|
||||
};
|
||||
use crate::dsl::grammar::{
|
||||
CommandNode, IdentSource, Node, NumberValidator, ValidationError, Word, expr,
|
||||
shared::{
|
||||
@@ -426,7 +428,9 @@ const LIMIT_CLAUSE_NODES: &[Node] = &[
|
||||
const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES);
|
||||
|
||||
// =================================================================
|
||||
// seed — `seed <T> [<count>]` (ADR-0048, SD1)
|
||||
// seed — `seed <T>[.<col>] [<count>] [set <overrides>] [--seed <n>]`
|
||||
// (ADR-0048, SD1 whole-row + SD2 Phase 2 set-clause /
|
||||
// column-fill)
|
||||
// =================================================================
|
||||
|
||||
/// Optional positional row count. Reuses `LIMIT_VALIDATOR` (a
|
||||
@@ -444,11 +448,127 @@ const SEED_FLAG_NODES: &[Node] = &[
|
||||
},
|
||||
];
|
||||
const SEED_FLAG: Node = Node::Seq(SEED_FLAG_NODES);
|
||||
|
||||
// --- column-fill target: the optional `.<column>` (ADR-0048 D1
|
||||
// form 2) ----------------------------------------------------
|
||||
//
|
||||
// `seed users.email …` fills one column across existing rows. The
|
||||
// table ident stops at `.` (idents are alnum/underscore), so an
|
||||
// `Optional(Seq['.', column])` after the table cleanly discriminates:
|
||||
// when the next token is not `.`, the `Punct('.')` first-child
|
||||
// NoMatches and `walk_optional` skips it; once `.` commits, a missing
|
||||
// column propagates as the user mid-typing `seed users.` (driver
|
||||
// `walk_optional` semantics). The column resolves against
|
||||
// `current_table_columns` (populated by `TABLE_NAME_WRITES`).
|
||||
const SEED_TARGET_COLUMN: Node = Node::Ident {
|
||||
source: IdentSource::Columns,
|
||||
role: "seed_target_column",
|
||||
validator: None,
|
||||
highlight_override: None,
|
||||
writes_table: false,
|
||||
writes_column: false,
|
||||
writes_user_listed_column: false,
|
||||
writes_table_alias: false,
|
||||
writes_cte_name: false,
|
||||
writes_projection_alias: false,
|
||||
};
|
||||
const SEED_DOT_COLUMN_NODES: &[Node] = &[Node::Punct('.'), SEED_TARGET_COLUMN];
|
||||
const SEED_DOT_COLUMN: Node = Node::Optional(&Node::Seq(SEED_DOT_COLUMN_NODES));
|
||||
|
||||
// --- the `set <col> <override>[, …]` clause (ADR-0048 D2) --------
|
||||
//
|
||||
// Each override pins one column's generation. The column slot
|
||||
// `writes_column` so the typed value slots (`PER_COLUMN_VALUE`, the
|
||||
// same `current_column_value` dispatch `update … set` uses) narrow to
|
||||
// the column's type — so list/range/fixed values get the column's
|
||||
// typed slot (quoted text, unquoted number, quoted date) and a
|
||||
// type-mismatched literal is flagged. The four tails each start with a
|
||||
// distinct token (`=` / `in` / `between` / `as`), so the `Choice`
|
||||
// discriminates cleanly (no Optional-first branch).
|
||||
|
||||
/// The `set <col>` column slot. Distinct role from `update`'s
|
||||
/// `update_set_column` and the expression `expr_column`.
|
||||
const SEED_SET_COLUMN: Node = Node::Ident {
|
||||
source: IdentSource::Columns,
|
||||
role: "seed_set_column",
|
||||
validator: None,
|
||||
highlight_override: None,
|
||||
writes_table: false,
|
||||
writes_column: true,
|
||||
writes_user_listed_column: false,
|
||||
writes_table_alias: false,
|
||||
writes_cte_name: false,
|
||||
writes_projection_alias: false,
|
||||
};
|
||||
|
||||
/// `as <generator>` — the curated generator-name vocabulary (D9),
|
||||
/// highlighted in the `tok_function` colour. The slot is structural
|
||||
/// (any identifier matches); the name is validated at execution and
|
||||
/// flagged live by the validity indicator.
|
||||
const SEED_GENERATOR: Node = Node::Ident {
|
||||
source: IdentSource::Generators,
|
||||
role: "seed_generator",
|
||||
validator: None,
|
||||
highlight_override: Some(crate::dsl::grammar::HighlightClass::Function),
|
||||
writes_table: false,
|
||||
writes_column: false,
|
||||
writes_user_listed_column: false,
|
||||
writes_table_alias: false,
|
||||
writes_cte_name: false,
|
||||
writes_projection_alias: false,
|
||||
};
|
||||
|
||||
/// `= <value>` — a fixed constant for every row.
|
||||
const SEED_OV_FIXED_NODES: &[Node] = &[Node::Punct('='), PER_COLUMN_VALUE];
|
||||
/// `in ( <value> [, <value>]* )` — uniform pick from the list.
|
||||
const SEED_OV_IN_VALUES: Node = Node::Repeated {
|
||||
inner: &PER_COLUMN_VALUE,
|
||||
separator: Some(&Node::Punct(',')),
|
||||
min: 1,
|
||||
};
|
||||
const SEED_OV_IN_NODES: &[Node] = &[
|
||||
Node::Word(Word::keyword("in")),
|
||||
Node::Punct('('),
|
||||
SEED_OV_IN_VALUES,
|
||||
Node::Punct(')'),
|
||||
];
|
||||
/// `between <value> and <value>` — uniform in the (typed) range.
|
||||
const SEED_OV_BETWEEN_NODES: &[Node] = &[
|
||||
Node::Word(Word::keyword("between")),
|
||||
PER_COLUMN_VALUE,
|
||||
Node::Word(Word::keyword("and")),
|
||||
PER_COLUMN_VALUE,
|
||||
];
|
||||
/// `as <generator>` — force a named generator.
|
||||
const SEED_OV_AS_NODES: &[Node] = &[Node::Word(Word::keyword("as")), SEED_GENERATOR];
|
||||
|
||||
const SEED_OV_TAIL_CHOICES: &[Node] = &[
|
||||
Node::Seq(SEED_OV_FIXED_NODES),
|
||||
Node::Seq(SEED_OV_IN_NODES),
|
||||
Node::Seq(SEED_OV_BETWEEN_NODES),
|
||||
Node::Seq(SEED_OV_AS_NODES),
|
||||
];
|
||||
const SEED_OV_TAIL: Node = Node::Choice(SEED_OV_TAIL_CHOICES);
|
||||
|
||||
const SEED_OVERRIDE_NODES: &[Node] = &[SEED_SET_COLUMN, SEED_OV_TAIL];
|
||||
const SEED_OVERRIDE: Node = Node::Seq(SEED_OVERRIDE_NODES);
|
||||
const SEED_OVERRIDES: Node = Node::Repeated {
|
||||
inner: &SEED_OVERRIDE,
|
||||
separator: Some(&Node::Punct(',')),
|
||||
min: 1,
|
||||
};
|
||||
const SEED_SET_CLAUSE_NODES: &[Node] =
|
||||
&[Node::Word(Word::keyword("set")), SEED_OVERRIDES];
|
||||
const SEED_SET_CLAUSE: Node = Node::Seq(SEED_SET_CLAUSE_NODES);
|
||||
|
||||
const SEED_NODES: &[Node] = &[
|
||||
// `writes_table` so a future `set <col>=…` clause's column slots
|
||||
// can resolve against this table.
|
||||
// `writes_table` so the `.column` target, the `set <col>=…`
|
||||
// clause's column slots, and the typed value slots all resolve
|
||||
// against this table.
|
||||
TABLE_NAME_WRITES,
|
||||
SEED_DOT_COLUMN,
|
||||
Node::Optional(&SEED_COUNT),
|
||||
Node::Optional(&SEED_SET_CLAUSE),
|
||||
Node::Optional(&SEED_FLAG),
|
||||
];
|
||||
const SEED_SHAPE: Node = Node::Seq(SEED_NODES);
|
||||
@@ -736,16 +856,29 @@ fn build_show_limit(path: &MatchedPath) -> Result<Option<u64>, ValidationError>
|
||||
})
|
||||
}
|
||||
|
||||
/// Build a `seed <T> [<count>] [--seed <n>]` command (ADR-0048). The
|
||||
/// `--seed` flag's value is the `NumberLit` right after the flag; the
|
||||
/// positional count is the `NumberLit` *before* the flag (or the only
|
||||
/// one when no flag is present).
|
||||
/// Build a `seed <T>[.<col>] [<count>] [set <overrides>] [--seed <n>]`
|
||||
/// command (ADR-0048, SD1 + SD2 Phase 2).
|
||||
///
|
||||
/// - `target_column` (column-fill, D1 form 2) is the `seed_target_column`
|
||||
/// ident, present only for the `seed <T>.<col>` form.
|
||||
/// - The positional `count` is the `NumberLit` that precedes both the
|
||||
/// `set` keyword and the `--seed` flag — bounding it that way keeps a
|
||||
/// `set age between 18 and 80` value (also a `NumberLit`) from being
|
||||
/// mistaken for the count.
|
||||
/// - `--seed <n>` is the `NumberLit` right after the flag (D4).
|
||||
/// - `overrides` (D2) is folded from the flat `set`-clause terminals.
|
||||
fn build_seed(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
|
||||
let table = require_ident(path, "table_name")?;
|
||||
let target_column = ident_text(path, "seed_target_column").map(str::to_string);
|
||||
|
||||
let flag_idx = path
|
||||
.items
|
||||
.iter()
|
||||
.position(|i| matches!(&i.kind, MatchedKind::Flag("seed")));
|
||||
let set_idx = path
|
||||
.items
|
||||
.iter()
|
||||
.position(|i| matches!(&i.kind, MatchedKind::Word("set")));
|
||||
|
||||
let rng_seed = flag_idx
|
||||
.and_then(|fi| path.items.get(fi + 1))
|
||||
@@ -753,23 +886,155 @@ fn build_seed(path: &MatchedPath, _source: &str) -> Result<Command, ValidationEr
|
||||
.map(|i| parse_seed_u64(&i.text))
|
||||
.transpose()?;
|
||||
|
||||
// The count is bounded to before the `set` clause and the flag, so a
|
||||
// numeric value inside `set` (e.g. `between 18 and 80`) is never read
|
||||
// as the count.
|
||||
let count_boundary = [set_idx, flag_idx]
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.min()
|
||||
.unwrap_or(path.items.len());
|
||||
let count = path
|
||||
.items
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(idx, i)| {
|
||||
matches!(i.kind, MatchedKind::NumberLit) && flag_idx.is_none_or(|fi| *idx < fi)
|
||||
})
|
||||
.find(|(idx, i)| matches!(i.kind, MatchedKind::NumberLit) && *idx < count_boundary)
|
||||
.map(|(_, i)| parse_seed_u64(&i.text))
|
||||
.transpose()?;
|
||||
|
||||
let overrides = build_seed_overrides(path, set_idx, flag_idx)?;
|
||||
|
||||
Ok(Command::Seed {
|
||||
table,
|
||||
target_column,
|
||||
count,
|
||||
overrides,
|
||||
rng_seed,
|
||||
})
|
||||
}
|
||||
|
||||
/// Fold the flat `set`-clause terminals into [`SeedOverride`]s
|
||||
/// (ADR-0048 D2). The clause region runs from just after `Word("set")`
|
||||
/// to the `--seed` flag (or the path end). Each override begins at a
|
||||
/// `seed_set_column` ident; the token right after it selects the form
|
||||
/// (`=` / `in` / `between` / `as`). Top-level comma separators between
|
||||
/// overrides are skipped (the `in (...)` form consumes its own inner
|
||||
/// commas up to `)`).
|
||||
fn build_seed_overrides(
|
||||
path: &MatchedPath,
|
||||
set_idx: Option<usize>,
|
||||
flag_idx: Option<usize>,
|
||||
) -> Result<Vec<SeedOverride>, ValidationError> {
|
||||
let Some(set_idx) = set_idx else {
|
||||
return Ok(Vec::new());
|
||||
};
|
||||
let end = flag_idx.unwrap_or(path.items.len());
|
||||
let region = &path.items[set_idx + 1..end];
|
||||
|
||||
let mut overrides = Vec::new();
|
||||
let mut i = 0;
|
||||
while i < region.len() {
|
||||
// The next override starts at its column ident; skip the
|
||||
// top-level comma separators (and any stray token) between them.
|
||||
let MatchedKind::Ident {
|
||||
role: "seed_set_column",
|
||||
..
|
||||
} = ®ion[i].kind
|
||||
else {
|
||||
i += 1;
|
||||
continue;
|
||||
};
|
||||
let column = region[i].text.clone();
|
||||
i += 1;
|
||||
let kind = parse_seed_override_tail(region, &mut i, &column)?;
|
||||
overrides.push(SeedOverride { column, kind });
|
||||
}
|
||||
Ok(overrides)
|
||||
}
|
||||
|
||||
/// Parse one override tail starting at `region[*i]` (just past the
|
||||
/// column ident), advancing `*i` past the consumed tokens.
|
||||
fn parse_seed_override_tail(
|
||||
region: &[MatchedItem],
|
||||
i: &mut usize,
|
||||
column: &str,
|
||||
) -> Result<SeedOverrideKind, ValidationError> {
|
||||
let head = region.get(*i).ok_or_else(|| seed_set_error(column))?;
|
||||
match &head.kind {
|
||||
MatchedKind::Punct('=') => {
|
||||
*i += 1;
|
||||
let value = seed_take_value(region, i, column)?;
|
||||
Ok(SeedOverrideKind::Fixed(value))
|
||||
}
|
||||
MatchedKind::Word("in") => {
|
||||
*i += 1; // `in`
|
||||
// `(`
|
||||
if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Punct('('))) {
|
||||
*i += 1;
|
||||
}
|
||||
let mut values = Vec::new();
|
||||
while let Some(item) = region.get(*i) {
|
||||
match &item.kind {
|
||||
MatchedKind::Punct(')') => {
|
||||
*i += 1;
|
||||
break;
|
||||
}
|
||||
MatchedKind::Punct(',') => {
|
||||
*i += 1;
|
||||
}
|
||||
_ => values.push(seed_take_value(region, i, column)?),
|
||||
}
|
||||
}
|
||||
Ok(SeedOverrideKind::PickList(values))
|
||||
}
|
||||
MatchedKind::Word("between") => {
|
||||
*i += 1; // `between`
|
||||
let low = seed_take_value(region, i, column)?;
|
||||
if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Word("and"))) {
|
||||
*i += 1;
|
||||
}
|
||||
let high = seed_take_value(region, i, column)?;
|
||||
Ok(SeedOverrideKind::Range { low, high })
|
||||
}
|
||||
MatchedKind::Word("as") => {
|
||||
*i += 1; // `as`
|
||||
let gen_item = region
|
||||
.get(*i)
|
||||
.filter(|t| matches!(t.kind, MatchedKind::Ident { role: "seed_generator", .. }))
|
||||
.ok_or_else(|| seed_set_error(column))?;
|
||||
*i += 1;
|
||||
Ok(SeedOverrideKind::Generator(gen_item.text.clone()))
|
||||
}
|
||||
_ => Err(seed_set_error(column)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Take one value literal at `region[*i]`, advancing past it.
|
||||
///
|
||||
/// The grammar's typed value slots only ever match value literals (a
|
||||
/// bare unquoted word fails to match the slot and is rejected *before*
|
||||
/// this fold runs — D2's quoting requirement enforced structurally), so
|
||||
/// a non-literal here can only mean a grammar/builder drift bug; the
|
||||
/// `Err` is a drift guard (mirrors `expr::build_expr`).
|
||||
fn seed_take_value(
|
||||
region: &[MatchedItem],
|
||||
i: &mut usize,
|
||||
column: &str,
|
||||
) -> Result<Value, ValidationError> {
|
||||
let item = region.get(*i).ok_or_else(|| seed_set_error(column))?;
|
||||
let value = item_to_value(item).ok_or_else(|| seed_set_error(column))?;
|
||||
*i += 1;
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
/// Drift-guard error for the `set`-clause fold (see `seed_take_value`).
|
||||
fn seed_set_error(column: &str) -> ValidationError {
|
||||
ValidationError {
|
||||
message_key: "parse.error_wrapper",
|
||||
args: vec![("detail", format!("malformed `set` clause for `{column}`"))],
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_seed_u64(text: &str) -> Result<u64, ValidationError> {
|
||||
text.parse::<u64>().map_err(|_| ValidationError {
|
||||
message_key: "parse.custom.bind_type_mismatch",
|
||||
|
||||
Reference in New Issue
Block a user