//! Data command nodes (ADR-0024 §migration Phase D). //! //! Five commands at four entry words: `show` (show data / //! show table), `insert`, `update`, `delete`. The walker route //! owns these end-to-end. //! //! Schema awareness (ADR-0024 §Phase D): the DSL value slots are //! wired to `DynamicSubgrammar(column_value_list)` / //! `current_column_value` (see `INSERT_VALUES_LIST`, //! `insert_first_paren`, `PER_COLUMN_VALUE`), so the schema reference //! that flows through `parse_command` unfolds a typed slot per column: //! numeric-shape mismatch is caught at parse (`int`/`decimal`/`bool` //! slots in `shared.rs`) and the full semantic type (`date` / `shortid` //! format) is validated at bind time. So the simple-mode DSL gives data //! values per-column feedback end-to-end. //! //! The advanced-mode SQL DML surface (`build_sql_insert` / //! `build_sql_update` below) is a separate path: it executes the //! validated statement verbatim (ADR-0030 §4) and is NOT yet wired to //! the typed slots. ADR-0036 closes the resulting value-feedback gap //! without a grammar change by *capturing* each literal value position //! at parse (`capture_literal_rows` / `capture_set_literals`) and //! validating it against the column type in the worker — Phase 3 will //! later swap that capture for the same typed slots used here, adding //! live hints/highlighting. use crate::dsl::command::{ Command, Expr, RowFilter, SeedOverride, SeedOverrideKind, ShowListKind, }; use crate::dsl::grammar::{ CommandNode, IdentSource, Node, NumberValidator, ValidationError, Word, expr, shared::{ FALLBACK_VALUE_LIST, column_value_list, count_tuple_values, current_column_value, insert_target_columns, }, sql_delete, sql_insert, sql_select, sql_update, }; use crate::dsl::walker::context::WalkContext; use crate::dsl::value::Value; use crate::dsl::walker::outcome::{MatchedItem, MatchedKind, MatchedPath}; // ================================================================= // Building blocks // ================================================================= const TABLE_NAME_EXISTING: Node = Node::Ident { source: IdentSource::Tables, // Reject `__rdbms_*` internal tables at the table-source slot // (ADR-0030 §6 — "every table-source slot"), matching the SQL // grammar's `reject_internal_table`. Without this, simple-mode DSL // data commands could read/write the internal metadata tables // even though advanced-mode SQL rejects them (ADR-0033 // Amendment 3 / `/runda` finding B). role: "table_name", validator: Some(sql_select::reject_internal_table), highlight_override: None, writes_table: false, writes_column: false, writes_user_listed_column: false, writes_table_alias: false, writes_cte_name: false, writes_projection_alias: false, }; /// Table-name slot variant that populates /// `WalkContext::current_table_columns` (ADR-0024 §Phase D). /// Used by `insert into …` so the inner value list can /// dispatch typed slots per column. const TABLE_NAME_INSERT: Node = Node::Ident { source: IdentSource::Tables, // Reject `__rdbms_*` internal tables (ADR-0030 §6; `/runda` // finding B) — see `TABLE_NAME_EXISTING`. role: "table_name", validator: Some(sql_select::reject_internal_table), highlight_override: None, writes_table: true, writes_column: false, writes_user_listed_column: false, writes_table_alias: false, writes_cte_name: false, writes_projection_alias: false, }; // ================================================================= // show — `show (data|table) ` // ================================================================= const SHOW_DATA_NODES: &[Node] = &[ Node::Word(Word::keyword("data")), // `writes_table` so the optional `where` expression's // column slots resolve against this table for completion. TABLE_NAME_WRITES, Node::Optional(&WHERE_CLAUSE), Node::Optional(&LIMIT_CLAUSE), ]; const SHOW_DATA: Node = Node::Seq(SHOW_DATA_NODES); const SHOW_TABLE_NODES: &[Node] = &[ Node::Word(Word::keyword("table")), TABLE_NAME_EXISTING, ]; const SHOW_TABLE: Node = Node::Seq(SHOW_TABLE_NODES); // `show tables` / `show relationships` / `show indexes` — the // list-all forms (V5). Each is a single keyword with no argument; // the executor lists every item of the kind. Distinct keyword // tokens (`tables` ≠ `table`), so Choice ordering is irrelevant. const SHOW_TABLES: Node = Node::Word(Word::keyword("tables")); const SHOW_RELATIONSHIPS: Node = Node::Word(Word::keyword("relationships")); const SHOW_INDEXES: Node = Node::Word(Word::keyword("indexes")); // `show relationship ` / `show index ` — singular // per-item detail (V5a). The name slot reuses the existing // completion sources (relationship / index names). Distinct // keyword tokens from the plurals (`relationship` ≠ // `relationships`), so Choice ordering is irrelevant. const SHOW_RELATIONSHIP_NAME: Node = Node::Ident { source: IdentSource::Relationships, role: "relationship_name", validator: None, highlight_override: None, writes_table: false, writes_column: false, writes_user_listed_column: false, writes_table_alias: false, writes_cte_name: false, writes_projection_alias: false, }; const SHOW_RELATIONSHIP_NODES: &[Node] = &[ Node::Word(Word::keyword("relationship")), SHOW_RELATIONSHIP_NAME, ]; const SHOW_RELATIONSHIP: Node = Node::Seq(SHOW_RELATIONSHIP_NODES); const SHOW_INDEX_NAME: Node = Node::Ident { source: IdentSource::Indexes, role: "index_name", validator: None, highlight_override: None, writes_table: false, writes_column: false, writes_user_listed_column: false, writes_table_alias: false, writes_cte_name: false, writes_projection_alias: false, }; const SHOW_INDEX_NODES: &[Node] = &[Node::Word(Word::keyword("index")), SHOW_INDEX_NAME]; const SHOW_INDEX: Node = Node::Seq(SHOW_INDEX_NODES); const SHOW_CHOICES: &[Node] = &[ SHOW_DATA, SHOW_TABLE, SHOW_TABLES, SHOW_RELATIONSHIPS, SHOW_INDEXES, SHOW_RELATIONSHIP, SHOW_INDEX, ]; const SHOW_SHAPE: Node = Node::Choice(SHOW_CHOICES); // ================================================================= // insert — `insert into (,,…) values (,,…)` // | `insert into values (,…)` // | `insert into (,…)` // ================================================================= // // Forms A (with column list) and C (bare value list) both start // with `(`. The walker's "first commit wins" Choice semantics // can't pick between them after the `(` matches, so the first // paren's contents are resolved by a `Node::Lookahead` factory // (`insert_first_paren`): it peeks the first token to decide. // // - First token is a value literal (number / string / // null / true / false) → Form C → the typed `column_value_list` // (same dispatch contract as Form B — ADR-0024 §Phase D Form-C // type-awareness). Form C values are now type-checked at parse // time, not only at bind time. // - Otherwise (column-name identifier, or an empty paren) → // Form A → a repeated column-name list. The idents write // `WalkContext::user_listed_columns` so the trailing // `values (…)` slots mirror the user's selection. /// Form A's column-name slot. `static` (not `const`) so the /// `insert_first_paren` factory can take a `&'static` reference /// to it when building the repeated list at walk time. static FORM_A_COLUMN: Node = Node::Ident { source: IdentSource::Columns, role: "insert_first_item", validator: None, highlight_override: None, writes_table: false, writes_column: false, writes_user_listed_column: true, writes_table_alias: false, writes_cte_name: false, writes_projection_alias: false, }; static INSERT_COMMA: Node = Node::Punct(','); /// First-paren resolver (ADR-0024 §Phase D Form-C type-awareness). /// Peeks the first token after `(` to route to Form A's /// column-name list or Form C's typed value list. fn insert_first_paren(ctx: &WalkContext, source: &str, pos: usize) -> Node { if first_paren_item_is_value_literal(source, pos) { // Form C — bare value list. Arity-gated exactly like Form B's // `values (…)`: a correct-count tuple gets the typed per-column // slots; a wrong-count tuple routes to the type-blind fallback // so it still matches and the arity diagnostic fires (issue #17). dsl_insert_value_list(ctx, source, pos) } else { // Form A (or Form A in progress / empty paren). Node::Repeated { inner: &FORM_A_COLUMN, separator: Some(&INSERT_COMMA), min: 1, } } } /// True when the first token after the insert `(` is a /// value literal — the signal that the paren is a Form C value /// list rather than a Form A column-name list. An empty paren /// or an identifier-shaped token (a column name) returns false. fn first_paren_item_is_value_literal(source: &str, pos: usize) -> bool { use crate::dsl::walker::lex_helpers::{ consume_ident, consume_number_literal, consume_string_literal, skip_whitespace, }; let p = skip_whitespace(source, pos); if p >= source.len() { return false; // empty paren — treat as Form A } if consume_string_literal(source, p).is_some() { return true; } if consume_number_literal(source, p).is_some() { return true; } if let Some((s, e)) = consume_ident(source, p) { let word = &source[s..e]; // `null` / `true` / `false` are value literals; any // other identifier is a column name (Form A). return word.eq_ignore_ascii_case("null") || word.eq_ignore_ascii_case("true") || word.eq_ignore_ascii_case("false"); } false // punctuation (e.g. `)`) — treat as Form A } const INSERT_PAREN_LIST: Node = Node::Lookahead(insert_first_paren); /// Insert value-list arity gate (issue #17) — the simple-mode DSL /// counterpart of the advanced grammar's `tuple_value_list` /// (`sql_insert.rs`). Routes a correct-arity tuple to the typed /// per-column slots ([`column_value_list`]) and a wrong-arity tuple to /// the type-blind [`FALLBACK_VALUE_LIST`], so the wrong-count tuple /// still structurally matches and the per-tuple arity diagnostic /// (ADR-0033 §8.1, made mode-aware for issue #17) fires its friendly /// message instead of a bare "expected `,`/`)`". /// /// Target arity comes from [`insert_target_columns`] — the same source /// `column_value_list` uses, so gate and slots never disagree. `None` /// (schemaless / unknown table / all-auto-generated) → fallback: either /// we can't gate (schemaless) or the all-auto case wants the tuple to /// match so the diagnostic can explain it. /// /// **Simple-mode only.** The fallback routing is what lets a wrong-count /// tuple structurally match (so the diagnostic fires); that is a /// simple-mode behaviour. In advanced mode the DSL insert node must stay /// strict — otherwise a non-SQL shape like Form C (`insert into T /// (1, 2)`, no `values`) would spuriously match here and be accepted in /// advanced mode, where SQL requires `values` and the dedicated SQL /// grammar (`sql_insert.rs`) owns inserts. Keeping advanced strict /// preserves the pre-#17 advanced behaviour exactly (issue #17). fn dsl_insert_value_list(ctx: &WalkContext, source: &str, pos: usize) -> Node { if ctx.mode != crate::mode::Mode::Simple { return Node::DynamicSubgrammar(column_value_list); } let Some(cols) = insert_target_columns(ctx) else { return FALLBACK_VALUE_LIST; }; let (count, closed) = count_tuple_values(source, pos); let arity_ok = if closed { count == cols.len() } else { count <= cols.len() }; if arity_ok { Node::DynamicSubgrammar(column_value_list) } else { FALLBACK_VALUE_LIST } } /// Schema-aware value list, arity-gated (issue #17): a correct-count /// tuple unfolds to a `Seq` of typed slots per column (`int_slot`, /// `text_slot`, …); a wrong-count tuple or a schemaless walk falls back /// to the type-blind `Repeated(VALUE_LITERAL, ',', 1)` shape (ADR-0024 /// §Phase D §column_value_list). const INSERT_VALUES_LIST: Node = Node::Lookahead(dsl_insert_value_list); const INSERT_OPTIONAL_VALUES_NODES: &[Node] = &[ Node::Word(Word::keyword("values")), Node::Punct('('), INSERT_VALUES_LIST, Node::Punct(')'), ]; const INSERT_OPTIONAL_VALUES: Node = Node::Optional(&Node::Seq(INSERT_OPTIONAL_VALUES_NODES)); const INSERT_PAREN_FIRST_NODES: &[Node] = &[ Node::Punct('('), INSERT_PAREN_LIST, Node::Punct(')'), INSERT_OPTIONAL_VALUES, ]; const INSERT_PAREN_FIRST: Node = Node::Seq(INSERT_PAREN_FIRST_NODES); const INSERT_VALUES_KEYWORD_FIRST_NODES: &[Node] = &[ Node::Word(Word::keyword("values")), Node::Punct('('), INSERT_VALUES_LIST, Node::Punct(')'), ]; const INSERT_VALUES_KEYWORD_FIRST: Node = Node::Seq(INSERT_VALUES_KEYWORD_FIRST_NODES); const INSERT_AFTER_TABLE_CHOICES: &[Node] = &[INSERT_VALUES_KEYWORD_FIRST, INSERT_PAREN_FIRST]; const INSERT_AFTER_TABLE: Node = Node::Choice(INSERT_AFTER_TABLE_CHOICES); const INSERT_NODES: &[Node] = &[ Node::Word(Word::keyword("into")), TABLE_NAME_INSERT, INSERT_AFTER_TABLE, ]; const INSERT_SHAPE: Node = Node::Seq(INSERT_NODES); // ================================================================= // update — `update set =[, =] (where … | --all-rows)` // ================================================================= /// Table-name slot that populates `current_table_columns` so /// the inner `set =` / `where =` slots /// can resolve column types (Phase D). const TABLE_NAME_WRITES: Node = Node::Ident { source: IdentSource::Tables, // Reject `__rdbms_*` internal tables (ADR-0030 §6; `/runda` // finding B) — see `TABLE_NAME_EXISTING`. Shared by `update`, // `delete`, and `show data`, so all three reject the internal // metadata tables, matching the SQL grammar. role: "table_name", validator: Some(sql_select::reject_internal_table), highlight_override: None, writes_table: true, writes_column: false, writes_user_listed_column: false, writes_table_alias: false, writes_cte_name: false, writes_projection_alias: false, }; /// Column-name slot in `set col = …` — resolves the column's /// type into `current_column` so the value slot dispatches per /// column type (Phase D). const SET_COLUMN: Node = Node::Ident { source: IdentSource::Columns, role: "update_set_column", validator: None, highlight_override: None, writes_table: false, writes_column: true, writes_user_listed_column: false, writes_table_alias: false, writes_cte_name: false, writes_projection_alias: false, }; /// Value slot resolved at walk time from /// `WalkContext::current_column`. Falls back to the schemaless /// value-literal choice when no current_column is bound. const PER_COLUMN_VALUE: Node = Node::DynamicSubgrammar(current_column_value); const UPDATE_ASSIGNMENT_NODES: &[Node] = &[ SET_COLUMN, Node::Punct('='), PER_COLUMN_VALUE, ]; const UPDATE_ASSIGNMENT: Node = Node::Seq(UPDATE_ASSIGNMENT_NODES); const UPDATE_ASSIGNMENTS: Node = Node::Repeated { inner: &UPDATE_ASSIGNMENT, separator: Some(&Node::Punct(',')), min: 1, }; /// `where ` — the complex WHERE-expression fragment /// (ADR-0026). The grammar tier is defined once in /// `grammar::expr` and reached here through `Subgrammar`. const WHERE_CLAUSE_NODES: &[Node] = &[ Node::Word(Word::keyword("where")), Node::Subgrammar(&expr::OR_EXPR), ]; const WHERE_CLAUSE: Node = Node::Seq(WHERE_CLAUSE_NODES); const FILTER_CHOICES: &[Node] = &[WHERE_CLAUSE, Node::Flag("all-rows")]; const FILTER_CLAUSE: Node = Node::Choice(FILTER_CHOICES); /// `limit ` — `` is a non-negative integer; the /// validator rejects fractional / negative literals at parse /// time (ADR-0026 §5). fn validate_limit_count(value: &str) -> Result<(), ValidationError> { if value.parse::().is_ok() { Ok(()) } else { Err(ValidationError { message_key: "parse.custom.bind_type_mismatch", args: vec![ ("found", value.to_string()), ("expected", "non-negative integer".to_string()), ], }) } } const LIMIT_VALIDATOR: NumberValidator = validate_limit_count; /// `limit ` clause, optional on `show data` (ADR-0026 §5). const LIMIT_CLAUSE_NODES: &[Node] = &[ Node::Word(Word::keyword("limit")), Node::NumberLit { validator: Some(LIMIT_VALIDATOR), }, ]; const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES); // ================================================================= // seed — `seed [.] [] [set ] [--seed ]` // (ADR-0048, SD1 whole-row + SD2 Phase 2 set-clause / // column-fill) // ================================================================= /// Optional positional row count. Reuses `LIMIT_VALIDATOR` (a /// non-negative integer). const SEED_COUNT: Node = Node::NumberLit { validator: Some(LIMIT_VALIDATOR), }; /// Issue #26: the row count is a bare positional number, so it produces /// no Tab candidate and was invisible in the hint panel at /// `seed ▮` (only `set` / `--seed` showed). Wrapping it in /// `IntroProse` advertises it (and the other options) in prose; the /// skipped-optional carry (`surviving_intro_hint`) makes the hint reach /// the resolver despite the trailing optionals. Tab still cycles the /// keyword candidates. const SEED_COUNT_HINTED: Node = Node::Hinted { mode: crate::dsl::grammar::HintMode::IntroProse("hint.seed_count"), inner: &SEED_COUNT, }; /// `--seed ` — a reproducible-generation flag carrying a numeric /// seed (ADR-0048 D4). The only flag in the DSL that takes a value; /// `build_seed` reads the number immediately after the flag. const SEED_FLAG_NODES: &[Node] = &[ Node::Flag("seed"), Node::NumberLit { validator: Some(LIMIT_VALIDATOR), }, ]; const SEED_FLAG: Node = Node::Seq(SEED_FLAG_NODES); // --- column-fill target: the optional `.` (ADR-0048 D1 // form 2) ---------------------------------------------------- // // `seed users.email …` fills one column across existing rows. The // table ident stops at `.` (idents are alnum/underscore), so an // `Optional(Seq['.', column])` after the table cleanly discriminates: // when the next token is not `.`, the `Punct('.')` first-child // NoMatches and `walk_optional` skips it; once `.` commits, a missing // column propagates as the user mid-typing `seed users.` (driver // `walk_optional` semantics). The column resolves against // `current_table_columns` (populated by `TABLE_NAME_WRITES`). const SEED_TARGET_COLUMN: Node = Node::Ident { source: IdentSource::Columns, role: "seed_target_column", validator: None, highlight_override: None, writes_table: false, writes_column: false, writes_user_listed_column: false, writes_table_alias: false, writes_cte_name: false, writes_projection_alias: false, }; const SEED_DOT_COLUMN_NODES: &[Node] = &[Node::Punct('.'), SEED_TARGET_COLUMN]; const SEED_DOT_COLUMN: Node = Node::Optional(&Node::Seq(SEED_DOT_COLUMN_NODES)); // --- the `set [, …]` clause (ADR-0048 D2) -------- // // Each override pins one column's generation. The column slot // `writes_column` so the typed value slots (`PER_COLUMN_VALUE`, the // same `current_column_value` dispatch `update … set` uses) narrow to // the column's type — so list/range/fixed values get the column's // typed slot (quoted text, unquoted number, quoted date) and a // type-mismatched literal is flagged. The four tails each start with a // distinct token (`=` / `in` / `between` / `as`), so the `Choice` // discriminates cleanly (no Optional-first branch). /// The `set ` column slot. Distinct role from `update`'s /// `update_set_column` and the expression `expr_column`. const SEED_SET_COLUMN: Node = Node::Ident { source: IdentSource::Columns, role: "seed_set_column", validator: None, highlight_override: None, writes_table: false, writes_column: true, writes_user_listed_column: false, writes_table_alias: false, writes_cte_name: false, writes_projection_alias: false, }; /// `as ` — the curated generator-name vocabulary (D9), /// highlighted in the `tok_function` colour. The slot is structural /// (any identifier matches); the name is validated at execution and /// flagged live by the validity indicator. const SEED_GENERATOR: Node = Node::Ident { source: IdentSource::Generators, role: "seed_generator", validator: None, highlight_override: Some(crate::dsl::grammar::HighlightClass::Function), writes_table: false, writes_column: false, writes_user_listed_column: false, writes_table_alias: false, writes_cte_name: false, writes_projection_alias: false, }; /// `= ` — a fixed constant for every row. const SEED_OV_FIXED_NODES: &[Node] = &[Node::Punct('='), PER_COLUMN_VALUE]; /// `in ( [, ]* )` — uniform pick from the list. const SEED_OV_IN_VALUES: Node = Node::Repeated { inner: &PER_COLUMN_VALUE, separator: Some(&Node::Punct(',')), min: 1, }; const SEED_OV_IN_NODES: &[Node] = &[ Node::Word(Word::keyword("in")), Node::Punct('('), SEED_OV_IN_VALUES, Node::Punct(')'), ]; /// `between and ` — uniform in the (typed) range. const SEED_OV_BETWEEN_NODES: &[Node] = &[ Node::Word(Word::keyword("between")), PER_COLUMN_VALUE, Node::Word(Word::keyword("and")), PER_COLUMN_VALUE, ]; /// `as ` — force a named generator. const SEED_OV_AS_NODES: &[Node] = &[Node::Word(Word::keyword("as")), SEED_GENERATOR]; const SEED_OV_TAIL_CHOICES: &[Node] = &[ Node::Seq(SEED_OV_FIXED_NODES), Node::Seq(SEED_OV_IN_NODES), Node::Seq(SEED_OV_BETWEEN_NODES), Node::Seq(SEED_OV_AS_NODES), ]; const SEED_OV_TAIL: Node = Node::Choice(SEED_OV_TAIL_CHOICES); const SEED_OVERRIDE_NODES: &[Node] = &[SEED_SET_COLUMN, SEED_OV_TAIL]; const SEED_OVERRIDE: Node = Node::Seq(SEED_OVERRIDE_NODES); const SEED_OVERRIDES: Node = Node::Repeated { inner: &SEED_OVERRIDE, separator: Some(&Node::Punct(',')), min: 1, }; const SEED_SET_CLAUSE_NODES: &[Node] = &[Node::Word(Word::keyword("set")), SEED_OVERRIDES]; const SEED_SET_CLAUSE: Node = Node::Seq(SEED_SET_CLAUSE_NODES); const SEED_NODES: &[Node] = &[ // `writes_table` so the `.column` target, the `set =…` // clause's column slots, and the typed value slots all resolve // against this table. TABLE_NAME_WRITES, SEED_DOT_COLUMN, Node::Optional(&SEED_COUNT_HINTED), Node::Optional(&SEED_SET_CLAUSE), Node::Optional(&SEED_FLAG), ]; const SEED_SHAPE: Node = Node::Seq(SEED_NODES); const UPDATE_NODES: &[Node] = &[ TABLE_NAME_WRITES, Node::Word(Word::keyword("set")), UPDATE_ASSIGNMENTS, FILTER_CLAUSE, ]; const UPDATE_SHAPE: Node = Node::Seq(UPDATE_NODES); // ================================================================= // delete — `delete from (where … | --all-rows)` // ================================================================= const DELETE_NODES: &[Node] = &[ Node::Word(Word::keyword("from")), TABLE_NAME_WRITES, FILTER_CLAUSE, ]; const DELETE_SHAPE: Node = Node::Seq(DELETE_NODES); // ================================================================= // explain — `explain (show data … | update … | delete from …)` // ================================================================= // // ADR-0028 §1: `explain` is a top-level command whose shape is a // `Choice` over the three explainable query commands. The inner // query grammars are *referenced* through `Subgrammar`, not // duplicated — so an explained command is parsed, completed, // hinted and highlighted exactly as it is on its own. // // `Subgrammar` needs a `&'static Node`; `SHOW_DATA` / // `UPDATE_SHAPE` / `DELETE_SHAPE` are `const` (and cannot be // referenced as `&'static`). These three thin `static` wrappers // over the existing `_NODES` slices give the references without // any churn to the standalone command shapes. `explain show` // references `EXPLAIN_SHOW_DATA` directly (not the `show` // command's `data | table` choice) — `explain` covers `show // data` only (ADR-0028 §1). static EXPLAIN_SHOW_DATA: Node = Node::Seq(SHOW_DATA_NODES); static EXPLAIN_UPDATE: Node = Node::Seq(UPDATE_NODES); static EXPLAIN_DELETE: Node = Node::Seq(DELETE_NODES); const EXPLAIN_SHOW_NODES: &[Node] = &[ Node::Word(Word::keyword("show")), Node::Subgrammar(&EXPLAIN_SHOW_DATA), ]; const EXPLAIN_UPDATE_NODES: &[Node] = &[ Node::Word(Word::keyword("update")), Node::Subgrammar(&EXPLAIN_UPDATE), ]; const EXPLAIN_DELETE_NODES: &[Node] = &[ Node::Word(Word::keyword("delete")), Node::Subgrammar(&EXPLAIN_DELETE), ]; const EXPLAIN_CHOICES: &[Node] = &[ Node::Seq(EXPLAIN_SHOW_NODES), Node::Seq(EXPLAIN_UPDATE_NODES), Node::Seq(EXPLAIN_DELETE_NODES), ]; const EXPLAIN_SHAPE: Node = Node::Choice(EXPLAIN_CHOICES); // --- explain over advanced-mode SQL (ADR-0039) ------------------- // // The SQL inner mirrors the DSL inner above, but wraps the SQL // command shapes (the same nodes the standalone `SELECT` / `WITH` / // `SQL_*` commands use). This shape backs a *second* `explain` // CommandNode (`EXPLAIN_SQL`, registered `Advanced`); the registry's // shared-entry-word dispatch tries it first in advanced mode and // falls back to the `Simple` DSL `EXPLAIN` when a branch can't match // (e.g. `explain show data …`, or a DSL-only `--all-rows`). `select` // and `with` are SQL-only, so they only ever resolve here. const EXPLAIN_SELECT_NODES: &[Node] = &[ Node::Word(Word::keyword("select")), Node::Subgrammar(&sql_select::SQL_SELECT_TAIL), ]; const EXPLAIN_WITH_NODES: &[Node] = &[ Node::Word(Word::keyword("with")), Node::Subgrammar(&sql_select::SQL_WITH_TAIL), ]; const EXPLAIN_SQL_INSERT_NODES: &[Node] = &[ Node::Word(Word::keyword("insert")), Node::Subgrammar(&sql_insert::SQL_INSERT_SHAPE), ]; const EXPLAIN_SQL_UPDATE_NODES: &[Node] = &[ Node::Word(Word::keyword("update")), Node::Subgrammar(&sql_update::SQL_UPDATE_SHAPE), ]; const EXPLAIN_SQL_DELETE_NODES: &[Node] = &[ Node::Word(Word::keyword("delete")), Node::Subgrammar(&sql_delete::SQL_DELETE_SHAPE), ]; const EXPLAIN_SQL_CHOICES: &[Node] = &[ Node::Seq(EXPLAIN_SELECT_NODES), Node::Seq(EXPLAIN_WITH_NODES), Node::Seq(EXPLAIN_SQL_INSERT_NODES), Node::Seq(EXPLAIN_SQL_UPDATE_NODES), Node::Seq(EXPLAIN_SQL_DELETE_NODES), ]; const EXPLAIN_SQL_SHAPE: Node = Node::Choice(EXPLAIN_SQL_CHOICES); // ================================================================= // select — SQL `SELECT` (advanced mode; ADR-0030 §6, ADR-0031) // ================================================================= // // Phase 1's single-table `SELECT`: a projection, a `FROM` table, // and optional `WHERE` / `ORDER BY` / `LIMIT`. The projection, // `WHERE` and `ORDER BY` expression slots reference the SQL // expression grammar (ADR-0031) through `Subgrammar`, so SQL gets // the same completion / highlighting / hints as the DSL for free. // // Advanced mode only — the walker's mode gate (ADR-0030 §2, // `grammar::is_advanced_only`) refuses `select` in simple mode // with the "this is SQL" hint, so this grammar is never reached // there. // // `JOIN`s, `GROUP BY` / `HAVING`, subqueries, `UNION`, CTEs, and // `OFFSET` are ADR-0030 Phase 2 ("`SELECT` — full"); implicit // column aliasing (`select a x`) and qualified `t.*` are out of // Phase 1 (see the inline notes). // SQL expression slot — `Node::Subgrammar(&sql_expr::SQL_OR_EXPR)` // is inlined at each use site to avoid a Rust const-evaluation // cycle through the sql_expr ⇄ sql_select recursion (see the // matching note in sql_select.rs). // Phase 1's local `SELECT_*` grammar nodes have been retired in // favour of `sql_select::SQL_SELECT_TAIL` (ADR-0032 sub-phase // 2c). The shape definition that `data::SELECT` references now // lives in the dedicated `sql_select` module — including the // `reject_internal_table` validator, the `LIMIT` count // validator, and the projection / FROM / WHERE / ORDER BY // machinery. The full §1 grammar (JOIN, GROUP BY, HAVING, // set-ops, qualified refs, subqueries, CTEs) is admitted as a // natural superset. // ================================================================= // AST builders // ================================================================= fn ident_text<'a>(path: &'a MatchedPath, role: &str) -> Option<&'a str> { path.items.iter().find_map(|i| match &i.kind { MatchedKind::Ident { role: r, .. } if *r == role => Some(i.text.as_str()), _ => None, }) } fn require_ident(path: &MatchedPath, role: &'static str) -> Result { ident_text(path, role) .map(str::to_string) .ok_or_else(|| ValidationError { message_key: "parse.error_wrapper", args: vec![("detail", format!("missing {role}"))], }) } /// Convert a `MatchedItem` whose kind is one of the `value_literal` /// variants (Word("null"|"true"|"false"), NumberLit, StringLit) to /// a `Value`. Returns None for non-value items. /// /// `pub(crate)` so `grammar::ddl` can reuse it when collecting a /// `default ` column constraint (ADR-0029). pub(crate) fn item_to_value(item: &MatchedItem) -> Option { match &item.kind { MatchedKind::Word("null") => Some(Value::Null), MatchedKind::Word("true") => Some(Value::Bool(true)), MatchedKind::Word("false") => Some(Value::Bool(false)), MatchedKind::NumberLit => Some(Value::Number(item.text.clone())), MatchedKind::StringLit => Some(Value::Text(item.text.clone())), _ => None, } } fn build_show(path: &MatchedPath, _source: &str) -> Result { let sub = path .items .iter() .filter_map(|i| match &i.kind { MatchedKind::Word(w) => Some(*w), _ => None, }) .nth(1); match sub { Some("data") => build_show_data(path, _source), // `name` is resolved only for the forms that carry one; the // list-all forms (`tables` / `relationships` / `indexes`) // have no table argument. Some("table") => Ok(Command::ShowTable { name: require_ident(path, "table_name")?, }), Some("tables") => Ok(Command::ShowList { kind: ShowListKind::Tables, name: None, }), Some("relationships") => Ok(Command::ShowList { kind: ShowListKind::Relationships, name: None, }), Some("indexes") => Ok(Command::ShowList { kind: ShowListKind::Indexes, name: None, }), // V5a singular per-item detail — carry the named item. Some("relationship") => Ok(Command::ShowList { kind: ShowListKind::Relationships, name: Some(require_ident(path, "relationship_name")?), }), Some("index") => Ok(Command::ShowList { kind: ShowListKind::Indexes, name: Some(require_ident(path, "index_name")?), }), _ => Err(ValidationError { message_key: "parse.error_wrapper", args: vec![("detail", "unknown show subcommand".to_string())], }), } } /// Build a `show data` command from a matched path. Role-based /// (no positional `nth` lookups), so it serves both the /// standalone `show data` entry word and the `explain show /// data …` wrapper, where the entry-word offset shifts. fn build_show_data(path: &MatchedPath, _source: &str) -> Result { Ok(Command::ShowData { name: require_ident(path, "table_name")?, filter: build_show_filter(path)?, limit: build_show_limit(path)?, }) } /// The optional `where ` of a `show data`. The expression /// terminals run from just past `Word("where")` to the start of /// the `limit` clause (or the end of the path) — neither the /// `limit` keyword nor any expression keyword collide, so the /// slice is exact. fn build_show_filter(path: &MatchedPath) -> Result, ValidationError> { let Some(where_idx) = path .items .iter() .position(|i| matches!(&i.kind, MatchedKind::Word("where"))) else { return Ok(None); }; let end = path .items .iter() .position(|i| matches!(&i.kind, MatchedKind::Word("limit"))) .unwrap_or(path.items.len()); Ok(Some(expr::build_expr(&path.items[where_idx + 1..end])?)) } /// The optional `limit ` of a `show data`. The grammar's /// `LIMIT_VALIDATOR` already constrained `` to a /// non-negative integer, so the parse here cannot realistically /// fail. fn build_show_limit(path: &MatchedPath) -> Result, ValidationError> { let Some(limit_idx) = path .items .iter() .position(|i| matches!(&i.kind, MatchedKind::Word("limit"))) else { return Ok(None); }; let count = path .items .get(limit_idx + 1) .ok_or_else(|| ValidationError { message_key: "parse.error_wrapper", args: vec![("detail", "missing limit count".to_string())], })?; count .text .parse::() .map(Some) .map_err(|_| ValidationError { message_key: "parse.custom.bind_type_mismatch", args: vec![ ("found", count.text.clone()), ("expected", "non-negative integer".to_string()), ], }) } /// Build a `seed [.] [] [set ] [--seed ]` /// command (ADR-0048, SD1 + SD2 Phase 2). /// /// - `target_column` (column-fill, D1 form 2) is the `seed_target_column` /// ident, present only for the `seed .` form. /// - The positional `count` is the `NumberLit` that precedes both the /// `set` keyword and the `--seed` flag — bounding it that way keeps a /// `set age between 18 and 80` value (also a `NumberLit`) from being /// mistaken for the count. /// - `--seed ` is the `NumberLit` right after the flag (D4). /// - `overrides` (D2) is folded from the flat `set`-clause terminals. fn build_seed(path: &MatchedPath, _source: &str) -> Result { let table = require_ident(path, "table_name")?; let target_column = ident_text(path, "seed_target_column").map(str::to_string); let flag_idx = path .items .iter() .position(|i| matches!(&i.kind, MatchedKind::Flag("seed"))); let set_idx = path .items .iter() .position(|i| matches!(&i.kind, MatchedKind::Word("set"))); let rng_seed = flag_idx .and_then(|fi| path.items.get(fi + 1)) .filter(|i| matches!(i.kind, MatchedKind::NumberLit)) .map(|i| parse_seed_u64(&i.text)) .transpose()?; // The count is bounded to before the `set` clause and the flag, so a // numeric value inside `set` (e.g. `between 18 and 80`) is never read // as the count. let count_boundary = [set_idx, flag_idx] .into_iter() .flatten() .min() .unwrap_or(path.items.len()); let count = path .items .iter() .enumerate() .find(|(idx, i)| matches!(i.kind, MatchedKind::NumberLit) && *idx < count_boundary) .map(|(_, i)| parse_seed_u64(&i.text)) .transpose()?; let overrides = build_seed_overrides(path, set_idx, flag_idx)?; Ok(Command::Seed { table, target_column, count, overrides, rng_seed, }) } /// Fold the flat `set`-clause terminals into [`SeedOverride`]s /// (ADR-0048 D2). The clause region runs from just after `Word("set")` /// to the `--seed` flag (or the path end). Each override begins at a /// `seed_set_column` ident; the token right after it selects the form /// (`=` / `in` / `between` / `as`). Top-level comma separators between /// overrides are skipped (the `in (...)` form consumes its own inner /// commas up to `)`). fn build_seed_overrides( path: &MatchedPath, set_idx: Option, flag_idx: Option, ) -> Result, ValidationError> { let Some(set_idx) = set_idx else { return Ok(Vec::new()); }; let end = flag_idx.unwrap_or(path.items.len()); let region = &path.items[set_idx + 1..end]; let mut overrides = Vec::new(); let mut i = 0; while i < region.len() { // The next override starts at its column ident; skip the // top-level comma separators (and any stray token) between them. let MatchedKind::Ident { role: "seed_set_column", .. } = ®ion[i].kind else { i += 1; continue; }; let column = region[i].text.clone(); i += 1; let kind = parse_seed_override_tail(region, &mut i, &column)?; overrides.push(SeedOverride { column, kind }); } Ok(overrides) } /// Parse one override tail starting at `region[*i]` (just past the /// column ident), advancing `*i` past the consumed tokens. fn parse_seed_override_tail( region: &[MatchedItem], i: &mut usize, column: &str, ) -> Result { let head = region.get(*i).ok_or_else(|| seed_set_error(column))?; match &head.kind { MatchedKind::Punct('=') => { *i += 1; let value = seed_take_value(region, i, column)?; Ok(SeedOverrideKind::Fixed(value)) } MatchedKind::Word("in") => { *i += 1; // `in` // `(` if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Punct('('))) { *i += 1; } let mut values = Vec::new(); while let Some(item) = region.get(*i) { match &item.kind { MatchedKind::Punct(')') => { *i += 1; break; } MatchedKind::Punct(',') => { *i += 1; } _ => values.push(seed_take_value(region, i, column)?), } } Ok(SeedOverrideKind::PickList(values)) } MatchedKind::Word("between") => { *i += 1; // `between` let low = seed_take_value(region, i, column)?; if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Word("and"))) { *i += 1; } let high = seed_take_value(region, i, column)?; Ok(SeedOverrideKind::Range { low, high }) } MatchedKind::Word("as") => { *i += 1; // `as` let gen_item = region .get(*i) .filter(|t| matches!(t.kind, MatchedKind::Ident { role: "seed_generator", .. })) .ok_or_else(|| seed_set_error(column))?; *i += 1; Ok(SeedOverrideKind::Generator(gen_item.text.clone())) } _ => Err(seed_set_error(column)), } } /// Take one value literal at `region[*i]`, advancing past it. /// /// The grammar's typed value slots only ever match value literals (a /// bare unquoted word fails to match the slot and is rejected *before* /// this fold runs — D2's quoting requirement enforced structurally), so /// a non-literal here can only mean a grammar/builder drift bug; the /// `Err` is a drift guard (mirrors `expr::build_expr`). fn seed_take_value( region: &[MatchedItem], i: &mut usize, column: &str, ) -> Result { let item = region.get(*i).ok_or_else(|| seed_set_error(column))?; let value = item_to_value(item).ok_or_else(|| seed_set_error(column))?; *i += 1; Ok(value) } /// Drift-guard error for the `set`-clause fold (see `seed_take_value`). fn seed_set_error(column: &str) -> ValidationError { ValidationError { message_key: "parse.error_wrapper", args: vec![("detail", format!("malformed `set` clause for `{column}`"))], } } fn parse_seed_u64(text: &str) -> Result { text.parse::().map_err(|_| ValidationError { message_key: "parse.custom.bind_type_mismatch", args: vec![ ("found", text.to_string()), ("expected", "non-negative integer".to_string()), ], }) } fn build_insert(path: &MatchedPath, _source: &str) -> Result { let table = require_ident(path, "table_name")?; // Locate the second `values` keyword (the first is the // command word `insert`'s sibling — but `insert` isn't a // matched Word here since it's the entry word and the // entry-word push uses the literal "insert"; only later // `values` matches as Word("values")). // // Strategy: walk the path. After the table name: // - If we see Word("values") next (Form B), the next // parenthesized values are the value list. // - If we see Punct('('), the first paren's content was // either column names (Form A) or values (Form C). // If a Word("values") follows the closing paren, it's // Form A. // // Easier discriminator: collect all matched keyword words; // count occurrences of "values". let saw_values = path .items .iter() .any(|i| matches!(i.kind, MatchedKind::Word("values"))); // Find the index of the table_name match — the first paren // afterwards starts the parsed list. let table_idx = path .items .iter() .position(|i| matches!(&i.kind, MatchedKind::Ident { role: "table_name", .. })) .ok_or_else(|| ValidationError { message_key: "parse.error_wrapper", args: vec![("detail", "missing table".to_string())], })?; // Form B (values keyword right after table): no column list, // values come from the single paren-bounded list. let first_token_after_table = path.items.get(table_idx + 1); let form_b = matches!( first_token_after_table.map(|i| &i.kind), Some(MatchedKind::Word("values")) ); if form_b { // Form B: the only value run is between the only `(` … `)`. let values = collect_values_in_parens(path, table_idx + 1)?; return Ok(Command::Insert { table, columns: None, values, }); } // Form A or C: the first paren after the table is a Choice // of either column-idents or value-literals. let first_paren_idx = path .items .iter() .enumerate() .skip(table_idx + 1) .find(|(_, i)| matches!(i.kind, MatchedKind::Punct('('))) .map(|(idx, _)| idx) .ok_or_else(|| ValidationError { message_key: "parse.error_wrapper", args: vec![("detail", "missing `(`".to_string())], })?; if saw_values { // Form A: first paren = column names; second paren = values. // The Repeated inside the first paren tagged matched idents // with role "insert_first_item". let columns: Vec = path .items .iter() .filter_map(|i| match &i.kind { MatchedKind::Ident { role: "insert_first_item", .. } => Some(i.text.clone()), _ => None, }) .collect(); if columns.is_empty() { return Err(ValidationError { message_key: "parse.error_wrapper", args: vec![("detail", "expected column names in `insert into T (…)`".to_string())], }); } // Find the `values` keyword and the next `(` — the values // run starts after that `(`. let values_idx = path .items .iter() .enumerate() .skip(first_paren_idx) .find(|(_, i)| matches!(i.kind, MatchedKind::Word("values"))) .map(|(i, _)| i) .ok_or_else(|| ValidationError { message_key: "parse.error_wrapper", args: vec![("detail", "missing `values` keyword".to_string())], })?; let values = collect_values_in_parens(path, values_idx + 1)?; Ok(Command::Insert { table, columns: Some(columns), values, }) } else { // Form C: the first paren contained the value list. The // Repeated tagged the matched values via their natural // MatchedKind (Word/NumberLit/StringLit); collect them. // // Form-A-without-`values` recovery: the shared // INSERT_PAREN_ITEM choice accepts both VALUE_LITERAL // and Ident{Columns} so that Form A can resolve // column-name items inside its `( cols )` list. When the // user types `insert into T (col)` (column-shaped item, // no `values` keyword), the grammar walks to a complete // match but the user almost certainly meant Form A and // forgot the `values (...)` suffix. Reject here with a // ValidationError — the walker classifies validation // errors as `at_eof: true`, so the input renderer // surfaces this as IncompleteAtEof (mid-typing) rather // than dispatching a logically-broken Form C insert with // an empty value list. let user_listed_columns: Vec = path .items .iter() .filter_map(|i| match &i.kind { MatchedKind::Ident { role: "insert_first_item", .. } => Some(i.text.clone()), _ => None, }) .collect(); if !user_listed_columns.is_empty() { return Err(ValidationError { message_key: "parse.custom.insert_form_a_missing_values", args: vec![("columns", user_listed_columns.join(", "))], }); } let values = collect_values_in_parens(path, first_paren_idx)?; Ok(Command::Insert { table, columns: None, values, }) } } /// Collect Value items inside the next `(…)` block at or after /// `start_idx`. Stops at the matching `)`. fn collect_values_in_parens( path: &MatchedPath, start_idx: usize, ) -> Result, ValidationError> { let mut out = Vec::new(); let mut inside = false; for item in path.items.iter().skip(start_idx) { match &item.kind { MatchedKind::Punct('(') => inside = true, MatchedKind::Punct(')') if inside => return Ok(out), _ if inside => { if let Some(v) = item_to_value(item) { out.push(v); } } _ => {} } } if out.is_empty() && !inside { return Err(ValidationError { message_key: "parse.error_wrapper", args: vec![("detail", "missing `(`".to_string())], }); } Ok(out) } fn build_update(path: &MatchedPath, _source: &str) -> Result { let table = require_ident(path, "table_name")?; let assignments = collect_assignments(path)?; let filter = collect_filter(path)?; Ok(Command::Update { table, assignments, filter, }) } fn collect_assignments( path: &MatchedPath, ) -> Result, ValidationError> { let mut out = Vec::new(); let mut iter = path.items.iter(); while let Some(item) = iter.next() { if matches!( item.kind, MatchedKind::Ident { role: "update_set_column", .. } ) { let column = item.text.clone(); // Skip the `=` punct. for next in iter.by_ref() { if matches!(next.kind, MatchedKind::Punct('=')) { break; } } // Next item is the value. let value_item = iter.next().ok_or_else(|| ValidationError { message_key: "parse.error_wrapper", args: vec![("detail", "missing assignment value".to_string())], })?; let value = item_to_value(value_item).ok_or_else(|| ValidationError { message_key: "parse.error_wrapper", args: vec![("detail", "expected value literal".to_string())], })?; out.push((column, value)); } } Ok(out) } fn collect_filter(path: &MatchedPath) -> Result { if path .items .iter() .any(|i| matches!(i.kind, MatchedKind::Flag("all-rows"))) { return Ok(RowFilter::AllRows); } let where_idx = path .items .iter() .position(|i| matches!(&i.kind, MatchedKind::Word("where"))) .ok_or_else(|| ValidationError { message_key: "parse.error_wrapper", args: vec![("detail", "missing where or --all-rows".to_string())], })?; // `where` is the last clause of update / delete, so every // terminal after it belongs to the expression. Ok(RowFilter::Where(expr::build_expr( &path.items[where_idx + 1..], )?)) } fn build_delete(path: &MatchedPath, _source: &str) -> Result { let table = require_ident(path, "table_name")?; let filter = collect_filter(path)?; Ok(Command::Delete { table, filter }) } /// Build `Command::Explain` (ADR-0028 §1). The matched-word /// sequence is `[explain, show|update|delete, …]` — the entry /// word `explain` is at index 0, the inner command's lead word /// at index 1. The inner command is built by the same builder /// it uses standalone (`build_show_data` / `build_update` / /// `build_delete`), all of which are role-based and so are /// indifferent to the entry-word offset the `explain` prefix /// introduces. fn build_explain(path: &MatchedPath, _source: &str) -> Result { let inner_word = path .items .iter() .filter_map(|i| match &i.kind { MatchedKind::Word(w) => Some(*w), _ => None, }) .nth(1); let inner = match inner_word { Some("show") => build_show_data(path, _source)?, Some("update") => build_update(path, _source)?, Some("delete") => build_delete(path, _source)?, _ => { return Err(ValidationError { message_key: "parse.error_wrapper", args: vec![("detail", "unknown explain target".to_string())], }); } }; Ok(Command::Explain { query: Box::new(inner), }) } /// Build `Command::Explain` over an advanced-mode SQL inner /// (ADR-0039). The inner SQL text is sliced from `source` starting /// at the inner entry keyword's span, so the carried SQL excludes /// the `explain` prefix — `EXPLAIN QUERY PLAN` runs over the inner /// statement, not the wrapper. The SQL builders extract their /// metadata (target table, etc.) from `path` by role, which is /// offset-independent, so passing the whole explain `path` is safe; /// only the SQL *text* needs the prefix stripped. fn build_explain_sql(path: &MatchedPath, source: &str) -> Result { // Words in the path: [0] is the `explain` entry word, [1] is the // inner entry keyword (select / with / insert / update / delete). let inner_item = path .items .iter() .filter(|i| matches!(i.kind, MatchedKind::Word(_))) .nth(1) .ok_or_else(|| ValidationError { message_key: "parse.error_wrapper", args: vec![("detail", "missing explain target".to_string())], })?; let inner_word = match &inner_item.kind { MatchedKind::Word(w) => *w, _ => unreachable!("filtered to Word above"), }; let inner_source = source[inner_item.span.0..].trim(); let inner = match inner_word { "select" | "with" => build_select(path, inner_source)?, "insert" => build_sql_insert(path, inner_source)?, "update" => build_sql_update(path, inner_source)?, "delete" => build_sql_delete(path, inner_source)?, _ => { return Err(ValidationError { message_key: "parse.error_wrapper", args: vec![("detail", "unknown explain target".to_string())], }); } }; Ok(Command::Explain { query: Box::new(inner), }) } // ================================================================= // replay — `replay ` | `replay ''` // ================================================================= // // Phase E (ADR-0024 §migration). The chumsky-side // `try_parse_replay_with_bare_path` source-slice helper is // retired here: walker BarePath consumes the unquoted form // (terminating at whitespace per the path-bearing UX change), // and StringLit consumes the quoted form. Paths with spaces // must use the quoted form — same UX that `import` / `export` // adopted in Phase A. const REPLAY_PATH_CHOICES: &[Node] = &[Node::StringLit, Node::BarePath]; const REPLAY_PATH: Node = Node::Choice(REPLAY_PATH_CHOICES); fn build_replay(path: &MatchedPath, _source: &str) -> Result { let payload = path .items .iter() .find_map(|i| match &i.kind { MatchedKind::StringLit | MatchedKind::BarePath => Some(i.text.clone()), _ => None, }) .ok_or_else(|| ValidationError { message_key: "parse.error_wrapper", args: vec![("detail", "missing path".to_string())], })?; Ok(Command::Replay { path: payload }) } /// `Command::Select` carries the validated SQL text verbatim /// (ADR-0030 §4/§6, ADR-0031 §2): a `SELECT` builds no AST — the /// walk has confirmed it is in the supported subset, and the /// worker runs the statement as text. `source` is the full /// submitted line; on a `Match` outcome the `SELECT` shape /// consumed all of it. fn build_select(_path: &MatchedPath, source: &str) -> Result { Ok(Command::Select { sql: source.trim().to_string(), }) } /// Build `Command::SqlInsert` from a validated SQL `INSERT` /// (ADR-0033 §1). Extracts the target table from the matched path /// so the worker re-persists the right CSV. `insert` is now the /// real (shared) entry word, so the validated `source` runs /// verbatim — like `build_select` (sub-phase 3j). fn build_sql_insert(path: &MatchedPath, source: &str) -> Result { let target_table = path .items .iter() .find_map(|item| match item.kind { MatchedKind::Ident { role: "insert_target_table", .. } => Some(item.text.clone()), _ => None, }) .unwrap_or_default(); // The user's explicit `(col, …)` list, in order (empty when the // form omits it). Sub-phase 3d reads this to decide which // `shortid` columns were left for the worker to auto-fill. let listed_columns: Vec = path .items .iter() .filter_map(|item| match item.kind { MatchedKind::Ident { role: "insert_column", .. } => Some(item.text.clone()), _ => None, }) .collect(); // The row source is the `VALUES` / `SELECT` / `WITH` clause — // from that keyword up to (but not including) any trailing // clause: `ON CONFLICT …` (3h) or `RETURNING …` (3g), whichever // comes first, else the trailing `;` / end. Boundaries are // located by *Word token* in the path (not a text scan), so a // string literal like `values ('select')` can't be mistaken for // a keyword. Excluding the trailing clauses keeps the row source // independently preparable for `shortid` auto-fill (`VALUES … // ON CONFLICT …` / `VALUES … RETURNING …` are not valid // standalone statements), and the auto-fill rewrite re-appends // the trailing tail verbatim (see `do_sql_insert`). // // `ON CONFLICT`'s `on` is located via the unambiguous `conflict` // keyword that immediately follows it — a JOIN's `on` inside a // SELECT row source has no following `conflict`, so it is not // mistaken for a clause boundary. let on_conflict_start = path .items .windows(2) .find(|w| { matches!(w[0].kind, MatchedKind::Word("on")) && matches!(w[1].kind, MatchedKind::Word("conflict")) }) .map(|w| w[0].span.0); let returning_start = path .items .iter() .find(|item| matches!(item.kind, MatchedKind::Word("returning"))) .map(|item| item.span.0); let tail_start = [on_conflict_start, returning_start] .into_iter() .flatten() .min(); let row_source = path .items .iter() .find(|item| { matches!(item.kind, MatchedKind::Word("values" | "select" | "with")) }) .map(|item| { let end = tail_start.unwrap_or(source.len()); source[item.span.0..end] .trim() .trim_end_matches(';') .trim() .to_string() }) .unwrap_or_default(); // The entry word is the real `insert` keyword (sub-phase 3j), // so the validated line runs verbatim (grammar-as-text, // ADR-0030 §4) — no keyword reconstruction. let sql = source.trim().to_string(); // Capture literal values per `VALUES` row for app-level type // validation + error enrichment (ADR-0036 Phase 1). Only for a // `VALUES` source (a `SELECT`/`WITH` source has no `values` keyword, // so this stays empty). Bounded to the row-source region by the same // `tail_start` the row_source slice used. let values_start = path .items .iter() .find(|i| matches!(i.kind, MatchedKind::Word("values"))) .map(|i| i.span.0); let literal_rows = values_start.map_or_else(Vec::new, |vs| { capture_literal_rows(path, vs, tail_start.unwrap_or(source.len())) }); Ok(Command::SqlInsert { sql, target_table, listed_columns, row_source, returning: path_has_returning(path), literal_rows, }) } /// Capture the literal values of each `VALUES` tuple from the matched /// path (ADR-0036 Phase 1). Each position is `Some(Value)` for a bare /// literal (incl. a signed number — the leading sign is folded into the /// number) and `None` for an expression position (a `func(x)`, `a+1`, /// subquery, column ref — nothing static to validate). Works purely from /// the tokens the walker already matched (no reparse); rows and positions /// are delimited by tuple parens and depth-1 commas. `values_start` is the /// byte offset of the `values` keyword; only items in `[values_start, /// tail_end)` are considered (so any trailing `ON CONFLICT`/`RETURNING` /// clause is excluded). fn capture_literal_rows( path: &MatchedPath, values_start: usize, tail_end: usize, ) -> Vec>> { let mut rows: Vec>> = Vec::new(); let mut depth: i32 = 0; let mut cur_row: Vec> = Vec::new(); let mut pos: Vec<&MatchedItem> = Vec::new(); for item in &path.items { if item.span.0 < values_start || item.span.0 >= tail_end { continue; } match &item.kind { MatchedKind::Word("values") => {} MatchedKind::Punct('(') => { depth += 1; if depth == 1 { cur_row = Vec::new(); pos.clear(); } else { pos.push(item); } } MatchedKind::Punct(')') => { if depth == 1 { cur_row.push(classify_value_position(&pos)); pos.clear(); rows.push(std::mem::take(&mut cur_row)); } else if depth > 1 { pos.push(item); } depth -= 1; } MatchedKind::Punct(',') if depth == 1 => { cur_row.push(classify_value_position(&pos)); pos.clear(); } _ if depth >= 1 => pos.push(item), _ => {} } } rows } /// Classify one `VALUES` position's matched tokens into `Some(Value)` (a /// bare literal) or `None` (an expression). A single literal token, or a /// sign followed by a number, is a literal; anything else is an /// expression (ADR-0036 §1). fn classify_value_position(tokens: &[&MatchedItem]) -> Option { match tokens { [one] => item_to_value(one), [sign, num] if matches!(sign.kind, MatchedKind::Punct('-') | MatchedKind::Punct('+')) && matches!(num.kind, MatchedKind::NumberLit) => { let text = if matches!(sign.kind, MatchedKind::Punct('-')) { format!("-{}", num.text) } else { num.text.clone() }; Some(Value::Number(text)) } _ => None, } } /// Whether the matched path contains a `RETURNING` clause /// (ADR-0033 §5, sub-phase 3g). Located by the `returning` *Word /// token* in the path — path-based, so a string literal can't be /// mistaken for the keyword (mirrors `build_sql_insert`'s /// row-source detection). fn path_has_returning(path: &MatchedPath) -> bool { path.items .iter() .any(|item| matches!(item.kind, MatchedKind::Word("returning"))) } /// Build `Command::SqlUpdate` from a validated SQL `UPDATE` /// (ADR-0033 §2). Extracts the target table from the matched path /// so the worker re-persists the right CSV. `update` is now the /// real (shared) entry word, so the validated `source` runs /// verbatim (sub-phase 3j). fn build_sql_update(path: &MatchedPath, source: &str) -> Result { // The UPDATE target is the first `table_name` ident (it // precedes any table referenced inside a SET / WHERE subquery). let target_table = path .items .iter() .find_map(|item| match item.kind { MatchedKind::Ident { role: "table_name", .. } => Some(item.text.clone()), _ => None, }) .unwrap_or_default(); let sql = source.trim().to_string(); // Capture the literal RHS of each top-level `SET col = ` // assignment for app-level type validation + error enrichment // (ADR-0036 Phase 2). Purely from the matched tokens — no reparse. let set_literals = capture_set_literals(path); Ok(Command::SqlUpdate { sql, target_table, returning: path_has_returning(path), set_literals, }) } /// Capture the literal RHS of each top-level `SET col = ` /// assignment from the matched path (ADR-0036 Phase 2). Returns /// `(col, Some(Value))` for a bare-literal RHS (incl. a signed number) /// and `(col, None)` for an expression RHS (arithmetic, function call, /// scalar subquery, column ref — nothing static to validate). Works /// purely from the tokens the walker already matched (no reparse). /// /// Boundaries: the assignment LHS is the `update_set_column` ident (a /// role only ever emitted at the top level of an assignment — expression /// column refs carry `sql_expr_ident` / `sql_expr_qualified_ref`, so they /// are never confused with it). A *depth-0* comma separates assignments; /// a *depth-0* `where` / `returning` keyword (or `;` / end of path) ends /// the SET list. Parens raise the depth so a comma, `where`, or `=` /// inside a function call or scalar subquery on the RHS is never mistaken /// for an assignment / clause boundary or the assignment operator. fn capture_set_literals(path: &MatchedPath) -> Vec<(String, Option)> { let mut out: Vec<(String, Option)> = Vec::new(); let mut after_set = false; let mut depth: i32 = 0; // The assignment currently being accumulated: its column name, its // RHS tokens so far, and whether the assignment `=` has been consumed. let mut cur_col: Option = None; let mut cur_rhs: Vec<&MatchedItem> = Vec::new(); let mut seen_eq = false; // Finalise the pending assignment (if any) into `out`. fn flush( col: &mut Option, rhs: &mut Vec<&MatchedItem>, out: &mut Vec<(String, Option)>, ) { if let Some(c) = col.take() { out.push((c, classify_value_position(rhs))); } rhs.clear(); } for item in &path.items { if !after_set { // Scan only the SET list — skip everything up to (and // including) the `set` keyword. The first `update_set_column` // appears after it. if matches!(item.kind, MatchedKind::Word("set")) { after_set = true; } continue; } // A depth-0 `where` / `returning` / `;` ends the SET list. if depth == 0 && matches!( item.kind, MatchedKind::Word("where" | "returning") | MatchedKind::Punct(';') ) { break; } match &item.kind { MatchedKind::Punct('(') => { depth += 1; if cur_col.is_some() && seen_eq { cur_rhs.push(item); } } MatchedKind::Punct(')') => { depth -= 1; if cur_col.is_some() && seen_eq { cur_rhs.push(item); } } MatchedKind::Ident { role: "update_set_column", .. } if depth == 0 => { // A new assignment begins — finalise the previous one. flush(&mut cur_col, &mut cur_rhs, &mut out); cur_col = Some(item.text.clone()); seen_eq = false; } MatchedKind::Punct(',') if depth == 0 => { // Assignment separator — finalise the current assignment; // the next `update_set_column` starts the following one. flush(&mut cur_col, &mut cur_rhs, &mut out); } MatchedKind::Punct('=') if depth == 0 && !seen_eq && cur_col.is_some() => { // The assignment operator — consumed, not part of the RHS. seen_eq = true; } _ => { if cur_col.is_some() && seen_eq { cur_rhs.push(item); } } } } // Finalise the last assignment (ended by `where`/`returning`/`;`/EOF). flush(&mut cur_col, &mut cur_rhs, &mut out); out } /// Build `Command::SqlDelete` from a validated SQL `DELETE` /// (ADR-0033 §1/§7). Extracts the target table from the matched /// path so the worker re-persists the right CSV and snapshots the /// right inbound children for cascade diffing. No WHERE clause is /// captured — the worker executes the verbatim SQL and never /// inspects the predicate (Amendment 2). `delete` is now the real /// (shared) entry word, so the validated `source` runs verbatim /// (sub-phase 3j). fn build_sql_delete(path: &MatchedPath, source: &str) -> Result { // The DELETE target is the first `table_name` ident (it precedes // any table referenced inside a WHERE subquery). let target_table = path .items .iter() .find_map(|item| match item.kind { MatchedKind::Ident { role: "table_name", .. } => Some(item.text.clone()), _ => None, }) .unwrap_or_default(); let sql = source.trim().to_string(); Ok(Command::SqlDelete { sql, target_table, returning: path_has_returning(path), }) } // ================================================================= // CommandNodes // ================================================================= pub static SHOW: CommandNode = CommandNode { entry: Word::keyword("show"), shape: SHOW_SHAPE, ast_builder: build_show, help_id: Some("data.show"), hint_ids: &[], usage_ids: &[ "parse.usage.show_data", "parse.usage.show_table", "parse.usage.show_tables", "parse.usage.show_relationships", "parse.usage.show_indexes", "parse.usage.show_relationship", "parse.usage.show_index", ],}; pub static SEED: CommandNode = CommandNode { entry: Word::keyword("seed"), shape: SEED_SHAPE, ast_builder: build_seed, help_id: Some("data.seed"), hint_ids: &[], usage_ids: &["parse.usage.seed"], }; pub static INSERT: CommandNode = CommandNode { entry: Word::keyword("insert"), shape: INSERT_SHAPE, ast_builder: build_insert, help_id: Some("data.insert"), // ADR-0053 Phase-B exemplar. hint_ids: &["insert"], usage_ids: &["parse.usage.insert"],}; pub static UPDATE: CommandNode = CommandNode { entry: Word::keyword("update"), shape: UPDATE_SHAPE, ast_builder: build_update, help_id: Some("data.update"), hint_ids: &[], usage_ids: &["parse.usage.update"],}; pub static DELETE: CommandNode = CommandNode { entry: Word::keyword("delete"), shape: DELETE_SHAPE, ast_builder: build_delete, help_id: Some("data.delete"), hint_ids: &[], usage_ids: &["parse.usage.delete"],}; pub static REPLAY: CommandNode = CommandNode { entry: Word::keyword("replay"), shape: REPLAY_PATH, ast_builder: build_replay, help_id: Some("data.replay"), hint_ids: &[], usage_ids: &["parse.usage.replay"],}; pub static EXPLAIN: CommandNode = CommandNode { entry: Word::keyword("explain"), shape: EXPLAIN_SHAPE, ast_builder: build_explain, help_id: Some("data.explain"), hint_ids: &[], usage_ids: &["parse.usage.explain"],}; /// `explain` over advanced-mode SQL (ADR-0039). /// /// The `Advanced` node of the shared `explain` entry word. Pairs with /// the `Simple` DSL [`EXPLAIN`] node above: in advanced mode the /// dispatcher tries this SQL node first and falls back to the DSL node /// when no SQL branch matches (`explain show data …`, or a DSL-only /// `--all-rows`); in simple mode only the DSL node is reachable. pub static EXPLAIN_SQL: CommandNode = CommandNode { entry: Word::keyword("explain"), shape: EXPLAIN_SQL_SHAPE, ast_builder: build_explain_sql, // No `help_id` / `usage_ids` — this is the `Advanced` half of the // shared `explain` entry word, so it defers to the `Simple` // `EXPLAIN` node's help/usage (which now covers the SQL forms // too). Mirrors the `SQL_INSERT`/`SQL_UPDATE`/`SQL_DELETE` // precedent; otherwise `note_help` would print `explain` twice. help_id: None, hint_ids: &[], usage_ids: &[],}; /// SQL `SELECT` (ADR-0030 §6, ADR-0031, ADR-0032). /// /// Advanced mode only — gated by `grammar::is_advanced_only`. /// The shape is the post-`SELECT` portion of a top-level /// statement; the registry's entry-word dispatch consumes the /// leading `SELECT` keyword before the shape walks (sub-phase /// 2c migration). `help_id` is `None` until the `help sql` /// page lands (ADR-0030 Phase 6). pub static SELECT: CommandNode = CommandNode { entry: Word::keyword("select"), shape: Node::Subgrammar(&sql_select::SQL_SELECT_TAIL), ast_builder: build_select, help_id: None, hint_ids: &[], usage_ids: &["parse.usage.select"],}; /// `WITH …` top-level statement (ADR-0032 §4 / sub-phase 2c). /// /// Advanced mode only. Dispatched separately from `SELECT` so /// the registry's entry-word dispatch routes `with` and /// `select` to the right shapes; both reach the same /// `Command::Select` AST since execution is grammar-as-text /// (ADR-0030 §6, ADR-0031 §2). pub static WITH: CommandNode = CommandNode { entry: Word::keyword("with"), shape: Node::Subgrammar(&sql_select::SQL_WITH_TAIL), ast_builder: build_select, help_id: None, hint_ids: &[], usage_ids: &["parse.usage.with"],}; /// SQL `INSERT` — the `Advanced`-category node of the shared /// `insert` entry word (ADR-0033 §2, Amendment 1, sub-phase 3j). /// /// `insert` is a shared entry word: this `Advanced` SQL node and /// the `Simple` DSL [`INSERT`] node both register under `insert`. /// In Advanced mode the dispatcher (`walker::walk` / `decide`) /// tries this SQL node first and falls back to the DSL node when /// the SQL shape does not match; in Simple mode only the DSL node /// is reachable (Amendment 3 — command identity is the mode-rooted /// grammar-path outcome). pub static SQL_INSERT: CommandNode = CommandNode { entry: Word::keyword("insert"), shape: Node::Subgrammar(&sql_insert::SQL_INSERT_SHAPE), ast_builder: build_sql_insert, help_id: None, hint_ids: &[], usage_ids: &[], }; /// SQL `UPDATE` — the `Advanced` node of the shared `update` word. /// /// ADR-0033 §2 / Amendment 1, sub-phase 3j. Pairs with the `Simple` /// DSL [`UPDATE`] node; dispatch is SQL-first / DSL-fallback in /// Advanced mode, DSL-only in Simple. pub static SQL_UPDATE: CommandNode = CommandNode { entry: Word::keyword("update"), shape: Node::Subgrammar(&sql_update::SQL_UPDATE_SHAPE), ast_builder: build_sql_update, help_id: None, hint_ids: &[], usage_ids: &[], }; /// SQL `DELETE` — the `Advanced` node of the shared `delete` word. /// /// ADR-0033 §2 / Amendment 1, sub-phase 3j. Pairs with the `Simple` /// DSL [`DELETE`] node; dispatch is SQL-first / DSL-fallback in /// Advanced mode, DSL-only in Simple. In Advanced mode `delete from t /// --all-rows` falls back to the DSL node (the SQL shape has no /// `--all-rows`). pub static SQL_DELETE: CommandNode = CommandNode { entry: Word::keyword("delete"), shape: Node::Subgrammar(&sql_delete::SQL_DELETE_SHAPE), ast_builder: build_sql_delete, help_id: None, hint_ids: &[], usage_ids: &[], }; // ================================================================= // Tests — `explain` grammar (ADR-0028 §1) // ================================================================= #[cfg(test)] mod explain_tests { use super::Command; use crate::dsl::parser::parse_command; /// Parse `input` in **simple** mode and unwrap the /// `Command::Explain` wrapper, returning the inner command. /// These cover the DSL-explain wrapping (ADR-0028); the /// advanced-mode SQL wrapping (ADR-0039) is covered by /// `explain_inner_adv` below. (`parse_command` defaults to /// advanced, where `explain update`/`delete` now route to the /// SQL path — so DSL-explain tests pin the mode explicitly.) fn explain_inner(input: &str) -> Command { match crate::dsl::parser::parse_command_in_mode(input, crate::mode::Mode::Simple) .expect("explain should parse") { Command::Explain { query } => *query, other => panic!("expected Command::Explain, got {other:?}"), } } #[test] fn explain_show_data_wraps_a_show_data() { assert!(matches!( explain_inner("explain show data Customers"), Command::ShowData { .. } )); } #[test] fn explain_show_data_carries_where_and_limit_through() { match explain_inner("explain show data Customers where id = 1 limit 5") { Command::ShowData { name, filter, limit } => { assert_eq!(name, "Customers"); assert!(filter.is_some(), "where clause should survive"); assert_eq!(limit, Some(5)); } other => panic!("expected ShowData, got {other:?}"), } } #[test] fn explain_update_wraps_an_update() { assert!(matches!( explain_inner("explain update Customers set Name='Bo' where id=1"), Command::Update { .. } )); } #[test] fn explain_delete_wraps_a_delete() { assert!(matches!( explain_inner("explain delete from Customers where id=1"), Command::Delete { .. } )); } #[test] fn explain_of_an_incomplete_update_is_a_parse_error() { // A bare `update` still needs its `where` / `--all-rows` // (ADR-0028 §1: `explain` of an incomplete command is the // same parse error the command alone would be). Simple mode: // in advanced mode a where-less SQL UPDATE is valid (ADR-0039). assert!( crate::dsl::parser::parse_command_in_mode( "explain update Customers set Name='Bo'", crate::mode::Mode::Simple, ) .is_err() ); } #[test] fn explain_does_not_cover_show_table() { // `explain` covers `show data` only (ADR-0028 §1). assert!(parse_command("explain show table Customers").is_err()); } #[test] fn bare_explain_is_a_parse_error() { assert!(parse_command("explain").is_err()); assert!(parse_command("explain show").is_err()); } // ---- ADR-0039: explain over advanced-mode SQL -------------- use crate::dsl::parser::parse_command_in_mode; use crate::mode::Mode; /// Advanced-mode counterpart of `explain_inner`. fn explain_inner_adv(input: &str) -> Command { match parse_command_in_mode(input, Mode::Advanced) .expect("advanced explain should parse") { Command::Explain { query } => *query, other => panic!("expected Command::Explain, got {other:?}"), } } #[test] fn explain_select_wraps_a_select_with_clean_sql() { // The carried SQL must NOT include the `explain` prefix // (ADR-0039) — `EXPLAIN QUERY PLAN` runs over the inner SQL. match explain_inner_adv("explain select * from Customers") { Command::Select { sql } => assert_eq!(sql, "select * from Customers"), other => panic!("expected Select, got {other:?}"), } } #[test] fn explain_with_cte_wraps_a_select() { match explain_inner_adv( "explain with recent as (select * from Orders) select * from recent", ) { Command::Select { sql } => { assert!(sql.starts_with("with recent"), "clean inner sql: {sql}"); } other => panic!("expected Select, got {other:?}"), } } #[test] fn explain_sql_insert_wraps_a_sql_insert() { match explain_inner_adv("explain insert into Customers values (1, 'Bo')") { Command::SqlInsert { sql, target_table, .. } => { assert_eq!(target_table, "Customers"); assert_eq!(sql, "insert into Customers values (1, 'Bo')"); } other => panic!("expected SqlInsert, got {other:?}"), } } #[test] fn explain_sql_update_wraps_a_sql_update_with_clean_sql() { match explain_inner_adv("explain update Customers set Name = 'Bo' where id = 1") { Command::SqlUpdate { sql, target_table, .. } => { assert_eq!(target_table, "Customers"); assert_eq!(sql, "update Customers set Name = 'Bo' where id = 1"); } other => panic!("expected SqlUpdate, got {other:?}"), } } #[test] fn explain_sql_delete_wraps_a_sql_delete() { match explain_inner_adv("explain delete from Customers where id = 1") { Command::SqlDelete { sql, target_table, .. } => { assert_eq!(target_table, "Customers"); assert_eq!(sql, "delete from Customers where id = 1"); } other => panic!("expected SqlDelete, got {other:?}"), } } #[test] fn explain_update_with_all_rows_flag_falls_back_to_dsl_in_advanced() { // `--all-rows` is DSL-only; the SQL update shape can't // consume it, so the explain inner falls back to the DSL // `Update` node — mirroring the top-level shared-word // dispatch (ADR-0033). assert!(matches!( explain_inner_adv("explain update Customers set Name = 'Bo' --all-rows"), Command::Update { .. } )); } #[test] fn explain_show_data_still_uses_dsl_in_advanced() { // `show data` has no SQL form; advanced `explain show data` // falls back to the DSL inner. assert!(matches!( explain_inner_adv("explain show data Customers"), Command::ShowData { .. } )); } #[test] fn explain_select_is_rejected_in_simple_mode() { // `select` is advanced-only, so `explain select` has no // simple-mode form. assert!(parse_command_in_mode("explain select * from Customers", Mode::Simple).is_err()); } #[test] fn explain_does_not_cover_ddl() { // EXPLAIN QUERY PLAN applies to DML/queries only (ADR-0039 // out of scope); there is no SQL DDL branch under explain. assert!(parse_command_in_mode( "explain create table T (id int)", Mode::Advanced, ) .is_err()); } #[test] fn advanced_explain_completion_offers_the_sql_verbs() { // After `explain ` in advanced mode the candidate list is the // union across both `explain` CommandNodes: the SQL verbs // (select/with/insert/update/delete) plus the DSL `show` // (ADR-0039). The shared-entry-word completion already // aggregates, so there is no UX gap. use crate::completion::candidates_at_cursor_in_mode; let schema = crate::completion::SchemaCache::default(); let input = "explain "; let completion = candidates_at_cursor_in_mode(input, input.len(), &schema, Mode::Advanced) .expect("explain offers candidates"); let names: Vec<&str> = completion .candidates .iter() .map(|c| c.text.as_str()) .collect(); for verb in ["select", "with", "insert", "update", "delete", "show"] { assert!(names.contains(&verb), "expected `{verb}` in {names:?}"); } } }