Files
rdbms-playground/src/dsl/grammar/data.rs
T
claude@clouddev1 1d898adf00 feat: V5a show relationship/index <name> detail views
Fold the singular per-item forms into Command::ShowList { kind,
name: Option<String> } (name: Some = one item). Two grammar
branches reuse the relationship/index completion sources; worker
do_show_one renders a labelled detail block or a friendly
"No ... named X." line, reusing the V5 render path. Help +
parse-usage entries, two ADR-0042 near-miss rows, 5 integration
tests. Mark V5a [x] — V5's [<name>] clause now complete.
2026-06-07 14:04:00 +00:00

1806 lines
68 KiB
Rust

//! Data command nodes (ADR-0024 §migration Phase D).
//!
//! Five commands at four entry words: `show` (show data /
//! show table), `insert`, `update`, `delete`. The walker route
//! owns these end-to-end.
//!
//! Schema awareness (ADR-0024 §Phase D): the DSL value slots are
//! wired to `DynamicSubgrammar(column_value_list)` /
//! `current_column_value` (see `INSERT_VALUES_LIST`,
//! `insert_first_paren`, `PER_COLUMN_VALUE`), so the schema reference
//! that flows through `parse_command` unfolds a typed slot per column:
//! numeric-shape mismatch is caught at parse (`int`/`decimal`/`bool`
//! slots in `shared.rs`) and the full semantic type (`date` / `shortid`
//! format) is validated at bind time. So the simple-mode DSL gives data
//! values per-column feedback end-to-end.
//!
//! The advanced-mode SQL DML surface (`build_sql_insert` /
//! `build_sql_update` below) is a separate path: it executes the
//! validated statement verbatim (ADR-0030 §4) and is NOT yet wired to
//! the typed slots. ADR-0036 closes the resulting value-feedback gap
//! without a grammar change by *capturing* each literal value position
//! at parse (`capture_literal_rows` / `capture_set_literals`) and
//! validating it against the column type in the worker — Phase 3 will
//! later swap that capture for the same typed slots used here, adding
//! live hints/highlighting.
use crate::dsl::command::{Command, Expr, RowFilter, ShowListKind};
use crate::dsl::grammar::{
CommandNode, IdentSource, Node, NumberValidator, ValidationError, Word, expr,
shared::{
FALLBACK_VALUE_LIST, column_value_list, count_tuple_values,
current_column_value, insert_target_columns,
},
sql_delete, sql_insert, sql_select, sql_update,
};
use crate::dsl::walker::context::WalkContext;
use crate::dsl::value::Value;
use crate::dsl::walker::outcome::{MatchedItem, MatchedKind, MatchedPath};
// =================================================================
// Building blocks
// =================================================================
const TABLE_NAME_EXISTING: Node = Node::Ident {
source: IdentSource::Tables,
// Reject `__rdbms_*` internal tables at the table-source slot
// (ADR-0030 §6 — "every table-source slot"), matching the SQL
// grammar's `reject_internal_table`. Without this, simple-mode DSL
// data commands could read/write the internal metadata tables
// even though advanced-mode SQL rejects them (ADR-0033
// Amendment 3 / `/runda` finding B).
role: "table_name",
validator: Some(sql_select::reject_internal_table),
highlight_override: None,
writes_table: false,
writes_column: false,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
/// Table-name slot variant that populates
/// `WalkContext::current_table_columns` (ADR-0024 §Phase D).
/// Used by `insert into <T> …` so the inner value list can
/// dispatch typed slots per column.
const TABLE_NAME_INSERT: Node = Node::Ident {
source: IdentSource::Tables,
// Reject `__rdbms_*` internal tables (ADR-0030 §6; `/runda`
// finding B) — see `TABLE_NAME_EXISTING`.
role: "table_name",
validator: Some(sql_select::reject_internal_table),
highlight_override: None,
writes_table: true,
writes_column: false,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
// =================================================================
// show — `show (data|table) <T>`
// =================================================================
const SHOW_DATA_NODES: &[Node] = &[
Node::Word(Word::keyword("data")),
// `writes_table` so the optional `where` expression's
// column slots resolve against this table for completion.
TABLE_NAME_WRITES,
Node::Optional(&WHERE_CLAUSE),
Node::Optional(&LIMIT_CLAUSE),
];
const SHOW_DATA: Node = Node::Seq(SHOW_DATA_NODES);
const SHOW_TABLE_NODES: &[Node] = &[
Node::Word(Word::keyword("table")),
TABLE_NAME_EXISTING,
];
const SHOW_TABLE: Node = Node::Seq(SHOW_TABLE_NODES);
// `show tables` / `show relationships` / `show indexes` — the
// list-all forms (V5). Each is a single keyword with no argument;
// the executor lists every item of the kind. Distinct keyword
// tokens (`tables` ≠ `table`), so Choice ordering is irrelevant.
const SHOW_TABLES: Node = Node::Word(Word::keyword("tables"));
const SHOW_RELATIONSHIPS: Node = Node::Word(Word::keyword("relationships"));
const SHOW_INDEXES: Node = Node::Word(Word::keyword("indexes"));
// `show relationship <name>` / `show index <name>` — singular
// per-item detail (V5a). The name slot reuses the existing
// completion sources (relationship / index names). Distinct
// keyword tokens from the plurals (`relationship` ≠
// `relationships`), so Choice ordering is irrelevant.
const SHOW_RELATIONSHIP_NAME: Node = Node::Ident {
source: IdentSource::Relationships,
role: "relationship_name",
validator: None,
highlight_override: None,
writes_table: false,
writes_column: false,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
const SHOW_RELATIONSHIP_NODES: &[Node] = &[
Node::Word(Word::keyword("relationship")),
SHOW_RELATIONSHIP_NAME,
];
const SHOW_RELATIONSHIP: Node = Node::Seq(SHOW_RELATIONSHIP_NODES);
const SHOW_INDEX_NAME: Node = Node::Ident {
source: IdentSource::Indexes,
role: "index_name",
validator: None,
highlight_override: None,
writes_table: false,
writes_column: false,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
const SHOW_INDEX_NODES: &[Node] =
&[Node::Word(Word::keyword("index")), SHOW_INDEX_NAME];
const SHOW_INDEX: Node = Node::Seq(SHOW_INDEX_NODES);
const SHOW_CHOICES: &[Node] = &[
SHOW_DATA,
SHOW_TABLE,
SHOW_TABLES,
SHOW_RELATIONSHIPS,
SHOW_INDEXES,
SHOW_RELATIONSHIP,
SHOW_INDEX,
];
const SHOW_SHAPE: Node = Node::Choice(SHOW_CHOICES);
// =================================================================
// insert — `insert into <T> (<a>,<b>,…) values (<v>,<v>,…)`
// | `insert into <T> values (<v>,…)`
// | `insert into <T> (<v>,…)`
// =================================================================
//
// Forms A (with column list) and C (bare value list) both start
// with `(`. The walker's "first commit wins" Choice semantics
// can't pick between them after the `(` matches, so the first
// paren's contents are resolved by a `Node::Lookahead` factory
// (`insert_first_paren`): it peeks the first token to decide.
//
// - First token is a value literal (number / string /
// null / true / false) → Form C → the typed `column_value_list`
// (same dispatch contract as Form B — ADR-0024 §Phase D Form-C
// type-awareness). Form C values are now type-checked at parse
// time, not only at bind time.
// - Otherwise (column-name identifier, or an empty paren) →
// Form A → a repeated column-name list. The idents write
// `WalkContext::user_listed_columns` so the trailing
// `values (…)` slots mirror the user's selection.
/// Form A's column-name slot. `static` (not `const`) so the
/// `insert_first_paren` factory can take a `&'static` reference
/// to it when building the repeated list at walk time.
static FORM_A_COLUMN: Node = Node::Ident {
source: IdentSource::Columns,
role: "insert_first_item",
validator: None,
highlight_override: None,
writes_table: false,
writes_column: false,
writes_user_listed_column: true,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
static INSERT_COMMA: Node = Node::Punct(',');
/// First-paren resolver (ADR-0024 §Phase D Form-C type-awareness).
/// Peeks the first token after `(` to route to Form A's
/// column-name list or Form C's typed value list.
fn insert_first_paren(ctx: &WalkContext, source: &str, pos: usize) -> Node {
if first_paren_item_is_value_literal(source, pos) {
// Form C — bare value list. Arity-gated exactly like Form B's
// `values (…)`: a correct-count tuple gets the typed per-column
// slots; a wrong-count tuple routes to the type-blind fallback
// so it still matches and the arity diagnostic fires (issue #17).
dsl_insert_value_list(ctx, source, pos)
} else {
// Form A (or Form A in progress / empty paren).
Node::Repeated {
inner: &FORM_A_COLUMN,
separator: Some(&INSERT_COMMA),
min: 1,
}
}
}
/// True when the first token after the insert `(` is a
/// value literal — the signal that the paren is a Form C value
/// list rather than a Form A column-name list. An empty paren
/// or an identifier-shaped token (a column name) returns false.
fn first_paren_item_is_value_literal(source: &str, pos: usize) -> bool {
use crate::dsl::walker::lex_helpers::{
consume_ident, consume_number_literal, consume_string_literal,
skip_whitespace,
};
let p = skip_whitespace(source, pos);
if p >= source.len() {
return false; // empty paren — treat as Form A
}
if consume_string_literal(source, p).is_some() {
return true;
}
if consume_number_literal(source, p).is_some() {
return true;
}
if let Some((s, e)) = consume_ident(source, p) {
let word = &source[s..e];
// `null` / `true` / `false` are value literals; any
// other identifier is a column name (Form A).
return word.eq_ignore_ascii_case("null")
|| word.eq_ignore_ascii_case("true")
|| word.eq_ignore_ascii_case("false");
}
false // punctuation (e.g. `)`) — treat as Form A
}
const INSERT_PAREN_LIST: Node = Node::Lookahead(insert_first_paren);
/// Insert value-list arity gate (issue #17) — the simple-mode DSL
/// counterpart of the advanced grammar's `tuple_value_list`
/// (`sql_insert.rs`). Routes a correct-arity tuple to the typed
/// per-column slots ([`column_value_list`]) and a wrong-arity tuple to
/// the type-blind [`FALLBACK_VALUE_LIST`], so the wrong-count tuple
/// still structurally matches and the per-tuple arity diagnostic
/// (ADR-0033 §8.1, made mode-aware for issue #17) fires its friendly
/// message instead of a bare "expected `,`/`)`".
///
/// Target arity comes from [`insert_target_columns`] — the same source
/// `column_value_list` uses, so gate and slots never disagree. `None`
/// (schemaless / unknown table / all-auto-generated) → fallback: either
/// we can't gate (schemaless) or the all-auto case wants the tuple to
/// match so the diagnostic can explain it.
///
/// **Simple-mode only.** The fallback routing is what lets a wrong-count
/// tuple structurally match (so the diagnostic fires); that is a
/// simple-mode behaviour. In advanced mode the DSL insert node must stay
/// strict — otherwise a non-SQL shape like Form C (`insert into T
/// (1, 2)`, no `values`) would spuriously match here and be accepted in
/// advanced mode, where SQL requires `values` and the dedicated SQL
/// grammar (`sql_insert.rs`) owns inserts. Keeping advanced strict
/// preserves the pre-#17 advanced behaviour exactly (issue #17).
fn dsl_insert_value_list(ctx: &WalkContext, source: &str, pos: usize) -> Node {
if ctx.mode != crate::mode::Mode::Simple {
return Node::DynamicSubgrammar(column_value_list);
}
let Some(cols) = insert_target_columns(ctx) else {
return FALLBACK_VALUE_LIST;
};
let (count, closed) = count_tuple_values(source, pos);
let arity_ok = if closed { count == cols.len() } else { count <= cols.len() };
if arity_ok {
Node::DynamicSubgrammar(column_value_list)
} else {
FALLBACK_VALUE_LIST
}
}
/// Schema-aware value list, arity-gated (issue #17): a correct-count
/// tuple unfolds to a `Seq` of typed slots per column (`int_slot`,
/// `text_slot`, …); a wrong-count tuple or a schemaless walk falls back
/// to the type-blind `Repeated(VALUE_LITERAL, ',', 1)` shape (ADR-0024
/// §Phase D §column_value_list).
const INSERT_VALUES_LIST: Node = Node::Lookahead(dsl_insert_value_list);
const INSERT_OPTIONAL_VALUES_NODES: &[Node] = &[
Node::Word(Word::keyword("values")),
Node::Punct('('),
INSERT_VALUES_LIST,
Node::Punct(')'),
];
const INSERT_OPTIONAL_VALUES: Node = Node::Optional(&Node::Seq(INSERT_OPTIONAL_VALUES_NODES));
const INSERT_PAREN_FIRST_NODES: &[Node] = &[
Node::Punct('('),
INSERT_PAREN_LIST,
Node::Punct(')'),
INSERT_OPTIONAL_VALUES,
];
const INSERT_PAREN_FIRST: Node = Node::Seq(INSERT_PAREN_FIRST_NODES);
const INSERT_VALUES_KEYWORD_FIRST_NODES: &[Node] = &[
Node::Word(Word::keyword("values")),
Node::Punct('('),
INSERT_VALUES_LIST,
Node::Punct(')'),
];
const INSERT_VALUES_KEYWORD_FIRST: Node = Node::Seq(INSERT_VALUES_KEYWORD_FIRST_NODES);
const INSERT_AFTER_TABLE_CHOICES: &[Node] =
&[INSERT_VALUES_KEYWORD_FIRST, INSERT_PAREN_FIRST];
const INSERT_AFTER_TABLE: Node = Node::Choice(INSERT_AFTER_TABLE_CHOICES);
const INSERT_NODES: &[Node] = &[
Node::Word(Word::keyword("into")),
TABLE_NAME_INSERT,
INSERT_AFTER_TABLE,
];
const INSERT_SHAPE: Node = Node::Seq(INSERT_NODES);
// =================================================================
// update — `update <T> set <col>=<v>[, <col>=<v>] (where … | --all-rows)`
// =================================================================
/// Table-name slot that populates `current_table_columns` so
/// the inner `set <col>=<value>` / `where <col>=<value>` slots
/// can resolve column types (Phase D).
const TABLE_NAME_WRITES: Node = Node::Ident {
source: IdentSource::Tables,
// Reject `__rdbms_*` internal tables (ADR-0030 §6; `/runda`
// finding B) — see `TABLE_NAME_EXISTING`. Shared by `update`,
// `delete`, and `show data`, so all three reject the internal
// metadata tables, matching the SQL grammar.
role: "table_name",
validator: Some(sql_select::reject_internal_table),
highlight_override: None,
writes_table: true,
writes_column: false,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
/// Column-name slot in `set col = …` — resolves the column's
/// type into `current_column` so the value slot dispatches per
/// column type (Phase D).
const SET_COLUMN: Node = Node::Ident {
source: IdentSource::Columns,
role: "update_set_column",
validator: None,
highlight_override: None,
writes_table: false,
writes_column: true,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
/// Value slot resolved at walk time from
/// `WalkContext::current_column`. Falls back to the schemaless
/// value-literal choice when no current_column is bound.
const PER_COLUMN_VALUE: Node = Node::DynamicSubgrammar(current_column_value);
const UPDATE_ASSIGNMENT_NODES: &[Node] = &[
SET_COLUMN,
Node::Punct('='),
PER_COLUMN_VALUE,
];
const UPDATE_ASSIGNMENT: Node = Node::Seq(UPDATE_ASSIGNMENT_NODES);
const UPDATE_ASSIGNMENTS: Node = Node::Repeated {
inner: &UPDATE_ASSIGNMENT,
separator: Some(&Node::Punct(',')),
min: 1,
};
/// `where <expr>` — the complex WHERE-expression fragment
/// (ADR-0026). The grammar tier is defined once in
/// `grammar::expr` and reached here through `Subgrammar`.
const WHERE_CLAUSE_NODES: &[Node] = &[
Node::Word(Word::keyword("where")),
Node::Subgrammar(&expr::OR_EXPR),
];
const WHERE_CLAUSE: Node = Node::Seq(WHERE_CLAUSE_NODES);
const FILTER_CHOICES: &[Node] = &[WHERE_CLAUSE, Node::Flag("all-rows")];
const FILTER_CLAUSE: Node = Node::Choice(FILTER_CHOICES);
/// `limit <n>` — `<n>` is a non-negative integer; the
/// validator rejects fractional / negative literals at parse
/// time (ADR-0026 §5).
fn validate_limit_count(value: &str) -> Result<(), ValidationError> {
if value.parse::<u64>().is_ok() {
Ok(())
} else {
Err(ValidationError {
message_key: "parse.custom.bind_type_mismatch",
args: vec![
("found", value.to_string()),
("expected", "non-negative integer".to_string()),
],
})
}
}
const LIMIT_VALIDATOR: NumberValidator = validate_limit_count;
/// `limit <n>` clause, optional on `show data` (ADR-0026 §5).
const LIMIT_CLAUSE_NODES: &[Node] = &[
Node::Word(Word::keyword("limit")),
Node::NumberLit {
validator: Some(LIMIT_VALIDATOR),
},
];
const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES);
const UPDATE_NODES: &[Node] = &[
TABLE_NAME_WRITES,
Node::Word(Word::keyword("set")),
UPDATE_ASSIGNMENTS,
FILTER_CLAUSE,
];
const UPDATE_SHAPE: Node = Node::Seq(UPDATE_NODES);
// =================================================================
// delete — `delete from <T> (where … | --all-rows)`
// =================================================================
const DELETE_NODES: &[Node] = &[
Node::Word(Word::keyword("from")),
TABLE_NAME_WRITES,
FILTER_CLAUSE,
];
const DELETE_SHAPE: Node = Node::Seq(DELETE_NODES);
// =================================================================
// explain — `explain (show data … | update … | delete from …)`
// =================================================================
//
// ADR-0028 §1: `explain` is a top-level command whose shape is a
// `Choice` over the three explainable query commands. The inner
// query grammars are *referenced* through `Subgrammar`, not
// duplicated — so an explained command is parsed, completed,
// hinted and highlighted exactly as it is on its own.
//
// `Subgrammar` needs a `&'static Node`; `SHOW_DATA` /
// `UPDATE_SHAPE` / `DELETE_SHAPE` are `const` (and cannot be
// referenced as `&'static`). These three thin `static` wrappers
// over the existing `_NODES` slices give the references without
// any churn to the standalone command shapes. `explain show`
// references `EXPLAIN_SHOW_DATA` directly (not the `show`
// command's `data | table` choice) — `explain` covers `show
// data` only (ADR-0028 §1).
static EXPLAIN_SHOW_DATA: Node = Node::Seq(SHOW_DATA_NODES);
static EXPLAIN_UPDATE: Node = Node::Seq(UPDATE_NODES);
static EXPLAIN_DELETE: Node = Node::Seq(DELETE_NODES);
const EXPLAIN_SHOW_NODES: &[Node] = &[
Node::Word(Word::keyword("show")),
Node::Subgrammar(&EXPLAIN_SHOW_DATA),
];
const EXPLAIN_UPDATE_NODES: &[Node] = &[
Node::Word(Word::keyword("update")),
Node::Subgrammar(&EXPLAIN_UPDATE),
];
const EXPLAIN_DELETE_NODES: &[Node] = &[
Node::Word(Word::keyword("delete")),
Node::Subgrammar(&EXPLAIN_DELETE),
];
const EXPLAIN_CHOICES: &[Node] = &[
Node::Seq(EXPLAIN_SHOW_NODES),
Node::Seq(EXPLAIN_UPDATE_NODES),
Node::Seq(EXPLAIN_DELETE_NODES),
];
const EXPLAIN_SHAPE: Node = Node::Choice(EXPLAIN_CHOICES);
// --- explain over advanced-mode SQL (ADR-0039) -------------------
//
// The SQL inner mirrors the DSL inner above, but wraps the SQL
// command shapes (the same nodes the standalone `SELECT` / `WITH` /
// `SQL_*` commands use). This shape backs a *second* `explain`
// CommandNode (`EXPLAIN_SQL`, registered `Advanced`); the registry's
// shared-entry-word dispatch tries it first in advanced mode and
// falls back to the `Simple` DSL `EXPLAIN` when a branch can't match
// (e.g. `explain show data …`, or a DSL-only `--all-rows`). `select`
// and `with` are SQL-only, so they only ever resolve here.
const EXPLAIN_SELECT_NODES: &[Node] = &[
Node::Word(Word::keyword("select")),
Node::Subgrammar(&sql_select::SQL_SELECT_TAIL),
];
const EXPLAIN_WITH_NODES: &[Node] = &[
Node::Word(Word::keyword("with")),
Node::Subgrammar(&sql_select::SQL_WITH_TAIL),
];
const EXPLAIN_SQL_INSERT_NODES: &[Node] = &[
Node::Word(Word::keyword("insert")),
Node::Subgrammar(&sql_insert::SQL_INSERT_SHAPE),
];
const EXPLAIN_SQL_UPDATE_NODES: &[Node] = &[
Node::Word(Word::keyword("update")),
Node::Subgrammar(&sql_update::SQL_UPDATE_SHAPE),
];
const EXPLAIN_SQL_DELETE_NODES: &[Node] = &[
Node::Word(Word::keyword("delete")),
Node::Subgrammar(&sql_delete::SQL_DELETE_SHAPE),
];
const EXPLAIN_SQL_CHOICES: &[Node] = &[
Node::Seq(EXPLAIN_SELECT_NODES),
Node::Seq(EXPLAIN_WITH_NODES),
Node::Seq(EXPLAIN_SQL_INSERT_NODES),
Node::Seq(EXPLAIN_SQL_UPDATE_NODES),
Node::Seq(EXPLAIN_SQL_DELETE_NODES),
];
const EXPLAIN_SQL_SHAPE: Node = Node::Choice(EXPLAIN_SQL_CHOICES);
// =================================================================
// select — SQL `SELECT` (advanced mode; ADR-0030 §6, ADR-0031)
// =================================================================
//
// Phase 1's single-table `SELECT`: a projection, a `FROM` table,
// and optional `WHERE` / `ORDER BY` / `LIMIT`. The projection,
// `WHERE` and `ORDER BY` expression slots reference the SQL
// expression grammar (ADR-0031) through `Subgrammar`, so SQL gets
// the same completion / highlighting / hints as the DSL for free.
//
// Advanced mode only — the walker's mode gate (ADR-0030 §2,
// `grammar::is_advanced_only`) refuses `select` in simple mode
// with the "this is SQL" hint, so this grammar is never reached
// there.
//
// `JOIN`s, `GROUP BY` / `HAVING`, subqueries, `UNION`, CTEs, and
// `OFFSET` are ADR-0030 Phase 2 ("`SELECT` — full"); implicit
// column aliasing (`select a x`) and qualified `t.*` are out of
// Phase 1 (see the inline notes).
// SQL expression slot — `Node::Subgrammar(&sql_expr::SQL_OR_EXPR)`
// is inlined at each use site to avoid a Rust const-evaluation
// cycle through the sql_expr ⇄ sql_select recursion (see the
// matching note in sql_select.rs).
// Phase 1's local `SELECT_*` grammar nodes have been retired in
// favour of `sql_select::SQL_SELECT_TAIL` (ADR-0032 sub-phase
// 2c). The shape definition that `data::SELECT` references now
// lives in the dedicated `sql_select` module — including the
// `reject_internal_table` validator, the `LIMIT` count
// validator, and the projection / FROM / WHERE / ORDER BY
// machinery. The full §1 grammar (JOIN, GROUP BY, HAVING,
// set-ops, qualified refs, subqueries, CTEs) is admitted as a
// natural superset.
// =================================================================
// AST builders
// =================================================================
fn ident_text<'a>(path: &'a MatchedPath, role: &str) -> Option<&'a str> {
path.items.iter().find_map(|i| match &i.kind {
MatchedKind::Ident { role: r, .. } if *r == role => Some(i.text.as_str()),
_ => None,
})
}
fn require_ident(path: &MatchedPath, role: &'static str) -> Result<String, ValidationError> {
ident_text(path, role)
.map(str::to_string)
.ok_or_else(|| ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", format!("missing {role}"))],
})
}
/// Convert a `MatchedItem` whose kind is one of the `value_literal`
/// variants (Word("null"|"true"|"false"), NumberLit, StringLit) to
/// a `Value`. Returns None for non-value items.
///
/// `pub(crate)` so `grammar::ddl` can reuse it when collecting a
/// `default <literal>` column constraint (ADR-0029).
pub(crate) fn item_to_value(item: &MatchedItem) -> Option<Value> {
match &item.kind {
MatchedKind::Word("null") => Some(Value::Null),
MatchedKind::Word("true") => Some(Value::Bool(true)),
MatchedKind::Word("false") => Some(Value::Bool(false)),
MatchedKind::NumberLit => Some(Value::Number(item.text.clone())),
MatchedKind::StringLit => Some(Value::Text(item.text.clone())),
_ => None,
}
}
fn build_show(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
let sub = path
.items
.iter()
.filter_map(|i| match &i.kind {
MatchedKind::Word(w) => Some(*w),
_ => None,
})
.nth(1);
match sub {
Some("data") => build_show_data(path, _source),
// `name` is resolved only for the forms that carry one; the
// list-all forms (`tables` / `relationships` / `indexes`)
// have no table argument.
Some("table") => Ok(Command::ShowTable {
name: require_ident(path, "table_name")?,
}),
Some("tables") => Ok(Command::ShowList {
kind: ShowListKind::Tables,
name: None,
}),
Some("relationships") => Ok(Command::ShowList {
kind: ShowListKind::Relationships,
name: None,
}),
Some("indexes") => Ok(Command::ShowList {
kind: ShowListKind::Indexes,
name: None,
}),
// V5a singular per-item detail — carry the named item.
Some("relationship") => Ok(Command::ShowList {
kind: ShowListKind::Relationships,
name: Some(require_ident(path, "relationship_name")?),
}),
Some("index") => Ok(Command::ShowList {
kind: ShowListKind::Indexes,
name: Some(require_ident(path, "index_name")?),
}),
_ => Err(ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", "unknown show subcommand".to_string())],
}),
}
}
/// Build a `show data` command from a matched path. Role-based
/// (no positional `nth` lookups), so it serves both the
/// standalone `show data` entry word and the `explain show
/// data …` wrapper, where the entry-word offset shifts.
fn build_show_data(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
Ok(Command::ShowData {
name: require_ident(path, "table_name")?,
filter: build_show_filter(path)?,
limit: build_show_limit(path)?,
})
}
/// The optional `where <expr>` of a `show data`. The expression
/// terminals run from just past `Word("where")` to the start of
/// the `limit` clause (or the end of the path) — neither the
/// `limit` keyword nor any expression keyword collide, so the
/// slice is exact.
fn build_show_filter(path: &MatchedPath) -> Result<Option<Expr>, ValidationError> {
let Some(where_idx) = path
.items
.iter()
.position(|i| matches!(&i.kind, MatchedKind::Word("where")))
else {
return Ok(None);
};
let end = path
.items
.iter()
.position(|i| matches!(&i.kind, MatchedKind::Word("limit")))
.unwrap_or(path.items.len());
Ok(Some(expr::build_expr(&path.items[where_idx + 1..end])?))
}
/// The optional `limit <n>` of a `show data`. The grammar's
/// `LIMIT_VALIDATOR` already constrained `<n>` to a
/// non-negative integer, so the parse here cannot realistically
/// fail.
fn build_show_limit(path: &MatchedPath) -> Result<Option<u64>, ValidationError> {
let Some(limit_idx) = path
.items
.iter()
.position(|i| matches!(&i.kind, MatchedKind::Word("limit")))
else {
return Ok(None);
};
let count = path
.items
.get(limit_idx + 1)
.ok_or_else(|| ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", "missing limit count".to_string())],
})?;
count
.text
.parse::<u64>()
.map(Some)
.map_err(|_| ValidationError {
message_key: "parse.custom.bind_type_mismatch",
args: vec![
("found", count.text.clone()),
("expected", "non-negative integer".to_string()),
],
})
}
fn build_insert(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
let table = require_ident(path, "table_name")?;
// Locate the second `values` keyword (the first is the
// command word `insert`'s sibling — but `insert` isn't a
// matched Word here since it's the entry word and the
// entry-word push uses the literal "insert"; only later
// `values` matches as Word("values")).
//
// Strategy: walk the path. After the table name:
// - If we see Word("values") next (Form B), the next
// parenthesized values are the value list.
// - If we see Punct('('), the first paren's content was
// either column names (Form A) or values (Form C).
// If a Word("values") follows the closing paren, it's
// Form A.
//
// Easier discriminator: collect all matched keyword words;
// count occurrences of "values".
let saw_values = path
.items
.iter()
.any(|i| matches!(i.kind, MatchedKind::Word("values")));
// Find the index of the table_name match — the first paren
// afterwards starts the parsed list.
let table_idx = path
.items
.iter()
.position(|i| matches!(&i.kind, MatchedKind::Ident { role: "table_name", .. }))
.ok_or_else(|| ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", "missing table".to_string())],
})?;
// Form B (values keyword right after table): no column list,
// values come from the single paren-bounded list.
let first_token_after_table = path.items.get(table_idx + 1);
let form_b = matches!(
first_token_after_table.map(|i| &i.kind),
Some(MatchedKind::Word("values"))
);
if form_b {
// Form B: the only value run is between the only `(` … `)`.
let values = collect_values_in_parens(path, table_idx + 1)?;
return Ok(Command::Insert {
table,
columns: None,
values,
});
}
// Form A or C: the first paren after the table is a Choice
// of either column-idents or value-literals.
let first_paren_idx = path
.items
.iter()
.enumerate()
.skip(table_idx + 1)
.find(|(_, i)| matches!(i.kind, MatchedKind::Punct('(')))
.map(|(idx, _)| idx)
.ok_or_else(|| ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", "missing `(`".to_string())],
})?;
if saw_values {
// Form A: first paren = column names; second paren = values.
// The Repeated inside the first paren tagged matched idents
// with role "insert_first_item".
let columns: Vec<String> = path
.items
.iter()
.filter_map(|i| match &i.kind {
MatchedKind::Ident {
role: "insert_first_item",
..
} => Some(i.text.clone()),
_ => None,
})
.collect();
if columns.is_empty() {
return Err(ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", "expected column names in `insert into T (…)`".to_string())],
});
}
// Find the `values` keyword and the next `(` — the values
// run starts after that `(`.
let values_idx = path
.items
.iter()
.enumerate()
.skip(first_paren_idx)
.find(|(_, i)| matches!(i.kind, MatchedKind::Word("values")))
.map(|(i, _)| i)
.ok_or_else(|| ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", "missing `values` keyword".to_string())],
})?;
let values = collect_values_in_parens(path, values_idx + 1)?;
Ok(Command::Insert {
table,
columns: Some(columns),
values,
})
} else {
// Form C: the first paren contained the value list. The
// Repeated tagged the matched values via their natural
// MatchedKind (Word/NumberLit/StringLit); collect them.
//
// Form-A-without-`values` recovery: the shared
// INSERT_PAREN_ITEM choice accepts both VALUE_LITERAL
// and Ident{Columns} so that Form A can resolve
// column-name items inside its `( cols )` list. When the
// user types `insert into T (col)` (column-shaped item,
// no `values` keyword), the grammar walks to a complete
// match but the user almost certainly meant Form A and
// forgot the `values (...)` suffix. Reject here with a
// ValidationError — the walker classifies validation
// errors as `at_eof: true`, so the input renderer
// surfaces this as IncompleteAtEof (mid-typing) rather
// than dispatching a logically-broken Form C insert with
// an empty value list.
let user_listed_columns: Vec<String> = path
.items
.iter()
.filter_map(|i| match &i.kind {
MatchedKind::Ident {
role: "insert_first_item",
..
} => Some(i.text.clone()),
_ => None,
})
.collect();
if !user_listed_columns.is_empty() {
return Err(ValidationError {
message_key: "parse.custom.insert_form_a_missing_values",
args: vec![("columns", user_listed_columns.join(", "))],
});
}
let values = collect_values_in_parens(path, first_paren_idx)?;
Ok(Command::Insert {
table,
columns: None,
values,
})
}
}
/// Collect Value items inside the next `(…)` block at or after
/// `start_idx`. Stops at the matching `)`.
fn collect_values_in_parens(
path: &MatchedPath,
start_idx: usize,
) -> Result<Vec<Value>, ValidationError> {
let mut out = Vec::new();
let mut inside = false;
for item in path.items.iter().skip(start_idx) {
match &item.kind {
MatchedKind::Punct('(') => inside = true,
MatchedKind::Punct(')') if inside => return Ok(out),
_ if inside => {
if let Some(v) = item_to_value(item) {
out.push(v);
}
}
_ => {}
}
}
if out.is_empty() && !inside {
return Err(ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", "missing `(`".to_string())],
});
}
Ok(out)
}
fn build_update(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
let table = require_ident(path, "table_name")?;
let assignments = collect_assignments(path)?;
let filter = collect_filter(path)?;
Ok(Command::Update {
table,
assignments,
filter,
})
}
fn collect_assignments(
path: &MatchedPath,
) -> Result<Vec<(String, Value)>, ValidationError> {
let mut out = Vec::new();
let mut iter = path.items.iter();
while let Some(item) = iter.next() {
if matches!(
item.kind,
MatchedKind::Ident {
role: "update_set_column",
..
}
) {
let column = item.text.clone();
// Skip the `=` punct.
for next in iter.by_ref() {
if matches!(next.kind, MatchedKind::Punct('=')) {
break;
}
}
// Next item is the value.
let value_item = iter.next().ok_or_else(|| ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", "missing assignment value".to_string())],
})?;
let value = item_to_value(value_item).ok_or_else(|| ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", "expected value literal".to_string())],
})?;
out.push((column, value));
}
}
Ok(out)
}
fn collect_filter(path: &MatchedPath) -> Result<RowFilter, ValidationError> {
if path
.items
.iter()
.any(|i| matches!(i.kind, MatchedKind::Flag("all-rows")))
{
return Ok(RowFilter::AllRows);
}
let where_idx = path
.items
.iter()
.position(|i| matches!(&i.kind, MatchedKind::Word("where")))
.ok_or_else(|| ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", "missing where or --all-rows".to_string())],
})?;
// `where` is the last clause of update / delete, so every
// terminal after it belongs to the expression.
Ok(RowFilter::Where(expr::build_expr(
&path.items[where_idx + 1..],
)?))
}
fn build_delete(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
let table = require_ident(path, "table_name")?;
let filter = collect_filter(path)?;
Ok(Command::Delete { table, filter })
}
/// Build `Command::Explain` (ADR-0028 §1). The matched-word
/// sequence is `[explain, show|update|delete, …]` — the entry
/// word `explain` is at index 0, the inner command's lead word
/// at index 1. The inner command is built by the same builder
/// it uses standalone (`build_show_data` / `build_update` /
/// `build_delete`), all of which are role-based and so are
/// indifferent to the entry-word offset the `explain` prefix
/// introduces.
fn build_explain(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
let inner_word = path
.items
.iter()
.filter_map(|i| match &i.kind {
MatchedKind::Word(w) => Some(*w),
_ => None,
})
.nth(1);
let inner = match inner_word {
Some("show") => build_show_data(path, _source)?,
Some("update") => build_update(path, _source)?,
Some("delete") => build_delete(path, _source)?,
_ => {
return Err(ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", "unknown explain target".to_string())],
});
}
};
Ok(Command::Explain {
query: Box::new(inner),
})
}
/// Build `Command::Explain` over an advanced-mode SQL inner
/// (ADR-0039). The inner SQL text is sliced from `source` starting
/// at the inner entry keyword's span, so the carried SQL excludes
/// the `explain` prefix — `EXPLAIN QUERY PLAN` runs over the inner
/// statement, not the wrapper. The SQL builders extract their
/// metadata (target table, etc.) from `path` by role, which is
/// offset-independent, so passing the whole explain `path` is safe;
/// only the SQL *text* needs the prefix stripped.
fn build_explain_sql(path: &MatchedPath, source: &str) -> Result<Command, ValidationError> {
// Words in the path: [0] is the `explain` entry word, [1] is the
// inner entry keyword (select / with / insert / update / delete).
let inner_item = path
.items
.iter()
.filter(|i| matches!(i.kind, MatchedKind::Word(_)))
.nth(1)
.ok_or_else(|| ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", "missing explain target".to_string())],
})?;
let inner_word = match &inner_item.kind {
MatchedKind::Word(w) => *w,
_ => unreachable!("filtered to Word above"),
};
let inner_source = source[inner_item.span.0..].trim();
let inner = match inner_word {
"select" | "with" => build_select(path, inner_source)?,
"insert" => build_sql_insert(path, inner_source)?,
"update" => build_sql_update(path, inner_source)?,
"delete" => build_sql_delete(path, inner_source)?,
_ => {
return Err(ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", "unknown explain target".to_string())],
});
}
};
Ok(Command::Explain {
query: Box::new(inner),
})
}
// =================================================================
// replay — `replay <bare-path>` | `replay '<path>'`
// =================================================================
//
// Phase E (ADR-0024 §migration). The chumsky-side
// `try_parse_replay_with_bare_path` source-slice helper is
// retired here: walker BarePath consumes the unquoted form
// (terminating at whitespace per the path-bearing UX change),
// and StringLit consumes the quoted form. Paths with spaces
// must use the quoted form — same UX that `import` / `export`
// adopted in Phase A.
const REPLAY_PATH_CHOICES: &[Node] = &[Node::StringLit, Node::BarePath];
const REPLAY_PATH: Node = Node::Choice(REPLAY_PATH_CHOICES);
fn build_replay(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
let payload = path
.items
.iter()
.find_map(|i| match &i.kind {
MatchedKind::StringLit | MatchedKind::BarePath => Some(i.text.clone()),
_ => None,
})
.ok_or_else(|| ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", "missing path".to_string())],
})?;
Ok(Command::Replay { path: payload })
}
/// `Command::Select` carries the validated SQL text verbatim
/// (ADR-0030 §4/§6, ADR-0031 §2): a `SELECT` builds no AST — the
/// walk has confirmed it is in the supported subset, and the
/// worker runs the statement as text. `source` is the full
/// submitted line; on a `Match` outcome the `SELECT` shape
/// consumed all of it.
fn build_select(_path: &MatchedPath, source: &str) -> Result<Command, ValidationError> {
Ok(Command::Select {
sql: source.trim().to_string(),
})
}
/// Build `Command::SqlInsert` from a validated SQL `INSERT`
/// (ADR-0033 §1). Extracts the target table from the matched path
/// so the worker re-persists the right CSV. `insert` is now the
/// real (shared) entry word, so the validated `source` runs
/// verbatim — like `build_select` (sub-phase 3j).
fn build_sql_insert(path: &MatchedPath, source: &str) -> Result<Command, ValidationError> {
let target_table = path
.items
.iter()
.find_map(|item| match item.kind {
MatchedKind::Ident {
role: "insert_target_table",
..
} => Some(item.text.clone()),
_ => None,
})
.unwrap_or_default();
// The user's explicit `(col, …)` list, in order (empty when the
// form omits it). Sub-phase 3d reads this to decide which
// `shortid` columns were left for the worker to auto-fill.
let listed_columns: Vec<String> = path
.items
.iter()
.filter_map(|item| match item.kind {
MatchedKind::Ident {
role: "insert_column",
..
} => Some(item.text.clone()),
_ => None,
})
.collect();
// The row source is the `VALUES` / `SELECT` / `WITH` clause —
// from that keyword up to (but not including) any trailing
// clause: `ON CONFLICT …` (3h) or `RETURNING …` (3g), whichever
// comes first, else the trailing `;` / end. Boundaries are
// located by *Word token* in the path (not a text scan), so a
// string literal like `values ('select')` can't be mistaken for
// a keyword. Excluding the trailing clauses keeps the row source
// independently preparable for `shortid` auto-fill (`VALUES …
// ON CONFLICT …` / `VALUES … RETURNING …` are not valid
// standalone statements), and the auto-fill rewrite re-appends
// the trailing tail verbatim (see `do_sql_insert`).
//
// `ON CONFLICT`'s `on` is located via the unambiguous `conflict`
// keyword that immediately follows it — a JOIN's `on` inside a
// SELECT row source has no following `conflict`, so it is not
// mistaken for a clause boundary.
let on_conflict_start = path
.items
.windows(2)
.find(|w| {
matches!(w[0].kind, MatchedKind::Word("on"))
&& matches!(w[1].kind, MatchedKind::Word("conflict"))
})
.map(|w| w[0].span.0);
let returning_start = path
.items
.iter()
.find(|item| matches!(item.kind, MatchedKind::Word("returning")))
.map(|item| item.span.0);
let tail_start = [on_conflict_start, returning_start]
.into_iter()
.flatten()
.min();
let row_source = path
.items
.iter()
.find(|item| {
matches!(item.kind, MatchedKind::Word("values" | "select" | "with"))
})
.map(|item| {
let end = tail_start.unwrap_or(source.len());
source[item.span.0..end]
.trim()
.trim_end_matches(';')
.trim()
.to_string()
})
.unwrap_or_default();
// The entry word is the real `insert` keyword (sub-phase 3j),
// so the validated line runs verbatim (grammar-as-text,
// ADR-0030 §4) — no keyword reconstruction.
let sql = source.trim().to_string();
// Capture literal values per `VALUES` row for app-level type
// validation + error enrichment (ADR-0036 Phase 1). Only for a
// `VALUES` source (a `SELECT`/`WITH` source has no `values` keyword,
// so this stays empty). Bounded to the row-source region by the same
// `tail_start` the row_source slice used.
let values_start = path
.items
.iter()
.find(|i| matches!(i.kind, MatchedKind::Word("values")))
.map(|i| i.span.0);
let literal_rows = values_start.map_or_else(Vec::new, |vs| {
capture_literal_rows(path, vs, tail_start.unwrap_or(source.len()))
});
Ok(Command::SqlInsert {
sql,
target_table,
listed_columns,
row_source,
returning: path_has_returning(path),
literal_rows,
})
}
/// Capture the literal values of each `VALUES` tuple from the matched
/// path (ADR-0036 Phase 1). Each position is `Some(Value)` for a bare
/// literal (incl. a signed number — the leading sign is folded into the
/// number) and `None` for an expression position (a `func(x)`, `a+1`,
/// subquery, column ref — nothing static to validate). Works purely from
/// the tokens the walker already matched (no reparse); rows and positions
/// are delimited by tuple parens and depth-1 commas. `values_start` is the
/// byte offset of the `values` keyword; only items in `[values_start,
/// tail_end)` are considered (so any trailing `ON CONFLICT`/`RETURNING`
/// clause is excluded).
fn capture_literal_rows(
path: &MatchedPath,
values_start: usize,
tail_end: usize,
) -> Vec<Vec<Option<Value>>> {
let mut rows: Vec<Vec<Option<Value>>> = Vec::new();
let mut depth: i32 = 0;
let mut cur_row: Vec<Option<Value>> = Vec::new();
let mut pos: Vec<&MatchedItem> = Vec::new();
for item in &path.items {
if item.span.0 < values_start || item.span.0 >= tail_end {
continue;
}
match &item.kind {
MatchedKind::Word("values") => {}
MatchedKind::Punct('(') => {
depth += 1;
if depth == 1 {
cur_row = Vec::new();
pos.clear();
} else {
pos.push(item);
}
}
MatchedKind::Punct(')') => {
if depth == 1 {
cur_row.push(classify_value_position(&pos));
pos.clear();
rows.push(std::mem::take(&mut cur_row));
} else if depth > 1 {
pos.push(item);
}
depth -= 1;
}
MatchedKind::Punct(',') if depth == 1 => {
cur_row.push(classify_value_position(&pos));
pos.clear();
}
_ if depth >= 1 => pos.push(item),
_ => {}
}
}
rows
}
/// Classify one `VALUES` position's matched tokens into `Some(Value)` (a
/// bare literal) or `None` (an expression). A single literal token, or a
/// sign followed by a number, is a literal; anything else is an
/// expression (ADR-0036 §1).
fn classify_value_position(tokens: &[&MatchedItem]) -> Option<Value> {
match tokens {
[one] => item_to_value(one),
[sign, num]
if matches!(sign.kind, MatchedKind::Punct('-') | MatchedKind::Punct('+'))
&& matches!(num.kind, MatchedKind::NumberLit) =>
{
let text = if matches!(sign.kind, MatchedKind::Punct('-')) {
format!("-{}", num.text)
} else {
num.text.clone()
};
Some(Value::Number(text))
}
_ => None,
}
}
/// Whether the matched path contains a `RETURNING` clause
/// (ADR-0033 §5, sub-phase 3g). Located by the `returning` *Word
/// token* in the path — path-based, so a string literal can't be
/// mistaken for the keyword (mirrors `build_sql_insert`'s
/// row-source detection).
fn path_has_returning(path: &MatchedPath) -> bool {
path.items
.iter()
.any(|item| matches!(item.kind, MatchedKind::Word("returning")))
}
/// Build `Command::SqlUpdate` from a validated SQL `UPDATE`
/// (ADR-0033 §2). Extracts the target table from the matched path
/// so the worker re-persists the right CSV. `update` is now the
/// real (shared) entry word, so the validated `source` runs
/// verbatim (sub-phase 3j).
fn build_sql_update(path: &MatchedPath, source: &str) -> Result<Command, ValidationError> {
// The UPDATE target is the first `table_name` ident (it
// precedes any table referenced inside a SET / WHERE subquery).
let target_table = path
.items
.iter()
.find_map(|item| match item.kind {
MatchedKind::Ident {
role: "table_name", ..
} => Some(item.text.clone()),
_ => None,
})
.unwrap_or_default();
let sql = source.trim().to_string();
// Capture the literal RHS of each top-level `SET col = <literal>`
// assignment for app-level type validation + error enrichment
// (ADR-0036 Phase 2). Purely from the matched tokens — no reparse.
let set_literals = capture_set_literals(path);
Ok(Command::SqlUpdate {
sql,
target_table,
returning: path_has_returning(path),
set_literals,
})
}
/// Capture the literal RHS of each top-level `SET col = <literal>`
/// assignment from the matched path (ADR-0036 Phase 2). Returns
/// `(col, Some(Value))` for a bare-literal RHS (incl. a signed number)
/// and `(col, None)` for an expression RHS (arithmetic, function call,
/// scalar subquery, column ref — nothing static to validate). Works
/// purely from the tokens the walker already matched (no reparse).
///
/// Boundaries: the assignment LHS is the `update_set_column` ident (a
/// role only ever emitted at the top level of an assignment — expression
/// column refs carry `sql_expr_ident` / `sql_expr_qualified_ref`, so they
/// are never confused with it). A *depth-0* comma separates assignments;
/// a *depth-0* `where` / `returning` keyword (or `;` / end of path) ends
/// the SET list. Parens raise the depth so a comma, `where`, or `=`
/// inside a function call or scalar subquery on the RHS is never mistaken
/// for an assignment / clause boundary or the assignment operator.
fn capture_set_literals(path: &MatchedPath) -> Vec<(String, Option<Value>)> {
let mut out: Vec<(String, Option<Value>)> = Vec::new();
let mut after_set = false;
let mut depth: i32 = 0;
// The assignment currently being accumulated: its column name, its
// RHS tokens so far, and whether the assignment `=` has been consumed.
let mut cur_col: Option<String> = None;
let mut cur_rhs: Vec<&MatchedItem> = Vec::new();
let mut seen_eq = false;
// Finalise the pending assignment (if any) into `out`.
fn flush(
col: &mut Option<String>,
rhs: &mut Vec<&MatchedItem>,
out: &mut Vec<(String, Option<Value>)>,
) {
if let Some(c) = col.take() {
out.push((c, classify_value_position(rhs)));
}
rhs.clear();
}
for item in &path.items {
if !after_set {
// Scan only the SET list — skip everything up to (and
// including) the `set` keyword. The first `update_set_column`
// appears after it.
if matches!(item.kind, MatchedKind::Word("set")) {
after_set = true;
}
continue;
}
// A depth-0 `where` / `returning` / `;` ends the SET list.
if depth == 0
&& matches!(
item.kind,
MatchedKind::Word("where" | "returning") | MatchedKind::Punct(';')
)
{
break;
}
match &item.kind {
MatchedKind::Punct('(') => {
depth += 1;
if cur_col.is_some() && seen_eq {
cur_rhs.push(item);
}
}
MatchedKind::Punct(')') => {
depth -= 1;
if cur_col.is_some() && seen_eq {
cur_rhs.push(item);
}
}
MatchedKind::Ident {
role: "update_set_column",
..
} if depth == 0 => {
// A new assignment begins — finalise the previous one.
flush(&mut cur_col, &mut cur_rhs, &mut out);
cur_col = Some(item.text.clone());
seen_eq = false;
}
MatchedKind::Punct(',') if depth == 0 => {
// Assignment separator — finalise the current assignment;
// the next `update_set_column` starts the following one.
flush(&mut cur_col, &mut cur_rhs, &mut out);
}
MatchedKind::Punct('=') if depth == 0 && !seen_eq && cur_col.is_some() => {
// The assignment operator — consumed, not part of the RHS.
seen_eq = true;
}
_ => {
if cur_col.is_some() && seen_eq {
cur_rhs.push(item);
}
}
}
}
// Finalise the last assignment (ended by `where`/`returning`/`;`/EOF).
flush(&mut cur_col, &mut cur_rhs, &mut out);
out
}
/// Build `Command::SqlDelete` from a validated SQL `DELETE`
/// (ADR-0033 §1/§7). Extracts the target table from the matched
/// path so the worker re-persists the right CSV and snapshots the
/// right inbound children for cascade diffing. No WHERE clause is
/// captured — the worker executes the verbatim SQL and never
/// inspects the predicate (Amendment 2). `delete` is now the real
/// (shared) entry word, so the validated `source` runs verbatim
/// (sub-phase 3j).
fn build_sql_delete(path: &MatchedPath, source: &str) -> Result<Command, ValidationError> {
// The DELETE target is the first `table_name` ident (it precedes
// any table referenced inside a WHERE subquery).
let target_table = path
.items
.iter()
.find_map(|item| match item.kind {
MatchedKind::Ident {
role: "table_name", ..
} => Some(item.text.clone()),
_ => None,
})
.unwrap_or_default();
let sql = source.trim().to_string();
Ok(Command::SqlDelete {
sql,
target_table,
returning: path_has_returning(path),
})
}
// =================================================================
// CommandNodes
// =================================================================
pub static SHOW: CommandNode = CommandNode {
entry: Word::keyword("show"),
shape: SHOW_SHAPE,
ast_builder: build_show,
help_id: Some("data.show"),
usage_ids: &[
"parse.usage.show_data",
"parse.usage.show_table",
"parse.usage.show_tables",
"parse.usage.show_relationships",
"parse.usage.show_indexes",
"parse.usage.show_relationship",
"parse.usage.show_index",
],};
pub static INSERT: CommandNode = CommandNode {
entry: Word::keyword("insert"),
shape: INSERT_SHAPE,
ast_builder: build_insert,
help_id: Some("data.insert"),
usage_ids: &["parse.usage.insert"],};
pub static UPDATE: CommandNode = CommandNode {
entry: Word::keyword("update"),
shape: UPDATE_SHAPE,
ast_builder: build_update,
help_id: Some("data.update"),
usage_ids: &["parse.usage.update"],};
pub static DELETE: CommandNode = CommandNode {
entry: Word::keyword("delete"),
shape: DELETE_SHAPE,
ast_builder: build_delete,
help_id: Some("data.delete"),
usage_ids: &["parse.usage.delete"],};
pub static REPLAY: CommandNode = CommandNode {
entry: Word::keyword("replay"),
shape: REPLAY_PATH,
ast_builder: build_replay,
help_id: Some("data.replay"),
usage_ids: &["parse.usage.replay"],};
pub static EXPLAIN: CommandNode = CommandNode {
entry: Word::keyword("explain"),
shape: EXPLAIN_SHAPE,
ast_builder: build_explain,
help_id: Some("data.explain"),
usage_ids: &["parse.usage.explain"],};
/// `explain` over advanced-mode SQL (ADR-0039).
///
/// The `Advanced` node of the shared `explain` entry word. Pairs with
/// the `Simple` DSL [`EXPLAIN`] node above: in advanced mode the
/// dispatcher tries this SQL node first and falls back to the DSL node
/// when no SQL branch matches (`explain show data …`, or a DSL-only
/// `--all-rows`); in simple mode only the DSL node is reachable.
pub static EXPLAIN_SQL: CommandNode = CommandNode {
entry: Word::keyword("explain"),
shape: EXPLAIN_SQL_SHAPE,
ast_builder: build_explain_sql,
// No `help_id` / `usage_ids` — this is the `Advanced` half of the
// shared `explain` entry word, so it defers to the `Simple`
// `EXPLAIN` node's help/usage (which now covers the SQL forms
// too). Mirrors the `SQL_INSERT`/`SQL_UPDATE`/`SQL_DELETE`
// precedent; otherwise `note_help` would print `explain` twice.
help_id: None,
usage_ids: &[],};
/// SQL `SELECT` (ADR-0030 §6, ADR-0031, ADR-0032).
///
/// Advanced mode only — gated by `grammar::is_advanced_only`.
/// The shape is the post-`SELECT` portion of a top-level
/// statement; the registry's entry-word dispatch consumes the
/// leading `SELECT` keyword before the shape walks (sub-phase
/// 2c migration). `help_id` is `None` until the `help sql`
/// page lands (ADR-0030 Phase 6).
pub static SELECT: CommandNode = CommandNode {
entry: Word::keyword("select"),
shape: Node::Subgrammar(&sql_select::SQL_SELECT_TAIL),
ast_builder: build_select,
help_id: None,
usage_ids: &["parse.usage.select"],};
/// `WITH …` top-level statement (ADR-0032 §4 / sub-phase 2c).
///
/// Advanced mode only. Dispatched separately from `SELECT` so
/// the registry's entry-word dispatch routes `with` and
/// `select` to the right shapes; both reach the same
/// `Command::Select` AST since execution is grammar-as-text
/// (ADR-0030 §6, ADR-0031 §2).
pub static WITH: CommandNode = CommandNode {
entry: Word::keyword("with"),
shape: Node::Subgrammar(&sql_select::SQL_WITH_TAIL),
ast_builder: build_select,
help_id: None,
usage_ids: &["parse.usage.with"],};
/// SQL `INSERT` — the `Advanced`-category node of the shared
/// `insert` entry word (ADR-0033 §2, Amendment 1, sub-phase 3j).
///
/// `insert` is a shared entry word: this `Advanced` SQL node and
/// the `Simple` DSL [`INSERT`] node both register under `insert`.
/// In Advanced mode the dispatcher (`walker::walk` / `decide`)
/// tries this SQL node first and falls back to the DSL node when
/// the SQL shape does not match; in Simple mode only the DSL node
/// is reachable (Amendment 3 — command identity is the mode-rooted
/// grammar-path outcome).
pub static SQL_INSERT: CommandNode = CommandNode {
entry: Word::keyword("insert"),
shape: Node::Subgrammar(&sql_insert::SQL_INSERT_SHAPE),
ast_builder: build_sql_insert,
help_id: None,
usage_ids: &[],
};
/// SQL `UPDATE` — the `Advanced` node of the shared `update` word.
///
/// ADR-0033 §2 / Amendment 1, sub-phase 3j. Pairs with the `Simple`
/// DSL [`UPDATE`] node; dispatch is SQL-first / DSL-fallback in
/// Advanced mode, DSL-only in Simple.
pub static SQL_UPDATE: CommandNode = CommandNode {
entry: Word::keyword("update"),
shape: Node::Subgrammar(&sql_update::SQL_UPDATE_SHAPE),
ast_builder: build_sql_update,
help_id: None,
usage_ids: &[],
};
/// SQL `DELETE` — the `Advanced` node of the shared `delete` word.
///
/// ADR-0033 §2 / Amendment 1, sub-phase 3j. Pairs with the `Simple`
/// DSL [`DELETE`] node; dispatch is SQL-first / DSL-fallback in
/// Advanced mode, DSL-only in Simple. In Advanced mode `delete from t
/// --all-rows` falls back to the DSL node (the SQL shape has no
/// `--all-rows`).
pub static SQL_DELETE: CommandNode = CommandNode {
entry: Word::keyword("delete"),
shape: Node::Subgrammar(&sql_delete::SQL_DELETE_SHAPE),
ast_builder: build_sql_delete,
help_id: None,
usage_ids: &[],
};
// =================================================================
// Tests — `explain` grammar (ADR-0028 §1)
// =================================================================
#[cfg(test)]
mod explain_tests {
use super::Command;
use crate::dsl::parser::parse_command;
/// Parse `input` in **simple** mode and unwrap the
/// `Command::Explain` wrapper, returning the inner command.
/// These cover the DSL-explain wrapping (ADR-0028); the
/// advanced-mode SQL wrapping (ADR-0039) is covered by
/// `explain_inner_adv` below. (`parse_command` defaults to
/// advanced, where `explain update`/`delete` now route to the
/// SQL path — so DSL-explain tests pin the mode explicitly.)
fn explain_inner(input: &str) -> Command {
match crate::dsl::parser::parse_command_in_mode(input, crate::mode::Mode::Simple)
.expect("explain should parse")
{
Command::Explain { query } => *query,
other => panic!("expected Command::Explain, got {other:?}"),
}
}
#[test]
fn explain_show_data_wraps_a_show_data() {
assert!(matches!(
explain_inner("explain show data Customers"),
Command::ShowData { .. }
));
}
#[test]
fn explain_show_data_carries_where_and_limit_through() {
match explain_inner("explain show data Customers where id = 1 limit 5") {
Command::ShowData { name, filter, limit } => {
assert_eq!(name, "Customers");
assert!(filter.is_some(), "where clause should survive");
assert_eq!(limit, Some(5));
}
other => panic!("expected ShowData, got {other:?}"),
}
}
#[test]
fn explain_update_wraps_an_update() {
assert!(matches!(
explain_inner("explain update Customers set Name='Bo' where id=1"),
Command::Update { .. }
));
}
#[test]
fn explain_delete_wraps_a_delete() {
assert!(matches!(
explain_inner("explain delete from Customers where id=1"),
Command::Delete { .. }
));
}
#[test]
fn explain_of_an_incomplete_update_is_a_parse_error() {
// A bare `update` still needs its `where` / `--all-rows`
// (ADR-0028 §1: `explain` of an incomplete command is the
// same parse error the command alone would be). Simple mode:
// in advanced mode a where-less SQL UPDATE is valid (ADR-0039).
assert!(
crate::dsl::parser::parse_command_in_mode(
"explain update Customers set Name='Bo'",
crate::mode::Mode::Simple,
)
.is_err()
);
}
#[test]
fn explain_does_not_cover_show_table() {
// `explain` covers `show data` only (ADR-0028 §1).
assert!(parse_command("explain show table Customers").is_err());
}
#[test]
fn bare_explain_is_a_parse_error() {
assert!(parse_command("explain").is_err());
assert!(parse_command("explain show").is_err());
}
// ---- ADR-0039: explain over advanced-mode SQL --------------
use crate::dsl::parser::parse_command_in_mode;
use crate::mode::Mode;
/// Advanced-mode counterpart of `explain_inner`.
fn explain_inner_adv(input: &str) -> Command {
match parse_command_in_mode(input, Mode::Advanced)
.expect("advanced explain should parse")
{
Command::Explain { query } => *query,
other => panic!("expected Command::Explain, got {other:?}"),
}
}
#[test]
fn explain_select_wraps_a_select_with_clean_sql() {
// The carried SQL must NOT include the `explain` prefix
// (ADR-0039) — `EXPLAIN QUERY PLAN` runs over the inner SQL.
match explain_inner_adv("explain select * from Customers") {
Command::Select { sql } => assert_eq!(sql, "select * from Customers"),
other => panic!("expected Select, got {other:?}"),
}
}
#[test]
fn explain_with_cte_wraps_a_select() {
match explain_inner_adv(
"explain with recent as (select * from Orders) select * from recent",
) {
Command::Select { sql } => {
assert!(sql.starts_with("with recent"), "clean inner sql: {sql}");
}
other => panic!("expected Select, got {other:?}"),
}
}
#[test]
fn explain_sql_insert_wraps_a_sql_insert() {
match explain_inner_adv("explain insert into Customers values (1, 'Bo')") {
Command::SqlInsert { sql, target_table, .. } => {
assert_eq!(target_table, "Customers");
assert_eq!(sql, "insert into Customers values (1, 'Bo')");
}
other => panic!("expected SqlInsert, got {other:?}"),
}
}
#[test]
fn explain_sql_update_wraps_a_sql_update_with_clean_sql() {
match explain_inner_adv("explain update Customers set Name = 'Bo' where id = 1") {
Command::SqlUpdate { sql, target_table, .. } => {
assert_eq!(target_table, "Customers");
assert_eq!(sql, "update Customers set Name = 'Bo' where id = 1");
}
other => panic!("expected SqlUpdate, got {other:?}"),
}
}
#[test]
fn explain_sql_delete_wraps_a_sql_delete() {
match explain_inner_adv("explain delete from Customers where id = 1") {
Command::SqlDelete { sql, target_table, .. } => {
assert_eq!(target_table, "Customers");
assert_eq!(sql, "delete from Customers where id = 1");
}
other => panic!("expected SqlDelete, got {other:?}"),
}
}
#[test]
fn explain_update_with_all_rows_flag_falls_back_to_dsl_in_advanced() {
// `--all-rows` is DSL-only; the SQL update shape can't
// consume it, so the explain inner falls back to the DSL
// `Update` node — mirroring the top-level shared-word
// dispatch (ADR-0033).
assert!(matches!(
explain_inner_adv("explain update Customers set Name = 'Bo' --all-rows"),
Command::Update { .. }
));
}
#[test]
fn explain_show_data_still_uses_dsl_in_advanced() {
// `show data` has no SQL form; advanced `explain show data`
// falls back to the DSL inner.
assert!(matches!(
explain_inner_adv("explain show data Customers"),
Command::ShowData { .. }
));
}
#[test]
fn explain_select_is_rejected_in_simple_mode() {
// `select` is advanced-only, so `explain select` has no
// simple-mode form.
assert!(parse_command_in_mode("explain select * from Customers", Mode::Simple).is_err());
}
#[test]
fn explain_does_not_cover_ddl() {
// EXPLAIN QUERY PLAN applies to DML/queries only (ADR-0039
// out of scope); there is no SQL DDL branch under explain.
assert!(parse_command_in_mode(
"explain create table T (id int)",
Mode::Advanced,
)
.is_err());
}
#[test]
fn advanced_explain_completion_offers_the_sql_verbs() {
// After `explain ` in advanced mode the candidate list is the
// union across both `explain` CommandNodes: the SQL verbs
// (select/with/insert/update/delete) plus the DSL `show`
// (ADR-0039). The shared-entry-word completion already
// aggregates, so there is no UX gap.
use crate::completion::candidates_at_cursor_in_mode;
let schema = crate::completion::SchemaCache::default();
let input = "explain ";
let completion =
candidates_at_cursor_in_mode(input, input.len(), &schema, Mode::Advanced)
.expect("explain offers candidates");
let names: Vec<&str> = completion
.candidates
.iter()
.map(|c| c.text.as_str())
.collect();
for verb in ["select", "with", "insert", "update", "delete", "show"] {
assert!(names.contains(&verb), "expected `{verb}` in {names:?}");
}
}
}