78ad476d24
When an INSERT's column list omits one or more shortid columns, the worker now fills them. Command::SqlInsert gains listed_columns and row_source, captured in build_sql_insert from the matched path (the row source is located by the first values/select/with Word token, so a string literal like 'select' can't be mistaken for the keyword). do_sql_insert calls plan_shortid_autofill, which — per the user-confirmed Option B — materialises the row source by running it as a query, generates a distinct shortid per row via the existing generate_shortid_batch (deduped against stored values), and reconstructs a parameterised multi-row INSERT over the listed columns plus the omitted shortid columns. Uniform for VALUES and INSERT…SELECT, and handles multiple omitted shortids in one row (each gets its own batch). No explicit list, no omitted shortid, or a zero-row source → execute verbatim (the 3b path). serial stays engine-filled via rowid. history.log keeps the original line, never the rewrite (§11). Tests: VALUES single/multi-row distinct; explicit override honoured; INSERT…SELECT distinct fills; combined serial(engine) + shortid(worker); two shortids (PK + non-PK) both fill; one provided + one omitted; compound-PK shortid member; mixed-case column name (ADR-0009 DA gate); original-source-in-history on the rewrite path. Still behind the dev `sqlinsert` entry word (3j). 1503 green, clippy clean.
1093 lines
39 KiB
Rust
1093 lines
39 KiB
Rust
//! Data command nodes (ADR-0024 §migration Phase D).
|
||
//!
|
||
//! Five commands at four entry words: `show` (show data /
|
||
//! show table), `insert`, `update`, `delete`. The walker route
|
||
//! owns these end-to-end.
|
||
//!
|
||
//! Phase D scope deviation note: ADR-0024's Phase D describes
|
||
//! "full schema awareness" via `DynamicSubgrammar
|
||
//! (column_value_list)` that unfolds typed slots per column. This
|
||
//! milestone lands the data commands at functional parity with
|
||
//! the existing chumsky parser — value slots accept any
|
||
//! literal regardless of column type, with type validation
|
||
//! happening at bind time (matching today's behaviour). The
|
||
//! `DynamicSubgrammar` machinery and schema-cache plumbing are
|
||
//! deferred to a follow-up refinement; the trie shape is
|
||
//! ready to consume them when the schema reference flows
|
||
//! through `parse_command`.
|
||
|
||
use crate::dsl::command::{Command, Expr, RowFilter};
|
||
use crate::dsl::grammar::{
|
||
CommandNode, IdentSource, Node, NumberValidator, ValidationError, Word, expr,
|
||
shared::{column_value_list, current_column_value},
|
||
sql_insert, sql_select,
|
||
};
|
||
use crate::dsl::walker::context::WalkContext;
|
||
use crate::dsl::value::Value;
|
||
use crate::dsl::walker::outcome::{MatchedItem, MatchedKind, MatchedPath};
|
||
|
||
// =================================================================
|
||
// Building blocks
|
||
// =================================================================
|
||
|
||
const TABLE_NAME_EXISTING: Node = Node::Ident {
|
||
source: IdentSource::Tables,
|
||
role: "table_name",
|
||
validator: None,
|
||
highlight_override: None,
|
||
writes_table: false,
|
||
writes_column: false,
|
||
writes_user_listed_column: false,
|
||
writes_table_alias: false,
|
||
writes_cte_name: false,
|
||
writes_projection_alias: false,
|
||
};
|
||
|
||
/// Table-name slot variant that populates
|
||
/// `WalkContext::current_table_columns` (ADR-0024 §Phase D).
|
||
/// Used by `insert into <T> …` so the inner value list can
|
||
/// dispatch typed slots per column.
|
||
const TABLE_NAME_INSERT: Node = Node::Ident {
|
||
source: IdentSource::Tables,
|
||
role: "table_name",
|
||
validator: None,
|
||
highlight_override: None,
|
||
writes_table: true,
|
||
writes_column: false,
|
||
writes_user_listed_column: false,
|
||
writes_table_alias: false,
|
||
writes_cte_name: false,
|
||
writes_projection_alias: false,
|
||
};
|
||
|
||
// =================================================================
|
||
// show — `show (data|table) <T>`
|
||
// =================================================================
|
||
|
||
const SHOW_DATA_NODES: &[Node] = &[
|
||
Node::Word(Word::keyword("data")),
|
||
// `writes_table` so the optional `where` expression's
|
||
// column slots resolve against this table for completion.
|
||
TABLE_NAME_WRITES,
|
||
Node::Optional(&WHERE_CLAUSE),
|
||
Node::Optional(&LIMIT_CLAUSE),
|
||
];
|
||
const SHOW_DATA: Node = Node::Seq(SHOW_DATA_NODES);
|
||
|
||
const SHOW_TABLE_NODES: &[Node] = &[
|
||
Node::Word(Word::keyword("table")),
|
||
TABLE_NAME_EXISTING,
|
||
];
|
||
const SHOW_TABLE: Node = Node::Seq(SHOW_TABLE_NODES);
|
||
|
||
const SHOW_CHOICES: &[Node] = &[SHOW_DATA, SHOW_TABLE];
|
||
const SHOW_SHAPE: Node = Node::Choice(SHOW_CHOICES);
|
||
|
||
// =================================================================
|
||
// insert — `insert into <T> (<a>,<b>,…) values (<v>,<v>,…)`
|
||
// | `insert into <T> values (<v>,…)`
|
||
// | `insert into <T> (<v>,…)`
|
||
// =================================================================
|
||
//
|
||
// Forms A (with column list) and C (bare value list) both start
|
||
// with `(`. The walker's "first commit wins" Choice semantics
|
||
// can't pick between them after the `(` matches, so the first
|
||
// paren's contents are resolved by a `Node::Lookahead` factory
|
||
// (`insert_first_paren`): it peeks the first token to decide.
|
||
//
|
||
// - First token is a value literal (number / string /
|
||
// null / true / false) → Form C → the typed `column_value_list`
|
||
// (same dispatch contract as Form B — ADR-0024 §Phase D Form-C
|
||
// type-awareness). Form C values are now type-checked at parse
|
||
// time, not only at bind time.
|
||
// - Otherwise (column-name identifier, or an empty paren) →
|
||
// Form A → a repeated column-name list. The idents write
|
||
// `WalkContext::user_listed_columns` so the trailing
|
||
// `values (…)` slots mirror the user's selection.
|
||
|
||
/// Form A's column-name slot. `static` (not `const`) so the
|
||
/// `insert_first_paren` factory can take a `&'static` reference
|
||
/// to it when building the repeated list at walk time.
|
||
static FORM_A_COLUMN: Node = Node::Ident {
|
||
source: IdentSource::Columns,
|
||
role: "insert_first_item",
|
||
validator: None,
|
||
highlight_override: None,
|
||
writes_table: false,
|
||
writes_column: false,
|
||
writes_user_listed_column: true,
|
||
writes_table_alias: false,
|
||
writes_cte_name: false,
|
||
writes_projection_alias: false,
|
||
};
|
||
static INSERT_COMMA: Node = Node::Punct(',');
|
||
|
||
/// First-paren resolver (ADR-0024 §Phase D Form-C type-awareness).
|
||
/// Peeks the first token after `(` to route to Form A's
|
||
/// column-name list or Form C's typed value list.
|
||
fn insert_first_paren(_ctx: &WalkContext, source: &str, pos: usize) -> Node {
|
||
if first_paren_item_is_value_literal(source, pos) {
|
||
// Form C — bare value list. `column_value_list` with no
|
||
// user-listed columns dispatches per non-auto-generated
|
||
// column, exactly as Form B does.
|
||
Node::DynamicSubgrammar(column_value_list)
|
||
} else {
|
||
// Form A (or Form A in progress / empty paren).
|
||
Node::Repeated {
|
||
inner: &FORM_A_COLUMN,
|
||
separator: Some(&INSERT_COMMA),
|
||
min: 1,
|
||
}
|
||
}
|
||
}
|
||
|
||
/// True when the first token after the insert `(` is a
|
||
/// value literal — the signal that the paren is a Form C value
|
||
/// list rather than a Form A column-name list. An empty paren
|
||
/// or an identifier-shaped token (a column name) returns false.
|
||
fn first_paren_item_is_value_literal(source: &str, pos: usize) -> bool {
|
||
use crate::dsl::walker::lex_helpers::{
|
||
consume_ident, consume_number_literal, consume_string_literal,
|
||
skip_whitespace,
|
||
};
|
||
let p = skip_whitespace(source, pos);
|
||
if p >= source.len() {
|
||
return false; // empty paren — treat as Form A
|
||
}
|
||
if consume_string_literal(source, p).is_some() {
|
||
return true;
|
||
}
|
||
if consume_number_literal(source, p).is_some() {
|
||
return true;
|
||
}
|
||
if let Some((s, e)) = consume_ident(source, p) {
|
||
let word = &source[s..e];
|
||
// `null` / `true` / `false` are value literals; any
|
||
// other identifier is a column name (Form A).
|
||
return word.eq_ignore_ascii_case("null")
|
||
|| word.eq_ignore_ascii_case("true")
|
||
|| word.eq_ignore_ascii_case("false");
|
||
}
|
||
false // punctuation (e.g. `)`) — treat as Form A
|
||
}
|
||
|
||
const INSERT_PAREN_LIST: Node = Node::Lookahead(insert_first_paren);
|
||
|
||
/// Schema-aware value list: when the walker has a populated
|
||
/// `current_table_columns`, unfolds to a `Seq` of typed slots
|
||
/// per column (`int_slot`, `text_slot`, …). When schemaless,
|
||
/// falls back to the pre-Phase-D `Repeated(VALUE_LITERAL, ',', 1)`
|
||
/// shape (ADR-0024 §Phase D §column_value_list).
|
||
const INSERT_VALUES_LIST: Node = Node::DynamicSubgrammar(column_value_list);
|
||
|
||
const INSERT_OPTIONAL_VALUES_NODES: &[Node] = &[
|
||
Node::Word(Word::keyword("values")),
|
||
Node::Punct('('),
|
||
INSERT_VALUES_LIST,
|
||
Node::Punct(')'),
|
||
];
|
||
const INSERT_OPTIONAL_VALUES: Node = Node::Optional(&Node::Seq(INSERT_OPTIONAL_VALUES_NODES));
|
||
|
||
const INSERT_PAREN_FIRST_NODES: &[Node] = &[
|
||
Node::Punct('('),
|
||
INSERT_PAREN_LIST,
|
||
Node::Punct(')'),
|
||
INSERT_OPTIONAL_VALUES,
|
||
];
|
||
const INSERT_PAREN_FIRST: Node = Node::Seq(INSERT_PAREN_FIRST_NODES);
|
||
|
||
const INSERT_VALUES_KEYWORD_FIRST_NODES: &[Node] = &[
|
||
Node::Word(Word::keyword("values")),
|
||
Node::Punct('('),
|
||
INSERT_VALUES_LIST,
|
||
Node::Punct(')'),
|
||
];
|
||
const INSERT_VALUES_KEYWORD_FIRST: Node = Node::Seq(INSERT_VALUES_KEYWORD_FIRST_NODES);
|
||
|
||
const INSERT_AFTER_TABLE_CHOICES: &[Node] =
|
||
&[INSERT_VALUES_KEYWORD_FIRST, INSERT_PAREN_FIRST];
|
||
const INSERT_AFTER_TABLE: Node = Node::Choice(INSERT_AFTER_TABLE_CHOICES);
|
||
|
||
const INSERT_NODES: &[Node] = &[
|
||
Node::Word(Word::keyword("into")),
|
||
TABLE_NAME_INSERT,
|
||
INSERT_AFTER_TABLE,
|
||
];
|
||
const INSERT_SHAPE: Node = Node::Seq(INSERT_NODES);
|
||
|
||
// =================================================================
|
||
// update — `update <T> set <col>=<v>[, <col>=<v>] (where … | --all-rows)`
|
||
// =================================================================
|
||
|
||
/// Table-name slot that populates `current_table_columns` so
|
||
/// the inner `set <col>=<value>` / `where <col>=<value>` slots
|
||
/// can resolve column types (Phase D).
|
||
const TABLE_NAME_WRITES: Node = Node::Ident {
|
||
source: IdentSource::Tables,
|
||
role: "table_name",
|
||
validator: None,
|
||
highlight_override: None,
|
||
writes_table: true,
|
||
writes_column: false,
|
||
writes_user_listed_column: false,
|
||
writes_table_alias: false,
|
||
writes_cte_name: false,
|
||
writes_projection_alias: false,
|
||
};
|
||
|
||
/// Column-name slot in `set col = …` — resolves the column's
|
||
/// type into `current_column` so the value slot dispatches per
|
||
/// column type (Phase D).
|
||
const SET_COLUMN: Node = Node::Ident {
|
||
source: IdentSource::Columns,
|
||
role: "update_set_column",
|
||
validator: None,
|
||
highlight_override: None,
|
||
writes_table: false,
|
||
writes_column: true,
|
||
writes_user_listed_column: false,
|
||
writes_table_alias: false,
|
||
writes_cte_name: false,
|
||
writes_projection_alias: false,
|
||
};
|
||
|
||
/// Value slot resolved at walk time from
|
||
/// `WalkContext::current_column`. Falls back to the schemaless
|
||
/// value-literal choice when no current_column is bound.
|
||
const PER_COLUMN_VALUE: Node = Node::DynamicSubgrammar(current_column_value);
|
||
|
||
const UPDATE_ASSIGNMENT_NODES: &[Node] = &[
|
||
SET_COLUMN,
|
||
Node::Punct('='),
|
||
PER_COLUMN_VALUE,
|
||
];
|
||
const UPDATE_ASSIGNMENT: Node = Node::Seq(UPDATE_ASSIGNMENT_NODES);
|
||
const UPDATE_ASSIGNMENTS: Node = Node::Repeated {
|
||
inner: &UPDATE_ASSIGNMENT,
|
||
separator: Some(&Node::Punct(',')),
|
||
min: 1,
|
||
};
|
||
|
||
/// `where <expr>` — the complex WHERE-expression fragment
|
||
/// (ADR-0026). The grammar tier is defined once in
|
||
/// `grammar::expr` and reached here through `Subgrammar`.
|
||
const WHERE_CLAUSE_NODES: &[Node] = &[
|
||
Node::Word(Word::keyword("where")),
|
||
Node::Subgrammar(&expr::OR_EXPR),
|
||
];
|
||
const WHERE_CLAUSE: Node = Node::Seq(WHERE_CLAUSE_NODES);
|
||
|
||
const FILTER_CHOICES: &[Node] = &[WHERE_CLAUSE, Node::Flag("all-rows")];
|
||
const FILTER_CLAUSE: Node = Node::Choice(FILTER_CHOICES);
|
||
|
||
/// `limit <n>` — `<n>` is a non-negative integer; the
|
||
/// validator rejects fractional / negative literals at parse
|
||
/// time (ADR-0026 §5).
|
||
fn validate_limit_count(value: &str) -> Result<(), ValidationError> {
|
||
if value.parse::<u64>().is_ok() {
|
||
Ok(())
|
||
} else {
|
||
Err(ValidationError {
|
||
message_key: "parse.custom.bind_type_mismatch",
|
||
args: vec![
|
||
("found", value.to_string()),
|
||
("expected", "non-negative integer".to_string()),
|
||
],
|
||
})
|
||
}
|
||
}
|
||
const LIMIT_VALIDATOR: NumberValidator = validate_limit_count;
|
||
|
||
/// `limit <n>` clause, optional on `show data` (ADR-0026 §5).
|
||
const LIMIT_CLAUSE_NODES: &[Node] = &[
|
||
Node::Word(Word::keyword("limit")),
|
||
Node::NumberLit {
|
||
validator: Some(LIMIT_VALIDATOR),
|
||
},
|
||
];
|
||
const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES);
|
||
|
||
const UPDATE_NODES: &[Node] = &[
|
||
TABLE_NAME_WRITES,
|
||
Node::Word(Word::keyword("set")),
|
||
UPDATE_ASSIGNMENTS,
|
||
FILTER_CLAUSE,
|
||
];
|
||
const UPDATE_SHAPE: Node = Node::Seq(UPDATE_NODES);
|
||
|
||
// =================================================================
|
||
// delete — `delete from <T> (where … | --all-rows)`
|
||
// =================================================================
|
||
|
||
const DELETE_NODES: &[Node] = &[
|
||
Node::Word(Word::keyword("from")),
|
||
TABLE_NAME_WRITES,
|
||
FILTER_CLAUSE,
|
||
];
|
||
const DELETE_SHAPE: Node = Node::Seq(DELETE_NODES);
|
||
|
||
// =================================================================
|
||
// explain — `explain (show data … | update … | delete from …)`
|
||
// =================================================================
|
||
//
|
||
// ADR-0028 §1: `explain` is a top-level command whose shape is a
|
||
// `Choice` over the three explainable query commands. The inner
|
||
// query grammars are *referenced* through `Subgrammar`, not
|
||
// duplicated — so an explained command is parsed, completed,
|
||
// hinted and highlighted exactly as it is on its own.
|
||
//
|
||
// `Subgrammar` needs a `&'static Node`; `SHOW_DATA` /
|
||
// `UPDATE_SHAPE` / `DELETE_SHAPE` are `const` (and cannot be
|
||
// referenced as `&'static`). These three thin `static` wrappers
|
||
// over the existing `_NODES` slices give the references without
|
||
// any churn to the standalone command shapes. `explain show`
|
||
// references `EXPLAIN_SHOW_DATA` directly (not the `show`
|
||
// command's `data | table` choice) — `explain` covers `show
|
||
// data` only (ADR-0028 §1).
|
||
|
||
static EXPLAIN_SHOW_DATA: Node = Node::Seq(SHOW_DATA_NODES);
|
||
static EXPLAIN_UPDATE: Node = Node::Seq(UPDATE_NODES);
|
||
static EXPLAIN_DELETE: Node = Node::Seq(DELETE_NODES);
|
||
|
||
const EXPLAIN_SHOW_NODES: &[Node] = &[
|
||
Node::Word(Word::keyword("show")),
|
||
Node::Subgrammar(&EXPLAIN_SHOW_DATA),
|
||
];
|
||
const EXPLAIN_UPDATE_NODES: &[Node] = &[
|
||
Node::Word(Word::keyword("update")),
|
||
Node::Subgrammar(&EXPLAIN_UPDATE),
|
||
];
|
||
const EXPLAIN_DELETE_NODES: &[Node] = &[
|
||
Node::Word(Word::keyword("delete")),
|
||
Node::Subgrammar(&EXPLAIN_DELETE),
|
||
];
|
||
const EXPLAIN_CHOICES: &[Node] = &[
|
||
Node::Seq(EXPLAIN_SHOW_NODES),
|
||
Node::Seq(EXPLAIN_UPDATE_NODES),
|
||
Node::Seq(EXPLAIN_DELETE_NODES),
|
||
];
|
||
const EXPLAIN_SHAPE: Node = Node::Choice(EXPLAIN_CHOICES);
|
||
|
||
// =================================================================
|
||
// select — SQL `SELECT` (advanced mode; ADR-0030 §6, ADR-0031)
|
||
// =================================================================
|
||
//
|
||
// Phase 1's single-table `SELECT`: a projection, a `FROM` table,
|
||
// and optional `WHERE` / `ORDER BY` / `LIMIT`. The projection,
|
||
// `WHERE` and `ORDER BY` expression slots reference the SQL
|
||
// expression grammar (ADR-0031) through `Subgrammar`, so SQL gets
|
||
// the same completion / highlighting / hints as the DSL for free.
|
||
//
|
||
// Advanced mode only — the walker's mode gate (ADR-0030 §2,
|
||
// `grammar::is_advanced_only`) refuses `select` in simple mode
|
||
// with the "this is SQL" hint, so this grammar is never reached
|
||
// there.
|
||
//
|
||
// `JOIN`s, `GROUP BY` / `HAVING`, subqueries, `UNION`, CTEs, and
|
||
// `OFFSET` are ADR-0030 Phase 2 ("`SELECT` — full"); implicit
|
||
// column aliasing (`select a x`) and qualified `t.*` are out of
|
||
// Phase 1 (see the inline notes).
|
||
|
||
// SQL expression slot — `Node::Subgrammar(&sql_expr::SQL_OR_EXPR)`
|
||
// is inlined at each use site to avoid a Rust const-evaluation
|
||
// cycle through the sql_expr ⇄ sql_select recursion (see the
|
||
// matching note in sql_select.rs).
|
||
|
||
// Phase 1's local `SELECT_*` grammar nodes have been retired in
|
||
// favour of `sql_select::SQL_SELECT_TAIL` (ADR-0032 sub-phase
|
||
// 2c). The shape definition that `data::SELECT` references now
|
||
// lives in the dedicated `sql_select` module — including the
|
||
// `reject_internal_table` validator, the `LIMIT` count
|
||
// validator, and the projection / FROM / WHERE / ORDER BY
|
||
// machinery. The full §1 grammar (JOIN, GROUP BY, HAVING,
|
||
// set-ops, qualified refs, subqueries, CTEs) is admitted as a
|
||
// natural superset.
|
||
|
||
// =================================================================
|
||
// AST builders
|
||
// =================================================================
|
||
|
||
fn ident_text<'a>(path: &'a MatchedPath, role: &str) -> Option<&'a str> {
|
||
path.items.iter().find_map(|i| match &i.kind {
|
||
MatchedKind::Ident { role: r, .. } if *r == role => Some(i.text.as_str()),
|
||
_ => None,
|
||
})
|
||
}
|
||
|
||
fn require_ident(path: &MatchedPath, role: &'static str) -> Result<String, ValidationError> {
|
||
ident_text(path, role)
|
||
.map(str::to_string)
|
||
.ok_or_else(|| ValidationError {
|
||
message_key: "parse.error_wrapper",
|
||
args: vec![("detail", format!("missing {role}"))],
|
||
})
|
||
}
|
||
|
||
/// Convert a `MatchedItem` whose kind is one of the `value_literal`
|
||
/// variants (Word("null"|"true"|"false"), NumberLit, StringLit) to
|
||
/// a `Value`. Returns None for non-value items.
|
||
///
|
||
/// `pub(crate)` so `grammar::ddl` can reuse it when collecting a
|
||
/// `default <literal>` column constraint (ADR-0029).
|
||
pub(crate) fn item_to_value(item: &MatchedItem) -> Option<Value> {
|
||
match &item.kind {
|
||
MatchedKind::Word("null") => Some(Value::Null),
|
||
MatchedKind::Word("true") => Some(Value::Bool(true)),
|
||
MatchedKind::Word("false") => Some(Value::Bool(false)),
|
||
MatchedKind::NumberLit => Some(Value::Number(item.text.clone())),
|
||
MatchedKind::StringLit => Some(Value::Text(item.text.clone())),
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
fn build_show(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
|
||
let sub = path
|
||
.items
|
||
.iter()
|
||
.filter_map(|i| match &i.kind {
|
||
MatchedKind::Word(w) => Some(*w),
|
||
_ => None,
|
||
})
|
||
.nth(1);
|
||
let name = require_ident(path, "table_name")?;
|
||
match sub {
|
||
Some("data") => build_show_data(path, _source),
|
||
Some("table") => Ok(Command::ShowTable { name }),
|
||
_ => Err(ValidationError {
|
||
message_key: "parse.error_wrapper",
|
||
args: vec![("detail", "unknown show subcommand".to_string())],
|
||
}),
|
||
}
|
||
}
|
||
|
||
/// Build a `show data` command from a matched path. Role-based
|
||
/// (no positional `nth` lookups), so it serves both the
|
||
/// standalone `show data` entry word and the `explain show
|
||
/// data …` wrapper, where the entry-word offset shifts.
|
||
fn build_show_data(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
|
||
Ok(Command::ShowData {
|
||
name: require_ident(path, "table_name")?,
|
||
filter: build_show_filter(path)?,
|
||
limit: build_show_limit(path)?,
|
||
})
|
||
}
|
||
|
||
/// The optional `where <expr>` of a `show data`. The expression
|
||
/// terminals run from just past `Word("where")` to the start of
|
||
/// the `limit` clause (or the end of the path) — neither the
|
||
/// `limit` keyword nor any expression keyword collide, so the
|
||
/// slice is exact.
|
||
fn build_show_filter(path: &MatchedPath) -> Result<Option<Expr>, ValidationError> {
|
||
let Some(where_idx) = path
|
||
.items
|
||
.iter()
|
||
.position(|i| matches!(&i.kind, MatchedKind::Word("where")))
|
||
else {
|
||
return Ok(None);
|
||
};
|
||
let end = path
|
||
.items
|
||
.iter()
|
||
.position(|i| matches!(&i.kind, MatchedKind::Word("limit")))
|
||
.unwrap_or(path.items.len());
|
||
Ok(Some(expr::build_expr(&path.items[where_idx + 1..end])?))
|
||
}
|
||
|
||
/// The optional `limit <n>` of a `show data`. The grammar's
|
||
/// `LIMIT_VALIDATOR` already constrained `<n>` to a
|
||
/// non-negative integer, so the parse here cannot realistically
|
||
/// fail.
|
||
fn build_show_limit(path: &MatchedPath) -> Result<Option<u64>, ValidationError> {
|
||
let Some(limit_idx) = path
|
||
.items
|
||
.iter()
|
||
.position(|i| matches!(&i.kind, MatchedKind::Word("limit")))
|
||
else {
|
||
return Ok(None);
|
||
};
|
||
let count = path
|
||
.items
|
||
.get(limit_idx + 1)
|
||
.ok_or_else(|| ValidationError {
|
||
message_key: "parse.error_wrapper",
|
||
args: vec![("detail", "missing limit count".to_string())],
|
||
})?;
|
||
count
|
||
.text
|
||
.parse::<u64>()
|
||
.map(Some)
|
||
.map_err(|_| ValidationError {
|
||
message_key: "parse.custom.bind_type_mismatch",
|
||
args: vec![
|
||
("found", count.text.clone()),
|
||
("expected", "non-negative integer".to_string()),
|
||
],
|
||
})
|
||
}
|
||
|
||
fn build_insert(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
|
||
let table = require_ident(path, "table_name")?;
|
||
|
||
// Locate the second `values` keyword (the first is the
|
||
// command word `insert`'s sibling — but `insert` isn't a
|
||
// matched Word here since it's the entry word and the
|
||
// entry-word push uses the literal "insert"; only later
|
||
// `values` matches as Word("values")).
|
||
//
|
||
// Strategy: walk the path. After the table name:
|
||
// - If we see Word("values") next (Form B), the next
|
||
// parenthesized values are the value list.
|
||
// - If we see Punct('('), the first paren's content was
|
||
// either column names (Form A) or values (Form C).
|
||
// If a Word("values") follows the closing paren, it's
|
||
// Form A.
|
||
//
|
||
// Easier discriminator: collect all matched keyword words;
|
||
// count occurrences of "values".
|
||
let saw_values = path
|
||
.items
|
||
.iter()
|
||
.any(|i| matches!(i.kind, MatchedKind::Word("values")));
|
||
|
||
// Find the index of the table_name match — the first paren
|
||
// afterwards starts the parsed list.
|
||
let table_idx = path
|
||
.items
|
||
.iter()
|
||
.position(|i| matches!(&i.kind, MatchedKind::Ident { role: "table_name", .. }))
|
||
.ok_or_else(|| ValidationError {
|
||
message_key: "parse.error_wrapper",
|
||
args: vec![("detail", "missing table".to_string())],
|
||
})?;
|
||
|
||
// Form B (values keyword right after table): no column list,
|
||
// values come from the single paren-bounded list.
|
||
let first_token_after_table = path.items.get(table_idx + 1);
|
||
let form_b = matches!(
|
||
first_token_after_table.map(|i| &i.kind),
|
||
Some(MatchedKind::Word("values"))
|
||
);
|
||
|
||
if form_b {
|
||
// Form B: the only value run is between the only `(` … `)`.
|
||
let values = collect_values_in_parens(path, table_idx + 1)?;
|
||
return Ok(Command::Insert {
|
||
table,
|
||
columns: None,
|
||
values,
|
||
});
|
||
}
|
||
|
||
// Form A or C: the first paren after the table is a Choice
|
||
// of either column-idents or value-literals.
|
||
let first_paren_idx = path
|
||
.items
|
||
.iter()
|
||
.enumerate()
|
||
.skip(table_idx + 1)
|
||
.find(|(_, i)| matches!(i.kind, MatchedKind::Punct('(')))
|
||
.map(|(idx, _)| idx)
|
||
.ok_or_else(|| ValidationError {
|
||
message_key: "parse.error_wrapper",
|
||
args: vec![("detail", "missing `(`".to_string())],
|
||
})?;
|
||
|
||
if saw_values {
|
||
// Form A: first paren = column names; second paren = values.
|
||
// The Repeated inside the first paren tagged matched idents
|
||
// with role "insert_first_item".
|
||
let columns: Vec<String> = path
|
||
.items
|
||
.iter()
|
||
.filter_map(|i| match &i.kind {
|
||
MatchedKind::Ident {
|
||
role: "insert_first_item",
|
||
..
|
||
} => Some(i.text.clone()),
|
||
_ => None,
|
||
})
|
||
.collect();
|
||
if columns.is_empty() {
|
||
return Err(ValidationError {
|
||
message_key: "parse.error_wrapper",
|
||
args: vec![("detail", "expected column names in `insert into T (…)`".to_string())],
|
||
});
|
||
}
|
||
// Find the `values` keyword and the next `(` — the values
|
||
// run starts after that `(`.
|
||
let values_idx = path
|
||
.items
|
||
.iter()
|
||
.enumerate()
|
||
.skip(first_paren_idx)
|
||
.find(|(_, i)| matches!(i.kind, MatchedKind::Word("values")))
|
||
.map(|(i, _)| i)
|
||
.ok_or_else(|| ValidationError {
|
||
message_key: "parse.error_wrapper",
|
||
args: vec![("detail", "missing `values` keyword".to_string())],
|
||
})?;
|
||
let values = collect_values_in_parens(path, values_idx + 1)?;
|
||
Ok(Command::Insert {
|
||
table,
|
||
columns: Some(columns),
|
||
values,
|
||
})
|
||
} else {
|
||
// Form C: the first paren contained the value list. The
|
||
// Repeated tagged the matched values via their natural
|
||
// MatchedKind (Word/NumberLit/StringLit); collect them.
|
||
//
|
||
// Form-A-without-`values` recovery: the shared
|
||
// INSERT_PAREN_ITEM choice accepts both VALUE_LITERAL
|
||
// and Ident{Columns} so that Form A can resolve
|
||
// column-name items inside its `( cols )` list. When the
|
||
// user types `insert into T (col)` (column-shaped item,
|
||
// no `values` keyword), the grammar walks to a complete
|
||
// match but the user almost certainly meant Form A and
|
||
// forgot the `values (...)` suffix. Reject here with a
|
||
// ValidationError — the walker classifies validation
|
||
// errors as `at_eof: true`, so the input renderer
|
||
// surfaces this as IncompleteAtEof (mid-typing) rather
|
||
// than dispatching a logically-broken Form C insert with
|
||
// an empty value list.
|
||
let user_listed_columns: Vec<String> = path
|
||
.items
|
||
.iter()
|
||
.filter_map(|i| match &i.kind {
|
||
MatchedKind::Ident {
|
||
role: "insert_first_item",
|
||
..
|
||
} => Some(i.text.clone()),
|
||
_ => None,
|
||
})
|
||
.collect();
|
||
if !user_listed_columns.is_empty() {
|
||
return Err(ValidationError {
|
||
message_key: "parse.custom.insert_form_a_missing_values",
|
||
args: vec![("columns", user_listed_columns.join(", "))],
|
||
});
|
||
}
|
||
let values = collect_values_in_parens(path, first_paren_idx)?;
|
||
Ok(Command::Insert {
|
||
table,
|
||
columns: None,
|
||
values,
|
||
})
|
||
}
|
||
}
|
||
|
||
/// Collect Value items inside the next `(…)` block at or after
|
||
/// `start_idx`. Stops at the matching `)`.
|
||
fn collect_values_in_parens(
|
||
path: &MatchedPath,
|
||
start_idx: usize,
|
||
) -> Result<Vec<Value>, ValidationError> {
|
||
let mut out = Vec::new();
|
||
let mut inside = false;
|
||
for item in path.items.iter().skip(start_idx) {
|
||
match &item.kind {
|
||
MatchedKind::Punct('(') => inside = true,
|
||
MatchedKind::Punct(')') if inside => return Ok(out),
|
||
_ if inside => {
|
||
if let Some(v) = item_to_value(item) {
|
||
out.push(v);
|
||
}
|
||
}
|
||
_ => {}
|
||
}
|
||
}
|
||
if out.is_empty() && !inside {
|
||
return Err(ValidationError {
|
||
message_key: "parse.error_wrapper",
|
||
args: vec![("detail", "missing `(`".to_string())],
|
||
});
|
||
}
|
||
Ok(out)
|
||
}
|
||
|
||
fn build_update(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
|
||
let table = require_ident(path, "table_name")?;
|
||
let assignments = collect_assignments(path)?;
|
||
let filter = collect_filter(path)?;
|
||
Ok(Command::Update {
|
||
table,
|
||
assignments,
|
||
filter,
|
||
})
|
||
}
|
||
|
||
fn collect_assignments(
|
||
path: &MatchedPath,
|
||
) -> Result<Vec<(String, Value)>, ValidationError> {
|
||
let mut out = Vec::new();
|
||
let mut iter = path.items.iter();
|
||
while let Some(item) = iter.next() {
|
||
if matches!(
|
||
item.kind,
|
||
MatchedKind::Ident {
|
||
role: "update_set_column",
|
||
..
|
||
}
|
||
) {
|
||
let column = item.text.clone();
|
||
// Skip the `=` punct.
|
||
for next in iter.by_ref() {
|
||
if matches!(next.kind, MatchedKind::Punct('=')) {
|
||
break;
|
||
}
|
||
}
|
||
// Next item is the value.
|
||
let value_item = iter.next().ok_or_else(|| ValidationError {
|
||
message_key: "parse.error_wrapper",
|
||
args: vec![("detail", "missing assignment value".to_string())],
|
||
})?;
|
||
let value = item_to_value(value_item).ok_or_else(|| ValidationError {
|
||
message_key: "parse.error_wrapper",
|
||
args: vec![("detail", "expected value literal".to_string())],
|
||
})?;
|
||
out.push((column, value));
|
||
}
|
||
}
|
||
Ok(out)
|
||
}
|
||
|
||
fn collect_filter(path: &MatchedPath) -> Result<RowFilter, ValidationError> {
|
||
if path
|
||
.items
|
||
.iter()
|
||
.any(|i| matches!(i.kind, MatchedKind::Flag("all-rows")))
|
||
{
|
||
return Ok(RowFilter::AllRows);
|
||
}
|
||
let where_idx = path
|
||
.items
|
||
.iter()
|
||
.position(|i| matches!(&i.kind, MatchedKind::Word("where")))
|
||
.ok_or_else(|| ValidationError {
|
||
message_key: "parse.error_wrapper",
|
||
args: vec![("detail", "missing where or --all-rows".to_string())],
|
||
})?;
|
||
// `where` is the last clause of update / delete, so every
|
||
// terminal after it belongs to the expression.
|
||
Ok(RowFilter::Where(expr::build_expr(
|
||
&path.items[where_idx + 1..],
|
||
)?))
|
||
}
|
||
|
||
fn build_delete(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
|
||
let table = require_ident(path, "table_name")?;
|
||
let filter = collect_filter(path)?;
|
||
Ok(Command::Delete { table, filter })
|
||
}
|
||
|
||
/// Build `Command::Explain` (ADR-0028 §1). The matched-word
|
||
/// sequence is `[explain, show|update|delete, …]` — the entry
|
||
/// word `explain` is at index 0, the inner command's lead word
|
||
/// at index 1. The inner command is built by the same builder
|
||
/// it uses standalone (`build_show_data` / `build_update` /
|
||
/// `build_delete`), all of which are role-based and so are
|
||
/// indifferent to the entry-word offset the `explain` prefix
|
||
/// introduces.
|
||
fn build_explain(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
|
||
let inner_word = path
|
||
.items
|
||
.iter()
|
||
.filter_map(|i| match &i.kind {
|
||
MatchedKind::Word(w) => Some(*w),
|
||
_ => None,
|
||
})
|
||
.nth(1);
|
||
let inner = match inner_word {
|
||
Some("show") => build_show_data(path, _source)?,
|
||
Some("update") => build_update(path, _source)?,
|
||
Some("delete") => build_delete(path, _source)?,
|
||
_ => {
|
||
return Err(ValidationError {
|
||
message_key: "parse.error_wrapper",
|
||
args: vec![("detail", "unknown explain target".to_string())],
|
||
});
|
||
}
|
||
};
|
||
Ok(Command::Explain {
|
||
query: Box::new(inner),
|
||
})
|
||
}
|
||
|
||
// =================================================================
|
||
// replay — `replay <bare-path>` | `replay '<path>'`
|
||
// =================================================================
|
||
//
|
||
// Phase E (ADR-0024 §migration). The chumsky-side
|
||
// `try_parse_replay_with_bare_path` source-slice helper is
|
||
// retired here: walker BarePath consumes the unquoted form
|
||
// (terminating at whitespace per the path-bearing UX change),
|
||
// and StringLit consumes the quoted form. Paths with spaces
|
||
// must use the quoted form — same UX that `import` / `export`
|
||
// adopted in Phase A.
|
||
|
||
const REPLAY_PATH_CHOICES: &[Node] = &[Node::StringLit, Node::BarePath];
|
||
const REPLAY_PATH: Node = Node::Choice(REPLAY_PATH_CHOICES);
|
||
|
||
fn build_replay(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
|
||
let payload = path
|
||
.items
|
||
.iter()
|
||
.find_map(|i| match &i.kind {
|
||
MatchedKind::StringLit | MatchedKind::BarePath => Some(i.text.clone()),
|
||
_ => None,
|
||
})
|
||
.ok_or_else(|| ValidationError {
|
||
message_key: "parse.error_wrapper",
|
||
args: vec![("detail", "missing path".to_string())],
|
||
})?;
|
||
Ok(Command::Replay { path: payload })
|
||
}
|
||
|
||
/// `Command::Select` carries the validated SQL text verbatim
|
||
/// (ADR-0030 §4/§6, ADR-0031 §2): a `SELECT` builds no AST — the
|
||
/// walk has confirmed it is in the supported subset, and the
|
||
/// worker runs the statement as text. `source` is the full
|
||
/// submitted line; on a `Match` outcome the `SELECT` shape
|
||
/// consumed all of it.
|
||
fn build_select(_path: &MatchedPath, source: &str) -> Result<Command, ValidationError> {
|
||
Ok(Command::Select {
|
||
sql: source.trim().to_string(),
|
||
})
|
||
}
|
||
|
||
/// Build `Command::SqlInsert` from a validated SQL `INSERT`
|
||
/// (ADR-0033 §1, sub-phase 3b). Extracts the target table from
|
||
/// the matched path so the worker re-persists the right CSV.
|
||
///
|
||
/// Dev-scaffold detail: the entry word is `sqlinsert` (not valid
|
||
/// SQL), so the statement is reconstructed as `insert` + the
|
||
/// matched tail. Sub-phase 3j wires the real `insert` entry word,
|
||
/// at which point this collapses to `source.trim()` like
|
||
/// `build_select`.
|
||
fn build_sql_insert(path: &MatchedPath, source: &str) -> Result<Command, ValidationError> {
|
||
let target_table = path
|
||
.items
|
||
.iter()
|
||
.find_map(|item| match item.kind {
|
||
MatchedKind::Ident {
|
||
role: "insert_target_table",
|
||
..
|
||
} => Some(item.text.clone()),
|
||
_ => None,
|
||
})
|
||
.unwrap_or_default();
|
||
// The user's explicit `(col, …)` list, in order (empty when the
|
||
// form omits it). Sub-phase 3d reads this to decide which
|
||
// `shortid` columns were left for the worker to auto-fill.
|
||
let listed_columns: Vec<String> = path
|
||
.items
|
||
.iter()
|
||
.filter_map(|item| match item.kind {
|
||
MatchedKind::Ident {
|
||
role: "insert_column",
|
||
..
|
||
} => Some(item.text.clone()),
|
||
_ => None,
|
||
})
|
||
.collect();
|
||
// The row source is everything from the `VALUES` / `SELECT` /
|
||
// `WITH` keyword onward. Located by the first matching *Word
|
||
// token* in the path (not a text scan), so a string literal
|
||
// like `values ('select')` can't be mistaken for the keyword.
|
||
let row_source = path
|
||
.items
|
||
.iter()
|
||
.find(|item| {
|
||
matches!(item.kind, MatchedKind::Word("values" | "select" | "with"))
|
||
})
|
||
.map(|item| {
|
||
source[item.span.0..]
|
||
.trim()
|
||
.trim_end_matches(';')
|
||
.trim()
|
||
.to_string()
|
||
})
|
||
.unwrap_or_default();
|
||
// Everything after the entry word is the `INTO …` tail; prefix
|
||
// the real `insert` keyword for the engine.
|
||
let tail = path
|
||
.items
|
||
.first()
|
||
.map_or(source, |entry| &source[entry.span.1..]);
|
||
let sql = format!("insert {}", tail.trim());
|
||
Ok(Command::SqlInsert {
|
||
sql,
|
||
target_table,
|
||
listed_columns,
|
||
row_source,
|
||
})
|
||
}
|
||
|
||
// =================================================================
|
||
// CommandNodes
|
||
// =================================================================
|
||
|
||
pub static SHOW: CommandNode = CommandNode {
|
||
entry: Word::keyword("show"),
|
||
shape: SHOW_SHAPE,
|
||
ast_builder: build_show,
|
||
help_id: Some("data.show"),
|
||
usage_ids: &["parse.usage.show_data", "parse.usage.show_table"],};
|
||
|
||
pub static INSERT: CommandNode = CommandNode {
|
||
entry: Word::keyword("insert"),
|
||
shape: INSERT_SHAPE,
|
||
ast_builder: build_insert,
|
||
help_id: Some("data.insert"),
|
||
usage_ids: &["parse.usage.insert"],};
|
||
|
||
pub static UPDATE: CommandNode = CommandNode {
|
||
entry: Word::keyword("update"),
|
||
shape: UPDATE_SHAPE,
|
||
ast_builder: build_update,
|
||
help_id: Some("data.update"),
|
||
usage_ids: &["parse.usage.update"],};
|
||
|
||
pub static DELETE: CommandNode = CommandNode {
|
||
entry: Word::keyword("delete"),
|
||
shape: DELETE_SHAPE,
|
||
ast_builder: build_delete,
|
||
help_id: Some("data.delete"),
|
||
usage_ids: &["parse.usage.delete"],};
|
||
|
||
pub static REPLAY: CommandNode = CommandNode {
|
||
entry: Word::keyword("replay"),
|
||
shape: REPLAY_PATH,
|
||
ast_builder: build_replay,
|
||
help_id: Some("data.replay"),
|
||
usage_ids: &["parse.usage.replay"],};
|
||
|
||
pub static EXPLAIN: CommandNode = CommandNode {
|
||
entry: Word::keyword("explain"),
|
||
shape: EXPLAIN_SHAPE,
|
||
ast_builder: build_explain,
|
||
help_id: Some("data.explain"),
|
||
usage_ids: &["parse.usage.explain"],};
|
||
|
||
/// SQL `SELECT` (ADR-0030 §6, ADR-0031, ADR-0032).
|
||
///
|
||
/// Advanced mode only — gated by `grammar::is_advanced_only`.
|
||
/// The shape is the post-`SELECT` portion of a top-level
|
||
/// statement; the registry's entry-word dispatch consumes the
|
||
/// leading `SELECT` keyword before the shape walks (sub-phase
|
||
/// 2c migration). `help_id` is `None` until the `help sql`
|
||
/// page lands (ADR-0030 Phase 6).
|
||
pub static SELECT: CommandNode = CommandNode {
|
||
entry: Word::keyword("select"),
|
||
shape: Node::Subgrammar(&sql_select::SQL_SELECT_TAIL),
|
||
ast_builder: build_select,
|
||
help_id: None,
|
||
usage_ids: &["parse.usage.select"],};
|
||
|
||
/// `WITH …` top-level statement (ADR-0032 §4 / sub-phase 2c).
|
||
///
|
||
/// Advanced mode only. Dispatched separately from `SELECT` so
|
||
/// the registry's entry-word dispatch routes `with` and
|
||
/// `select` to the right shapes; both reach the same
|
||
/// `Command::Select` AST since execution is grammar-as-text
|
||
/// (ADR-0030 §6, ADR-0031 §2).
|
||
pub static WITH: CommandNode = CommandNode {
|
||
entry: Word::keyword("with"),
|
||
shape: Node::Subgrammar(&sql_select::SQL_WITH_TAIL),
|
||
ast_builder: build_select,
|
||
help_id: None,
|
||
usage_ids: &["parse.usage.select"],};
|
||
|
||
/// SQL `INSERT` development scaffold (ADR-0033 sub-phase 3b–3i).
|
||
///
|
||
/// Registered under the temporary entry word `sqlinsert` so the
|
||
/// SQL INSERT grammar and execution path can be exercised in
|
||
/// isolation, WITHOUT yet making `insert` a shared DSL/SQL entry
|
||
/// word. Sharing `insert` is sub-phase 3j, which depends on
|
||
/// `shortid` auto-fill (3d) so advanced-mode DSL inserts keep
|
||
/// parity rather than regressing through an incomplete SQL path.
|
||
/// This scaffold (entry word + reconstruction in `build_sql_insert`)
|
||
/// is removed when 3j wires the real `insert` entry word.
|
||
pub static SQL_INSERT: CommandNode = CommandNode {
|
||
entry: Word::keyword("sqlinsert"),
|
||
shape: Node::Subgrammar(&sql_insert::SQL_INSERT_SHAPE),
|
||
ast_builder: build_sql_insert,
|
||
help_id: None,
|
||
usage_ids: &[],
|
||
};
|
||
|
||
// =================================================================
|
||
// Tests — `explain` grammar (ADR-0028 §1)
|
||
// =================================================================
|
||
|
||
#[cfg(test)]
|
||
mod explain_tests {
|
||
use super::Command;
|
||
use crate::dsl::parser::parse_command;
|
||
|
||
/// Parse `input` and unwrap the `Command::Explain` wrapper,
|
||
/// returning the inner command.
|
||
fn explain_inner(input: &str) -> Command {
|
||
match parse_command(input).expect("explain should parse") {
|
||
Command::Explain { query } => *query,
|
||
other => panic!("expected Command::Explain, got {other:?}"),
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn explain_show_data_wraps_a_show_data() {
|
||
assert!(matches!(
|
||
explain_inner("explain show data Customers"),
|
||
Command::ShowData { .. }
|
||
));
|
||
}
|
||
|
||
#[test]
|
||
fn explain_show_data_carries_where_and_limit_through() {
|
||
match explain_inner("explain show data Customers where id = 1 limit 5") {
|
||
Command::ShowData { name, filter, limit } => {
|
||
assert_eq!(name, "Customers");
|
||
assert!(filter.is_some(), "where clause should survive");
|
||
assert_eq!(limit, Some(5));
|
||
}
|
||
other => panic!("expected ShowData, got {other:?}"),
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn explain_update_wraps_an_update() {
|
||
assert!(matches!(
|
||
explain_inner("explain update Customers set Name='Bo' where id=1"),
|
||
Command::Update { .. }
|
||
));
|
||
}
|
||
|
||
#[test]
|
||
fn explain_delete_wraps_a_delete() {
|
||
assert!(matches!(
|
||
explain_inner("explain delete from Customers where id=1"),
|
||
Command::Delete { .. }
|
||
));
|
||
}
|
||
|
||
#[test]
|
||
fn explain_of_an_incomplete_update_is_a_parse_error() {
|
||
// A bare `update` still needs its `where` / `--all-rows`
|
||
// (ADR-0028 §1: `explain` of an incomplete command is the
|
||
// same parse error the command alone would be).
|
||
assert!(parse_command("explain update Customers set Name='Bo'").is_err());
|
||
}
|
||
|
||
#[test]
|
||
fn explain_does_not_cover_show_table() {
|
||
// `explain` covers `show data` only (ADR-0028 §1).
|
||
assert!(parse_command("explain show table Customers").is_err());
|
||
}
|
||
|
||
#[test]
|
||
fn bare_explain_is_a_parse_error() {
|
||
assert!(parse_command("explain").is_err());
|
||
assert!(parse_command("explain show").is_err());
|
||
}
|
||
}
|