//! SQL `INSERT` grammar (ADR-0033 §1, sub-phase 3b). //! //! Grammar-as-text (ADR-0030 §4): the walker validates that the //! `INSERT` is in the supported subset; the worker executes the //! validated SQL text and re-persists the target table's CSV //! (ADR-0030 §11). The shape here is the post-`INSERT` portion — //! the entry-word dispatch consumes the leading `INSERT` keyword //! before this shape walks (mirroring `sql_select::SQL_SELECT_TAIL`). //! //! Scope (3b): single- and multi-row `VALUES`, an optional //! `(column_name_list)`, and the `__rdbms_*` target rejection. //! `INSERT … SELECT` (3c), `shortid` auto-fill (3d), `RETURNING` //! (3g), and `ON CONFLICT … ` UPSERT (3h) land in later //! sub-phases. use crate::completion::TableColumn; use crate::dsl::grammar::shared::{SET_VALUE, count_tuple_values}; use crate::dsl::grammar::sql_expr; use crate::dsl::grammar::sql_select::{ RETURNING_CLAUSE, SQL_SELECT_COMPOUND, WHERE_CLAUSE, reject_internal_table, }; use crate::dsl::grammar::{IdentSource, Node, Word}; use crate::dsl::walker::context::WalkContext; static COMMA: Node = Node::Punct(','); /// The `INSERT` target table. `__rdbms_*` rejected (ADR-0030 §6 / /// ADR-0033 §1). `writes_table` populates `current_table` / /// `current_table_columns` so the optional column list and the /// `VALUES` expressions get column completion against the target. const TARGET_TABLE: Node = Node::Ident { source: IdentSource::Tables, role: "insert_target_table", validator: Some(reject_internal_table), highlight_override: None, writes_table: true, writes_column: false, writes_user_listed_column: false, writes_table_alias: false, writes_cte_name: false, writes_projection_alias: false, }; /// One column name inside the optional `(col1, col2, …)` list. /// /// `writes_user_listed_column: true` records the listed columns into /// `WalkContext::user_listed_columns` so the `VALUES` factory /// (`sql_value_list`, ADR-0036 Phase 3b) maps each value position to /// the listed column in the user's order (Form A). `build_sql_insert` /// still collects `listed_columns` independently from the matched /// `insert_column` idents, so this flag only adds the live typed-slot /// mapping — nothing else reads `user_listed_columns` on the SQL path. static COLUMN_NAME: Node = Node::Ident { source: IdentSource::Columns, role: "insert_column", validator: None, highlight_override: None, writes_table: false, writes_column: false, writes_user_listed_column: true, writes_table_alias: false, writes_cte_name: false, writes_projection_alias: false, }; static COLUMN_LIST_NODES: &[Node] = &[ Node::Punct('('), Node::Repeated { inner: &COLUMN_NAME, separator: Some(&COMMA), min: 1, }, Node::Punct(')'), ]; const OPTIONAL_COLUMN_LIST: Node = Node::Optional(&Node::Seq(COLUMN_LIST_NODES)); /// One value expression inside a `VALUES` tuple. Consumes the /// shared `sql_expr` grammar (ADR-0031), so literals, operators, /// `CASE`, function calls, etc. are all admitted; the engine /// evaluates them at execution time. Used as the schemaless / fallback /// value (see `sql_value_list`). static VALUE_EXPR: Node = Node::Subgrammar(&sql_expr::SQL_OR_EXPR); /// The fallback value list — the pre-Phase-3b type-blind /// `Repeated(sql_expr)`. Used for schemaless walks and (crucially) for /// any tuple whose value-count does NOT match the target column count, /// so the post-walk per-tuple arity diagnostic (ADR-0033 §8.1) still /// sees all the values in the matched path and fires its friendly /// message — a fixed-length typed `Seq` would instead reject the tuple /// and suppress that diagnostic. fn fallback_value_list() -> Node { Node::Repeated { inner: &VALUE_EXPR, separator: Some(&COMMA), min: 1, } } /// The target columns a `VALUES` tuple's positions map onto (ADR-0036 /// Phase 3b). Mirrors `db::do_sql_insert`'s positional rule — NOT the /// DSL's `column_value_list`: /// - **Form A** (`user_listed_columns` set, from the `(col, …)` /// list): the listed columns, in the user's order. An *omitted* /// `shortid` is auto-filled at execution (the X4 note) and has no /// `VALUES` position, so it is correctly absent here. /// - **Form B** (no column list): ALL columns in declaration order, /// including `serial` / `shortid` — advanced-mode Form B auto-fills /// *nothing* (`plan_autogen_autofill` returns early on an empty /// column list), so the user supplies a value for every column. /// /// Empty when schemaless, the table is unknown, or a Form A list /// resolves to nothing (callers fall back to the type-blind list). fn target_value_columns(ctx: &WalkContext) -> Vec { let Some(table_cols) = ctx.current_table_columns.as_ref() else { return Vec::new(); }; ctx.user_listed_columns.as_ref().map_or_else( || table_cols.clone(), |listed| { listed .iter() .filter_map(|name| { table_cols .iter() .find(|c| c.name.eq_ignore_ascii_case(name)) .cloned() }) .collect() }, ) } // `count_tuple_values` moved to `grammar::shared` (issue #17) so the // simple-mode DSL insert arity gate can share it; the advanced grammar // imports it above. /// Tuple value-list lookahead (ADR-0036 Phase 3b). Gates the typed /// per-column path on arity so the typed `Seq` is used only where it /// can succeed, leaving wrong-arity tuples to the type-blind path (and /// thus to the per-tuple arity diagnostic, ADR-0033 §8.1, which a /// fixed-length `Seq` would otherwise suppress by rejecting the tuple): /// - a **closed** tuple routes to typed slots only on an *exact* /// match (`count == columns`); /// - an **open** (still-typing) tuple routes to typed slots while /// there is still room (`count <= columns`), so the per-column hint /// shows from the moment `(` is opened through each position. /// /// Returns a small node — the heavy typed `Seq` is built + memoized by /// the `DynamicSubgrammar` — matching `insert_first_paren`'s leak /// discipline. Schemaless / unknown table → type-blind fallback. fn tuple_value_list(ctx: &WalkContext, source: &str, pos: usize) -> Node { let cols = target_value_columns(ctx); let (count, closed) = count_tuple_values(source, pos); let arity_ok = if closed { count == cols.len() } else { count <= cols.len() }; if !cols.is_empty() && arity_ok { Node::DynamicSubgrammar(sql_value_list) } else { fallback_value_list() } } /// Schema-aware typed value list for one correct-arity `VALUES` tuple /// (ADR-0036 Phase 3b). Emits, per target column, a zero-width /// `SetColumn(col)` marker (establishes the active column) followed by /// the shared boundary-aware [`SET_VALUE`] slot — so a lone literal /// routes to the column's typed slot (live hint + numeric-shape /// highlight) and any expression falls through to `sql_expr`. Reached /// only via [`tuple_value_list`] when arity matches and the schema is /// known; the empty-cols guard is defensive. fn sql_value_list(ctx: &WalkContext) -> Node { let cols = target_value_columns(ctx); if cols.is_empty() { return fallback_value_list(); } let mut children: Vec = Vec::with_capacity(cols.len() * 3); for (i, col) in cols.into_iter().enumerate() { if i > 0 { children.push(Node::Punct(',')); } let leaked: &'static TableColumn = Box::leak(Box::new(col)); children.push(Node::SetColumn(leaked)); children.push(SET_VALUE); } Node::Seq(Box::leak(children.into_boxed_slice())) } static VALUE_TUPLE_NODES: &[Node] = &[ Node::Punct('('), Node::Lookahead(tuple_value_list), Node::Punct(')'), ]; /// `'(' ')'` — one row of values. The value list is the /// arity-gated `tuple_value_list` (ADR-0036 Phase 3b): a correct-arity /// tuple gets per-column typed slots; a wrong-arity tuple keeps the /// type-blind `sql_expr` repeat so the §8.1 arity diagnostic fires. static VALUE_TUPLE: Node = Node::Seq(VALUE_TUPLE_NODES); static VALUES_CLAUSE_NODES: &[Node] = &[ Node::Word(Word::keyword("values")), Node::Repeated { inner: &VALUE_TUPLE, separator: Some(&COMMA), min: 1, }, ]; /// `VALUES tuple (',' tuple)*` — single- or multi-row. const VALUES_CLAUSE: Node = Node::Seq(VALUES_CLAUSE_NODES); /// The row source: either a `VALUES` clause or a `SELECT` /// compound (ADR-0033 §4, sub-phase 3c). `SQL_SELECT_COMPOUND` /// is itself a Choice that admits a leading `WITH` (ADR-0032 /// §10.3), so `INSERT INTO t WITH x AS (…) SELECT …` parses /// through this slot for free (R4). The two branches start on /// disjoint keywords (`values` vs `select`/`with`), so the /// Choice never ambiguously commits. static ROW_SOURCE_CHOICES: &[Node] = &[VALUES_CLAUSE, Node::Subgrammar(&SQL_SELECT_COMPOUND)]; const ROW_SOURCE: Node = Node::Choice(ROW_SOURCE_CHOICES); // ================================================================= // ON CONFLICT … DO NOTHING / DO UPDATE (ADR-0033 §9, sub-phase 3h) // ================================================================= /// One column in the optional `ON CONFLICT (col, …)` conflict /// target. A DISTINCT role from `insert_column` — the conflict /// target names existing unique-constraint columns, not the /// inserted column list, and `build_sql_insert` collects only /// `insert_column` into `listed_columns` (which drives `shortid` /// auto-fill). Sharing the role would corrupt that set. static CONFLICT_TARGET_COLUMN: Node = Node::Ident { source: IdentSource::Columns, role: "conflict_target_column", validator: None, highlight_override: None, writes_table: false, writes_column: false, writes_user_listed_column: false, writes_table_alias: false, writes_cte_name: false, writes_projection_alias: false, }; static CONFLICT_TARGET_NODES: &[Node] = &[ Node::Punct('('), Node::Repeated { inner: &CONFLICT_TARGET_COLUMN, separator: Some(&COMMA), min: 1, }, Node::Punct(')'), ]; /// Optional `(col, …)` conflict target — which unique constraint /// to react to. Standard SQL allows omitting it (any conflict). const OPTIONAL_CONFLICT_TARGET: Node = Node::Optional(&Node::Seq(CONFLICT_TARGET_NODES)); /// The column on the left of one `DO UPDATE SET col = expr` /// assignment. Mirrors `sql_update`'s `ASSIGN_COLUMN` shape (same /// `update_set_column` role so it gets the same column completion / /// diagnostics against the target table). `writes_column: true` /// resolves the column type into `current_column` so the RHS /// `SET_VALUE` lookahead can dispatch the typed slot for a lone /// literal (ADR-0036 Phase 3a). const UPSERT_SET_COLUMN: Node = Node::Ident { source: IdentSource::Columns, role: "update_set_column", validator: None, highlight_override: None, writes_table: false, writes_column: true, writes_user_listed_column: false, writes_table_alias: false, writes_cte_name: false, writes_projection_alias: false, }; /// `column '=' ` — the RHS is the boundary-aware `SET_VALUE` /// slot (ADR-0036 Phase 3a), shared with `sql_update`: a lone literal /// routes to the column-typed slot (live hint + highlight) while an /// expression — `excluded.col`, operators, `CASE`, function calls — /// falls through to the full `sql_expr` grammar (ADR-0031). `excluded` /// is the would-have-been-inserted row (ADR-0033 §9); it parses as a /// qualified ref via `sql_expr` and the engine resolves it. static UPSERT_ASSIGNMENT_NODES: &[Node] = &[UPSERT_SET_COLUMN, Node::Punct('='), SET_VALUE]; static UPSERT_ASSIGNMENT: Node = Node::Seq(UPSERT_ASSIGNMENT_NODES); // `const` — used by value in `DO_UPDATE_NODES` (static-vs-const // rule: a `Node` referenced by value in a `static [...]` must be // `const`; `inner: &UPSERT_ASSIGNMENT` is fine since that one is // referenced via `&`). const UPSERT_ASSIGNMENT_LIST: Node = Node::Repeated { inner: &UPSERT_ASSIGNMENT, separator: Some(&COMMA), min: 1, }; static DO_UPDATE_NODES: &[Node] = &[ Node::Word(Word::keyword("update")), Node::Word(Word::keyword("set")), UPSERT_ASSIGNMENT_LIST, Node::Optional(&WHERE_CLAUSE), ]; /// The action after the shared `do`: `NOTHING | UPDATE SET … [ WHERE /// … ]`. The `do` keyword is factored OUT of this Choice /// deliberately. A Choice whose branches *shared* a `do` prefix /// would break on the walker's `walk_seq`/`walk_choice` interaction /// (ADR-0033 Amendment 1): a branch matching `do` then failing its /// *second* token returns a hard `Failed` past idx 0, which stops /// `walk_choice` from trying the next branch. With `do` hoisted into /// the enclosing Seq, each branch's FIRST token (`nothing` vs /// `update`) disambiguates, so a non-match of branch 0 is a clean /// `NoMatch` that falls through to branch 1. static DO_ACTION_CHOICES: &[Node] = &[ Node::Word(Word::keyword("nothing")), Node::Seq(DO_UPDATE_NODES), ]; // `const` — used by value in `ON_CONFLICT_CLAUSE_NODES`. const DO_ACTION: Node = Node::Choice(DO_ACTION_CHOICES); static ON_CONFLICT_CLAUSE_NODES: &[Node] = &[ Node::Word(Word::keyword("on")), Node::Word(Word::keyword("conflict")), OPTIONAL_CONFLICT_TARGET, Node::Word(Word::keyword("do")), DO_ACTION, ]; /// `ON CONFLICT [ (col, …) ] DO ( NOTHING | UPDATE SET … )` /// (ADR-0033 §9). Sits between the row source and `RETURNING` in /// `SQL_INSERT_SHAPE`. static ON_CONFLICT_CLAUSE: Node = Node::Seq(ON_CONFLICT_CLAUSE_NODES); static SQL_INSERT_TAIL_NODES: &[Node] = &[ Node::Word(Word::keyword("into")), TARGET_TABLE, OPTIONAL_COLUMN_LIST, ROW_SOURCE, Node::Optional(&ON_CONFLICT_CLAUSE), Node::Optional(&RETURNING_CLAUSE), Node::Optional(&Node::Punct(';')), ]; /// The post-`INSERT` portion of a SQL `INSERT` statement /// (ADR-0033 §1): `INTO [ '(' col_list ')' ] VALUES /// (',' )* [ ';' ]`. /// /// The entry-word dispatch consumes the leading `INSERT` keyword /// before this shape walks, so a `CommandNode` references it as /// its `shape` (sub-phase 3b registers a development entry word; /// sub-phase 3j wires the shared `insert` entry word). pub static SQL_INSERT_SHAPE: Node = Node::Seq(SQL_INSERT_TAIL_NODES); // ================================================================= // Tests — grammar accept/reject for the post-`INSERT` tail. // ================================================================= #[cfg(test)] mod tests { use super::SQL_INSERT_SHAPE; use crate::dsl::walker::context::WalkContext; use crate::dsl::walker::driver::{NodeWalkResult, walk_node}; use crate::dsl::walker::outcome::MatchedPath; /// Walk `input` against the INSERT tail. Returns `true` only /// when the walk matches *and* consumes all of `input` /// (trailing whitespace allowed). Schemaless context: the /// shape is structural, so table/column idents match by shape /// and `reject_internal_table` still fires on `__rdbms_*`. fn walks(input: &str) -> bool { let mut ctx = WalkContext::new(); let mut path = MatchedPath::new(); let mut per_byte = Vec::new(); match walk_node( input, 0, &SQL_INSERT_SHAPE, &mut ctx, &mut path, &mut per_byte, ) { NodeWalkResult::Matched { end, .. } => input[end..].trim().is_empty(), _ => false, } } fn good(input: &str) { assert!(walks(input), "{input:?} should be a valid INSERT tail"); } fn bad(input: &str) { assert!( !walks(input), "{input:?} should NOT walk as a complete INSERT tail" ); } #[test] fn single_row_values() { good("into orders values (1, 2.0)"); good("into orders values (1, 'text', true, null)"); good("into orders values (1);"); } #[test] fn multi_row_values() { good("into orders values (1, 'a'), (2, 'b')"); good("into orders values (1), (2), (3)"); good("into orders values (1, 'a'), (2, 'b');"); } #[test] fn explicit_column_list() { good("into orders (id, total) values (1, 2.0)"); good("into orders (id) values (1)"); good("into orders (a, b, c) values (1, 2, 3), (4, 5, 6)"); } #[test] fn value_expressions_admit_sql_expr() { good("into t values (1 + 2)"); good("into t values (case when 1 > 0 then 'y' else 'n' end)"); } #[test] fn returning_tail_admitted() { // 3g: optional RETURNING projection_list tail, on both row // sources. good("into orders values (1, 2.0) returning *"); good("into orders (id, total) values (1, 2.0) returning id"); good("into orders values (1, 'a'), (2, 'b') returning id, total"); good("into archive select * from orders returning *"); good("into orders values (1) returning id as new_id;"); } #[test] fn on_conflict_clause_admitted() { // 3h: ON CONFLICT … DO NOTHING / DO UPDATE (ADR-0033 §9). good("into t (id, name) values (1, 'x') on conflict (id) do nothing"); good("into t (id, name) values (1, 'x') on conflict do nothing"); good( "into t (id, name) values (1, 'x') on conflict (id) do update set name = excluded.name", ); good( "into t (id, name) values (1, 'x') on conflict (id) do update set name = 'y' where id > 0", ); // Multi-column conflict target + multi-assignment DO UPDATE. good("into t (a, b) values (1, 2) on conflict (a, b) do update set b = excluded.b, a = 9"); // ON CONFLICT composes with RETURNING (order: row source, // ON CONFLICT, RETURNING). good("into t (id) values (1) on conflict (id) do nothing returning *"); good("into t (id) values (1) on conflict (id) do update set id = excluded.id returning id"); } #[test] fn on_conflict_structurally_incomplete_rejected() { // `do` with no action. bad("into t (id) values (1) on conflict (id) do"); // DO UPDATE with no SET. bad("into t (id) values (1) on conflict (id) do update"); // DO UPDATE SET with no assignment. bad("into t (id) values (1) on conflict (id) do update set"); // Bare ON with no CONFLICT. bad("into t (id) values (1) on do nothing"); } #[test] fn internal_target_table_rejected() { bad("into __rdbms_playground_columns values (1)"); bad("into __rdbms_playground_relationships (a) values (1)"); } #[test] fn select_row_source() { // 3c: the row source is a Choice between VALUES and a // SELECT compound (which itself admits a leading WITH). good("into archive select * from orders"); good("into archive select * from orders where created < '2025-01-01'"); good("into archive select * from orders;"); } #[test] fn select_row_source_with_column_list() { good("into target (a, b) select x, y from source"); good("into target (id) select id from source"); } #[test] fn with_prefixed_select_row_source() { // R4 invariant: a WITH-prefixed SELECT row source parses // through SQL_SELECT_COMPOUND's WITH-prefixed branch. good("into archive with t as (select * from orders) select * from t"); good( "into summary (id, total) with t as (select * from orders) \ select id, total from t", ); } #[test] fn select_row_source_rejects_internal_from_table() { // DA gate: the SELECT's FROM slot must still reject // `__rdbms_*` tables (Phase-2 gate, not silently dropped on // the DML path). bad("into archive select * from __rdbms_playground_columns"); } #[test] fn incomplete_select_row_source_rejected() { // A bare `select` with no projection is not a complete row // source. bad("into archive select"); bad("into archive select * from"); } #[test] fn structurally_incomplete_or_wrong_rejected() { // Missing VALUES. bad("into orders"); bad("into orders (id, total)"); // Empty value tuple — at least one expression required. bad("into orders values ()"); // Missing INTO. bad("orders values (1)"); // Trailing comma with no following tuple. bad("into orders values (1),"); // Unclosed tuple. bad("into orders values (1, 2"); } }