Files
rdbms-playground/src/dsl/grammar/sql_insert.rs
T
claude@clouddev1 41b7e9a049 style: format the whole tree with cargo fmt (stock defaults, #35)
One-time, mechanical reformat — no functional changes. The tree was not
rustfmt-clean (~1800 hunks across ~100 files); this brings it to stock
`cargo fmt` defaults so a `cargo fmt --check` CI gate can follow.
Behaviour-preserving: 2509 pass / 0 fail / 1 ignored (unchanged baseline),
clippy clean. A .git-blame-ignore-revs entry follows so `git blame`
skips this commit.
2026-06-17 21:39:19 +00:00

529 lines
20 KiB
Rust

//! SQL `INSERT` grammar (ADR-0033 §1, sub-phase 3b).
//!
//! Grammar-as-text (ADR-0030 §4): the walker validates that the
//! `INSERT` is in the supported subset; the worker executes the
//! validated SQL text and re-persists the target table's CSV
//! (ADR-0030 §11). The shape here is the post-`INSERT` portion —
//! the entry-word dispatch consumes the leading `INSERT` keyword
//! before this shape walks (mirroring `sql_select::SQL_SELECT_TAIL`).
//!
//! Scope (3b): single- and multi-row `VALUES`, an optional
//! `(column_name_list)`, and the `__rdbms_*` target rejection.
//! `INSERT … SELECT` (3c), `shortid` auto-fill (3d), `RETURNING`
//! (3g), and `ON CONFLICT … ` UPSERT (3h) land in later
//! sub-phases.
use crate::completion::TableColumn;
use crate::dsl::grammar::shared::{SET_VALUE, count_tuple_values};
use crate::dsl::grammar::sql_expr;
use crate::dsl::grammar::sql_select::{
RETURNING_CLAUSE, SQL_SELECT_COMPOUND, WHERE_CLAUSE, reject_internal_table,
};
use crate::dsl::grammar::{IdentSource, Node, Word};
use crate::dsl::walker::context::WalkContext;
static COMMA: Node = Node::Punct(',');
/// The `INSERT` target table. `__rdbms_*` rejected (ADR-0030 §6 /
/// ADR-0033 §1). `writes_table` populates `current_table` /
/// `current_table_columns` so the optional column list and the
/// `VALUES` expressions get column completion against the target.
const TARGET_TABLE: Node = Node::Ident {
source: IdentSource::Tables,
role: "insert_target_table",
validator: Some(reject_internal_table),
highlight_override: None,
writes_table: true,
writes_column: false,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
/// One column name inside the optional `(col1, col2, …)` list.
///
/// `writes_user_listed_column: true` records the listed columns into
/// `WalkContext::user_listed_columns` so the `VALUES` factory
/// (`sql_value_list`, ADR-0036 Phase 3b) maps each value position to
/// the listed column in the user's order (Form A). `build_sql_insert`
/// still collects `listed_columns` independently from the matched
/// `insert_column` idents, so this flag only adds the live typed-slot
/// mapping — nothing else reads `user_listed_columns` on the SQL path.
static COLUMN_NAME: Node = Node::Ident {
source: IdentSource::Columns,
role: "insert_column",
validator: None,
highlight_override: None,
writes_table: false,
writes_column: false,
writes_user_listed_column: true,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
static COLUMN_LIST_NODES: &[Node] = &[
Node::Punct('('),
Node::Repeated {
inner: &COLUMN_NAME,
separator: Some(&COMMA),
min: 1,
},
Node::Punct(')'),
];
const OPTIONAL_COLUMN_LIST: Node = Node::Optional(&Node::Seq(COLUMN_LIST_NODES));
/// One value expression inside a `VALUES` tuple. Consumes the
/// shared `sql_expr` grammar (ADR-0031), so literals, operators,
/// `CASE`, function calls, etc. are all admitted; the engine
/// evaluates them at execution time. Used as the schemaless / fallback
/// value (see `sql_value_list`).
static VALUE_EXPR: Node = Node::Subgrammar(&sql_expr::SQL_OR_EXPR);
/// The fallback value list — the pre-Phase-3b type-blind
/// `Repeated(sql_expr)`. Used for schemaless walks and (crucially) for
/// any tuple whose value-count does NOT match the target column count,
/// so the post-walk per-tuple arity diagnostic (ADR-0033 §8.1) still
/// sees all the values in the matched path and fires its friendly
/// message — a fixed-length typed `Seq` would instead reject the tuple
/// and suppress that diagnostic.
fn fallback_value_list() -> Node {
Node::Repeated {
inner: &VALUE_EXPR,
separator: Some(&COMMA),
min: 1,
}
}
/// The target columns a `VALUES` tuple's positions map onto (ADR-0036
/// Phase 3b). Mirrors `db::do_sql_insert`'s positional rule — NOT the
/// DSL's `column_value_list`:
/// - **Form A** (`user_listed_columns` set, from the `(col, …)`
/// list): the listed columns, in the user's order. An *omitted*
/// `shortid` is auto-filled at execution (the X4 note) and has no
/// `VALUES` position, so it is correctly absent here.
/// - **Form B** (no column list): ALL columns in declaration order,
/// including `serial` / `shortid` — advanced-mode Form B auto-fills
/// *nothing* (`plan_autogen_autofill` returns early on an empty
/// column list), so the user supplies a value for every column.
///
/// Empty when schemaless, the table is unknown, or a Form A list
/// resolves to nothing (callers fall back to the type-blind list).
fn target_value_columns(ctx: &WalkContext) -> Vec<TableColumn> {
let Some(table_cols) = ctx.current_table_columns.as_ref() else {
return Vec::new();
};
ctx.user_listed_columns.as_ref().map_or_else(
|| table_cols.clone(),
|listed| {
listed
.iter()
.filter_map(|name| {
table_cols
.iter()
.find(|c| c.name.eq_ignore_ascii_case(name))
.cloned()
})
.collect()
},
)
}
// `count_tuple_values` moved to `grammar::shared` (issue #17) so the
// simple-mode DSL insert arity gate can share it; the advanced grammar
// imports it above.
/// Tuple value-list lookahead (ADR-0036 Phase 3b). Gates the typed
/// per-column path on arity so the typed `Seq` is used only where it
/// can succeed, leaving wrong-arity tuples to the type-blind path (and
/// thus to the per-tuple arity diagnostic, ADR-0033 §8.1, which a
/// fixed-length `Seq` would otherwise suppress by rejecting the tuple):
/// - a **closed** tuple routes to typed slots only on an *exact*
/// match (`count == columns`);
/// - an **open** (still-typing) tuple routes to typed slots while
/// there is still room (`count <= columns`), so the per-column hint
/// shows from the moment `(` is opened through each position.
///
/// Returns a small node — the heavy typed `Seq` is built + memoized by
/// the `DynamicSubgrammar` — matching `insert_first_paren`'s leak
/// discipline. Schemaless / unknown table → type-blind fallback.
fn tuple_value_list(ctx: &WalkContext, source: &str, pos: usize) -> Node {
let cols = target_value_columns(ctx);
let (count, closed) = count_tuple_values(source, pos);
let arity_ok = if closed {
count == cols.len()
} else {
count <= cols.len()
};
if !cols.is_empty() && arity_ok {
Node::DynamicSubgrammar(sql_value_list)
} else {
fallback_value_list()
}
}
/// Schema-aware typed value list for one correct-arity `VALUES` tuple
/// (ADR-0036 Phase 3b). Emits, per target column, a zero-width
/// `SetColumn(col)` marker (establishes the active column) followed by
/// the shared boundary-aware [`SET_VALUE`] slot — so a lone literal
/// routes to the column's typed slot (live hint + numeric-shape
/// highlight) and any expression falls through to `sql_expr`. Reached
/// only via [`tuple_value_list`] when arity matches and the schema is
/// known; the empty-cols guard is defensive.
fn sql_value_list(ctx: &WalkContext) -> Node {
let cols = target_value_columns(ctx);
if cols.is_empty() {
return fallback_value_list();
}
let mut children: Vec<Node> = Vec::with_capacity(cols.len() * 3);
for (i, col) in cols.into_iter().enumerate() {
if i > 0 {
children.push(Node::Punct(','));
}
let leaked: &'static TableColumn = Box::leak(Box::new(col));
children.push(Node::SetColumn(leaked));
children.push(SET_VALUE);
}
Node::Seq(Box::leak(children.into_boxed_slice()))
}
static VALUE_TUPLE_NODES: &[Node] = &[
Node::Punct('('),
Node::Lookahead(tuple_value_list),
Node::Punct(')'),
];
/// `'(' <value-list> ')'` — one row of values. The value list is the
/// arity-gated `tuple_value_list` (ADR-0036 Phase 3b): a correct-arity
/// tuple gets per-column typed slots; a wrong-arity tuple keeps the
/// type-blind `sql_expr` repeat so the §8.1 arity diagnostic fires.
static VALUE_TUPLE: Node = Node::Seq(VALUE_TUPLE_NODES);
static VALUES_CLAUSE_NODES: &[Node] = &[
Node::Word(Word::keyword("values")),
Node::Repeated {
inner: &VALUE_TUPLE,
separator: Some(&COMMA),
min: 1,
},
];
/// `VALUES tuple (',' tuple)*` — single- or multi-row.
const VALUES_CLAUSE: Node = Node::Seq(VALUES_CLAUSE_NODES);
/// The row source: either a `VALUES` clause or a `SELECT`
/// compound (ADR-0033 §4, sub-phase 3c). `SQL_SELECT_COMPOUND`
/// is itself a Choice that admits a leading `WITH` (ADR-0032
/// §10.3), so `INSERT INTO t WITH x AS (…) SELECT …` parses
/// through this slot for free (R4). The two branches start on
/// disjoint keywords (`values` vs `select`/`with`), so the
/// Choice never ambiguously commits.
static ROW_SOURCE_CHOICES: &[Node] = &[VALUES_CLAUSE, Node::Subgrammar(&SQL_SELECT_COMPOUND)];
const ROW_SOURCE: Node = Node::Choice(ROW_SOURCE_CHOICES);
// =================================================================
// ON CONFLICT … DO NOTHING / DO UPDATE (ADR-0033 §9, sub-phase 3h)
// =================================================================
/// One column in the optional `ON CONFLICT (col, …)` conflict
/// target. A DISTINCT role from `insert_column` — the conflict
/// target names existing unique-constraint columns, not the
/// inserted column list, and `build_sql_insert` collects only
/// `insert_column` into `listed_columns` (which drives `shortid`
/// auto-fill). Sharing the role would corrupt that set.
static CONFLICT_TARGET_COLUMN: Node = Node::Ident {
source: IdentSource::Columns,
role: "conflict_target_column",
validator: None,
highlight_override: None,
writes_table: false,
writes_column: false,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
static CONFLICT_TARGET_NODES: &[Node] = &[
Node::Punct('('),
Node::Repeated {
inner: &CONFLICT_TARGET_COLUMN,
separator: Some(&COMMA),
min: 1,
},
Node::Punct(')'),
];
/// Optional `(col, …)` conflict target — which unique constraint
/// to react to. Standard SQL allows omitting it (any conflict).
const OPTIONAL_CONFLICT_TARGET: Node = Node::Optional(&Node::Seq(CONFLICT_TARGET_NODES));
/// The column on the left of one `DO UPDATE SET col = expr`
/// assignment. Mirrors `sql_update`'s `ASSIGN_COLUMN` shape (same
/// `update_set_column` role so it gets the same column completion /
/// diagnostics against the target table). `writes_column: true`
/// resolves the column type into `current_column` so the RHS
/// `SET_VALUE` lookahead can dispatch the typed slot for a lone
/// literal (ADR-0036 Phase 3a).
const UPSERT_SET_COLUMN: Node = Node::Ident {
source: IdentSource::Columns,
role: "update_set_column",
validator: None,
highlight_override: None,
writes_table: false,
writes_column: true,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
/// `column '=' <value>` — the RHS is the boundary-aware `SET_VALUE`
/// slot (ADR-0036 Phase 3a), shared with `sql_update`: a lone literal
/// routes to the column-typed slot (live hint + highlight) while an
/// expression — `excluded.col`, operators, `CASE`, function calls —
/// falls through to the full `sql_expr` grammar (ADR-0031). `excluded`
/// is the would-have-been-inserted row (ADR-0033 §9); it parses as a
/// qualified ref via `sql_expr` and the engine resolves it.
static UPSERT_ASSIGNMENT_NODES: &[Node] = &[UPSERT_SET_COLUMN, Node::Punct('='), SET_VALUE];
static UPSERT_ASSIGNMENT: Node = Node::Seq(UPSERT_ASSIGNMENT_NODES);
// `const` — used by value in `DO_UPDATE_NODES` (static-vs-const
// rule: a `Node` referenced by value in a `static [...]` must be
// `const`; `inner: &UPSERT_ASSIGNMENT` is fine since that one is
// referenced via `&`).
const UPSERT_ASSIGNMENT_LIST: Node = Node::Repeated {
inner: &UPSERT_ASSIGNMENT,
separator: Some(&COMMA),
min: 1,
};
static DO_UPDATE_NODES: &[Node] = &[
Node::Word(Word::keyword("update")),
Node::Word(Word::keyword("set")),
UPSERT_ASSIGNMENT_LIST,
Node::Optional(&WHERE_CLAUSE),
];
/// The action after the shared `do`: `NOTHING | UPDATE SET … [ WHERE
/// … ]`. The `do` keyword is factored OUT of this Choice
/// deliberately. A Choice whose branches *shared* a `do` prefix
/// would break on the walker's `walk_seq`/`walk_choice` interaction
/// (ADR-0033 Amendment 1): a branch matching `do` then failing its
/// *second* token returns a hard `Failed` past idx 0, which stops
/// `walk_choice` from trying the next branch. With `do` hoisted into
/// the enclosing Seq, each branch's FIRST token (`nothing` vs
/// `update`) disambiguates, so a non-match of branch 0 is a clean
/// `NoMatch` that falls through to branch 1.
static DO_ACTION_CHOICES: &[Node] = &[
Node::Word(Word::keyword("nothing")),
Node::Seq(DO_UPDATE_NODES),
];
// `const` — used by value in `ON_CONFLICT_CLAUSE_NODES`.
const DO_ACTION: Node = Node::Choice(DO_ACTION_CHOICES);
static ON_CONFLICT_CLAUSE_NODES: &[Node] = &[
Node::Word(Word::keyword("on")),
Node::Word(Word::keyword("conflict")),
OPTIONAL_CONFLICT_TARGET,
Node::Word(Word::keyword("do")),
DO_ACTION,
];
/// `ON CONFLICT [ (col, …) ] DO ( NOTHING | UPDATE SET … )`
/// (ADR-0033 §9). Sits between the row source and `RETURNING` in
/// `SQL_INSERT_SHAPE`.
static ON_CONFLICT_CLAUSE: Node = Node::Seq(ON_CONFLICT_CLAUSE_NODES);
static SQL_INSERT_TAIL_NODES: &[Node] = &[
Node::Word(Word::keyword("into")),
TARGET_TABLE,
OPTIONAL_COLUMN_LIST,
ROW_SOURCE,
Node::Optional(&ON_CONFLICT_CLAUSE),
Node::Optional(&RETURNING_CLAUSE),
Node::Optional(&Node::Punct(';')),
];
/// The post-`INSERT` portion of a SQL `INSERT` statement
/// (ADR-0033 §1): `INTO <table> [ '(' col_list ')' ] VALUES
/// <tuple> (',' <tuple>)* [ ';' ]`.
///
/// The entry-word dispatch consumes the leading `INSERT` keyword
/// before this shape walks, so a `CommandNode` references it as
/// its `shape` (sub-phase 3b registers a development entry word;
/// sub-phase 3j wires the shared `insert` entry word).
pub static SQL_INSERT_SHAPE: Node = Node::Seq(SQL_INSERT_TAIL_NODES);
// =================================================================
// Tests — grammar accept/reject for the post-`INSERT` tail.
// =================================================================
#[cfg(test)]
mod tests {
use super::SQL_INSERT_SHAPE;
use crate::dsl::walker::context::WalkContext;
use crate::dsl::walker::driver::{NodeWalkResult, walk_node};
use crate::dsl::walker::outcome::MatchedPath;
/// Walk `input` against the INSERT tail. Returns `true` only
/// when the walk matches *and* consumes all of `input`
/// (trailing whitespace allowed). Schemaless context: the
/// shape is structural, so table/column idents match by shape
/// and `reject_internal_table` still fires on `__rdbms_*`.
fn walks(input: &str) -> bool {
let mut ctx = WalkContext::new();
let mut path = MatchedPath::new();
let mut per_byte = Vec::new();
match walk_node(
input,
0,
&SQL_INSERT_SHAPE,
&mut ctx,
&mut path,
&mut per_byte,
) {
NodeWalkResult::Matched { end, .. } => input[end..].trim().is_empty(),
_ => false,
}
}
fn good(input: &str) {
assert!(walks(input), "{input:?} should be a valid INSERT tail");
}
fn bad(input: &str) {
assert!(
!walks(input),
"{input:?} should NOT walk as a complete INSERT tail"
);
}
#[test]
fn single_row_values() {
good("into orders values (1, 2.0)");
good("into orders values (1, 'text', true, null)");
good("into orders values (1);");
}
#[test]
fn multi_row_values() {
good("into orders values (1, 'a'), (2, 'b')");
good("into orders values (1), (2), (3)");
good("into orders values (1, 'a'), (2, 'b');");
}
#[test]
fn explicit_column_list() {
good("into orders (id, total) values (1, 2.0)");
good("into orders (id) values (1)");
good("into orders (a, b, c) values (1, 2, 3), (4, 5, 6)");
}
#[test]
fn value_expressions_admit_sql_expr() {
good("into t values (1 + 2)");
good("into t values (case when 1 > 0 then 'y' else 'n' end)");
}
#[test]
fn returning_tail_admitted() {
// 3g: optional RETURNING projection_list tail, on both row
// sources.
good("into orders values (1, 2.0) returning *");
good("into orders (id, total) values (1, 2.0) returning id");
good("into orders values (1, 'a'), (2, 'b') returning id, total");
good("into archive select * from orders returning *");
good("into orders values (1) returning id as new_id;");
}
#[test]
fn on_conflict_clause_admitted() {
// 3h: ON CONFLICT … DO NOTHING / DO UPDATE (ADR-0033 §9).
good("into t (id, name) values (1, 'x') on conflict (id) do nothing");
good("into t (id, name) values (1, 'x') on conflict do nothing");
good(
"into t (id, name) values (1, 'x') on conflict (id) do update set name = excluded.name",
);
good(
"into t (id, name) values (1, 'x') on conflict (id) do update set name = 'y' where id > 0",
);
// Multi-column conflict target + multi-assignment DO UPDATE.
good("into t (a, b) values (1, 2) on conflict (a, b) do update set b = excluded.b, a = 9");
// ON CONFLICT composes with RETURNING (order: row source,
// ON CONFLICT, RETURNING).
good("into t (id) values (1) on conflict (id) do nothing returning *");
good("into t (id) values (1) on conflict (id) do update set id = excluded.id returning id");
}
#[test]
fn on_conflict_structurally_incomplete_rejected() {
// `do` with no action.
bad("into t (id) values (1) on conflict (id) do");
// DO UPDATE with no SET.
bad("into t (id) values (1) on conflict (id) do update");
// DO UPDATE SET with no assignment.
bad("into t (id) values (1) on conflict (id) do update set");
// Bare ON with no CONFLICT.
bad("into t (id) values (1) on do nothing");
}
#[test]
fn internal_target_table_rejected() {
bad("into __rdbms_playground_columns values (1)");
bad("into __rdbms_playground_relationships (a) values (1)");
}
#[test]
fn select_row_source() {
// 3c: the row source is a Choice between VALUES and a
// SELECT compound (which itself admits a leading WITH).
good("into archive select * from orders");
good("into archive select * from orders where created < '2025-01-01'");
good("into archive select * from orders;");
}
#[test]
fn select_row_source_with_column_list() {
good("into target (a, b) select x, y from source");
good("into target (id) select id from source");
}
#[test]
fn with_prefixed_select_row_source() {
// R4 invariant: a WITH-prefixed SELECT row source parses
// through SQL_SELECT_COMPOUND's WITH-prefixed branch.
good("into archive with t as (select * from orders) select * from t");
good(
"into summary (id, total) with t as (select * from orders) \
select id, total from t",
);
}
#[test]
fn select_row_source_rejects_internal_from_table() {
// DA gate: the SELECT's FROM slot must still reject
// `__rdbms_*` tables (Phase-2 gate, not silently dropped on
// the DML path).
bad("into archive select * from __rdbms_playground_columns");
}
#[test]
fn incomplete_select_row_source_rejected() {
// A bare `select` with no projection is not a complete row
// source.
bad("into archive select");
bad("into archive select * from");
}
#[test]
fn structurally_incomplete_or_wrong_rejected() {
// Missing VALUES.
bad("into orders");
bad("into orders (id, total)");
// Empty value tuple — at least one expression required.
bad("into orders values ()");
// Missing INTO.
bad("orders values (1)");
// Trailing comma with no following tuple.
bad("into orders values (1),");
// Unclosed tuple.
bad("into orders values (1, 2");
}
}