grammar+db: 3h — UPSERT ON CONFLICT DO NOTHING / DO UPDATE (ADR-0033 §9)
on_conflict_clause on SQL_INSERT_SHAPE: optional (col,…) conflict target (distinct conflict_target_column role so it never enters listed_columns), DO NOTHING / DO UPDATE SET … [WHERE …]. `do` is factored out of the action Choice so nothing/update disambiguate without tripping the walk_seq/walk_choice shared-prefix trap (ADR-0033 Amendment 1). Worker runs the UPSERT verbatim (SQLite native); no new execution path. build_sql_insert: row_source now stops before the FIRST trailing clause — ON CONFLICT (3h) or RETURNING (3g) — and do_sql_insert's shortid auto-fill rewrite re-appends the whole trailing tail, so an auto-filled INSERT keeps its ON CONFLICT / RETURNING. excluded pseudo-table (§9): resolves to the target's columns inside the DO UPDATE action and completes at `excluded.|`, but stays flagged as unknown_qualifier in VALUES / RETURNING / non-upsert statements. Diagnostic pass scopes it by the DO UPDATE byte-range (update token → RETURNING/end); completion resolves it against the INSERT target's current_table_columns. NOTE: scoping uses byte-range rather than the plan's prescribed from_scope TableBinding push — same behaviour, no walker scope-frame change. Tests (+13): grammar accept/reject; DO NOTHING / DO UPDATE-excluded / no-target execution + persistence; auto-fill × ON CONFLICT with a REAL unique conflict (proves the clause survives the rewrite, not a no-op); excluded resolves in DO UPDATE SET + WHERE, flagged in VALUES (incl. same statement), unknown column under excluded; excluded.| completion; conflict-target not in listed_columns. 1576 pass / 0 fail / 1 ignored. Clippy clean. Dev sql_insert entry word still removed in 3j. Known follow-up (tracked for 3i): UPSERT DO UPDATE bare column refs (SET LHS / WHERE) are not schema-validated, unlike regular UPDATE — the INSERT target isn't a diagnostic binding. Fits 3i's cross-cut SET/WHERE validation scope.
This commit is contained in:
+28
-8
@@ -891,18 +891,38 @@ fn build_sql_insert(path: &MatchedPath, source: &str) -> Result<Command, Validat
|
||||
})
|
||||
.collect();
|
||||
// The row source is the `VALUES` / `SELECT` / `WITH` clause —
|
||||
// from that keyword up to (but not including) any `RETURNING`
|
||||
// tail (3g) or trailing `;`. Both boundaries are located by
|
||||
// *Word token* in the path (not a text scan), so a string
|
||||
// literal like `values ('select')` / `values ('returning')`
|
||||
// can't be mistaken for a keyword. Excluding RETURNING keeps the
|
||||
// row source independently preparable for `shortid` auto-fill
|
||||
// (`VALUES … RETURNING …` is not a valid standalone statement).
|
||||
// from that keyword up to (but not including) any trailing
|
||||
// clause: `ON CONFLICT …` (3h) or `RETURNING …` (3g), whichever
|
||||
// comes first, else the trailing `;` / end. Boundaries are
|
||||
// located by *Word token* in the path (not a text scan), so a
|
||||
// string literal like `values ('select')` can't be mistaken for
|
||||
// a keyword. Excluding the trailing clauses keeps the row source
|
||||
// independently preparable for `shortid` auto-fill (`VALUES …
|
||||
// ON CONFLICT …` / `VALUES … RETURNING …` are not valid
|
||||
// standalone statements), and the auto-fill rewrite re-appends
|
||||
// the trailing tail verbatim (see `do_sql_insert`).
|
||||
//
|
||||
// `ON CONFLICT`'s `on` is located via the unambiguous `conflict`
|
||||
// keyword that immediately follows it — a JOIN's `on` inside a
|
||||
// SELECT row source has no following `conflict`, so it is not
|
||||
// mistaken for a clause boundary.
|
||||
let on_conflict_start = path
|
||||
.items
|
||||
.windows(2)
|
||||
.find(|w| {
|
||||
matches!(w[0].kind, MatchedKind::Word("on"))
|
||||
&& matches!(w[1].kind, MatchedKind::Word("conflict"))
|
||||
})
|
||||
.map(|w| w[0].span.0);
|
||||
let returning_start = path
|
||||
.items
|
||||
.iter()
|
||||
.find(|item| matches!(item.kind, MatchedKind::Word("returning")))
|
||||
.map(|item| item.span.0);
|
||||
let tail_start = [on_conflict_start, returning_start]
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.min();
|
||||
let row_source = path
|
||||
.items
|
||||
.iter()
|
||||
@@ -910,7 +930,7 @@ fn build_sql_insert(path: &MatchedPath, source: &str) -> Result<Command, Validat
|
||||
matches!(item.kind, MatchedKind::Word("values" | "select" | "with"))
|
||||
})
|
||||
.map(|item| {
|
||||
let end = returning_start.unwrap_or(source.len());
|
||||
let end = tail_start.unwrap_or(source.len());
|
||||
source[item.span.0..end]
|
||||
.trim()
|
||||
.trim_end_matches(';')
|
||||
|
||||
@@ -14,7 +14,9 @@
|
||||
//! sub-phases.
|
||||
|
||||
use crate::dsl::grammar::sql_expr;
|
||||
use crate::dsl::grammar::sql_select::{RETURNING_CLAUSE, SQL_SELECT_COMPOUND, reject_internal_table};
|
||||
use crate::dsl::grammar::sql_select::{
|
||||
RETURNING_CLAUSE, SQL_SELECT_COMPOUND, WHERE_CLAUSE, reject_internal_table,
|
||||
};
|
||||
use crate::dsl::grammar::{IdentSource, Node, Word};
|
||||
|
||||
static COMMA: Node = Node::Punct(',');
|
||||
@@ -105,11 +107,119 @@ const VALUES_CLAUSE: Node = Node::Seq(VALUES_CLAUSE_NODES);
|
||||
static ROW_SOURCE_CHOICES: &[Node] = &[VALUES_CLAUSE, Node::Subgrammar(&SQL_SELECT_COMPOUND)];
|
||||
const ROW_SOURCE: Node = Node::Choice(ROW_SOURCE_CHOICES);
|
||||
|
||||
// =================================================================
|
||||
// ON CONFLICT … DO NOTHING / DO UPDATE (ADR-0033 §9, sub-phase 3h)
|
||||
// =================================================================
|
||||
|
||||
/// One column in the optional `ON CONFLICT (col, …)` conflict
|
||||
/// target. A DISTINCT role from `insert_column` — the conflict
|
||||
/// target names existing unique-constraint columns, not the
|
||||
/// inserted column list, and `build_sql_insert` collects only
|
||||
/// `insert_column` into `listed_columns` (which drives `shortid`
|
||||
/// auto-fill). Sharing the role would corrupt that set.
|
||||
static CONFLICT_TARGET_COLUMN: Node = Node::Ident {
|
||||
source: IdentSource::Columns,
|
||||
role: "conflict_target_column",
|
||||
validator: None,
|
||||
highlight_override: None,
|
||||
writes_table: false,
|
||||
writes_column: false,
|
||||
writes_user_listed_column: false,
|
||||
writes_table_alias: false,
|
||||
writes_cte_name: false,
|
||||
writes_projection_alias: false,
|
||||
};
|
||||
|
||||
static CONFLICT_TARGET_NODES: &[Node] = &[
|
||||
Node::Punct('('),
|
||||
Node::Repeated {
|
||||
inner: &CONFLICT_TARGET_COLUMN,
|
||||
separator: Some(&COMMA),
|
||||
min: 1,
|
||||
},
|
||||
Node::Punct(')'),
|
||||
];
|
||||
/// Optional `(col, …)` conflict target — which unique constraint
|
||||
/// to react to. Standard SQL allows omitting it (any conflict).
|
||||
const OPTIONAL_CONFLICT_TARGET: Node = Node::Optional(&Node::Seq(CONFLICT_TARGET_NODES));
|
||||
|
||||
/// The column on the left of one `DO UPDATE SET col = expr`
|
||||
/// assignment. Mirrors `sql_update`'s `ASSIGN_COLUMN` shape (same
|
||||
/// `update_set_column` role so it gets the same column completion /
|
||||
/// diagnostics against the target table).
|
||||
const UPSERT_SET_COLUMN: Node = Node::Ident {
|
||||
source: IdentSource::Columns,
|
||||
role: "update_set_column",
|
||||
validator: None,
|
||||
highlight_override: None,
|
||||
writes_table: false,
|
||||
writes_column: false,
|
||||
writes_user_listed_column: false,
|
||||
writes_table_alias: false,
|
||||
writes_cte_name: false,
|
||||
writes_projection_alias: false,
|
||||
};
|
||||
|
||||
/// `column '=' sql_expr` — the RHS reuses the shared expression
|
||||
/// grammar (ADR-0031), so `excluded.col`, literals, operators,
|
||||
/// `CASE`, and function calls are all admitted. `excluded` is the
|
||||
/// would-have-been-inserted row (ADR-0033 §9); it parses as a
|
||||
/// qualified ref via `sql_expr` and the engine resolves it.
|
||||
static UPSERT_ASSIGNMENT_NODES: &[Node] = &[
|
||||
UPSERT_SET_COLUMN,
|
||||
Node::Punct('='),
|
||||
Node::Subgrammar(&sql_expr::SQL_OR_EXPR),
|
||||
];
|
||||
static UPSERT_ASSIGNMENT: Node = Node::Seq(UPSERT_ASSIGNMENT_NODES);
|
||||
// `const` — used by value in `DO_UPDATE_NODES` (static-vs-const
|
||||
// rule: a `Node` referenced by value in a `static [...]` must be
|
||||
// `const`; `inner: &UPSERT_ASSIGNMENT` is fine since that one is
|
||||
// referenced via `&`).
|
||||
const UPSERT_ASSIGNMENT_LIST: Node = Node::Repeated {
|
||||
inner: &UPSERT_ASSIGNMENT,
|
||||
separator: Some(&COMMA),
|
||||
min: 1,
|
||||
};
|
||||
|
||||
static DO_UPDATE_NODES: &[Node] = &[
|
||||
Node::Word(Word::keyword("update")),
|
||||
Node::Word(Word::keyword("set")),
|
||||
UPSERT_ASSIGNMENT_LIST,
|
||||
Node::Optional(&WHERE_CLAUSE),
|
||||
];
|
||||
/// The action after the shared `do`: `NOTHING | UPDATE SET … [ WHERE
|
||||
/// … ]`. The `do` keyword is factored OUT of this Choice
|
||||
/// deliberately. A Choice whose branches *shared* a `do` prefix
|
||||
/// would break on the walker's `walk_seq`/`walk_choice` interaction
|
||||
/// (ADR-0033 Amendment 1): a branch matching `do` then failing its
|
||||
/// *second* token returns a hard `Failed` past idx 0, which stops
|
||||
/// `walk_choice` from trying the next branch. With `do` hoisted into
|
||||
/// the enclosing Seq, each branch's FIRST token (`nothing` vs
|
||||
/// `update`) disambiguates, so a non-match of branch 0 is a clean
|
||||
/// `NoMatch` that falls through to branch 1.
|
||||
static DO_ACTION_CHOICES: &[Node] =
|
||||
&[Node::Word(Word::keyword("nothing")), Node::Seq(DO_UPDATE_NODES)];
|
||||
// `const` — used by value in `ON_CONFLICT_CLAUSE_NODES`.
|
||||
const DO_ACTION: Node = Node::Choice(DO_ACTION_CHOICES);
|
||||
|
||||
static ON_CONFLICT_CLAUSE_NODES: &[Node] = &[
|
||||
Node::Word(Word::keyword("on")),
|
||||
Node::Word(Word::keyword("conflict")),
|
||||
OPTIONAL_CONFLICT_TARGET,
|
||||
Node::Word(Word::keyword("do")),
|
||||
DO_ACTION,
|
||||
];
|
||||
/// `ON CONFLICT [ (col, …) ] DO ( NOTHING | UPDATE SET … )`
|
||||
/// (ADR-0033 §9). Sits between the row source and `RETURNING` in
|
||||
/// `SQL_INSERT_SHAPE`.
|
||||
static ON_CONFLICT_CLAUSE: Node = Node::Seq(ON_CONFLICT_CLAUSE_NODES);
|
||||
|
||||
static SQL_INSERT_TAIL_NODES: &[Node] = &[
|
||||
Node::Word(Word::keyword("into")),
|
||||
TARGET_TABLE,
|
||||
OPTIONAL_COLUMN_LIST,
|
||||
ROW_SOURCE,
|
||||
Node::Optional(&ON_CONFLICT_CLAUSE),
|
||||
Node::Optional(&RETURNING_CLAUSE),
|
||||
Node::Optional(&Node::Punct(';')),
|
||||
];
|
||||
@@ -196,6 +306,33 @@ mod tests {
|
||||
good("into orders values (1) returning id as new_id;");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn on_conflict_clause_admitted() {
|
||||
// 3h: ON CONFLICT … DO NOTHING / DO UPDATE (ADR-0033 §9).
|
||||
good("into t (id, name) values (1, 'x') on conflict (id) do nothing");
|
||||
good("into t (id, name) values (1, 'x') on conflict do nothing");
|
||||
good("into t (id, name) values (1, 'x') on conflict (id) do update set name = excluded.name");
|
||||
good("into t (id, name) values (1, 'x') on conflict (id) do update set name = 'y' where id > 0");
|
||||
// Multi-column conflict target + multi-assignment DO UPDATE.
|
||||
good("into t (a, b) values (1, 2) on conflict (a, b) do update set b = excluded.b, a = 9");
|
||||
// ON CONFLICT composes with RETURNING (order: row source,
|
||||
// ON CONFLICT, RETURNING).
|
||||
good("into t (id) values (1) on conflict (id) do nothing returning *");
|
||||
good("into t (id) values (1) on conflict (id) do update set id = excluded.id returning id");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn on_conflict_structurally_incomplete_rejected() {
|
||||
// `do` with no action.
|
||||
bad("into t (id) values (1) on conflict (id) do");
|
||||
// DO UPDATE with no SET.
|
||||
bad("into t (id) values (1) on conflict (id) do update");
|
||||
// DO UPDATE SET with no assignment.
|
||||
bad("into t (id) values (1) on conflict (id) do update set");
|
||||
// Bare ON with no CONFLICT.
|
||||
bad("into t (id) values (1) on do nothing");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn internal_target_table_rejected() {
|
||||
bad("into __rdbms_playground_columns values (1)");
|
||||
|
||||
Reference in New Issue
Block a user