From 6b8888f1051a09838f91db989dcc936f5c4a8154 Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Fri, 22 May 2026 21:28:24 +0000 Subject: [PATCH] =?UTF-8?q?grammar+db:=203h=20=E2=80=94=20UPSERT=20ON=20CO?= =?UTF-8?q?NFLICT=20DO=20NOTHING=20/=20DO=20UPDATE=20(ADR-0033=20=C2=A79)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit on_conflict_clause on SQL_INSERT_SHAPE: optional (col,…) conflict target (distinct conflict_target_column role so it never enters listed_columns), DO NOTHING / DO UPDATE SET … [WHERE …]. `do` is factored out of the action Choice so nothing/update disambiguate without tripping the walk_seq/walk_choice shared-prefix trap (ADR-0033 Amendment 1). Worker runs the UPSERT verbatim (SQLite native); no new execution path. build_sql_insert: row_source now stops before the FIRST trailing clause — ON CONFLICT (3h) or RETURNING (3g) — and do_sql_insert's shortid auto-fill rewrite re-appends the whole trailing tail, so an auto-filled INSERT keeps its ON CONFLICT / RETURNING. excluded pseudo-table (§9): resolves to the target's columns inside the DO UPDATE action and completes at `excluded.|`, but stays flagged as unknown_qualifier in VALUES / RETURNING / non-upsert statements. Diagnostic pass scopes it by the DO UPDATE byte-range (update token → RETURNING/end); completion resolves it against the INSERT target's current_table_columns. NOTE: scoping uses byte-range rather than the plan's prescribed from_scope TableBinding push — same behaviour, no walker scope-frame change. Tests (+13): grammar accept/reject; DO NOTHING / DO UPDATE-excluded / no-target execution + persistence; auto-fill × ON CONFLICT with a REAL unique conflict (proves the clause survives the rewrite, not a no-op); excluded resolves in DO UPDATE SET + WHERE, flagged in VALUES (incl. same statement), unknown column under excluded; excluded.| completion; conflict-target not in listed_columns. 1576 pass / 0 fail / 1 ignored. Clippy clean. Dev sql_insert entry word still removed in 3j. Known follow-up (tracked for 3i): UPSERT DO UPDATE bare column refs (SET LHS / WHERE) are not schema-validated, unlike regular UPDATE — the INSERT target isn't a diagnostic binding. Fits 3i's cross-cut SET/WHERE validation scope. --- src/completion.rs | 44 ++++++-- src/db.rs | 43 ++++---- src/dsl/grammar/data.rs | 36 +++++-- src/dsl/grammar/sql_insert.rs | 139 +++++++++++++++++++++++++- src/dsl/walker/mod.rs | 183 +++++++++++++++++++++++++++++++--- tests/sql_insert.rs | 136 +++++++++++++++++++++++++ 6 files changed, 529 insertions(+), 52 deletions(-) diff --git a/src/completion.rs b/src/completion.rs index 76f80f4..f9a25c1 100644 --- a/src/completion.rs +++ b/src/completion.rs @@ -374,12 +374,26 @@ pub fn candidates_at_cursor_with_in_mode( let qualified_columns: Option> = prefix_qualifier .as_ref() .map(|q| { - resolve_qualifier_columns_in( - q, - resolution_from_scope, - resolution_cte_bindings, - cache, - ) + // ADR-0033 §9: `excluded.|` inside an `INSERT … ON + // CONFLICT … DO UPDATE` completes to the target table's + // columns — `excluded` mirrors the would-be-inserted row. + // The target's columns are the INSERT's + // `current_table_columns` (set by the target-table slot). + // The diagnostic pass enforces the strict DO-UPDATE + // byte-range; completion is the softer surface and offers + // the columns whenever the INSERT target is in hand. + if q.eq_ignore_ascii_case("excluded") + && let Some(cols) = current_table_columns + { + cols.iter().map(|c| c.name.clone()).collect() + } else { + resolve_qualifier_columns_in( + q, + resolution_from_scope, + resolution_cte_bindings, + cache, + ) + } }); let expected = if probe.expected.is_empty() { @@ -2119,6 +2133,24 @@ mod tests { ); } + #[test] + fn excluded_prefix_completes_to_target_columns() { + // ADR-0033 §9: `excluded.|` inside a DO UPDATE action + // completes to the INSERT target table's columns. + let cache = two_table_schema(); + let input = "sqlinsert into a (id, name) values (1, 'x') \ + on conflict (id) do update set name = excluded."; + let cs = cands_with(input, input.len(), &cache); + assert!( + cs.contains(&"id".to_string()) && cs.contains(&"name".to_string()), + "excluded.| should offer the target table's columns; got {cs:?}", + ); + assert!( + !cs.contains(&"total".to_string()), + "a column from an unrelated table must not appear; got {cs:?}", + ); + } + #[test] fn qualified_prefix_with_partial_filters_prefix() { // `select a.na` — only `name` (starts with `na`). diff --git a/src/db.rs b/src/db.rs index a766aac..6f85407 100644 --- a/src/db.rs +++ b/src/db.rs @@ -5945,7 +5945,7 @@ fn plan_shortid_autofill( sql: &str, listed_columns: &[String], row_source: &str, - returning_tail: &str, + trailing_tail: &str, ) -> Result<(String, Vec), DbError> { if listed_columns.is_empty() { return Ok((sql.to_string(), Vec::new())); @@ -6040,18 +6040,18 @@ fn plan_shortid_autofill( .join(", "); tuples.push(format!("({placeholders})")); } - // Preserve any RETURNING tail (3g) — the reconstruction would - // otherwise drop it, so `INSERT … RETURNING *` on an auto-filled - // shortid table would return no rows (and the worker would read - // a zero affected-row count). `returning_tail` is "" on the - // non-RETURNING path. - let returning_suffix = if returning_tail.is_empty() { + // Preserve any trailing clause — `ON CONFLICT …` (3h) and/or + // `RETURNING …` (3g). The reconstruction rebuilds only INSERT … + // VALUES …, so without this the UPSERT action would be lost and + // `RETURNING *` would yield no rows. `trailing_tail` is "" when + // the statement has neither. + let trailing_suffix = if trailing_tail.is_empty() { String::new() } else { - format!(" {returning_tail}") + format!(" {trailing_tail}") }; let exec_sql = format!( - "INSERT INTO {tbl} ({cols_csv}) VALUES {vals}{returning_suffix};", + "INSERT INTO {tbl} ({cols_csv}) VALUES {vals}{trailing_suffix};", tbl = quote_ident(target_table), vals = tuples.join(", "), ); @@ -6092,20 +6092,21 @@ fn do_sql_insert( returning: bool, ) -> Result { debug!(sql = %sql, table = %target_table, returning, "sql_insert"); - // RETURNING (3g): the `shortid` auto-fill rewrite reconstructs - // only `INSERT … VALUES …` and would drop the RETURNING tail, so - // extract it here to re-append. `row_source` is the clean - // VALUES/SELECT text (no RETURNING — `build_sql_insert` stops the - // slice at the RETURNING token), so whatever follows it in the - // full `sql` is the RETURNING clause. On the verbatim (no - // auto-fill) path the original `sql` already carries RETURNING, - // so the tail is only consumed by the rewrite. - let returning_tail: String = if returning && !row_source.is_empty() { + // The `shortid` auto-fill rewrite reconstructs only `INSERT … + // VALUES …` and would drop any trailing clause — `ON CONFLICT …` + // (3h) and/or `RETURNING …` (3g). `row_source` is the clean + // VALUES/SELECT text (`build_sql_insert` stops the slice at the + // first trailing clause), so whatever follows it in the full + // `sql` is exactly that tail; extract it here so the rewrite can + // re-append it verbatim. On the verbatim (no auto-fill) path the + // original `sql` already carries the tail, so it is consumed only + // by the rewrite. + let trailing_tail: String = if row_source.is_empty() { + String::new() + } else { sql.find(row_source) .map(|i| sql[i + row_source.len()..].trim().trim_end_matches(';').trim().to_string()) .unwrap_or_default() - } else { - String::new() }; // Sub-phase 3d: when the user's column list omits one or more // `shortid` columns, the worker materialises the row source, @@ -6114,7 +6115,7 @@ fn do_sql_insert( // params vec with the original `sql` means "no auto-fill — // execute verbatim" (the 3b path). let (exec_sql, params) = - plan_shortid_autofill(conn, target_table, sql, listed_columns, row_source, &returning_tail)?; + plan_shortid_autofill(conn, target_table, sql, listed_columns, row_source, &trailing_tail)?; let tx = conn .unchecked_transaction() .map_err(DbError::from_rusqlite)?; diff --git a/src/dsl/grammar/data.rs b/src/dsl/grammar/data.rs index 2bc7bae..7daeffb 100644 --- a/src/dsl/grammar/data.rs +++ b/src/dsl/grammar/data.rs @@ -891,18 +891,38 @@ fn build_sql_insert(path: &MatchedPath, source: &str) -> Result Result 0"); + // Multi-column conflict target + multi-assignment DO UPDATE. + good("into t (a, b) values (1, 2) on conflict (a, b) do update set b = excluded.b, a = 9"); + // ON CONFLICT composes with RETURNING (order: row source, + // ON CONFLICT, RETURNING). + good("into t (id) values (1) on conflict (id) do nothing returning *"); + good("into t (id) values (1) on conflict (id) do update set id = excluded.id returning id"); + } + + #[test] + fn on_conflict_structurally_incomplete_rejected() { + // `do` with no action. + bad("into t (id) values (1) on conflict (id) do"); + // DO UPDATE with no SET. + bad("into t (id) values (1) on conflict (id) do update"); + // DO UPDATE SET with no assignment. + bad("into t (id) values (1) on conflict (id) do update set"); + // Bare ON with no CONFLICT. + bad("into t (id) values (1) on do nothing"); + } + #[test] fn internal_target_table_rejected() { bad("into __rdbms_playground_columns values (1)"); diff --git a/src/dsl/walker/mod.rs b/src/dsl/walker/mod.rs index c69da70..c7d5c5a 100644 --- a/src/dsl/walker/mod.rs +++ b/src/dsl/walker/mod.rs @@ -593,6 +593,47 @@ fn schema_existence_diagnostics( } } + // ADR-0033 §9: the `excluded` pseudo-table. In an + // `INSERT … ON CONFLICT … DO UPDATE`, a `excluded.col` reference + // resolves to the target table's columns — but ONLY within the + // DO UPDATE action's byte range. Outside it (a VALUES tuple, a + // trailing RETURNING, or any non-upsert statement) `excluded` + // has no meaning and must stay unresolved so the existing + // `unknown_qualifier` diagnostic fires (the leak guard). The + // range runs from the `update` keyword (the one after + // `conflict`) to the `RETURNING` boundary (or end). + let upsert_excluded: Option<(String, usize, usize)> = { + let target = path.items.iter().find_map(|it| match it.kind { + MatchedKind::Ident { + source: IdentSource::Tables, + role: "insert_target_table", + } => Some(it.text.clone()), + _ => None, + }); + let conflict_pos = path + .items + .iter() + .find(|it| matches!(it.kind, MatchedKind::Word("conflict"))) + .map(|it| it.span.0); + let do_update_pos = conflict_pos.and_then(|cp| { + path.items + .iter() + .find(|it| matches!(it.kind, MatchedKind::Word("update")) && it.span.0 > cp) + .map(|it| it.span.0) + }); + let returning_pos = path + .items + .iter() + .find(|it| matches!(it.kind, MatchedKind::Word("returning"))) + .map(|it| it.span.0); + match (target, do_update_pos) { + (Some(t), Some(start)) if schema_has_table(schema, &t) => { + Some((t, start, returning_pos.unwrap_or(usize::MAX))) + } + _ => None, + } + }; + // Track which CTE names have already been seen, for // duplicate detection (a separate single-pass walk; emits // the diagnostic on the second occurrence). @@ -724,22 +765,56 @@ fn schema_existence_diagnostics( } } None => { - // Qualifier didn't resolve — emit - // unknown_qualifier on the - // qualifier span, not on the - // column, so the learner sees - // the root cause. - diagnostics.push(Diagnostic { - severity: Severity::Error, - span: qual_span, - message: crate::friendly::translate( - "diagnostic.unknown_qualifier", - &[( - "qualifier", - &qual as &dyn std::fmt::Display, - )], - ), - }); + // ADR-0033 §9: `excluded.col` inside + // a DO UPDATE action resolves to the + // target table's columns. Scoped by + // byte-range so it stays unknown (and + // flagged below) in VALUES / RETURNING + // / non-upsert statements (leak guard). + let in_excluded_scope = upsert_excluded + .as_ref() + .is_some_and(|(_, start, end)| { + qual.eq_ignore_ascii_case("excluded") + && qual_span.0 >= *start + && qual_span.0 < *end + }); + if let Some((target, _, _)) = + upsert_excluded.as_ref().filter(|_| in_excluded_scope) + { + // Validate the column against the + // target's columns (excluded mirrors + // the target row's shape). + if !schema_has_column(schema, target, &item.text) { + diagnostics.push(Diagnostic { + severity: Severity::Error, + span: item.span, + message: crate::friendly::translate( + "diagnostic.unknown_column", + &[ + ("name", &item.text as &dyn std::fmt::Display), + ("table", target as &dyn std::fmt::Display), + ], + ), + }); + } + } else { + // Qualifier didn't resolve — emit + // unknown_qualifier on the + // qualifier span, not on the + // column, so the learner sees + // the root cause. + diagnostics.push(Diagnostic { + severity: Severity::Error, + span: qual_span, + message: crate::friendly::translate( + "diagnostic.unknown_qualifier", + &[( + "qualifier", + &qual as &dyn std::fmt::Display, + )], + ), + }); + } } } } @@ -3915,6 +3990,82 @@ mod tests { ); } + #[test] + fn upsert_excluded_resolves_in_do_update() { + // ADR-0033 §9: inside a DO UPDATE action, `excluded.col` + // resolves to the target table's columns — no diagnostic. + let schema = two_table_schema(); + let diags = diag_keys( + "sqlinsert into a (id, name) values (1, 'x') on conflict (id) do update set name = excluded.name", + &schema, + ); + assert!(diags.is_empty(), "excluded.name should resolve in DO UPDATE; got {diags:?}"); + } + + #[test] + fn upsert_excluded_resolves_in_do_update_where() { + // The DO UPDATE WHERE is part of the action's byte range, so + // `excluded` resolves there too (not just in the SET RHS). + let schema = two_table_schema(); + let diags = diag_keys( + "sqlinsert into a (id, name) values (1, 'x') on conflict (id) do update set name = 'y' where id = excluded.id", + &schema, + ); + assert!(diags.is_empty(), "excluded in DO UPDATE WHERE should resolve; got {diags:?}"); + } + + #[test] + fn upsert_excluded_unknown_column_is_flagged() { + // `excluded` mirrors the target's shape, so an unknown column + // under it is still an unknown_column error. + let schema = two_table_schema(); + let diags = diag_keys( + "sqlinsert into a (id, name) values (1, 'x') on conflict (id) do update set name = excluded.nosuch", + &schema, + ); + assert!( + diags.iter().any(|d| d.contains("no such column")), + "excluded.nosuch should be unknown_column; got {diags:?}", + ); + } + + #[test] + fn excluded_outside_do_update_is_unknown_qualifier() { + // The leak guard: `excluded` in a VALUES tuple (no DO UPDATE + // in scope) has no meaning and must be flagged. + let schema = two_table_schema(); + let diags = diag_keys( + "sqlinsert into a (id, name) values (excluded.name, 'x')", + &schema, + ); + assert!( + diags.iter().any(|d| d.contains("no such table or alias")), + "excluded in VALUES should be unknown_qualifier; got {diags:?}", + ); + } + + #[test] + fn excluded_in_values_flagged_even_when_do_update_present() { + // Strongest leak case: same statement uses `excluded` BOTH in + // VALUES (out of scope → flagged) and in DO UPDATE (in scope → + // resolves). The byte-range scoping must distinguish them. + let schema = two_table_schema(); + let diags = diag_keys( + "sqlinsert into a (id, name) values (excluded.id, 'x') on conflict (id) do update set name = excluded.name", + &schema, + ); + // Exactly the VALUES-side `excluded.id` is flagged; the + // DO UPDATE-side `excluded.name` resolves cleanly. + assert!( + diags.iter().any(|d| d.contains("no such table or alias")), + "VALUES-side excluded must still leak-flag; got {diags:?}", + ); + assert!( + !diags.iter().any(|d| d.contains("no such column")), + "DO UPDATE-side excluded.name must resolve (no unknown_column); got {diags:?}", + ); + } + #[test] fn insert_select_unknown_projection_column_is_error() { // ADR-0033 sub-phase 3c cross-cut: the Phase-2 diff --git a/tests/sql_insert.rs b/tests/sql_insert.rs index 9b44e13..10077ba 100644 --- a/tests/sql_insert.rs +++ b/tests/sql_insert.rs @@ -897,3 +897,139 @@ fn insert_select_returning_executes_and_returns_rows() { result.data.rows.iter().map(|r| r[1].clone()).collect(); assert!(bs.contains(&Some("x".to_string())) && bs.contains(&Some("y".to_string()))); } + +// ================================================================= +// Sub-phase 3h — UPSERT (ON CONFLICT … DO NOTHING / DO UPDATE) +// ================================================================= + +#[test] +fn conflict_target_columns_excluded_from_listed_columns() { + // DA gate: the ON CONFLICT (col, …) target uses a DISTINCT role + // from the inserted column list, so build_sql_insert's + // listed_columns (which drives shortid auto-fill) must NOT pick + // up the conflict-target columns. If it did, an omitted shortid + // would look "listed" and auto-fill would wrongly skip. + match parse_command("sqlinsert into t (name) values ('x') on conflict (id) do nothing") + .expect("parse upsert") + { + Command::SqlInsert { listed_columns, .. } => { + assert_eq!( + listed_columns, + vec!["name".to_string()], + "only the inserted column list, not the conflict target", + ); + } + other => panic!("expected SqlInsert, got {other:?}"), + } +} + +#[test] +fn autofill_upsert_real_conflict_preserves_clause_and_excluded() { + // DA gate (stronger than autofill_preserves_on_conflict_clause, + // which can't tell a preserved clause from a dropped one because + // the generated id never conflicts). Here the table has a shortid + // PK (auto-filled) AND a UNIQUE `code`. The second insert reuses + // code 'A', so it hits a REAL conflict: with the ON CONFLICT + // clause preserved through the auto-fill rewrite it DO-UPDATEs via + // excluded; if the rewrite had dropped the clause it would raise a + // UNIQUE violation instead (the `.expect` would panic). + let (project, db, _dir) = open_project_db(); + let rt = rt(); + rt.block_on(db.create_table( + "t".to_string(), + vec![ + ColumnSpec::new("id", Type::ShortId), + ColumnSpec { unique: true, ..ColumnSpec::new("code", Type::Text) }, + ColumnSpec::new("label", Type::Text), + ], + vec!["id".to_string()], + None, + )) + .expect("create table with shortid pk + unique code"); + run_sqlinsert(&db, &rt, "sqlinsert into t (code, label) values ('A', 'first')").expect("seed"); + let result = run_sqlinsert( + &db, + &rt, + "sqlinsert into t (code, label) values ('A', 'second') on conflict (code) do update set label = excluded.label", + ) + .expect("auto-filled UPSERT with a real conflict (clause preserved)"); + assert_eq!(result.rows_affected, 1, "the conflicting row was updated, not inserted"); + let rows = csv_rows(&project, "t"); + assert_eq!(rows.len(), 1, "still one row (DO UPDATE, not a second insert)"); + assert!(rows[0].iter().any(|c| c == "second"), "label updated via excluded: {rows:?}"); + assert!(!rows[0].iter().any(|c| c == "first"), "old label replaced: {rows:?}"); +} + +#[test] +fn on_conflict_do_nothing_keeps_existing_row() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_cols(&db, &rt, "t", &[("id", Type::Int), ("name", Type::Text)], &["id"]); + run_sqlinsert(&db, &rt, "sqlinsert into t (id, name) values (1, 'orig')").expect("seed"); + let result = run_sqlinsert( + &db, + &rt, + "sqlinsert into t (id, name) values (1, 'new') on conflict (id) do nothing", + ) + .expect("ON CONFLICT DO NOTHING runs"); + assert_eq!(result.rows_affected, 0, "conflicting row left untouched"); + let rows = csv_rows(&project, "t"); + assert_eq!(rows.len(), 1, "still one row"); + assert!(rows[0].iter().any(|c| c == "orig"), "original value kept: {rows:?}"); +} + +#[test] +fn on_conflict_do_update_applies_excluded() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_cols(&db, &rt, "t", &[("id", Type::Int), ("name", Type::Text)], &["id"]); + run_sqlinsert(&db, &rt, "sqlinsert into t (id, name) values (1, 'orig')").expect("seed"); + let result = run_sqlinsert( + &db, + &rt, + "sqlinsert into t (id, name) values (1, 'new') on conflict (id) do update set name = excluded.name", + ) + .expect("ON CONFLICT DO UPDATE runs"); + assert_eq!(result.rows_affected, 1, "the conflicting row was updated"); + let rows = csv_rows(&project, "t"); + assert_eq!(rows.len(), 1, "still one row (updated, not inserted)"); + assert!(rows[0].iter().any(|c| c == "new"), "row updated to excluded.name: {rows:?}"); +} + +#[test] +fn on_conflict_do_nothing_without_target() { + let (_project, db, _dir) = open_project_db(); + let rt = rt(); + create_cols(&db, &rt, "t", &[("id", Type::Int), ("name", Type::Text)], &["id"]); + run_sqlinsert(&db, &rt, "sqlinsert into t (id, name) values (1, 'orig')").expect("seed"); + let result = run_sqlinsert( + &db, + &rt, + "sqlinsert into t (id, name) values (1, 'x') on conflict do nothing", + ) + .expect("ON CONFLICT (no target) DO NOTHING runs"); + assert_eq!(result.rows_affected, 0, "any-conflict do-nothing absorbed the duplicate"); +} + +#[test] +fn autofill_preserves_on_conflict_clause() { + // DA gate / landmine: an INSERT with an omitted shortid PK AND an + // ON CONFLICT tail. The auto-fill rewrite reconstructs INSERT … + // VALUES …; row_source must stop before ON CONFLICT (so the + // materialisation prepare doesn't choke) and the rewrite must + // re-append the clause (so it isn't silently dropped). The fresh + // generated id won't conflict, so the row inserts — the point is + // the rewrite doesn't prepare-fail and the clause survives. + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_cols(&db, &rt, "t", &[("id", Type::ShortId), ("label", Type::Text)], &["id"]); + let result = run_sqlinsert( + &db, + &rt, + "sqlinsert into t (label) values ('x') on conflict (id) do nothing", + ) + .expect("auto-fill INSERT with ON CONFLICT runs (clause preserved)"); + assert_eq!(result.rows_affected, 1, "row inserted with a generated id"); + let rows = csv_rows(&project, "t"); + assert_eq!(rows.len(), 1, "one row landed"); +}