From 8d17583fe0b4097f9e3e870268afc43ea8fd7f68 Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Fri, 22 May 2026 22:23:15 +0000 Subject: [PATCH] =?UTF-8?q?walker:=203i=20/runda=20DA=20round=20=E2=80=94?= =?UTF-8?q?=20fix=20INSERT-target=20scope=20confusion=20(6=20cases)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A focused adversarial round (/runda) found a single root cause with six manifestations, all pre-existing latent false-positives: the INSERT target is recorded under the `insert_target_table` role, not as a diagnostic `bindings` entry, so refs that should resolve to the *target* row were instead checked against the statement's bindings — which for an `INSERT … SELECT` are the SELECT's *source* tables (the wrong scope), producing false unknown_column / unknown_qualifier diagnostics on valid input. New helper bare_ref_insert_target re-scopes a ref onto the INSERT target when it sits in a target-referencing region: the UPSERT DO UPDATE action (byte range) or an INSERT's RETURNING list. Applied across every ref form: 1. INSERT column list (insert_column) — validated vs the target, skipped in the bare-column branch (was checked vs SELECT source). 2. ON CONFLICT (col) target (conflict_target_column) — same. 3. DO UPDATE SET RHS / WHERE bare refs — validated vs the target (also closes the #12 residual for VALUES upserts). 4. RETURNING bare refs — validated vs the target. 5. target-qualified refs `t.col` in DO UPDATE / RETURNING — the unified `excluded` / target-qualifier resolution in the qualified-ref None branch. 6. target-qualified star `t.*` in RETURNING — same re-scoping in the qualified-star handler. Each fix has a positive (resolves cleanly) and negative (genuinely unknown column / unrelated qualifier still flagged) test; the `excluded` leak guard and all prior diagnostics remain green. 1613 pass / 0 fail / 1 ignored. Clippy clean. --- src/dsl/walker/mod.rs | 397 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 375 insertions(+), 22 deletions(-) diff --git a/src/dsl/walker/mod.rs b/src/dsl/walker/mod.rs index d09ab3f..60e5115 100644 --- a/src/dsl/walker/mod.rs +++ b/src/dsl/walker/mod.rs @@ -602,6 +602,23 @@ fn schema_existence_diagnostics( // `unknown_qualifier` diagnostic fires (the leak guard). The // range runs from the `update` keyword (the one after // `conflict`) to the `RETURNING` boundary (or end). + // Whether this statement is an INSERT/UPSERT (has an + // `insert_target_table`). When set, `insert_column` and + // `update_set_column` idents are validated against that target + // by `dml_target_column_diagnostics`, NOT against the flat + // `bindings` below — for an `INSERT … SELECT` those bindings are + // the SELECT's source tables, the wrong scope (a target-only + // column would be falsely flagged against the source). + let has_insert_target = path.items.iter().any(|it| { + matches!( + it.kind, + MatchedKind::Ident { + source: IdentSource::Tables, + role: "insert_target_table", + } + ) + }); + let upsert_excluded: Option<(String, usize, usize)> = { let target = path.items.iter().find_map(|it| match it.kind { MatchedKind::Ident { @@ -634,6 +651,30 @@ fn schema_existence_diagnostics( } }; + // The INSERT target + RETURNING-clause start, for re-scoping + // bare RETURNING refs onto the target (they reference the + // inserted row, not an `INSERT … SELECT`'s source). `None` for a + // top-level UPDATE/DELETE (whose target is already a binding). + let insert_target_name: Option = if has_insert_target { + path.items + .iter() + .find_map(|it| match it.kind { + MatchedKind::Ident { + source: IdentSource::Tables, + role: "insert_target_table", + } => Some(it.text.clone()), + _ => None, + }) + .filter(|t| schema_has_table(schema, t)) + } else { + None + }; + let returning_pos: Option = path + .items + .iter() + .find(|it| matches!(it.kind, MatchedKind::Word("returning"))) + .map(|it| it.span.0); + // Track which CTE names have already been seen, for // duplicate detection (a separate single-pass walk; emits // the diagnostic on the second occurrence). @@ -701,8 +742,20 @@ fn schema_existence_diagnostics( if role == "qualified_star_qualifier" { // The `t` in `t.*`. Resolve against bindings // (populated by the pre-pass); emit - // `unknown_qualifier` if it doesn't resolve. - if resolve_qualifier(&bindings, &item.text).is_none() + // `unknown_qualifier` if it doesn't resolve. A + // target-qualified `.*` inside an INSERT's + // RETURNING (or DO UPDATE) range resolves to the + // INSERT target, which isn't a binding (same + // re-scoping as bare/qualified refs). + let resolves_to_target = bare_ref_insert_target( + item.span.0, + upsert_excluded.as_ref(), + insert_target_name.as_deref(), + returning_pos, + ) + .is_some_and(|t| t.eq_ignore_ascii_case(&item.text)); + if !resolves_to_target + && resolve_qualifier(&bindings, &item.text).is_none() && !cte_names_contains(&cte_names, &item.text) { diagnostics.push(Diagnostic { @@ -765,25 +818,33 @@ fn schema_existence_diagnostics( } } None => { - // ADR-0033 §9: `excluded.col` inside - // a DO UPDATE action resolves to the - // target table's columns. Scoped by - // byte-range so it stays unknown (and - // flagged below) in VALUES / RETURNING - // / non-upsert statements (leak guard). - let in_excluded_scope = upsert_excluded - .as_ref() - .is_some_and(|(_, start, end)| { - qual.eq_ignore_ascii_case("excluded") - && qual_span.0 >= *start - && qual_span.0 < *end - }); - if let Some((target, _, _)) = - upsert_excluded.as_ref().filter(|_| in_excluded_scope) - { + // A qualifier resolves to the INSERT + // target's columns when it is, within a + // target-scoped region (the DO UPDATE + // action — ADR-0033 §9 — or an INSERT's + // RETURNING list): + // - `excluded` (the would-be-inserted + // row, mirroring the target), or + // - the target table's own name (a + // target-qualified ref). + // Both are byte-range-scoped, so they + // stay unresolved (and flagged below) in + // a VALUES tuple, an `INSERT … SELECT`'s + // own scope, or a non-upsert statement + // (the leak guard). + let scope_target = bare_ref_insert_target( + qual_span.0, + upsert_excluded.as_ref(), + insert_target_name.as_deref(), + returning_pos, + ) + .filter(|t| { + qual.eq_ignore_ascii_case("excluded") + || qual.eq_ignore_ascii_case(t) + }); + if let Some(target) = scope_target { // Validate the column against the - // target's columns (excluded mirrors - // the target row's shape). + // target's columns. if !schema_has_column(schema, target, &item.text) { diagnostics.push(Diagnostic { severity: Severity::Error, @@ -792,7 +853,7 @@ fn schema_existence_diagnostics( "diagnostic.unknown_column", &[ ("name", &item.text as &dyn std::fmt::Display), - ("table", target as &dyn std::fmt::Display), + ("table", &target as &dyn std::fmt::Display), ], ), }); @@ -826,6 +887,53 @@ fn schema_existence_diagnostics( // check on the next iteration. pending_qualifier = Some((item.text.clone(), item.span)); + } else if let Some(target) = (role == "sql_expr_ident") + .then(|| { + bare_ref_insert_target( + item.span.0, + upsert_excluded.as_ref(), + insert_target_name.as_deref(), + returning_pos, + ) + }) + .flatten() + { + // A bare column ref inside the UPSERT DO UPDATE + // action or an INSERT's RETURNING list references + // the target row's columns (and `excluded`, handled + // on the qualified path), NOT the row source's + // tables. Validate it against the target — for an + // `INSERT … SELECT …` the flat `bindings` are the + // SELECT's sources, so the bare-column branch below + // would check the wrong scope. (Also covers the + // VALUES upsert case where `bindings` is empty.) + if !schema_has_column(schema, target, &item.text) { + diagnostics.push(Diagnostic { + severity: Severity::Error, + span: item.span, + message: crate::friendly::translate( + "diagnostic.unknown_column", + &[ + ("name", &item.text as &dyn std::fmt::Display), + ("table", &target as &dyn std::fmt::Display), + ], + ), + }); + } + } else if role == "insert_column" + || role == "conflict_target_column" + || (role == "update_set_column" && has_insert_target) + { + // The INSERT column list, the ON CONFLICT target, + // and the UPSERT DO UPDATE SET columns are all + // validated against the INSERT target by + // `dml_target_column_diagnostics`. They must NOT be + // checked against the flat `bindings` here: for + // `INSERT … SELECT` those are the SELECT's source + // tables, so a target-only column would be falsely + // flagged. (A top-level UPDATE's `update_set_column` + // has no insert target and so still flows through + // the bare-column check below.) } else if !bindings.is_empty() { // Bare column reference. Count which bindings // contain it (case-insensitive). CTE-binding @@ -1351,7 +1459,7 @@ fn dml_target_column_diagnostics( for item in &path.items { let MatchedKind::Ident { source: IdentSource::Columns, - role: "insert_column" | "update_set_column", + role: "insert_column" | "update_set_column" | "conflict_target_column", } = item.kind else { continue; @@ -1373,6 +1481,41 @@ fn dml_target_column_diagnostics( diagnostics } +/// The INSERT target a bare column ref should resolve against, if +/// the ref sits in a region that references the *target* row rather +/// than the statement's `FROM`/row-source tables (ADR-0033 §5/§9, +/// sub-phase 3i `/runda` round): +/// +/// - the UPSERT `DO UPDATE` action (`upsert` = `(target, start, +/// end)`), and +/// - an INSERT's `RETURNING` list (`insert_target` + `returning_pos`). +/// +/// For an `INSERT … SELECT …` the flat `bindings` are the SELECT's +/// source tables, so without this re-scoping a target-only column in +/// `DO UPDATE` / `RETURNING` is falsely flagged against the source. +/// Returns `None` for refs that should use the normal binding scope +/// (the row source's own projection / WHERE, a top-level +/// UPDATE/DELETE whose target is already a binding, etc.). +fn bare_ref_insert_target<'a>( + span_start: usize, + upsert: Option<&'a (String, usize, usize)>, + insert_target: Option<&'a str>, + returning_pos: Option, +) -> Option<&'a str> { + if let Some((t, start, end)) = upsert + && span_start >= *start + && span_start < *end + { + return Some(t); + } + if let (Some(t), Some(rs)) = (insert_target, returning_pos) + && span_start >= rs + { + return Some(t); + } + None +} + /// `not_null_missing` WARNING (ADR-0033 §8.3, sub-phase 3i). /// /// A SQL `INSERT` with an explicit `(column_name_list)` that omits a @@ -4386,6 +4529,216 @@ mod tests { ); } + #[test] + fn insert_select_target_only_column_not_misflagged() { + // Regression (/runda DA round): `name` is a column of the + // INSERT target `a` but NOT of the SELECT source `b`. The + // insert_column must be validated against the target, not the + // SELECT's source tables — otherwise the flat-scope + // bare-column branch falsely flags it against `b`. + let schema = two_table_schema(); // a(id,name), b(id,total) + let diags = diag_keys("sqlinsert into a (name) select total from b", &schema); + assert!( + !diags.iter().any(|d| d.contains("no such column")), + "target column `name` must not be flagged against the SELECT source; got {diags:?}", + ); + } + + #[test] + fn conflict_target_in_insert_select_validated_against_target() { + // Regression (/runda DA round): ON CONFLICT (name) where + // `name` is the target `a`'s column but not the SELECT source + // `b`'s — must validate against the target, not be flagged + // against `b`. + let schema = two_table_schema(); + let diags = diag_keys( + "sqlinsert into a (id) select id from b on conflict (name) do nothing", + &schema, + ); + assert!( + !diags.iter().any(|d| d.contains("no such column")), + "conflict target `name` (a's column) must not flag against `b`; got {diags:?}", + ); + } + + #[test] + fn do_update_bare_rhs_in_insert_select_validated_against_target() { + // Regression (/runda DA round): a DO UPDATE SET RHS bare ref + // (`name`, a target column) in an INSERT … SELECT … ON CONFLICT + // resolves to the target `a`, not the SELECT source `b`. + let schema = two_table_schema(); + let diags = diag_keys( + "sqlinsert into a (id) select id from b on conflict (id) do update set name = name", + &schema, + ); + assert!( + !diags.iter().any(|d| d.contains("no such column")), + "DO UPDATE bare ref `name` must not flag against `b`; got {diags:?}", + ); + } + + #[test] + fn do_update_bare_ref_unknown_column_is_flagged() { + // Closes the #12 residual: a DO UPDATE bare ref to a column + // that doesn't exist on the target IS flagged (validated + // against the target). Covers the SET RHS and the WHERE. + let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Text)]); + let set_rhs = diag_keys( + "sqlinsert into t (a, b) values (1, 'x') on conflict (a) do update set b = nope", + &schema, + ); + assert!( + set_rhs.iter().any(|d| d.contains("no such column")), + "unknown DO UPDATE SET RHS ref should be flagged; got {set_rhs:?}", + ); + let where_ref = diag_keys( + "sqlinsert into t (a, b) values (1, 'x') on conflict (a) do update set b = 'y' where nope = 1", + &schema, + ); + assert!( + where_ref.iter().any(|d| d.contains("no such column")), + "unknown DO UPDATE WHERE ref should be flagged; got {where_ref:?}", + ); + } + + #[test] + fn returning_ref_in_insert_select_validated_against_target() { + // Regression (/runda DA round): RETURNING references the + // INSERTED (target) row. In an INSERT … SELECT … RETURNING, a + // target-only column must resolve to the target `a`, not be + // flagged against the SELECT source `b`. + let schema = two_table_schema(); // a(id,name), b(id,total) + let diags = diag_keys("sqlinsert into a (id) select id from b returning name", &schema); + assert!( + !diags.iter().any(|d| d.contains("no such column")), + "RETURNING `name` (a's column) must not flag against `b`; got {diags:?}", + ); + } + + #[test] + fn target_qualified_star_in_returning_resolves_against_target() { + // Regression (/runda DA round): `returning a.*` (target- + // qualified star) in an INSERT … SELECT resolves to the + // target `a`, not flagged against the SELECT source `b`. + let schema = two_table_schema(); + let diags = diag_keys("sqlinsert into a (id) select id from b returning a.*", &schema); + assert!( + diags.is_empty(), + "target-qualified `a.*` in RETURNING must resolve cleanly; got {diags:?}", + ); + } + + #[test] + fn unrelated_qualified_star_in_returning_still_flagged() { + let schema = two_table_schema(); + let diags = diag_keys("sqlinsert into a (id) select id from b returning zzz.*", &schema); + assert!( + diags.iter().any(|d| d.contains("no such table or alias")), + "unrelated `zzz.*` qualifier should still flag; got {diags:?}", + ); + } + + #[test] + fn target_qualified_ref_in_returning_resolves_against_target() { + // Regression (/runda DA round): a target-qualified ref + // `a.name` in an INSERT … SELECT … RETURNING resolves to the + // target `a`, not flagged as an unknown qualifier (a is the + // target, b is the SELECT source). + let schema = two_table_schema(); + let diags = diag_keys("sqlinsert into a (id) select id from b returning a.name", &schema); + assert!( + diags.is_empty(), + "target-qualified RETURNING ref must resolve cleanly; got {diags:?}", + ); + } + + #[test] + fn target_qualified_ref_unknown_column_still_flagged() { + // `a.nope` — a is the target but nope isn't its column. + let schema = two_table_schema(); + let diags = diag_keys("sqlinsert into a (id) select id from b returning a.nope", &schema); + assert!( + diags.iter().any(|d| d.contains("no such column")), + "unknown column under the target qualifier should flag; got {diags:?}", + ); + } + + #[test] + fn unrelated_qualifier_in_returning_still_flagged() { + // A qualifier that's neither `excluded` nor the target is + // still an unknown qualifier (the leak guard holds). + let schema = two_table_schema(); + let diags = diag_keys("sqlinsert into a (id) select id from b returning zzz.name", &schema); + assert!( + diags.iter().any(|d| d.contains("no such table or alias")), + "unrelated qualifier should still flag; got {diags:?}", + ); + } + + #[test] + fn returning_ref_unknown_column_is_flagged() { + // Flip side: a RETURNING ref to a column on neither table is + // flagged (against the INSERT target). + let schema = two_table_schema(); + let diags = diag_keys("sqlinsert into a (id) select id from b returning nope", &schema); + assert!( + diags.iter().any(|d| d.contains("no such column")), + "unknown RETURNING column should be flagged; got {diags:?}", + ); + } + + #[test] + fn returning_ref_in_plain_insert_validated_against_target() { + // The VALUES INSERT … RETURNING case (no bindings at all): + // a valid target column is silent, an unknown one flags. + let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Text)]); + let ok = diag_keys("sqlinsert into t (a) values (1) returning b", &schema); + assert!(!ok.iter().any(|d| d.contains("no such column")), "got {ok:?}"); + let bad = diag_keys("sqlinsert into t (a) values (1) returning nope", &schema); + assert!(bad.iter().any(|d| d.contains("no such column")), "got {bad:?}"); + } + + #[test] + fn do_update_bare_ref_known_column_silent() { + // And a valid bare ref to a target column is silent. + let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Text)]); + let diags = diag_keys( + "sqlinsert into t (a, b) values (1, 'x') on conflict (a) do update set b = 'y' where a > 0", + &schema, + ); + assert!( + !diags.iter().any(|d| d.contains("no such column")), + "valid DO UPDATE refs must be silent; got {diags:?}", + ); + } + + #[test] + fn conflict_target_unknown_column_is_flagged() { + // The flip side: a conflict target naming a column that is in + // neither table is flagged (against the INSERT target). + let schema = two_table_schema(); + let diags = diag_keys( + "sqlinsert into a (id) values (1) on conflict (nope) do nothing", + &schema, + ); + assert!( + diags.iter().any(|d| d.contains("no such column")), + "unknown conflict-target column should be flagged; got {diags:?}", + ); + } + + #[test] + fn insert_select_genuinely_unknown_insert_column_still_flagged() { + // The flip side: a column in neither the target nor the source + // is still flagged (against the target, by the dedicated pass). + let schema = two_table_schema(); + let diags = diag_keys("sqlinsert into a (nope) select total from b", &schema); + assert!( + diags.iter().any(|d| d.contains("no such column")), + "a genuinely unknown INSERT column should still be flagged; got {diags:?}", + ); + } + #[test] fn insert_column_list_known_columns_silent() { let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Text)]);