walker: 3i /runda DA round — fix INSERT-target scope confusion (6 cases)

A focused adversarial round (/runda) found a single root cause with
six manifestations, all pre-existing latent false-positives: the
INSERT target is recorded under the `insert_target_table` role, not
as a diagnostic `bindings` entry, so refs that should resolve to the
*target* row were instead checked against the statement's bindings —
which for an `INSERT … SELECT` are the SELECT's *source* tables (the
wrong scope), producing false unknown_column / unknown_qualifier
diagnostics on valid input.

New helper bare_ref_insert_target re-scopes a ref onto the INSERT
target when it sits in a target-referencing region: the UPSERT
DO UPDATE action (byte range) or an INSERT's RETURNING list. Applied
across every ref form:

  1. INSERT column list (insert_column) — validated vs the target,
     skipped in the bare-column branch (was checked vs SELECT source).
  2. ON CONFLICT (col) target (conflict_target_column) — same.
  3. DO UPDATE SET RHS / WHERE bare refs — validated vs the target
     (also closes the #12 residual for VALUES upserts).
  4. RETURNING bare refs — validated vs the target.
  5. target-qualified refs `t.col` in DO UPDATE / RETURNING — the
     unified `excluded` / target-qualifier resolution in the
     qualified-ref None branch.
  6. target-qualified star `t.*` in RETURNING — same re-scoping in
     the qualified-star handler.

Each fix has a positive (resolves cleanly) and negative (genuinely
unknown column / unrelated qualifier still flagged) test; the
`excluded` leak guard and all prior diagnostics remain green.
1613 pass / 0 fail / 1 ignored. Clippy clean.
This commit is contained in:
claude@clouddev1
2026-05-22 22:23:15 +00:00
parent 4fa0aa06e9
commit 8d17583fe0
+375 -22
View File
@@ -602,6 +602,23 @@ fn schema_existence_diagnostics(
// `unknown_qualifier` diagnostic fires (the leak guard). The
// range runs from the `update` keyword (the one after
// `conflict`) to the `RETURNING` boundary (or end).
// Whether this statement is an INSERT/UPSERT (has an
// `insert_target_table`). When set, `insert_column` and
// `update_set_column` idents are validated against that target
// by `dml_target_column_diagnostics`, NOT against the flat
// `bindings` below — for an `INSERT … SELECT` those bindings are
// the SELECT's source tables, the wrong scope (a target-only
// column would be falsely flagged against the source).
let has_insert_target = path.items.iter().any(|it| {
matches!(
it.kind,
MatchedKind::Ident {
source: IdentSource::Tables,
role: "insert_target_table",
}
)
});
let upsert_excluded: Option<(String, usize, usize)> = {
let target = path.items.iter().find_map(|it| match it.kind {
MatchedKind::Ident {
@@ -634,6 +651,30 @@ fn schema_existence_diagnostics(
}
};
// The INSERT target + RETURNING-clause start, for re-scoping
// bare RETURNING refs onto the target (they reference the
// inserted row, not an `INSERT … SELECT`'s source). `None` for a
// top-level UPDATE/DELETE (whose target is already a binding).
let insert_target_name: Option<String> = if has_insert_target {
path.items
.iter()
.find_map(|it| match it.kind {
MatchedKind::Ident {
source: IdentSource::Tables,
role: "insert_target_table",
} => Some(it.text.clone()),
_ => None,
})
.filter(|t| schema_has_table(schema, t))
} else {
None
};
let returning_pos: Option<usize> = path
.items
.iter()
.find(|it| matches!(it.kind, MatchedKind::Word("returning")))
.map(|it| it.span.0);
// Track which CTE names have already been seen, for
// duplicate detection (a separate single-pass walk; emits
// the diagnostic on the second occurrence).
@@ -701,8 +742,20 @@ fn schema_existence_diagnostics(
if role == "qualified_star_qualifier" {
// The `t` in `t.*`. Resolve against bindings
// (populated by the pre-pass); emit
// `unknown_qualifier` if it doesn't resolve.
if resolve_qualifier(&bindings, &item.text).is_none()
// `unknown_qualifier` if it doesn't resolve. A
// target-qualified `<target>.*` inside an INSERT's
// RETURNING (or DO UPDATE) range resolves to the
// INSERT target, which isn't a binding (same
// re-scoping as bare/qualified refs).
let resolves_to_target = bare_ref_insert_target(
item.span.0,
upsert_excluded.as_ref(),
insert_target_name.as_deref(),
returning_pos,
)
.is_some_and(|t| t.eq_ignore_ascii_case(&item.text));
if !resolves_to_target
&& resolve_qualifier(&bindings, &item.text).is_none()
&& !cte_names_contains(&cte_names, &item.text)
{
diagnostics.push(Diagnostic {
@@ -765,25 +818,33 @@ fn schema_existence_diagnostics(
}
}
None => {
// ADR-0033 §9: `excluded.col` inside
// a DO UPDATE action resolves to the
// target table's columns. Scoped by
// byte-range so it stays unknown (and
// flagged below) in VALUES / RETURNING
// / non-upsert statements (leak guard).
let in_excluded_scope = upsert_excluded
.as_ref()
.is_some_and(|(_, start, end)| {
qual.eq_ignore_ascii_case("excluded")
&& qual_span.0 >= *start
&& qual_span.0 < *end
});
if let Some((target, _, _)) =
upsert_excluded.as_ref().filter(|_| in_excluded_scope)
{
// A qualifier resolves to the INSERT
// target's columns when it is, within a
// target-scoped region (the DO UPDATE
// action — ADR-0033 §9 — or an INSERT's
// RETURNING list):
// - `excluded` (the would-be-inserted
// row, mirroring the target), or
// - the target table's own name (a
// target-qualified ref).
// Both are byte-range-scoped, so they
// stay unresolved (and flagged below) in
// a VALUES tuple, an `INSERT … SELECT`'s
// own scope, or a non-upsert statement
// (the leak guard).
let scope_target = bare_ref_insert_target(
qual_span.0,
upsert_excluded.as_ref(),
insert_target_name.as_deref(),
returning_pos,
)
.filter(|t| {
qual.eq_ignore_ascii_case("excluded")
|| qual.eq_ignore_ascii_case(t)
});
if let Some(target) = scope_target {
// Validate the column against the
// target's columns (excluded mirrors
// the target row's shape).
// target's columns.
if !schema_has_column(schema, target, &item.text) {
diagnostics.push(Diagnostic {
severity: Severity::Error,
@@ -792,7 +853,7 @@ fn schema_existence_diagnostics(
"diagnostic.unknown_column",
&[
("name", &item.text as &dyn std::fmt::Display),
("table", target as &dyn std::fmt::Display),
("table", &target as &dyn std::fmt::Display),
],
),
});
@@ -826,6 +887,53 @@ fn schema_existence_diagnostics(
// check on the next iteration.
pending_qualifier =
Some((item.text.clone(), item.span));
} else if let Some(target) = (role == "sql_expr_ident")
.then(|| {
bare_ref_insert_target(
item.span.0,
upsert_excluded.as_ref(),
insert_target_name.as_deref(),
returning_pos,
)
})
.flatten()
{
// A bare column ref inside the UPSERT DO UPDATE
// action or an INSERT's RETURNING list references
// the target row's columns (and `excluded`, handled
// on the qualified path), NOT the row source's
// tables. Validate it against the target — for an
// `INSERT … SELECT …` the flat `bindings` are the
// SELECT's sources, so the bare-column branch below
// would check the wrong scope. (Also covers the
// VALUES upsert case where `bindings` is empty.)
if !schema_has_column(schema, target, &item.text) {
diagnostics.push(Diagnostic {
severity: Severity::Error,
span: item.span,
message: crate::friendly::translate(
"diagnostic.unknown_column",
&[
("name", &item.text as &dyn std::fmt::Display),
("table", &target as &dyn std::fmt::Display),
],
),
});
}
} else if role == "insert_column"
|| role == "conflict_target_column"
|| (role == "update_set_column" && has_insert_target)
{
// The INSERT column list, the ON CONFLICT target,
// and the UPSERT DO UPDATE SET columns are all
// validated against the INSERT target by
// `dml_target_column_diagnostics`. They must NOT be
// checked against the flat `bindings` here: for
// `INSERT … SELECT` those are the SELECT's source
// tables, so a target-only column would be falsely
// flagged. (A top-level UPDATE's `update_set_column`
// has no insert target and so still flows through
// the bare-column check below.)
} else if !bindings.is_empty() {
// Bare column reference. Count which bindings
// contain it (case-insensitive). CTE-binding
@@ -1351,7 +1459,7 @@ fn dml_target_column_diagnostics(
for item in &path.items {
let MatchedKind::Ident {
source: IdentSource::Columns,
role: "insert_column" | "update_set_column",
role: "insert_column" | "update_set_column" | "conflict_target_column",
} = item.kind
else {
continue;
@@ -1373,6 +1481,41 @@ fn dml_target_column_diagnostics(
diagnostics
}
/// The INSERT target a bare column ref should resolve against, if
/// the ref sits in a region that references the *target* row rather
/// than the statement's `FROM`/row-source tables (ADR-0033 §5/§9,
/// sub-phase 3i `/runda` round):
///
/// - the UPSERT `DO UPDATE` action (`upsert` = `(target, start,
/// end)`), and
/// - an INSERT's `RETURNING` list (`insert_target` + `returning_pos`).
///
/// For an `INSERT … SELECT …` the flat `bindings` are the SELECT's
/// source tables, so without this re-scoping a target-only column in
/// `DO UPDATE` / `RETURNING` is falsely flagged against the source.
/// Returns `None` for refs that should use the normal binding scope
/// (the row source's own projection / WHERE, a top-level
/// UPDATE/DELETE whose target is already a binding, etc.).
fn bare_ref_insert_target<'a>(
span_start: usize,
upsert: Option<&'a (String, usize, usize)>,
insert_target: Option<&'a str>,
returning_pos: Option<usize>,
) -> Option<&'a str> {
if let Some((t, start, end)) = upsert
&& span_start >= *start
&& span_start < *end
{
return Some(t);
}
if let (Some(t), Some(rs)) = (insert_target, returning_pos)
&& span_start >= rs
{
return Some(t);
}
None
}
/// `not_null_missing` WARNING (ADR-0033 §8.3, sub-phase 3i).
///
/// A SQL `INSERT` with an explicit `(column_name_list)` that omits a
@@ -4386,6 +4529,216 @@ mod tests {
);
}
#[test]
fn insert_select_target_only_column_not_misflagged() {
// Regression (/runda DA round): `name` is a column of the
// INSERT target `a` but NOT of the SELECT source `b`. The
// insert_column must be validated against the target, not the
// SELECT's source tables — otherwise the flat-scope
// bare-column branch falsely flags it against `b`.
let schema = two_table_schema(); // a(id,name), b(id,total)
let diags = diag_keys("sqlinsert into a (name) select total from b", &schema);
assert!(
!diags.iter().any(|d| d.contains("no such column")),
"target column `name` must not be flagged against the SELECT source; got {diags:?}",
);
}
#[test]
fn conflict_target_in_insert_select_validated_against_target() {
// Regression (/runda DA round): ON CONFLICT (name) where
// `name` is the target `a`'s column but not the SELECT source
// `b`'s — must validate against the target, not be flagged
// against `b`.
let schema = two_table_schema();
let diags = diag_keys(
"sqlinsert into a (id) select id from b on conflict (name) do nothing",
&schema,
);
assert!(
!diags.iter().any(|d| d.contains("no such column")),
"conflict target `name` (a's column) must not flag against `b`; got {diags:?}",
);
}
#[test]
fn do_update_bare_rhs_in_insert_select_validated_against_target() {
// Regression (/runda DA round): a DO UPDATE SET RHS bare ref
// (`name`, a target column) in an INSERT … SELECT … ON CONFLICT
// resolves to the target `a`, not the SELECT source `b`.
let schema = two_table_schema();
let diags = diag_keys(
"sqlinsert into a (id) select id from b on conflict (id) do update set name = name",
&schema,
);
assert!(
!diags.iter().any(|d| d.contains("no such column")),
"DO UPDATE bare ref `name` must not flag against `b`; got {diags:?}",
);
}
#[test]
fn do_update_bare_ref_unknown_column_is_flagged() {
// Closes the #12 residual: a DO UPDATE bare ref to a column
// that doesn't exist on the target IS flagged (validated
// against the target). Covers the SET RHS and the WHERE.
let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Text)]);
let set_rhs = diag_keys(
"sqlinsert into t (a, b) values (1, 'x') on conflict (a) do update set b = nope",
&schema,
);
assert!(
set_rhs.iter().any(|d| d.contains("no such column")),
"unknown DO UPDATE SET RHS ref should be flagged; got {set_rhs:?}",
);
let where_ref = diag_keys(
"sqlinsert into t (a, b) values (1, 'x') on conflict (a) do update set b = 'y' where nope = 1",
&schema,
);
assert!(
where_ref.iter().any(|d| d.contains("no such column")),
"unknown DO UPDATE WHERE ref should be flagged; got {where_ref:?}",
);
}
#[test]
fn returning_ref_in_insert_select_validated_against_target() {
// Regression (/runda DA round): RETURNING references the
// INSERTED (target) row. In an INSERT … SELECT … RETURNING, a
// target-only column must resolve to the target `a`, not be
// flagged against the SELECT source `b`.
let schema = two_table_schema(); // a(id,name), b(id,total)
let diags = diag_keys("sqlinsert into a (id) select id from b returning name", &schema);
assert!(
!diags.iter().any(|d| d.contains("no such column")),
"RETURNING `name` (a's column) must not flag against `b`; got {diags:?}",
);
}
#[test]
fn target_qualified_star_in_returning_resolves_against_target() {
// Regression (/runda DA round): `returning a.*` (target-
// qualified star) in an INSERT … SELECT resolves to the
// target `a`, not flagged against the SELECT source `b`.
let schema = two_table_schema();
let diags = diag_keys("sqlinsert into a (id) select id from b returning a.*", &schema);
assert!(
diags.is_empty(),
"target-qualified `a.*` in RETURNING must resolve cleanly; got {diags:?}",
);
}
#[test]
fn unrelated_qualified_star_in_returning_still_flagged() {
let schema = two_table_schema();
let diags = diag_keys("sqlinsert into a (id) select id from b returning zzz.*", &schema);
assert!(
diags.iter().any(|d| d.contains("no such table or alias")),
"unrelated `zzz.*` qualifier should still flag; got {diags:?}",
);
}
#[test]
fn target_qualified_ref_in_returning_resolves_against_target() {
// Regression (/runda DA round): a target-qualified ref
// `a.name` in an INSERT … SELECT … RETURNING resolves to the
// target `a`, not flagged as an unknown qualifier (a is the
// target, b is the SELECT source).
let schema = two_table_schema();
let diags = diag_keys("sqlinsert into a (id) select id from b returning a.name", &schema);
assert!(
diags.is_empty(),
"target-qualified RETURNING ref must resolve cleanly; got {diags:?}",
);
}
#[test]
fn target_qualified_ref_unknown_column_still_flagged() {
// `a.nope` — a is the target but nope isn't its column.
let schema = two_table_schema();
let diags = diag_keys("sqlinsert into a (id) select id from b returning a.nope", &schema);
assert!(
diags.iter().any(|d| d.contains("no such column")),
"unknown column under the target qualifier should flag; got {diags:?}",
);
}
#[test]
fn unrelated_qualifier_in_returning_still_flagged() {
// A qualifier that's neither `excluded` nor the target is
// still an unknown qualifier (the leak guard holds).
let schema = two_table_schema();
let diags = diag_keys("sqlinsert into a (id) select id from b returning zzz.name", &schema);
assert!(
diags.iter().any(|d| d.contains("no such table or alias")),
"unrelated qualifier should still flag; got {diags:?}",
);
}
#[test]
fn returning_ref_unknown_column_is_flagged() {
// Flip side: a RETURNING ref to a column on neither table is
// flagged (against the INSERT target).
let schema = two_table_schema();
let diags = diag_keys("sqlinsert into a (id) select id from b returning nope", &schema);
assert!(
diags.iter().any(|d| d.contains("no such column")),
"unknown RETURNING column should be flagged; got {diags:?}",
);
}
#[test]
fn returning_ref_in_plain_insert_validated_against_target() {
// The VALUES INSERT … RETURNING case (no bindings at all):
// a valid target column is silent, an unknown one flags.
let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Text)]);
let ok = diag_keys("sqlinsert into t (a) values (1) returning b", &schema);
assert!(!ok.iter().any(|d| d.contains("no such column")), "got {ok:?}");
let bad = diag_keys("sqlinsert into t (a) values (1) returning nope", &schema);
assert!(bad.iter().any(|d| d.contains("no such column")), "got {bad:?}");
}
#[test]
fn do_update_bare_ref_known_column_silent() {
// And a valid bare ref to a target column is silent.
let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Text)]);
let diags = diag_keys(
"sqlinsert into t (a, b) values (1, 'x') on conflict (a) do update set b = 'y' where a > 0",
&schema,
);
assert!(
!diags.iter().any(|d| d.contains("no such column")),
"valid DO UPDATE refs must be silent; got {diags:?}",
);
}
#[test]
fn conflict_target_unknown_column_is_flagged() {
// The flip side: a conflict target naming a column that is in
// neither table is flagged (against the INSERT target).
let schema = two_table_schema();
let diags = diag_keys(
"sqlinsert into a (id) values (1) on conflict (nope) do nothing",
&schema,
);
assert!(
diags.iter().any(|d| d.contains("no such column")),
"unknown conflict-target column should be flagged; got {diags:?}",
);
}
#[test]
fn insert_select_genuinely_unknown_insert_column_still_flagged() {
// The flip side: a column in neither the target nor the source
// is still flagged (against the target, by the dedicated pass).
let schema = two_table_schema();
let diags = diag_keys("sqlinsert into a (nope) select total from b", &schema);
assert!(
diags.iter().any(|d| d.contains("no such column")),
"a genuinely unknown INSERT column should still be flagged; got {diags:?}",
);
}
#[test]
fn insert_column_list_known_columns_silent() {
let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Text)]);