walker: 2d.1 — projection-alias misplaced + compound-arity ERROR passes

Closes the two diagnostics deferred by sub-phase 2d that were not
attached to a user-approved deferral. `cte_arity_mismatch` stays
deferred — it depends on the §10.3 stage-2 CTE harvest, which IS a
user-approved deferral.

- `diagnostic.projection_alias_misplaced` (ADR-0032 §11.2): emitted
  when a projection alias is referenced from `WHERE` / `HAVING` /
  `GROUP BY`. `ORDER BY` references are allowed and silent. The
  pass is integrated into `schema_existence_diagnostics`: when a
  bare-column ref doesn't resolve to any binding's column but DOES
  match a projection alias in the current SELECT leg, the new
  diagnostic pre-empts the misleading `unknown_column` that would
  otherwise fire on the same span. Real-column-shadowed-by-alias
  cases (engine resolves to the table column) stay silent. Subquery
  scopes (paren depth > 0) keep their own implicit alias bag —
  outer aliases don't leak into inner WHERE.

- `diagnostic.compound_arity_mismatch` (ADR-0032 §11.2 / §11.7): a
  new MatchedPath-walking pass that counts projection items per
  SELECT leg by tallying top-level commas at the leg's own paren-
  depth, then compares adjacent legs across `UNION` / `UNION ALL` /
  `INTERSECT` / `EXCEPT` operators. The diagnostic anchors on the
  operator span. Per-depth book-keeping lets chained compound
  queries inside CTE bodies / subqueries report independently.
  Function-call argument commas (deeper depth) are correctly
  ignored.

Test totals: 1385 → 1399 passing (+14), 0 failed, 1 ignored.
Clippy clean.
This commit is contained in:
claude@clouddev1
2026-05-20 17:11:44 +00:00
parent 5d716e64ab
commit c20c6e05ca
+482
View File
@@ -533,8 +533,57 @@ fn schema_existence_diagnostics(
// qualifier of a `t.c` reference; consumed on iteration // qualifier of a `t.c` reference; consumed on iteration
// `i + 2` by the `sql_expr_qualified_ref` ident. // `i + 2` by the `sql_expr_qualified_ref` ident.
let mut pending_qualifier: Option<(String, (usize, usize))> = None; let mut pending_qualifier: Option<(String, (usize, usize))> = None;
// Projection-alias scope at top-level (ADR-0032 §11.2). Aliases
// declared in the current SELECT leg's projection list are
// visible to `ORDER BY` but NOT to `WHERE` / `HAVING` /
// `GROUP BY`. A flat matched-path single pass suffices: aliases
// are always written BEFORE these clauses are reached, and
// subquery levels (depth > 0) keep their own implicit scope.
let mut paren_depth: i32 = 0;
let mut current_clause: Option<&'static str> = None;
let mut leg_aliases: Vec<String> = Vec::new();
for (i, item) in path.items.iter().enumerate() { for (i, item) in path.items.iter().enumerate() {
// Maintain paren-depth, clause kind, and per-leg alias bag
// BEFORE dispatching on the item — these track context that
// the ident handlers below read.
let depth_at_item = paren_depth;
match &item.kind {
MatchedKind::Punct('(') => paren_depth += 1,
MatchedKind::Punct(')') => paren_depth -= 1,
MatchedKind::Word(w) if depth_at_item == 0 => match *w {
// A new SELECT leg (top-level or compound-leg
// start) resets the alias bag and clause kind so a
// following leg's projection / clauses are scoped
// to its own aliases only.
"select" => {
leg_aliases.clear();
current_clause = None;
}
"union" | "intersect" | "except" => {
leg_aliases.clear();
current_clause = None;
}
"where" => current_clause = Some("the WHERE clause"),
"having" => current_clause = Some("the HAVING clause"),
"group" => current_clause = Some("the GROUP BY clause"),
// ORDER BY / LIMIT / OFFSET / FROM are not forbidden
// contexts for alias references. Clearing here also
// protects ORDER BY from a sticky earlier clause.
"order" | "limit" | "offset" | "from" => {
current_clause = None;
}
_ => {}
},
MatchedKind::Ident {
source: IdentSource::NewName,
role: "projection_alias",
} if depth_at_item == 0 => {
leg_aliases.push(item.text.clone());
}
_ => {}
}
let MatchedKind::Ident { source, role } = item.kind else { let MatchedKind::Ident { source, role } = item.kind else {
continue; continue;
}; };
@@ -653,6 +702,35 @@ fn schema_existence_diagnostics(
.collect(); .collect();
match matched.len() { match matched.len() {
0 => { 0 => {
// ADR-0032 §11.2 — a top-level bare ref
// that doesn't resolve as a column but
// DOES match a projection alias in this
// leg is either misplaced (forbidden
// clause) or a valid alias reference
// (ORDER BY / LIMIT). Either way, the
// unknown_column diagnostic would
// mislead, so suppress it here.
let alias_match = depth_at_item == 0
&& leg_aliases.iter().any(|a| {
a.eq_ignore_ascii_case(&item.text)
});
if alias_match {
if let Some(clause) = current_clause {
diagnostics.push(Diagnostic {
severity: Severity::Error,
span: item.span,
message: crate::friendly::translate(
"diagnostic.projection_alias_misplaced",
&[
("alias", &item.text as &dyn std::fmt::Display),
("clause", &clause as &dyn std::fmt::Display),
],
),
});
}
// Allowed-clause alias ref — silent.
continue;
}
let table_arg = if bindings.len() == 1 { let table_arg = if bindings.len() == 1 {
bindings[0].table.clone() bindings[0].table.clone()
} else { } else {
@@ -736,6 +814,155 @@ fn cte_names_contains(names: &[String], candidate: &str) -> bool {
names.iter().any(|n| n.eq_ignore_ascii_case(candidate)) names.iter().any(|n| n.eq_ignore_ascii_case(candidate))
} }
/// Compound-query column-count mismatch ERROR pass (ADR-0032 §11.2
/// / §11.7). A `UNION` / `INTERSECT` / `EXCEPT` chain whose legs
/// have differing projection arities will be rejected by the
/// engine at execution time; this pass catches it pre-flight so
/// the learner sees the slot highlighted at the offending operator
/// instead of an engine string.
///
/// Counting strategy: the matched-path is flat, so we maintain a
/// per-depth book-keeping of in-progress legs. A leg starts at a
/// `SELECT` keyword and counts projection items as the number of
/// top-level commas (at the leg's own paren-depth) seen before
/// the first leg-end keyword (`FROM` / `WHERE` / `GROUP` / `HAVING`
/// / `ORDER` / `LIMIT` / `OFFSET`) or compound-leg keyword
/// (`UNION` / `INTERSECT` / `EXCEPT`) or matching `)` at the same
/// depth. Commas nested inside function calls or subqueries sit
/// at a deeper paren-depth and are ignored.
///
/// When a compound operator at depth `d` is encountered, the
/// just-completed leg's arity at depth `d` is stashed as a
/// pending comparand; the next leg's arity at depth `d` is
/// compared against it on that leg's close. The op token's span
/// is the diagnostic anchor — that's the join point the learner
/// pointed the chain at.
fn compound_arity_diagnostics(
path: &MatchedPath,
) -> Vec<outcome::Diagnostic> {
use outcome::{Diagnostic, MatchedKind, Severity};
use std::collections::HashMap;
struct LegState {
arity: usize,
in_projection: bool,
}
struct Pending {
op_text: &'static str,
op_span: (usize, usize),
prev_arity: usize,
}
let mut diagnostics = Vec::new();
let mut depth: i32 = 0;
let mut legs: HashMap<i32, LegState> = HashMap::new();
let mut pending: HashMap<i32, Pending> = HashMap::new();
let close_leg = |depth: i32,
legs: &mut HashMap<i32, LegState>,
pending: &mut HashMap<i32, Pending>,
diagnostics: &mut Vec<Diagnostic>|
-> Option<usize> {
let leg = legs.remove(&depth)?;
if let Some(p) = pending.remove(&depth)
&& p.prev_arity != leg.arity
{
diagnostics.push(Diagnostic {
severity: Severity::Error,
span: p.op_span,
message: crate::friendly::translate(
"diagnostic.compound_arity_mismatch",
&[
("op", &p.op_text as &dyn std::fmt::Display),
("left_n", &p.prev_arity as &dyn std::fmt::Display),
("right_n", &leg.arity as &dyn std::fmt::Display),
],
),
});
}
Some(leg.arity)
};
for item in &path.items {
let depth_at_item = depth;
match &item.kind {
MatchedKind::Punct('(') => {
depth += 1;
}
MatchedKind::Punct(')') => {
// Any leg at this depth closes before the depth
// pops. If a pending set-op was waiting, the leg
// it expected never arrived — drop the pending
// (the chain ended without a second leg, which
// the grammar shouldn't admit but is safer than
// panicking).
close_leg(depth, &mut legs, &mut pending, &mut diagnostics);
pending.remove(&depth);
depth -= 1;
}
MatchedKind::Punct(',') if depth_at_item == depth => {
if let Some(leg) = legs.get_mut(&depth)
&& leg.in_projection
{
leg.arity += 1;
}
}
MatchedKind::Word(w) if depth_at_item == depth => {
match *w {
"select" => {
// A leg already at this depth shouldn't
// happen (a previous compound op or
// close-paren would have removed it), but
// overwriting is safe — the new leg
// supersedes any stale state.
legs.insert(
depth,
LegState { arity: 1, in_projection: true },
);
}
"from" | "where" | "group" | "having" | "order"
| "limit" | "offset" => {
if let Some(leg) = legs.get_mut(&depth) {
leg.in_projection = false;
}
}
op_word @ ("union" | "intersect" | "except") => {
// Close the just-finished leg, comparing
// it against any pending set-op state at
// this depth.
if let Some(arity) = close_leg(
depth,
&mut legs,
&mut pending,
&mut diagnostics,
) {
pending.insert(
depth,
Pending {
op_text: op_word,
op_span: item.span,
prev_arity: arity,
},
);
}
}
_ => {}
}
}
_ => {}
}
}
// Drain any still-open legs at end-of-path. Same comparison
// as the close_leg helper does on `)`.
let depths: Vec<i32> = legs.keys().copied().collect();
for d in depths {
close_leg(d, &mut legs, &mut pending, &mut diagnostics);
}
diagnostics
}
/// SQL-expression predicate-warning pass (ADR-0032 §11.6 — the /// SQL-expression predicate-warning pass (ADR-0032 §11.6 — the
/// Phase-1 carry-over gap closure). /// Phase-1 carry-over gap closure).
/// ///
@@ -1565,6 +1792,11 @@ pub fn walk<'a>(
// sql_expr_ident roles so the two passes don't // sql_expr_ident roles so the two passes don't
// collide. // collide.
d.extend(sql_predicate_warnings(&path, ctx.schema)); d.extend(sql_predicate_warnings(&path, ctx.schema));
// ADR-0032 §11.2 / §11.7 — compound-arity ERROR pass.
// Catches `SELECT 1, 2 UNION SELECT 1` pre-flight so the
// operator slot is highlighted rather than the engine
// wording shown at execution time.
d.extend(compound_arity_diagnostics(&path));
d d
} else { } else {
Vec::new() Vec::new()
@@ -3577,4 +3809,254 @@ mod tests {
"expected like_numeric warning on JOIN ON; got {diags:?}", "expected like_numeric warning on JOIN ON; got {diags:?}",
); );
} }
// ---- ADR-0032 §11.2 — projection_alias_misplaced ----
#[test]
fn projection_alias_in_where_is_misplaced() {
// ADR-0032 §11.2 plan test: `SELECT a + b AS x FROM t
// WHERE x > 0` fires `projection_alias_misplaced`.
let schema = schema_with(
"t",
&[("a", Type::Int), ("b", Type::Int)],
);
let diags = diag_keys(
"select a + b as x from t where x > 0",
&schema,
);
assert!(
diags.iter().any(|d| {
d.contains("alias `x`") && d.contains("WHERE")
}),
"expected projection_alias_misplaced on WHERE; got {diags:?}",
);
// The unknown_column diagnostic must NOT also fire on
// the same span — the alias check pre-empts it.
assert!(
!diags.iter().any(|d| d.contains("no such column")),
"unknown_column must be suppressed when alias matches; got {diags:?}",
);
}
#[test]
fn projection_alias_in_having_is_misplaced() {
let schema = schema_with(
"t",
&[("a", Type::Int), ("b", Type::Int)],
);
let diags = diag_keys(
"select a + b as x from t group by a having x > 0",
&schema,
);
assert!(
diags.iter().any(|d| {
d.contains("alias `x`") && d.contains("HAVING")
}),
"expected projection_alias_misplaced on HAVING; got {diags:?}",
);
}
#[test]
fn projection_alias_in_group_by_is_misplaced() {
let schema = schema_with(
"t",
&[("a", Type::Int), ("b", Type::Int)],
);
let diags = diag_keys(
"select a + b as x from t group by x",
&schema,
);
assert!(
diags.iter().any(|d| {
d.contains("alias `x`") && d.contains("GROUP BY")
}),
"expected projection_alias_misplaced on GROUP BY; got {diags:?}",
);
}
#[test]
fn projection_alias_in_order_by_is_allowed() {
// ADR-0032 §11.2 negative case: `… ORDER BY x` doesn't
// fire — aliases are bound by ORDER BY evaluation time.
let schema = schema_with(
"t",
&[("a", Type::Int), ("b", Type::Int)],
);
let diags = diag_keys(
"select a + b as x from t order by x",
&schema,
);
assert!(
diags.is_empty(),
"ORDER BY alias is allowed; got {diags:?}",
);
}
#[test]
fn real_column_shadowed_by_alias_is_not_misplaced() {
// `SELECT name AS id FROM t WHERE id > 0` — the alias
// `id` shadows nothing in the table, but a real column
// `id` exists. WHERE id refers to the table column (per
// SQL spec); the diagnostic must NOT fire.
let schema = schema_with(
"t",
&[("id", Type::Int), ("name", Type::Text)],
);
let diags = diag_keys(
"select name as id from t where id > 0",
&schema,
);
assert!(
diags.is_empty(),
"real-column WHERE ref must not be flagged as misplaced; got {diags:?}",
);
}
// ---- ADR-0032 §11.2 — compound_arity_mismatch ----
#[test]
fn compound_union_arity_mismatch_fires() {
// ADR-0032 §11.2 plan test: `SELECT 1, 2 UNION SELECT 1`
// fires `compound_arity_mismatch`.
let schema = schema_with("t", &[("a", Type::Int)]);
let diags = diag_keys("select 1, 2 union select 1", &schema);
assert!(
diags.iter().any(|d| {
d.contains("union")
&& d.contains("number of columns")
}),
"expected compound_arity_mismatch on UNION; got {diags:?}",
);
}
#[test]
fn compound_union_arity_match_no_diagnostic() {
// Matched-arity legs don't fire.
let schema = schema_with("t", &[("a", Type::Int)]);
let diags = diag_keys("select 1, 2 union select 3, 4", &schema);
assert!(
!diags.iter().any(|d| d.contains("number of columns")),
"matched arity should not fire; got {diags:?}",
);
}
#[test]
fn compound_intersect_arity_mismatch_fires() {
let schema = schema_with("t", &[("a", Type::Int)]);
let diags =
diag_keys("select 1 intersect select 1, 2", &schema);
assert!(
diags.iter().any(|d| {
d.contains("intersect")
&& d.contains("number of columns")
}),
"expected compound_arity_mismatch on INTERSECT; got {diags:?}",
);
}
#[test]
fn compound_except_arity_mismatch_fires() {
let schema = schema_with("t", &[("a", Type::Int)]);
let diags =
diag_keys("select 1, 2, 3 except select 1, 2", &schema);
assert!(
diags.iter().any(|d| {
d.contains("except")
&& d.contains("number of columns")
}),
"expected compound_arity_mismatch on EXCEPT; got {diags:?}",
);
}
#[test]
fn compound_arity_with_function_call_args_not_confused() {
// Function-call commas are at deeper depth — they must
// not be counted as projection items.
// `count(a, b)` is ONE projection item.
let schema = schema_with(
"t",
&[("a", Type::Int), ("b", Type::Int)],
);
let diags = diag_keys(
"select count(a, b) from t union select 1",
&schema,
);
assert!(
!diags.iter().any(|d| d.contains("number of columns")),
"function-call commas must not inflate arity; got {diags:?}",
);
}
#[test]
fn compound_union_all_arity_mismatch_fires() {
// `UNION ALL` keyword sequence is handled identically.
let schema = schema_with("t", &[("a", Type::Int)]);
let diags = diag_keys("select 1 union all select 1, 2", &schema);
assert!(
diags.iter().any(|d| {
d.contains("union")
&& d.contains("number of columns")
}),
"expected compound_arity_mismatch on UNION ALL; got {diags:?}",
);
}
#[test]
fn compound_three_leg_chain_emits_per_mismatch() {
// Chained legs at the same depth — each set-op compares
// its preceding leg against its following leg.
let schema = schema_with("t", &[("a", Type::Int)]);
let diags = diag_keys(
"select 1 union select 1, 2 union select 1",
&schema,
);
let mismatch_count = diags
.iter()
.filter(|d| d.contains("number of columns"))
.count();
assert_eq!(
mismatch_count, 2,
"expected two mismatch diagnostics; got {diags:?}",
);
}
#[test]
fn compound_arity_inside_cte_body_detected() {
// CTE body at depth 1 — the arity-mismatch is detected
// inside the parens, at the inner UNION.
let schema = schema_with("t", &[("a", Type::Int)]);
let diags = diag_keys(
"with x as (select 1, 2 union select 1) select * from x",
&schema,
);
assert!(
diags.iter().any(|d| {
d.contains("union")
&& d.contains("number of columns")
}),
"expected compound_arity_mismatch inside CTE body; got {diags:?}",
);
}
#[test]
fn alias_in_inner_subquery_does_not_affect_outer_aliases() {
// The inner `AS y` is inside parens (depth > 0) and
// must not be collected into the outer leg's alias bag.
// Outer `WHERE x` would otherwise (wrongly) match `y`
// — here we test that the outer `WHERE y` is flagged
// as unknown_column (not misplaced) because there is no
// alias `y` in the OUTER leg's projection.
let schema = schema_with(
"t",
&[("a", Type::Int), ("b", Type::Int)],
);
let diags = diag_keys(
"select (select a as y from t) from t where y > 0",
&schema,
);
assert!(
!diags.iter().any(|d| d.contains("misplaced")),
"inner-subquery alias must not affect outer scope; got {diags:?}",
);
}
} }