diff --git a/src/dsl/walker/mod.rs b/src/dsl/walker/mod.rs index fde9375..4e589f8 100644 --- a/src/dsl/walker/mod.rs +++ b/src/dsl/walker/mod.rs @@ -533,8 +533,57 @@ fn schema_existence_diagnostics( // qualifier of a `t.c` reference; consumed on iteration // `i + 2` by the `sql_expr_qualified_ref` ident. let mut pending_qualifier: Option<(String, (usize, usize))> = None; + // Projection-alias scope at top-level (ADR-0032 §11.2). Aliases + // declared in the current SELECT leg's projection list are + // visible to `ORDER BY` but NOT to `WHERE` / `HAVING` / + // `GROUP BY`. A flat matched-path single pass suffices: aliases + // are always written BEFORE these clauses are reached, and + // subquery levels (depth > 0) keep their own implicit scope. + let mut paren_depth: i32 = 0; + let mut current_clause: Option<&'static str> = None; + let mut leg_aliases: Vec = Vec::new(); for (i, item) in path.items.iter().enumerate() { + // Maintain paren-depth, clause kind, and per-leg alias bag + // BEFORE dispatching on the item — these track context that + // the ident handlers below read. + let depth_at_item = paren_depth; + match &item.kind { + MatchedKind::Punct('(') => paren_depth += 1, + MatchedKind::Punct(')') => paren_depth -= 1, + MatchedKind::Word(w) if depth_at_item == 0 => match *w { + // A new SELECT leg (top-level or compound-leg + // start) resets the alias bag and clause kind so a + // following leg's projection / clauses are scoped + // to its own aliases only. + "select" => { + leg_aliases.clear(); + current_clause = None; + } + "union" | "intersect" | "except" => { + leg_aliases.clear(); + current_clause = None; + } + "where" => current_clause = Some("the WHERE clause"), + "having" => current_clause = Some("the HAVING clause"), + "group" => current_clause = Some("the GROUP BY clause"), + // ORDER BY / LIMIT / OFFSET / FROM are not forbidden + // contexts for alias references. Clearing here also + // protects ORDER BY from a sticky earlier clause. + "order" | "limit" | "offset" | "from" => { + current_clause = None; + } + _ => {} + }, + MatchedKind::Ident { + source: IdentSource::NewName, + role: "projection_alias", + } if depth_at_item == 0 => { + leg_aliases.push(item.text.clone()); + } + _ => {} + } + let MatchedKind::Ident { source, role } = item.kind else { continue; }; @@ -653,6 +702,35 @@ fn schema_existence_diagnostics( .collect(); match matched.len() { 0 => { + // ADR-0032 §11.2 — a top-level bare ref + // that doesn't resolve as a column but + // DOES match a projection alias in this + // leg is either misplaced (forbidden + // clause) or a valid alias reference + // (ORDER BY / LIMIT). Either way, the + // unknown_column diagnostic would + // mislead, so suppress it here. + let alias_match = depth_at_item == 0 + && leg_aliases.iter().any(|a| { + a.eq_ignore_ascii_case(&item.text) + }); + if alias_match { + if let Some(clause) = current_clause { + diagnostics.push(Diagnostic { + severity: Severity::Error, + span: item.span, + message: crate::friendly::translate( + "diagnostic.projection_alias_misplaced", + &[ + ("alias", &item.text as &dyn std::fmt::Display), + ("clause", &clause as &dyn std::fmt::Display), + ], + ), + }); + } + // Allowed-clause alias ref — silent. + continue; + } let table_arg = if bindings.len() == 1 { bindings[0].table.clone() } else { @@ -736,6 +814,155 @@ fn cte_names_contains(names: &[String], candidate: &str) -> bool { names.iter().any(|n| n.eq_ignore_ascii_case(candidate)) } +/// Compound-query column-count mismatch ERROR pass (ADR-0032 §11.2 +/// / §11.7). A `UNION` / `INTERSECT` / `EXCEPT` chain whose legs +/// have differing projection arities will be rejected by the +/// engine at execution time; this pass catches it pre-flight so +/// the learner sees the slot highlighted at the offending operator +/// instead of an engine string. +/// +/// Counting strategy: the matched-path is flat, so we maintain a +/// per-depth book-keeping of in-progress legs. A leg starts at a +/// `SELECT` keyword and counts projection items as the number of +/// top-level commas (at the leg's own paren-depth) seen before +/// the first leg-end keyword (`FROM` / `WHERE` / `GROUP` / `HAVING` +/// / `ORDER` / `LIMIT` / `OFFSET`) or compound-leg keyword +/// (`UNION` / `INTERSECT` / `EXCEPT`) or matching `)` at the same +/// depth. Commas nested inside function calls or subqueries sit +/// at a deeper paren-depth and are ignored. +/// +/// When a compound operator at depth `d` is encountered, the +/// just-completed leg's arity at depth `d` is stashed as a +/// pending comparand; the next leg's arity at depth `d` is +/// compared against it on that leg's close. The op token's span +/// is the diagnostic anchor — that's the join point the learner +/// pointed the chain at. +fn compound_arity_diagnostics( + path: &MatchedPath, +) -> Vec { + use outcome::{Diagnostic, MatchedKind, Severity}; + use std::collections::HashMap; + + struct LegState { + arity: usize, + in_projection: bool, + } + struct Pending { + op_text: &'static str, + op_span: (usize, usize), + prev_arity: usize, + } + + let mut diagnostics = Vec::new(); + let mut depth: i32 = 0; + let mut legs: HashMap = HashMap::new(); + let mut pending: HashMap = HashMap::new(); + + let close_leg = |depth: i32, + legs: &mut HashMap, + pending: &mut HashMap, + diagnostics: &mut Vec| + -> Option { + let leg = legs.remove(&depth)?; + if let Some(p) = pending.remove(&depth) + && p.prev_arity != leg.arity + { + diagnostics.push(Diagnostic { + severity: Severity::Error, + span: p.op_span, + message: crate::friendly::translate( + "diagnostic.compound_arity_mismatch", + &[ + ("op", &p.op_text as &dyn std::fmt::Display), + ("left_n", &p.prev_arity as &dyn std::fmt::Display), + ("right_n", &leg.arity as &dyn std::fmt::Display), + ], + ), + }); + } + Some(leg.arity) + }; + + for item in &path.items { + let depth_at_item = depth; + match &item.kind { + MatchedKind::Punct('(') => { + depth += 1; + } + MatchedKind::Punct(')') => { + // Any leg at this depth closes before the depth + // pops. If a pending set-op was waiting, the leg + // it expected never arrived — drop the pending + // (the chain ended without a second leg, which + // the grammar shouldn't admit but is safer than + // panicking). + close_leg(depth, &mut legs, &mut pending, &mut diagnostics); + pending.remove(&depth); + depth -= 1; + } + MatchedKind::Punct(',') if depth_at_item == depth => { + if let Some(leg) = legs.get_mut(&depth) + && leg.in_projection + { + leg.arity += 1; + } + } + MatchedKind::Word(w) if depth_at_item == depth => { + match *w { + "select" => { + // A leg already at this depth shouldn't + // happen (a previous compound op or + // close-paren would have removed it), but + // overwriting is safe — the new leg + // supersedes any stale state. + legs.insert( + depth, + LegState { arity: 1, in_projection: true }, + ); + } + "from" | "where" | "group" | "having" | "order" + | "limit" | "offset" => { + if let Some(leg) = legs.get_mut(&depth) { + leg.in_projection = false; + } + } + op_word @ ("union" | "intersect" | "except") => { + // Close the just-finished leg, comparing + // it against any pending set-op state at + // this depth. + if let Some(arity) = close_leg( + depth, + &mut legs, + &mut pending, + &mut diagnostics, + ) { + pending.insert( + depth, + Pending { + op_text: op_word, + op_span: item.span, + prev_arity: arity, + }, + ); + } + } + _ => {} + } + } + _ => {} + } + } + + // Drain any still-open legs at end-of-path. Same comparison + // as the close_leg helper does on `)`. + let depths: Vec = legs.keys().copied().collect(); + for d in depths { + close_leg(d, &mut legs, &mut pending, &mut diagnostics); + } + + diagnostics +} + /// SQL-expression predicate-warning pass (ADR-0032 §11.6 — the /// Phase-1 carry-over gap closure). /// @@ -1565,6 +1792,11 @@ pub fn walk<'a>( // sql_expr_ident roles so the two passes don't // collide. d.extend(sql_predicate_warnings(&path, ctx.schema)); + // ADR-0032 §11.2 / §11.7 — compound-arity ERROR pass. + // Catches `SELECT 1, 2 UNION SELECT 1` pre-flight so the + // operator slot is highlighted rather than the engine + // wording shown at execution time. + d.extend(compound_arity_diagnostics(&path)); d } else { Vec::new() @@ -3577,4 +3809,254 @@ mod tests { "expected like_numeric warning on JOIN ON; got {diags:?}", ); } + + // ---- ADR-0032 §11.2 — projection_alias_misplaced ---- + + #[test] + fn projection_alias_in_where_is_misplaced() { + // ADR-0032 §11.2 plan test: `SELECT a + b AS x FROM t + // WHERE x > 0` fires `projection_alias_misplaced`. + let schema = schema_with( + "t", + &[("a", Type::Int), ("b", Type::Int)], + ); + let diags = diag_keys( + "select a + b as x from t where x > 0", + &schema, + ); + assert!( + diags.iter().any(|d| { + d.contains("alias `x`") && d.contains("WHERE") + }), + "expected projection_alias_misplaced on WHERE; got {diags:?}", + ); + // The unknown_column diagnostic must NOT also fire on + // the same span — the alias check pre-empts it. + assert!( + !diags.iter().any(|d| d.contains("no such column")), + "unknown_column must be suppressed when alias matches; got {diags:?}", + ); + } + + #[test] + fn projection_alias_in_having_is_misplaced() { + let schema = schema_with( + "t", + &[("a", Type::Int), ("b", Type::Int)], + ); + let diags = diag_keys( + "select a + b as x from t group by a having x > 0", + &schema, + ); + assert!( + diags.iter().any(|d| { + d.contains("alias `x`") && d.contains("HAVING") + }), + "expected projection_alias_misplaced on HAVING; got {diags:?}", + ); + } + + #[test] + fn projection_alias_in_group_by_is_misplaced() { + let schema = schema_with( + "t", + &[("a", Type::Int), ("b", Type::Int)], + ); + let diags = diag_keys( + "select a + b as x from t group by x", + &schema, + ); + assert!( + diags.iter().any(|d| { + d.contains("alias `x`") && d.contains("GROUP BY") + }), + "expected projection_alias_misplaced on GROUP BY; got {diags:?}", + ); + } + + #[test] + fn projection_alias_in_order_by_is_allowed() { + // ADR-0032 §11.2 negative case: `… ORDER BY x` doesn't + // fire — aliases are bound by ORDER BY evaluation time. + let schema = schema_with( + "t", + &[("a", Type::Int), ("b", Type::Int)], + ); + let diags = diag_keys( + "select a + b as x from t order by x", + &schema, + ); + assert!( + diags.is_empty(), + "ORDER BY alias is allowed; got {diags:?}", + ); + } + + #[test] + fn real_column_shadowed_by_alias_is_not_misplaced() { + // `SELECT name AS id FROM t WHERE id > 0` — the alias + // `id` shadows nothing in the table, but a real column + // `id` exists. WHERE id refers to the table column (per + // SQL spec); the diagnostic must NOT fire. + let schema = schema_with( + "t", + &[("id", Type::Int), ("name", Type::Text)], + ); + let diags = diag_keys( + "select name as id from t where id > 0", + &schema, + ); + assert!( + diags.is_empty(), + "real-column WHERE ref must not be flagged as misplaced; got {diags:?}", + ); + } + + // ---- ADR-0032 §11.2 — compound_arity_mismatch ---- + + #[test] + fn compound_union_arity_mismatch_fires() { + // ADR-0032 §11.2 plan test: `SELECT 1, 2 UNION SELECT 1` + // fires `compound_arity_mismatch`. + let schema = schema_with("t", &[("a", Type::Int)]); + let diags = diag_keys("select 1, 2 union select 1", &schema); + assert!( + diags.iter().any(|d| { + d.contains("union") + && d.contains("number of columns") + }), + "expected compound_arity_mismatch on UNION; got {diags:?}", + ); + } + + #[test] + fn compound_union_arity_match_no_diagnostic() { + // Matched-arity legs don't fire. + let schema = schema_with("t", &[("a", Type::Int)]); + let diags = diag_keys("select 1, 2 union select 3, 4", &schema); + assert!( + !diags.iter().any(|d| d.contains("number of columns")), + "matched arity should not fire; got {diags:?}", + ); + } + + #[test] + fn compound_intersect_arity_mismatch_fires() { + let schema = schema_with("t", &[("a", Type::Int)]); + let diags = + diag_keys("select 1 intersect select 1, 2", &schema); + assert!( + diags.iter().any(|d| { + d.contains("intersect") + && d.contains("number of columns") + }), + "expected compound_arity_mismatch on INTERSECT; got {diags:?}", + ); + } + + #[test] + fn compound_except_arity_mismatch_fires() { + let schema = schema_with("t", &[("a", Type::Int)]); + let diags = + diag_keys("select 1, 2, 3 except select 1, 2", &schema); + assert!( + diags.iter().any(|d| { + d.contains("except") + && d.contains("number of columns") + }), + "expected compound_arity_mismatch on EXCEPT; got {diags:?}", + ); + } + + #[test] + fn compound_arity_with_function_call_args_not_confused() { + // Function-call commas are at deeper depth — they must + // not be counted as projection items. + // `count(a, b)` is ONE projection item. + let schema = schema_with( + "t", + &[("a", Type::Int), ("b", Type::Int)], + ); + let diags = diag_keys( + "select count(a, b) from t union select 1", + &schema, + ); + assert!( + !diags.iter().any(|d| d.contains("number of columns")), + "function-call commas must not inflate arity; got {diags:?}", + ); + } + + #[test] + fn compound_union_all_arity_mismatch_fires() { + // `UNION ALL` keyword sequence is handled identically. + let schema = schema_with("t", &[("a", Type::Int)]); + let diags = diag_keys("select 1 union all select 1, 2", &schema); + assert!( + diags.iter().any(|d| { + d.contains("union") + && d.contains("number of columns") + }), + "expected compound_arity_mismatch on UNION ALL; got {diags:?}", + ); + } + + #[test] + fn compound_three_leg_chain_emits_per_mismatch() { + // Chained legs at the same depth — each set-op compares + // its preceding leg against its following leg. + let schema = schema_with("t", &[("a", Type::Int)]); + let diags = diag_keys( + "select 1 union select 1, 2 union select 1", + &schema, + ); + let mismatch_count = diags + .iter() + .filter(|d| d.contains("number of columns")) + .count(); + assert_eq!( + mismatch_count, 2, + "expected two mismatch diagnostics; got {diags:?}", + ); + } + + #[test] + fn compound_arity_inside_cte_body_detected() { + // CTE body at depth 1 — the arity-mismatch is detected + // inside the parens, at the inner UNION. + let schema = schema_with("t", &[("a", Type::Int)]); + let diags = diag_keys( + "with x as (select 1, 2 union select 1) select * from x", + &schema, + ); + assert!( + diags.iter().any(|d| { + d.contains("union") + && d.contains("number of columns") + }), + "expected compound_arity_mismatch inside CTE body; got {diags:?}", + ); + } + + #[test] + fn alias_in_inner_subquery_does_not_affect_outer_aliases() { + // The inner `AS y` is inside parens (depth > 0) and + // must not be collected into the outer leg's alias bag. + // Outer `WHERE x` would otherwise (wrongly) match `y` + // — here we test that the outer `WHERE y` is flagged + // as unknown_column (not misplaced) because there is no + // alias `y` in the OUTER leg's projection. + let schema = schema_with( + "t", + &[("a", Type::Int), ("b", Type::Int)], + ); + let diags = diag_keys( + "select (select a as y from t) from t where y > 0", + &schema, + ); + assert!( + !diags.iter().any(|d| d.contains("misplaced")), + "inner-subquery alias must not affect outer scope; got {diags:?}", + ); + } }