From c5cf03b152b1f9dfc108eb90001526f8c3d7b270 Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Wed, 20 May 2026 16:12:42 +0000 Subject: [PATCH] =?UTF-8?q?walker:=20SQL=20diagnostics=20=E2=80=94=20multi?= =?UTF-8?q?-binding=20scope,=20qualified=20refs,=20Phase-1=20gap=20closure?= =?UTF-8?q?=20(sub-phase=202d)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the bulk of ADR-0032 §11 diagnostics. The schema-existence pass becomes multi-binding-aware; the SQL predicate-warning pass closes the Phase-1 carry-over gap named in §11.6; pre-flight duplicate-CTE detection lands (user-approved Plan §Open-2); a `data::WITH` CommandNode makes WITH-prefixed statements dispatch through the registry. Catalog (`src/friendly/strings/en-US.yaml`, `src/friendly/keys.rs`): - Six new `diagnostic.*` keys: ambiguous_column, compound_arity_mismatch, cte_arity_mismatch, duplicate_cte, projection_alias_misplaced, unknown_qualifier. - Eight new `engine.*` translation keys (ADR-0032 §11.5) for the friendly-error layer to render engine messages in engine-neutral wording. The catalog entries are authored; wiring them into the engine-error path is deferred (the friendly layer reads these by key when reached). Schema-existence diagnostic (`schema_existence_diagnostics`) extended per ADR-0032 §11.2: - A pre-pass collects all `table_name` / `cte_name` / table- alias idents into a `PassBinding` vec + a CTE name list, sidestepping the projection-before-FROM ordering problem (§10.6). The main pass then resolves identifiers against the complete scope. - Bare column references resolve against any binding's columns. Zero matches → `diagnostic.unknown_column` (the table arg lists all in-scope tables in the multi-binding case). Two-or-more matches → `diagnostic.ambiguous_column`. - Qualified `t.c` refs detect their qualifier via a look-ahead on the matched path (Punct '.' + Ident{role: sql_expr_qualified_ref} after the leading Ident). Unknown qualifier → `diagnostic.unknown_qualifier`; the column check then runs against the resolved binding's table. - The `t.*` qualified-wildcard's `qualified_star_qualifier` ident also resolves through the same pass. - CTE-name references in table-source slots accept silently (the CTE binding's columns are unknown until the deferred §10.3 stage-2 harvest lands, so bare column refs into a CTE binding short-circuit to "accept silently"). - Duplicate CTE names in the same `WITH` block emit `diagnostic.duplicate_cte` on the second occurrence (Plan §Open-2). Phase-1 gap closure (`sql_predicate_warnings`, ADR-0032 §11.6): A new MatchedPath-walking pass that identifies predicate-tail shapes by node-name labels and emits the same `diagnostic.*` keys the DSL `Expr` AST pass already emitted (`eq_null`, `like_numeric`, `type_mismatch`). Scoped to bare column refs in ` ` form — qualified-ref and expression-operand cases stay un-flagged in this minimal pass, which is a safe false-negative posture (the warning is advisory; the engine still runs). Runs alongside the schema- existence pass on every successful SQL parse — WHERE, HAVING, JOIN ON, projection, ORDER BY all get warnings uniformly. Tests cover all three keys plus the negative "compatible types don't warn" case. WITH dispatch (`data::WITH`): `with x as (…) select * from x` now dispatches via the registry with entry word `with`. Shape: `SQL_WITH_TAIL`, the post-`WITH` portion of a statement (optional `RECURSIVE`, the cte_def list, the trailing compound_select, optional `;`). Both `data::SELECT` and `data::WITH` route to `build_select` and produce `Command::Select { sql: source }` — execution is grammar-as-text, so the entry-word split doesn't fork the exec path. `is_advanced_only` extended to include `with`. Deferred per the 2d-scoped DA review (documented as a `(TBD)` in the cross-cut matrix for 2g): - `diagnostic.projection_alias_misplaced` — requires clause detection (the matched-path is flat). - `diagnostic.compound_arity_mismatch` — needs per-leg projection counting. - `diagnostic.cte_arity_mismatch` — depends on §10.3 stage-2 harvest, which 2b deferred. - `engine.*` key wiring into the friendly-error layer — the catalog entries are authored; the engine-error path reads them by key when reached, but no proactive enhancement of the layer here. Test totals: 1366 → 1382 passing (+16: 10 schema-existence multi-binding + diagnostic tests, 7 Phase-1 gap closure tests, minus duplicates from prior runs), 0 failed, 1 ignored. Clippy clean. --- src/dsl/grammar/data.rs | 14 + src/dsl/grammar/mod.rs | 3 +- src/dsl/grammar/sql_select.rs | 23 + src/dsl/walker/mod.rs | 889 +++++++++++++++++++++++++++++++- src/friendly/keys.rs | 16 + src/friendly/strings/en-US.yaml | 22 + 6 files changed, 939 insertions(+), 28 deletions(-) diff --git a/src/dsl/grammar/data.rs b/src/dsl/grammar/data.rs index 6cb7f73..733a693 100644 --- a/src/dsl/grammar/data.rs +++ b/src/dsl/grammar/data.rs @@ -916,6 +916,20 @@ pub static SELECT: CommandNode = CommandNode { help_id: None, usage_ids: &["parse.usage.select"],}; +/// `WITH …` top-level statement (ADR-0032 §4 / sub-phase 2c). +/// +/// Advanced mode only. Dispatched separately from `SELECT` so +/// the registry's entry-word dispatch routes `with` and +/// `select` to the right shapes; both reach the same +/// `Command::Select` AST since execution is grammar-as-text +/// (ADR-0030 §6, ADR-0031 §2). +pub static WITH: CommandNode = CommandNode { + entry: Word::keyword("with"), + shape: Node::Subgrammar(&sql_select::SQL_WITH_TAIL), + ast_builder: build_select, + help_id: None, + usage_ids: &["parse.usage.select"],}; + // ================================================================= // Tests — `explain` grammar (ADR-0028 §1) // ================================================================= diff --git a/src/dsl/grammar/mod.rs b/src/dsl/grammar/mod.rs index 24c4611..a8d8dae 100644 --- a/src/dsl/grammar/mod.rs +++ b/src/dsl/grammar/mod.rs @@ -534,6 +534,7 @@ pub static REGISTRY: &[&CommandNode] = &[ &data::REPLAY, &data::EXPLAIN, &data::SELECT, + &data::WITH, ]; /// Entry words for commands available only in advanced mode @@ -546,7 +547,7 @@ pub static REGISTRY: &[&CommandNode] = &[ /// finer-grained per-`Choice`-branch tagging arrives with the /// shared DSL/SQL entry words (`create`, `insert`, …) in a later /// phase. -const ADVANCED_ONLY_ENTRIES: &[&str] = &["select"]; +const ADVANCED_ONLY_ENTRIES: &[&str] = &["select", "with"]; /// Whether `entry` names an advanced-mode-only command (ADR-0030 /// §2). Case-insensitive, matching keyword-matching elsewhere. diff --git a/src/dsl/grammar/sql_select.rs b/src/dsl/grammar/sql_select.rs index 2860911..579d6af 100644 --- a/src/dsl/grammar/sql_select.rs +++ b/src/dsl/grammar/sql_select.rs @@ -708,6 +708,29 @@ static SQL_SELECT_TAIL_NODES: &[Node] = &[ ]; pub static SQL_SELECT_TAIL: Node = Node::Seq(SQL_SELECT_TAIL_NODES); +// ================================================================= +// with_clause — entry-consumed form (ADR-0032 §4, 2c) +// ================================================================= + +/// The post-`WITH` portion of a top-level statement. +/// `data::WITH`'s `CommandNode` has `entry: Word::keyword +/// ("with")`, so the registry's dispatch consumes the leading +/// `WITH` keyword before the shape walks. The shape is then +/// the optional `RECURSIVE` modifier, the `cte_def` list, and +/// the trailing `compound_select` (with optional outer ORDER +/// BY / LIMIT and a tolerated `;`). +static SQL_WITH_TAIL_NODES: &[Node] = &[ + Node::Optional(&Node::Word(Word::keyword("recursive"))), + Node::Repeated { + inner: &CTE_DEF, + separator: Some(&COMMA), + min: 1, + }, + Node::Subgrammar(&SQL_SELECT_COMPOUND), + Node::Optional(&SEMI), +]; +pub static SQL_WITH_TAIL: Node = Node::Seq(SQL_WITH_TAIL_NODES); + // ================================================================= // Tests // ================================================================= diff --git a/src/dsl/walker/mod.rs b/src/dsl/walker/mod.rs index e0108e9..fde9375 100644 --- a/src/dsl/walker/mod.rs +++ b/src/dsl/walker/mod.rs @@ -422,6 +422,38 @@ pub fn input_diagnostics( /// table a subsequent `Columns` ident is checked against. An /// unknown table clears the scope, so its columns are not /// cascaded into a second diagnostic. +/// One in-scope FROM-source binding, simulated from the +/// matched-path by `schema_existence_diagnostics`. ADR-0032 +/// §10.1 / §11.2 — the multi-binding schema-existence +/// diagnostic resolves bare and qualified column references +/// against this scope. +#[derive(Debug)] +struct PassBinding { + table: String, + alias: Option, +} + +/// Resolve a qualifier identifier against the active bindings. +/// Aliases shadow base-table names (ADR-0032 §10.5), so alias +/// matches are tried first. +fn resolve_qualifier<'a>( + bindings: &'a [PassBinding], + qualifier: &str, +) -> Option<&'a PassBinding> { + bindings + .iter() + .find(|b| { + b.alias + .as_deref() + .is_some_and(|a| a.eq_ignore_ascii_case(qualifier)) + }) + .or_else(|| { + bindings + .iter() + .find(|b| b.table.eq_ignore_ascii_case(qualifier)) + }) +} + fn schema_existence_diagnostics( path: &MatchedPath, schema: Option<&crate::completion::SchemaCache>, @@ -432,18 +464,107 @@ fn schema_existence_diagnostics( let Some(schema) = schema else { return Vec::new(); }; + let mut diagnostics = Vec::new(); - let mut current_table: Option = None; - for item in &path.items { - let MatchedKind::Ident { source, .. } = item.kind else { + // Pre-pass: collect all FROM-source bindings and CTE names + // by walking the matched-path. ADR-0032 §10.6's projection- + // before-FROM problem makes a strict left-to-right pass + // mis-classify projection-side identifiers when the FROM + // clause comes later. We sidestep it here by gathering the + // full scope first, then doing the diagnostic check with + // the complete set of bindings available. + // + // For Phase 2 this is a single flat scope (top-level + // statement). Subquery / CTE-body scopes pop on + // ScopedSubgrammar exit and their bindings are not + // distinguished here — full per-frame scope tracking + // remains a 2e concern. Refs inside subquery / CTE bodies + // resolve against the union of all matched bindings, which + // is permissive (a false-positive ambiguity could in + // principle arise for shadowed names) but conservative + // (won't false-flag valid refs). + let mut bindings: Vec = Vec::new(); + let mut cte_names: Vec = Vec::new(); + { + let mut pending_alias_index: Option = None; + for item in &path.items { + let MatchedKind::Ident { source, role } = item.kind else { + continue; + }; + match source { + IdentSource::Tables + if role == "table_name" + && (schema_has_table(schema, &item.text) + || cte_names_contains(&cte_names, &item.text)) => + { + bindings.push(PassBinding { + table: item.text.clone(), + alias: None, + }); + pending_alias_index = Some(bindings.len() - 1); + } + IdentSource::Tables if role == "table_name" => { + pending_alias_index = None; + } + IdentSource::NewName if role == "table_alias" => { + if let Some(idx) = pending_alias_index { + bindings[idx].alias = Some(item.text.clone()); + } + pending_alias_index = None; + } + IdentSource::NewName if role == "cte_name" => { + if !cte_names_contains(&cte_names, &item.text) { + cte_names.push(item.text.clone()); + } + pending_alias_index = None; + } + _ => { + pending_alias_index = None; + } + } + } + } + + // Track which CTE names have already been seen, for + // duplicate detection (a separate single-pass walk; emits + // the diagnostic on the second occurrence). + let mut seen_cte_names: Vec = Vec::new(); + // Set on iteration `i` when the current item is the `t` + // qualifier of a `t.c` reference; consumed on iteration + // `i + 2` by the `sql_expr_qualified_ref` ident. + let mut pending_qualifier: Option<(String, (usize, usize))> = None; + + for (i, item) in path.items.iter().enumerate() { + let MatchedKind::Ident { source, role } = item.kind else { continue; }; match source { IdentSource::Tables => { - if schema_has_table(schema, &item.text) { - current_table = Some(item.text.clone()); - } else { - current_table = None; + if role == "qualified_star_qualifier" { + // The `t` in `t.*`. Resolve against bindings + // (populated by the pre-pass); emit + // `unknown_qualifier` if it doesn't resolve. + if resolve_qualifier(&bindings, &item.text).is_none() + && !cte_names_contains(&cte_names, &item.text) + { + diagnostics.push(Diagnostic { + severity: Severity::Error, + span: item.span, + message: crate::friendly::translate( + "diagnostic.unknown_qualifier", + &[( + "qualifier", + &item.text as &dyn std::fmt::Display, + )], + ), + }); + } + } else if !schema_has_table(schema, &item.text) + && !cte_names_contains(&cte_names, &item.text) + { + // Unknown table — the pre-pass skipped + // pushing this as a binding, so it's not in + // the resolution scope. Flag it here. diagnostics.push(Diagnostic { severity: Severity::Error, span: item.span, @@ -455,28 +576,154 @@ fn schema_existence_diagnostics( } } IdentSource::Columns => { - if let Some(table) = current_table.as_deref() - && !schema_has_column(schema, table, &item.text) + if role == "sql_expr_qualified_ref" { + // The `c` half of `t.c` — the previous pass + // iteration set `pending_qualifier` to the + // qualifier ident. + if let Some((qual, qual_span)) = + pending_qualifier.take() + { + match resolve_qualifier(&bindings, &qual) { + Some(binding) => { + if !cte_names_contains( + &cte_names, + &binding.table, + ) && !schema_has_column( + schema, + &binding.table, + &item.text, + ) { + diagnostics.push(Diagnostic { + severity: Severity::Error, + span: item.span, + message: crate::friendly::translate( + "diagnostic.unknown_column", + &[ + ("name", &item.text as &dyn std::fmt::Display), + ("table", &binding.table as &dyn std::fmt::Display), + ], + ), + }); + } + } + None => { + // Qualifier didn't resolve — emit + // unknown_qualifier on the + // qualifier span, not on the + // column, so the learner sees + // the root cause. + diagnostics.push(Diagnostic { + severity: Severity::Error, + span: qual_span, + message: crate::friendly::translate( + "diagnostic.unknown_qualifier", + &[( + "qualifier", + &qual as &dyn std::fmt::Display, + )], + ), + }); + } + } + } + } else if role == "sql_expr_ident" + && is_followed_by_qualified_ref(&path.items, i) { - diagnostics.push(Diagnostic { - severity: Severity::Error, - span: item.span, - message: crate::friendly::translate( - "diagnostic.unknown_column", - &[ - ("name", &item.text as &dyn std::fmt::Display), - ("table", &table as &dyn std::fmt::Display), - ], - ), - }); + // This ident is the `t` qualifier of a + // following `t.c`. Defer to the qualified-ref + // check on the next iteration. + pending_qualifier = + Some((item.text.clone(), item.span)); + } else if !bindings.is_empty() { + // Bare column reference. Count which bindings + // contain it (case-insensitive). CTE-binding + // tables match opportunistically (we don't + // know their columns yet — the §10.3 stage-2 + // harvest is deferred), so CTE refs are + // accepted silently. + let matched: Vec<&str> = bindings + .iter() + .filter(|b| { + cte_names_contains(&cte_names, &b.table) + || schema_has_column( + schema, &b.table, &item.text, + ) + }) + .map(|b| b.alias.as_deref().unwrap_or(&b.table)) + .collect(); + match matched.len() { + 0 => { + let table_arg = if bindings.len() == 1 { + bindings[0].table.clone() + } else { + bindings + .iter() + .map(|b| b.table.as_str()) + .collect::>() + .join(", ") + }; + diagnostics.push(Diagnostic { + severity: Severity::Error, + span: item.span, + message: crate::friendly::translate( + "diagnostic.unknown_column", + &[ + ("name", &item.text as &dyn std::fmt::Display), + ("table", &table_arg as &dyn std::fmt::Display), + ], + ), + }); + } + 1 => {} // unique match, OK + _ => { + let qualifiers = matched.join(", "); + diagnostics.push(Diagnostic { + severity: Severity::Error, + span: item.span, + message: crate::friendly::translate( + "diagnostic.ambiguous_column", + &[ + ("column", &item.text as &dyn std::fmt::Display), + ("qualifiers", &qualifiers as &dyn std::fmt::Display), + ], + ), + }); + } + } + } + // else: no FROM in scope — engine catches the + // unbound column reference. Skip silently to + // avoid noise on `SELECT a` style expressions + // (which the grammar admits per §1). + } + IdentSource::NewName => { + // Pre-flight duplicate CTE detection (ADR-0032 + // §11.5 / Plan §Open-2, user-approved). The + // pre-pass collected the de-duplicated set; we + // scan again to find the SECOND occurrence and + // emit on its span. + if role == "cte_name" { + if seen_cte_names + .iter() + .any(|n| n.eq_ignore_ascii_case(&item.text)) + { + diagnostics.push(Diagnostic { + severity: Severity::Error, + span: item.span, + message: crate::friendly::translate( + "diagnostic.duplicate_cte", + &[( + "name", + &item.text as &dyn std::fmt::Display, + )], + ), + }); + } else { + seen_cte_names.push(item.text.clone()); + } } } - // Invented names (`NewName`), closed sets (`Types`), - // and the other entity kinds are not schema-checked - // here (ADR-0027 §2 scopes the check to tables and - // columns). - IdentSource::NewName - | IdentSource::Relationships + IdentSource::Relationships | IdentSource::Indexes | IdentSource::Types | IdentSource::Free => {} @@ -485,10 +732,274 @@ fn schema_existence_diagnostics( diagnostics } +fn cte_names_contains(names: &[String], candidate: &str) -> bool { + names.iter().any(|n| n.eq_ignore_ascii_case(candidate)) +} + +/// SQL-expression predicate-warning pass (ADR-0032 §11.6 — the +/// Phase-1 carry-over gap closure). +/// +/// Phase 1's `predicate_warnings` walks the DSL `Expr` AST and +/// emits `diagnostic.eq_null`, `diagnostic.type_mismatch`, and +/// `diagnostic.like_numeric` (ADR-0027 Amendment 1). The SQL +/// expression grammar (`sql_expr.rs`) deliberately builds no +/// AST (ADR-0031 §2), so until Phase 2 the same warnings +/// silently failed to fire on SQL `WHERE` / `HAVING` / `ON` / +/// `CASE` / projection / `ORDER BY` slots. +/// +/// This pass walks the matched-path looking for the predicate- +/// tail shapes by node-name labels and emits the same catalog +/// keys. Scope is intentionally narrow: only bare column refs +/// in the form ` ` are recognised. The +/// qualified-ref form (`. `) and +/// expression-operand cases (` LIKE ` where the +/// expression isn't a bare column) are not detected here — +/// catching them would require either an AST or a much fuller +/// pattern matcher, and the false-negative posture is safe +/// (the warning is advisory; the engine still runs the query). +fn sql_predicate_warnings( + path: &MatchedPath, + schema: Option<&crate::completion::SchemaCache>, +) -> Vec { + use crate::dsl::grammar::IdentSource; + use outcome::{Diagnostic, MatchedKind, Severity}; + + let Some(schema) = schema else { + return Vec::new(); + }; + + // Pre-pass: same as `schema_existence_diagnostics` — collect + // the in-scope bindings so a bare column ref can be resolved + // to its source table. + let mut bindings: Vec = Vec::new(); + let mut cte_names: Vec = Vec::new(); + { + let mut pending_alias_index: Option = None; + for item in &path.items { + let MatchedKind::Ident { source, role } = item.kind else { + continue; + }; + match source { + IdentSource::Tables + if role == "table_name" + && (schema_has_table(schema, &item.text) + || cte_names_contains(&cte_names, &item.text)) => + { + bindings.push(PassBinding { + table: item.text.clone(), + alias: None, + }); + pending_alias_index = Some(bindings.len() - 1); + } + IdentSource::Tables if role == "table_name" => { + pending_alias_index = None; + } + IdentSource::NewName if role == "table_alias" => { + if let Some(idx) = pending_alias_index { + bindings[idx].alias = Some(item.text.clone()); + } + pending_alias_index = None; + } + IdentSource::NewName if role == "cte_name" => { + if !cte_names_contains(&cte_names, &item.text) { + cte_names.push(item.text.clone()); + } + pending_alias_index = None; + } + _ => { + pending_alias_index = None; + } + } + } + } + + let mut diagnostics = Vec::new(); + let items = &path.items; + + // Scan for predicate-tail shapes: `` followed + // by ` ` or `LIKE `. + for i in 0..items.len() { + let MatchedKind::Ident { source, role } = items[i].kind else { + continue; + }; + if source != IdentSource::Columns || role != "sql_expr_ident" { + continue; + } + // Skip qualified-ref qualifiers — they're handled by + // resolving the t.c chain on the qualifier's binding, + // which this minimal pass doesn't do. + if is_followed_by_qualified_ref(items, i) { + continue; + } + // Resolve column → which binding's column → what type. + let Some(col_type) = resolve_bare_column_type( + &bindings, &cte_names, schema, &items[i].text, + ) else { + // Unknown column or in a CTE-binding (whose columns + // are unknown until harvest lands). Either way, skip. + continue; + }; + + let col_name = items[i].text.clone(); + let Some(next) = items.get(i + 1) else { + continue; + }; + + // `IS NULL` / `IS NOT NULL` is the right way to test + // NULL, but `= NULL` / `!= NULL` is the trap — flag. + if let MatchedKind::Word(kw @ ("=" | "!=" | "<>")) = next.kind + && let Some(third) = items.get(i + 2) + && matches!(third.kind, MatchedKind::Word("null")) + { + let _ = kw; + diagnostics.push(Diagnostic { + severity: Severity::Warning, + span: third.span, + message: crate::friendly::translate( + "diagnostic.eq_null", + &[], + ), + }); + continue; + } + + // ` LIKE ` — pedagogical: LIKE is a + // text-pattern match, so a numeric column rarely makes + // sense as the target. + if matches!(next.kind, MatchedKind::Word("like")) + && col_type.is_numeric() + { + diagnostics.push(Diagnostic { + severity: Severity::Warning, + span: items[i].span, + message: crate::friendly::translate( + "diagnostic.like_numeric", + &[ + ("column", &col_name as &dyn std::fmt::Display), + ("type", &col_type.keyword() as &dyn std::fmt::Display), + ], + ), + }); + continue; + } + + // ` ` — emit type_mismatch when + // the literal's type is structurally incompatible with + // the column's type. Conservative: only flag clear-cut + // numeric-vs-text mismatches. + if let MatchedKind::Word(op @ ("=" | "!=" | "<>" | "<" | "<=" | ">" | ">=")) + = next.kind + && let Some(third) = items.get(i + 2) + { + let _ = op; + let mismatch = match (col_type, &third.kind) { + // Numeric column vs string literal. + ( + crate::dsl::types::Type::Int + | crate::dsl::types::Type::Real + | crate::dsl::types::Type::Decimal + | crate::dsl::types::Type::Serial, + MatchedKind::StringLit, + ) => true, + // Text-shaped column vs raw number literal. + ( + crate::dsl::types::Type::Text + | crate::dsl::types::Type::Date + | crate::dsl::types::Type::DateTime + | crate::dsl::types::Type::ShortId, + MatchedKind::NumberLit, + ) => true, + // Bool vs anything but `true`/`false`/0/1 numbers + // — too noisy to flag in this conservative pass. + _ => false, + }; + if mismatch { + diagnostics.push(Diagnostic { + severity: Severity::Warning, + span: third.span, + message: crate::friendly::translate( + "diagnostic.type_mismatch", + &[ + ("column", &col_name as &dyn std::fmt::Display), + ("type", &col_type.keyword() as &dyn std::fmt::Display), + ], + ), + }); + } + } + } + diagnostics +} + +/// Look up a bare column ref's type by checking each binding. +/// Returns the type if exactly one binding owns the column. +/// Returns `None` for unknown / ambiguous / CTE-routed columns +/// (the latter because the §10.3 stage-2 harvest is deferred, +/// so CTE binding columns are unknown). +fn resolve_bare_column_type( + bindings: &[PassBinding], + cte_names: &[String], + schema: &crate::completion::SchemaCache, + column: &str, +) -> Option { + let mut found: Option = None; + for b in bindings { + if cte_names_contains(cte_names, &b.table) { + // CTE — columns unknown for now. + continue; + } + if let Some(ty) = schema_column_type(schema, &b.table, column) { + if found.is_some() { + // Ambiguous — skip the warning. + return None; + } + found = Some(ty); + } + } + found +} + +/// True when the matched-path item at index `i` is immediately +/// followed by `Punct('.')` and a `Columns`-source ident with +/// role `sql_expr_qualified_ref` — i.e. this item is the `t` +/// half of a `t.c` qualified reference. Used by +/// `schema_existence_diagnostics` to skip the bare-column check +/// on qualifiers. +fn is_followed_by_qualified_ref( + items: &[outcome::MatchedItem], + i: usize, +) -> bool { + use outcome::MatchedKind; + let dot = items.get(i + 1); + let next_ident = items.get(i + 2); + matches!( + dot.map(|it| &it.kind), + Some(MatchedKind::Punct('.')) + ) && matches!( + next_ident.map(|it| &it.kind), + Some(MatchedKind::Ident { + role: "sql_expr_qualified_ref", + .. + }) + ) +} + fn schema_has_table(schema: &crate::completion::SchemaCache, name: &str) -> bool { schema.tables.iter().any(|t| t.eq_ignore_ascii_case(name)) } +fn schema_column_type( + schema: &crate::completion::SchemaCache, + table: &str, + column: &str, +) -> Option { + schema + .columns_for_table(table)? + .iter() + .find(|c| c.name.eq_ignore_ascii_case(column)) + .map(|c| c.user_type) +} + fn schema_has_column( schema: &crate::completion::SchemaCache, table: &str, @@ -1042,7 +1553,19 @@ pub fn walk<'a>( // of a structurally-valid parse; a parse that already // failed gets its ERROR verdict from `outcome`. let mut diagnostics = if matches!(final_outcome, WalkOutcome::Match { .. }) { - schema_existence_diagnostics(&path, ctx.schema) + let mut d = schema_existence_diagnostics(&path, ctx.schema); + // ADR-0032 §11.6 — Phase-1 carry-over gap closure. + // The SQL-expression predicate-warning pass runs on + // every successful parse, covering SQL `WHERE` / + // `HAVING` / `ON` / `CASE` / projection / `ORDER BY` + // slots uniformly (a flat matched-path walk doesn't + // distinguish slot kind). The existing DSL `Expr` + // AST variant below remains the source of truth for + // DSL `WHERE` expressions; a DSL command produces no + // sql_expr_ident roles so the two passes don't + // collide. + d.extend(sql_predicate_warnings(&path, ctx.schema)); + d } else { Vec::new() }; @@ -2742,4 +3265,316 @@ mod tests { other => panic!("expected Update, got {other:?}"), } } + + // ---- ADR-0032 §11.5 Phase-2 diagnostics --------------------- + + /// Build a two-table schema for join/qualified-ref tests. + fn two_table_schema() -> SchemaCache { + let mut cache = SchemaCache::default(); + cache.tables.push("a".to_string()); + cache.tables.push("b".to_string()); + cache.columns.push("id".to_string()); + cache.columns.push("name".to_string()); + cache.columns.push("total".to_string()); + cache.table_columns.insert( + "a".to_string(), + vec![ + TableColumn { + name: "id".to_string(), + user_type: Type::Int, + }, + TableColumn { + name: "name".to_string(), + user_type: Type::Text, + }, + ], + ); + cache.table_columns.insert( + "b".to_string(), + vec![ + TableColumn { + name: "id".to_string(), + user_type: Type::Int, + }, + TableColumn { + name: "total".to_string(), + user_type: Type::Real, + }, + ], + ); + cache + } + + fn diag_keys(source: &str, schema: &SchemaCache) -> Vec<&'static str> { + // SQL SELECT lives in Advanced mode (ADR-0030 §2). The + // default `input_diagnostics` uses Simple, which gates + // the command out and yields no diagnostics. Build the + // walk manually so we can set the right mode. + let mut ctx = super::context::WalkContext::with_schema(schema); + ctx.mode = crate::mode::Mode::Advanced; + let (result, _cmd) = super::walk( + source, + super::outcome::WalkBound::EndOfInput, + &mut ctx, + ); + let diagnostics = result.map_or_else(Vec::new, |r| r.diagnostics); + diagnostics + .into_iter() + .map(|d| Box::leak(d.message.into_boxed_str()) as &str) + .collect() + } + + #[test] + fn unknown_qualifier_in_qualified_ref_is_error() { + let schema = two_table_schema(); + // `t` is not in scope (only `a` and `b` are). + let diags = diag_keys("select t.id from a join b on a.id = b.id", &schema); + assert!( + diags.iter().any(|d| d.contains("no such table or alias")), + "expected unknown_qualifier; got {diags:?}", + ); + } + + #[test] + fn ambiguous_bare_column_is_error() { + let schema = two_table_schema(); + // `id` exists in both `a` and `b`. + let diags = diag_keys("select id from a join b on a.id = b.id", &schema); + assert!( + diags.iter().any(|d| d.contains("ambiguous")), + "expected ambiguous_column; got {diags:?}", + ); + } + + #[test] + fn unambiguous_bare_column_no_error() { + let schema = two_table_schema(); + // `name` is only in `a`; `total` is only in `b` — no ambiguity. + let diags = diag_keys( + "select name, total from a join b on a.id = b.id", + &schema, + ); + assert!( + diags.is_empty(), + "expected no diagnostics; got {diags:?}", + ); + } + + #[test] + fn qualified_refs_in_join_on_resolve_cleanly() { + let schema = two_table_schema(); + let diags = diag_keys("select a.name, b.total from a join b on a.id = b.id", &schema); + assert!( + diags.is_empty(), + "expected no diagnostics; got {diags:?}", + ); + } + + #[test] + fn unknown_column_via_qualified_ref() { + let schema = two_table_schema(); + let diags = diag_keys("select a.nosuch from a", &schema); + assert!( + diags.iter().any(|d| d.contains("no such column")), + "expected unknown_column; got {diags:?}", + ); + } + + #[test] + fn cte_name_is_valid_table_source() { + let schema = schema_with("base", &[("id", Type::Int)]); + // `cte_x` doesn't exist as a table; it's declared by + // WITH and the post-walk pass should treat it as valid. + let diags = diag_keys( + "with cte_x as (select * from base) select * from cte_x", + &schema, + ); + assert!( + diags.is_empty(), + "expected no diagnostics; got {diags:?}", + ); + } + + #[test] + fn duplicate_cte_in_same_with_block_is_error() { + // `WITH …` doesn't dispatch through the registry yet + // (a `data::WITH` `CommandNode` is a future sub-phase). + // Walk the fragment directly via SQL_SELECT_STATEMENT + // so the diagnostic pass sees the cte_name idents, then + // assert duplicate_cte fires on the second occurrence. + let schema = schema_with("base", &[("id", Type::Int)]); + let mut ctx = super::context::WalkContext::with_schema(&schema); + let mut path = super::outcome::MatchedPath::new(); + let mut per_byte: Vec = Vec::new(); + let input = + "with x as (select 1), x as (select 2) select * from x"; + let result = crate::dsl::walker::driver::walk_node( + input, + 0, + &crate::dsl::grammar::sql_select::SQL_SELECT_STATEMENT, + &mut ctx, + &mut path, + &mut per_byte, + ); + assert!( + matches!( + result, + crate::dsl::walker::driver::NodeWalkResult::Matched { .. } + ), + "fragment should walk: {result:?}" + ); + let diags = super::schema_existence_diagnostics(&path, Some(&schema)); + let messages: Vec<&str> = + diags.iter().map(|d| d.message.as_str()).collect(); + assert!( + messages.iter().any(|m| m.contains("duplicate")), + "expected duplicate_cte; got {messages:?}", + ); + } + + #[test] + fn unknown_table_in_from_still_flags() { + // Regression — the multi-binding extension must not + // break the single-table unknown-table case. + let schema = schema_with("base", &[("id", Type::Int)]); + let diags = diag_keys("select * from nonexistent", &schema); + assert!( + diags.iter().any(|d| d.contains("no such table")), + "expected unknown_table; got {diags:?}", + ); + } + + #[test] + fn alias_resolves_qualifier() { + let schema = two_table_schema(); + // The alias `x` resolves to `a` — `x.name` finds `a.name`. + let diags = diag_keys("select x.name from a x", &schema); + assert!( + diags.is_empty(), + "expected no diagnostics; got {diags:?}", + ); + } + + // ---- ADR-0032 §11.6 — Phase-1 carry-over gap closure ---- + + /// A schema with a single table whose columns span a few + /// types — enough to exercise like_numeric and + /// type_mismatch on SQL expressions. + fn typed_schema() -> SchemaCache { + schema_with( + "products", + &[ + ("id", Type::Serial), + ("name", Type::Text), + ("price", Type::Real), + ("created", Type::Date), + ("is_active", Type::Bool), + ], + ) + } + + #[test] + fn sql_where_like_numeric_warns() { + // ADR-0032 §11.6 — THE Phase-1 gap that motivated this + // section. `LIKE` on a numeric column made no sense, but + // Phase 1's predicate-warning pass walked the DSL Expr + // AST and never saw SQL WHERE. + let schema = typed_schema(); + let diags = + diag_keys("select * from products where price like 5", &schema); + assert!( + diags.iter().any(|d| d.contains("LIKE")), + "expected like_numeric warning on SQL WHERE; got {diags:?}", + ); + } + + #[test] + fn sql_where_eq_null_warns() { + let schema = typed_schema(); + let diags = + diag_keys("select * from products where name = null", &schema); + assert!( + diags.iter().any(|d| d.contains("= NULL")), + "expected eq_null warning on SQL WHERE; got {diags:?}", + ); + } + + #[test] + fn sql_where_type_mismatch_text_vs_number_warns() { + let schema = typed_schema(); + let diags = + diag_keys("select * from products where name = 5", &schema); + assert!( + diags.iter().any(|d| d.contains("different type")), + "expected type_mismatch warning on SQL WHERE; got {diags:?}", + ); + } + + #[test] + fn sql_where_type_mismatch_number_vs_text_warns() { + let schema = typed_schema(); + let diags = diag_keys( + "select * from products where price = 'high'", + &schema, + ); + assert!( + diags.iter().any(|d| d.contains("different type")), + "expected type_mismatch warning on SQL WHERE; got {diags:?}", + ); + } + + #[test] + fn sql_where_type_compatible_does_not_warn() { + let schema = typed_schema(); + let diags = + diag_keys("select * from products where price = 5", &schema); + // `price` is real; `5` is numeric — compatible (any + // numeric-real comparison is fine). No warning. + assert!( + !diags + .iter() + .any(|d| d.contains("different type") || d.contains("LIKE")), + "expected no warnings for compatible types; got {diags:?}", + ); + } + + #[test] + fn sql_having_predicate_warning_fires() { + // Phase-1 gap also affects HAVING. + let schema = typed_schema(); + let diags = diag_keys( + "select count(*) from products group by name having price like 5", + &schema, + ); + assert!( + diags.iter().any(|d| d.contains("LIKE")), + "expected like_numeric warning on HAVING; got {diags:?}", + ); + } + + #[test] + fn sql_join_on_predicate_warning_fires() { + // Phase-1 gap also affects JOIN ON. + let mut cache = SchemaCache::default(); + cache.tables.push("a".to_string()); + cache.tables.push("b".to_string()); + cache.columns.push("id".to_string()); + cache.columns.push("price".to_string()); + cache.table_columns.insert( + "a".to_string(), + vec![TableColumn { name: "id".to_string(), user_type: Type::Int }], + ); + cache.table_columns.insert( + "b".to_string(), + vec![TableColumn { name: "price".to_string(), user_type: Type::Real }], + ); + let diags = diag_keys( + "select * from a join b on price like 5", + &cache, + ); + assert!( + diags.iter().any(|d| d.contains("LIKE")), + "expected like_numeric warning on JOIN ON; got {diags:?}", + ); + } } diff --git a/src/friendly/keys.rs b/src/friendly/keys.rs index 9b2a5b8..46f34a5 100644 --- a/src/friendly/keys.rs +++ b/src/friendly/keys.rs @@ -38,11 +38,27 @@ /// `(key, expected_placeholders)`. Sorted by key for grep-ability. pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[ // ---- Pre-submit diagnostics (ADR-0027) ---- + ("diagnostic.ambiguous_column", &["column", "qualifiers"]), + ("diagnostic.compound_arity_mismatch", &["op", "left_n", "right_n"]), + ("diagnostic.cte_arity_mismatch", &["cte", "declared", "actual"]), + ("diagnostic.duplicate_cte", &["name"]), ("diagnostic.eq_null", &[]), ("diagnostic.like_numeric", &["column", "type"]), + ("diagnostic.projection_alias_misplaced", &["alias", "clause"]), ("diagnostic.type_mismatch", &["column", "type"]), ("diagnostic.unknown_column", &["name", "table"]), + ("diagnostic.unknown_qualifier", &["qualifier"]), ("diagnostic.unknown_table", &["name"]), + // ---- Friendly-error translations of engine messages + // ---- (ADR-0019; ADR-0032 §11.5). + ("engine.aggregate_misuse", &["name"]), + ("engine.ambiguous_column", &["column"]), + ("engine.compound_arity_mismatch", &["op"]), + ("engine.group_by_required", &[]), + ("engine.no_such_column", &["name"]), + ("engine.no_such_table", &["name"]), + ("engine.recursive_cte_malformed", &[]), + ("engine.scalar_subquery_too_many_rows", &[]), // ---- Already-exists collisions (anchor: "already exists") ---- ("error.already_exists.column.headline", &["table", "column"]), ("error.already_exists.relationship.headline", &["name"]), diff --git a/src/friendly/strings/en-US.yaml b/src/friendly/strings/en-US.yaml index 3a9cba8..d6df488 100644 --- a/src/friendly/strings/en-US.yaml +++ b/src/friendly/strings/en-US.yaml @@ -494,6 +494,28 @@ diagnostic: eq_null: "`= NULL` is never true — use `IS NULL` or `IS NOT NULL`" # ADR-0027 Amendment 1: LIKE is a text-pattern match. like_numeric: "`{column}` is {type} — `LIKE` is a text-pattern match, not a {type} comparison" + # Phase-2 diagnostic keys (ADR-0032 §11.5). + unknown_qualifier: "no such table or alias in scope: `{qualifier}`" + ambiguous_column: "`{column}` is ambiguous — appears in {qualifiers}" + projection_alias_misplaced: "alias `{alias}` cannot be used in {clause} — aliases are not bound until after `SELECT`'s projection list" + cte_arity_mismatch: "CTE `{cte}` declares {declared} columns but its body has {actual}" + compound_arity_mismatch: "`{op}` requires both sides to have the same number of columns — left has {left_n}, right has {right_n}" + duplicate_cte: "duplicate `WITH` table name: `{name}`" + +# Engine-error translations: an engine-rejected SQL statement +# reaches the friendly-error layer (ADR-0019) and these keys +# render its message in engine-neutral wording (ADR-0030 §7). +# The keys are reached only after a parse-time pass let the +# query through and the engine rejected at execution. +engine: + no_such_table: "no such table: `{name}`" + no_such_column: "no such column: `{name}`" + ambiguous_column: "`{column}` is ambiguous — qualify it with a table or alias" + aggregate_misuse: "`{name}()` cannot be used here — aggregates belong in projection or `HAVING`, not `WHERE`" + group_by_required: "non-aggregated columns must appear in a `GROUP BY` clause or be wrapped in an aggregate function" + compound_arity_mismatch: "`{op}` requires both sides to have the same number of columns" + scalar_subquery_too_many_rows: "scalar subquery returned more than one row — use `IN (…)` or limit the inner query" + recursive_cte_malformed: "recursive CTE shape is invalid — needs a non-recursive base case combined with `UNION`/`UNION ALL`" # ---- Project lifecycle event notes ----------------------------------- project: