//! Walker entry point (ADR-0024 §architecture). //! //! The walker is the single source of truth for the migrated //! commands. Phase A wires the parse consumer; completion + //! highlighting still flow through the chumsky path until //! Phase D / F. //! //! Routing rule (ADR-0024 §migration): the input's first //! identifier-shape token decides whether the walker owns this //! command. If it matches a registered entry word, the walker //! takes over end-to-end (success or failure). Otherwise, the //! router falls through to the chumsky parser, which still //! carries every non-migrated command's grammar through Phase F. pub mod context; pub mod driver; pub mod highlight; pub mod lex_helpers; pub mod outcome; use crate::dsl::command::{ Command, CompareOp, Expr, Operand, Predicate, RowFilter, }; use crate::dsl::grammar; use crate::dsl::walker::context::WalkContext; use crate::dsl::walker::driver::{FailureKind, NodeWalkResult, walk_node}; use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace}; use crate::dsl::walker::outcome::{ Expectation, MatchedPath, WalkBound, WalkOutcome, WalkResult, }; pub use context::ColumnInfo; pub use highlight::{highlight_runs, highlight_runs_in_mode}; pub use outcome::{Diagnostic, Severity}; /// Resolve the hint-panel mode at the end of `source` /// (ADR-0024 §HintMode-per-node, §Phase D §typed-value-slots). /// /// Schemaless variant. Surfaces: /// - `HintMode::ProseOnly("hint.value_literal_slot")` at generic /// value-literal positions (all five forms in the expected /// set), and /// - `HintMode::ForceProse("hint.ambient_typing_name")` at /// `NewName` ident slots. /// /// Schema-aware callers should use `hint_mode_at_input_with_schema` /// instead — that variant narrows the prose to the column's /// user-facing type at typed value slots (e.g. "Type a date /// as 'YYYY-MM-DD'" at a date column). #[must_use] pub fn hint_mode_at_input(source: &str) -> Option { hint_mode_at_input_inner(source, None) } /// Schema-aware hint-mode resolution (ADR-0024 §Phase D). /// /// Uses the same schema reference the walker drives parse-time /// dispatch from. When the walker enters a `Node::TypedValueSlot` /// at the cursor position, the catalog prose narrows to the /// column's user-facing type (e.g. `hint.value_slot_int` at an /// int column). #[must_use] pub fn hint_mode_at_input_with_schema( source: &str, schema: &crate::completion::SchemaCache, ) -> Option { hint_mode_at_input_inner(source, Some(schema)) } /// Resolution of the hint-panel mode at the cursor, plus the /// column name (if known) the cursor's value slot is keyed on. /// /// Returned by [`hint_resolution_at_input`]. The renderer /// composes per-column prose ("for `Email`: Type a quoted /// string …") when `column` is `Some`. #[derive(Debug, Clone, PartialEq, Eq)] pub struct HintResolution { pub mode: crate::dsl::grammar::HintMode, pub column: Option, /// Auto-generated columns (serial / shortid) that Form B /// `insert into values (…)` silently skips from the /// value list (ADR-0018 §3). Populated *only* at the first /// value slot of a Form B insert whose table has such /// columns — empty everywhere else. The renderer appends a /// pedagogical note pointing the user at Form A so the /// skipped column is discoverable without reading help /// (handoff-12 §2.2). pub form_b_autogen_skipped: Vec, } /// Single-walk hint resolver (ADR-0024 §Phase D §typed-value-slots). /// /// Walks `source` against `schema`, then reports both the /// resolved `HintMode` and the walker's `pending_value_column` /// (if any). Returns `None` when no HintMode applies. #[must_use] pub fn hint_resolution_at_input( source: &str, schema: Option<&crate::completion::SchemaCache>, ) -> Option { hint_resolution_at_input_in_mode(source, schema, crate::mode::Mode::Simple) } /// Mode-aware hint resolver (ADR-0022 Amendment 1). /// /// Walks `source` in `mode` so advanced-mode SQL resolves slot /// hints instead of being gated by the simple-mode "this is SQL" /// path. The no-mode [`hint_resolution_at_input`] defaults to /// `Mode::Simple`. #[must_use] pub fn hint_resolution_at_input_in_mode( source: &str, schema: Option<&crate::completion::SchemaCache>, mode: crate::mode::Mode, ) -> Option { use crate::dsl::grammar::HintMode; let snap = expected_for_hint_snapshot(source, schema, mode); // Empty expected set means the command is already complete // (`WalkOutcome::Match`) — no slot to hint at. if snap.expected.is_empty() { return None; } // Typed value slot: the walker tagged `pending_value_type` // on entry to a `Node::TypedValueSlot`. Per-column-type // prose, narrowed by the column's user-facing type, plus // the Form B auto-gen pedagogical note. if let Some(ty) = snap.pending_value_type { return Some(HintResolution { mode: HintMode::ProseOnly(catalog_key_for_value_type(ty)), form_b_autogen_skipped: form_b_autogen_skipped( source, snap.user_listed_columns.as_ref(), snap.current_table_columns.as_ref(), snap.pending_value_column.as_deref(), ), column: snap.pending_value_column, }); } // Node-attached HintMode (ADR-0024 §HintMode-per-node): the // grammar declares the mode at the slot via `Node::Hinted`; // the walker recorded it in `pending_hint_mode`. The hint // resolver reads it directly — no signature-matching on the // shape of the expected set. `ProseOnly` covers the // value-literal fallback slot; `ForceProse` covers `NewName` // ident slots ("Type a name"). match snap.pending_hint_mode { Some(mode @ (HintMode::ProseOnly(_) | HintMode::ForceProse(_))) => { Some(HintResolution { mode, column: None, form_b_autogen_skipped: Vec::new(), }) } Some(HintMode::SuppressProse | HintMode::Default) | None => None, } } /// Auto-generated columns a Form B insert skips from its value /// list — but only when the cursor sits at the *first* value /// slot, so the pedagogical note fires once per command rather /// than at every comma. /// /// Returns empty unless: the command is an `insert`; no explicit /// column list was given (Form B — `user_listed` is `None`); the /// table has serial / shortid columns; and `pending_column` is /// the first non-auto-generated column (the first slot). fn form_b_autogen_skipped( source: &str, user_listed: Option<&Vec>, table_columns: Option<&Vec>, pending_column: Option<&str>, ) -> Vec { use crate::dsl::types::Type; // Form A (explicit column list) and non-insert commands // (`update T set …` value slots also leave user_listed // None) are excluded — the note is insert-Form-B only. if user_listed.is_some() { return Vec::new(); } if !source.trim_start().to_ascii_lowercase().starts_with("insert") { return Vec::new(); } let Some(cols) = table_columns else { return Vec::new(); }; let is_auto = |t: Type| matches!(t, Type::Serial | Type::ShortId); let skipped: Vec = cols .iter() .filter(|c| is_auto(c.user_type)) .map(|c| c.name.clone()) .collect(); if skipped.is_empty() { return Vec::new(); } // Fire only at the first value slot — i.e. when the slot's // column is the first non-auto-generated column. let first_non_auto = cols.iter().find(|c| !is_auto(c.user_type)); match (first_non_auto, pending_column) { (Some(first), Some(pending)) if first.name == pending => skipped, _ => Vec::new(), } } fn hint_mode_at_input_inner( source: &str, schema: Option<&crate::completion::SchemaCache>, ) -> Option { // Single source of truth: `hint_resolution_at_input` already // resolves the slot's HintMode (typed-value-slot per-type // prose, or the node-attached `Node::Hinted` annotation). // This thin wrapper just drops the resolution's column / // skip detail for callers that only need the mode. hint_resolution_at_input(source, schema).map(|r| r.mode) } const fn catalog_key_for_value_type(ty: crate::dsl::types::Type) -> &'static str { use crate::dsl::types::Type; match ty { Type::Int => "hint.value_slot_int", Type::Real => "hint.value_slot_real", Type::Decimal => "hint.value_slot_decimal", Type::Bool => "hint.value_slot_bool", Type::Text => "hint.value_slot_text", Type::Date => "hint.value_slot_date", Type::DateTime => "hint.value_slot_datetime", Type::Blob => "hint.value_slot_blob", Type::Serial => "hint.value_slot_serial", Type::ShortId => "hint.value_slot_shortid", } } /// Completion-engine probe (ADR-0024 §Phase D §column-narrowing). /// /// Runs a single schema-aware walk and returns the structured /// pieces the completion engine needs: the expected set plus /// the table-context snapshot the engine reads to narrow /// column candidates to the active table. #[derive(Debug, Clone)] pub struct CompletionProbe { pub expected: Vec, /// Columns of `current_table` resolved at the cursor (set /// by an `Ident { source: Tables, writes_table: true }` /// earlier in the walk). `None` when the walker is /// schemaless or the table didn't resolve. pub current_table_columns: Option>, /// The grammar-declared `HintMode` at the cursor's slot /// (`Node::Hinted`), if any. A `ProseOnly` slot tells the /// completion engine to suppress its keyword candidates — /// the node-attached signal that supersedes the /// expected-set signature heuristic where the grammar /// explicitly marks a slot prose-only (e.g. the /// WHERE-expression operand, which also accepts a column /// reference — ADR-0026 §8). pub pending_hint_mode: Option, /// The active `from_scope` at the cursor (top frame on /// the walker's scope stack). Empty when no FROM has been /// reached or the walker is schemaless. Used by the /// completion engine to narrow `cte.|` / `t.|` qualified- /// prefix candidates to a single binding's columns /// (ADR-0032 §10.5). pub from_scope: Vec, /// CTE bindings visible at the cursor across all in-scope /// frames (innermost to outermost). The same source the /// qualified-prefix completion consults for `cte.|` shapes. pub cte_bindings: Vec, } /// Run a schema-aware walk and report the completion-engine's /// view (ADR-0024 §Phase D §column-narrowing). #[must_use] pub fn completion_probe( source: &str, schema: &crate::completion::SchemaCache, ) -> CompletionProbe { completion_probe_in_mode(source, schema, crate::mode::Mode::Advanced) } /// Mode-aware [`completion_probe`] (ADR-0030 §2). /// /// In `Mode::Simple` the empty-input / fall-through fallback /// omits advanced-only entry words so Tab does not offer SQL /// commands in simple mode, and the walker — running with /// `ctx.mode = mode` — gates SQL-only forms inline. pub fn completion_probe_in_mode( source: &str, schema: &crate::completion::SchemaCache, mode: crate::mode::Mode, ) -> CompletionProbe { use crate::dsl::grammar::{REGISTRY, is_advanced_only}; let mode_filtered_entries = || -> Vec { REGISTRY .iter() .filter(|(c, _)| { mode == crate::mode::Mode::Advanced || !is_advanced_only(c.entry.primary) }) .map(|(c, _)| outcome::Expectation::Word(c.entry.primary)) .collect() }; if source.trim().is_empty() { return CompletionProbe { expected: mode_filtered_entries(), current_table_columns: None, pending_hint_mode: None, from_scope: Vec::new(), cte_bindings: Vec::new(), }; } let mut ctx = context::WalkContext::with_schema(schema); ctx.mode = mode; let (result, _cmd) = walk(source, outcome::WalkBound::EndOfInput, &mut ctx); let Some(result) = result else { return CompletionProbe { expected: mode_filtered_entries(), current_table_columns: None, pending_hint_mode: None, from_scope: Vec::new(), cte_bindings: Vec::new(), }; }; let expected = match result.outcome { outcome::WalkOutcome::Match { .. } => result.tail_expected, // A trailing-junk Mismatch (the shape matched, then the // user kept typing) still carries the outer shape's // skipped trailing optionals in `tail_expected` — e.g. // an optional `--create-fk` flag the trailing `--` is // starting to type. Merge them so completion still // offers the optional continuation. A genuine // mid-command mismatch has an empty `tail_expected`. outcome::WalkOutcome::Mismatch { expected, .. } => { let mut merged = expected; merged.extend(result.tail_expected); merged } outcome::WalkOutcome::Incomplete { expected, .. } => expected, // Validation failure path: the walker matched the // structural shape but the AST builder rejected (e.g. // Form C with column-shaped items). The walker still // captured the skipped-Optional expectations before the // validation fired — surface those so the user gets // useful Tab candidates even at a validation-flagged // position. outcome::WalkOutcome::ValidationFailed { .. } => result.tail_expected, }; // Snapshot the cursor's lexical scope: top frame's // from_scope and the union of every frame's cte_bindings // (innermost first so a shadowing inner CTE wins on name // collision per ADR-0032 §10.3). let (from_scope, cte_bindings) = { let top_from = ctx .from_scope_stack .last() .map(|f| f.from_scope.clone()) .unwrap_or_default(); let mut ctes: Vec = Vec::new(); for frame in ctx.from_scope_stack.iter().rev() { for binding in &frame.cte_bindings { if !ctes .iter() .any(|c| c.name.eq_ignore_ascii_case(&binding.name)) { ctes.push(binding.clone()); } } } (top_from, ctes) }; CompletionProbe { expected, current_table_columns: ctx.current_table_columns, pending_hint_mode: ctx.pending_hint_mode, from_scope, cte_bindings, } } /// The validity-indicator verdict for `source` (ADR-0027 §3). /// /// `None` — the input would run clean (the indicator shows /// nothing); empty / whitespace-only input is also `None`. /// `Some(Error)` — pressing Enter now fails (a structural /// parse failure, or a schema-existence diagnostic). /// `Some(Warning)` — it runs, but is very likely not intended /// (the ADR-0026 expression flags). /// /// The verdict is the highest severity across the parse /// outcome and the `diagnostics` set (ADR-0027 §2). #[must_use] pub fn input_verdict( source: &str, schema: Option<&crate::completion::SchemaCache>, ) -> Option { input_verdict_in_mode(source, schema, crate::mode::Mode::Advanced) } /// Mode-aware [`input_verdict`] (ADR-0030 §2). /// /// The `[ERR]` / `[WRN]` indicator reads this through /// `App::input_verdict_for_indicator` passing the line's /// effective mode, so a simple-mode `select` lights up ERROR /// (the SQL-hint validation failure) and an advanced-mode /// `select` does not. #[must_use] pub fn input_verdict_in_mode( source: &str, schema: Option<&crate::completion::SchemaCache>, mode: crate::mode::Mode, ) -> Option { use outcome::Severity; if source.trim().is_empty() { return None; } let mut ctx = schema.map_or_else( context::WalkContext::new, context::WalkContext::with_schema, ); ctx.mode = mode; let (result, _cmd) = walk(source, outcome::WalkBound::EndOfInput, &mut ctx); let Some(result) = result else { // The first token is not a registered command word — // typing this and pressing Enter fails. return Some(Severity::Error); }; let outcome_severity = match result.outcome { outcome::WalkOutcome::Match { .. } => None, _ => Some(Severity::Error), }; let diag_severity = result.diagnostics.iter().map(|d| d.severity).max(); outcome_severity.into_iter().chain(diag_severity).max() } /// The schema-aware diagnostics for `source` (ADR-0027 §2). /// /// Schema-existence ERRORs (unknown table / column) and /// expression WARNINGs. The highlight overlay and the hint /// panel both read these for *where* and *why*; the indicator /// ([`input_verdict`]) is the severity summary over them. /// /// Empty for empty input, an unrecognised command, or a parse /// that never reached a structural `Match` — a parse failure /// carries its own ERROR through the outcome, not through a /// `Diagnostic`, and is highlighted by the existing /// definite-error path. #[must_use] pub fn input_diagnostics( source: &str, schema: Option<&crate::completion::SchemaCache>, ) -> Vec { input_diagnostics_in_mode(source, schema, crate::mode::Mode::Simple) } /// Mode-aware [`input_diagnostics`]. Advanced mode lets the /// Phase-2 SQL-side diagnostics (ADR-0032 §11) emit alongside /// the existing DSL diagnostics. #[must_use] pub fn input_diagnostics_in_mode( source: &str, schema: Option<&crate::completion::SchemaCache>, mode: crate::mode::Mode, ) -> Vec { if source.trim().is_empty() { return Vec::new(); } let mut ctx = schema.map_or_else( context::WalkContext::new, context::WalkContext::with_schema, ); ctx.mode = mode; let (result, _cmd) = walk(source, outcome::WalkBound::EndOfInput, &mut ctx); result.map_or_else(Vec::new, |r| r.diagnostics) } /// Schema-existence diagnostics (ADR-0027 §2). /// /// A matched `IdentSource::Tables` token whose name is not in /// the schema — or a `Columns` token absent from the table in /// scope — is an ERROR: the command parses but would fail at /// execution. Runs only on a structural `Match`. /// /// Column scope is resolved by a single left-to-right pass: /// every command places its table ident before the columns /// that belong to it (a qualified `T.c` puts `T` immediately /// before `c`), so the most recent valid `Tables` ident is the /// table a subsequent `Columns` ident is checked against. An /// unknown table clears the scope, so its columns are not /// cascaded into a second diagnostic. /// One in-scope FROM-source binding, simulated from the /// matched-path by `schema_existence_diagnostics`. ADR-0032 /// §10.1 / §11.2 — the multi-binding schema-existence /// diagnostic resolves bare and qualified column references /// against this scope. #[derive(Debug)] struct PassBinding { table: String, alias: Option, } /// Resolve a qualifier identifier against the active bindings. /// Aliases shadow base-table names (ADR-0032 §10.5), so alias /// matches are tried first. fn resolve_qualifier<'a>( bindings: &'a [PassBinding], qualifier: &str, ) -> Option<&'a PassBinding> { bindings .iter() .find(|b| { b.alias .as_deref() .is_some_and(|a| a.eq_ignore_ascii_case(qualifier)) }) .or_else(|| { bindings .iter() .find(|b| b.table.eq_ignore_ascii_case(qualifier)) }) } fn schema_existence_diagnostics( path: &MatchedPath, schema: Option<&crate::completion::SchemaCache>, ) -> Vec { use crate::dsl::grammar::IdentSource; use outcome::{Diagnostic, MatchedKind, Severity}; let Some(schema) = schema else { return Vec::new(); }; let mut diagnostics = Vec::new(); // Pre-pass: collect all FROM-source bindings and CTE names // by walking the matched-path. ADR-0032 §10.6's projection- // before-FROM problem makes a strict left-to-right pass // mis-classify projection-side identifiers when the FROM // clause comes later. We sidestep it here by gathering the // full scope first, then doing the diagnostic check with // the complete set of bindings available. // // For Phase 2 this is a single flat scope (top-level // statement). Subquery / CTE-body scopes pop on // ScopedSubgrammar exit and their bindings are not // distinguished here — full per-frame scope tracking // remains a 2e concern. Refs inside subquery / CTE bodies // resolve against the union of all matched bindings, which // is permissive (a false-positive ambiguity could in // principle arise for shadowed names) but conservative // (won't false-flag valid refs). let mut bindings: Vec = Vec::new(); let mut cte_names: Vec = Vec::new(); { let mut pending_alias_index: Option = None; for item in &path.items { let MatchedKind::Ident { source, role } = item.kind else { continue; }; match source { IdentSource::Tables if role == "table_name" && (schema_has_table(schema, &item.text) || cte_names_contains(&cte_names, &item.text)) => { bindings.push(PassBinding { table: item.text.clone(), alias: None, }); pending_alias_index = Some(bindings.len() - 1); } IdentSource::Tables if role == "table_name" => { pending_alias_index = None; } IdentSource::NewName if role == "table_alias" => { if let Some(idx) = pending_alias_index { bindings[idx].alias = Some(item.text.clone()); } pending_alias_index = None; } IdentSource::NewName if role == "cte_name" => { if !cte_names_contains(&cte_names, &item.text) { cte_names.push(item.text.clone()); } pending_alias_index = None; } _ => { pending_alias_index = None; } } } } // Track which CTE names have already been seen, for // duplicate detection (a separate single-pass walk; emits // the diagnostic on the second occurrence). let mut seen_cte_names: Vec = Vec::new(); // Set on iteration `i` when the current item is the `t` // qualifier of a `t.c` reference; consumed on iteration // `i + 2` by the `sql_expr_qualified_ref` ident. let mut pending_qualifier: Option<(String, (usize, usize))> = None; // Projection-alias scope at top-level (ADR-0032 §11.2). Aliases // declared in the current SELECT leg's projection list are // visible to `ORDER BY` but NOT to `WHERE` / `HAVING` / // `GROUP BY`. A flat matched-path single pass suffices: aliases // are always written BEFORE these clauses are reached, and // subquery levels (depth > 0) keep their own implicit scope. let mut paren_depth: i32 = 0; let mut current_clause: Option<&'static str> = None; let mut leg_aliases: Vec = Vec::new(); for (i, item) in path.items.iter().enumerate() { // Maintain paren-depth, clause kind, and per-leg alias bag // BEFORE dispatching on the item — these track context that // the ident handlers below read. let depth_at_item = paren_depth; match &item.kind { MatchedKind::Punct('(') => paren_depth += 1, MatchedKind::Punct(')') => paren_depth -= 1, MatchedKind::Word(w) if depth_at_item == 0 => match *w { // A new SELECT leg (top-level or compound-leg // start) resets the alias bag and clause kind so a // following leg's projection / clauses are scoped // to its own aliases only. "select" => { leg_aliases.clear(); current_clause = None; } "union" | "intersect" | "except" => { leg_aliases.clear(); current_clause = None; } "where" => current_clause = Some("the WHERE clause"), "having" => current_clause = Some("the HAVING clause"), "group" => current_clause = Some("the GROUP BY clause"), // ORDER BY / LIMIT / OFFSET / FROM are not forbidden // contexts for alias references. Clearing here also // protects ORDER BY from a sticky earlier clause. "order" | "limit" | "offset" | "from" => { current_clause = None; } _ => {} }, MatchedKind::Ident { source: IdentSource::NewName, role: "projection_alias", } if depth_at_item == 0 => { leg_aliases.push(item.text.clone()); } _ => {} } let MatchedKind::Ident { source, role } = item.kind else { continue; }; match source { IdentSource::Tables => { if role == "qualified_star_qualifier" { // The `t` in `t.*`. Resolve against bindings // (populated by the pre-pass); emit // `unknown_qualifier` if it doesn't resolve. if resolve_qualifier(&bindings, &item.text).is_none() && !cte_names_contains(&cte_names, &item.text) { diagnostics.push(Diagnostic { severity: Severity::Error, span: item.span, message: crate::friendly::translate( "diagnostic.unknown_qualifier", &[( "qualifier", &item.text as &dyn std::fmt::Display, )], ), }); } } else if !schema_has_table(schema, &item.text) && !cte_names_contains(&cte_names, &item.text) { // Unknown table — the pre-pass skipped // pushing this as a binding, so it's not in // the resolution scope. Flag it here. diagnostics.push(Diagnostic { severity: Severity::Error, span: item.span, message: crate::friendly::translate( "diagnostic.unknown_table", &[("name", &item.text as &dyn std::fmt::Display)], ), }); } } IdentSource::Columns => { if role == "sql_expr_qualified_ref" { // The `c` half of `t.c` — the previous pass // iteration set `pending_qualifier` to the // qualifier ident. if let Some((qual, qual_span)) = pending_qualifier.take() { match resolve_qualifier(&bindings, &qual) { Some(binding) => { if !cte_names_contains( &cte_names, &binding.table, ) && !schema_has_column( schema, &binding.table, &item.text, ) { diagnostics.push(Diagnostic { severity: Severity::Error, span: item.span, message: crate::friendly::translate( "diagnostic.unknown_column", &[ ("name", &item.text as &dyn std::fmt::Display), ("table", &binding.table as &dyn std::fmt::Display), ], ), }); } } None => { // Qualifier didn't resolve — emit // unknown_qualifier on the // qualifier span, not on the // column, so the learner sees // the root cause. diagnostics.push(Diagnostic { severity: Severity::Error, span: qual_span, message: crate::friendly::translate( "diagnostic.unknown_qualifier", &[( "qualifier", &qual as &dyn std::fmt::Display, )], ), }); } } } } else if role == "sql_expr_ident" && is_followed_by_qualified_ref(&path.items, i) { // This ident is the `t` qualifier of a // following `t.c`. Defer to the qualified-ref // check on the next iteration. pending_qualifier = Some((item.text.clone(), item.span)); } else if !bindings.is_empty() { // Bare column reference. Count which bindings // contain it (case-insensitive). CTE-binding // tables match opportunistically (we don't // know their columns yet — the §10.3 stage-2 // harvest is deferred), so CTE refs are // accepted silently. let matched: Vec<&str> = bindings .iter() .filter(|b| { cte_names_contains(&cte_names, &b.table) || schema_has_column( schema, &b.table, &item.text, ) }) .map(|b| b.alias.as_deref().unwrap_or(&b.table)) .collect(); match matched.len() { 0 => { // ADR-0032 §11.2 — a top-level bare ref // that doesn't resolve as a column but // DOES match a projection alias in this // leg is either misplaced (forbidden // clause) or a valid alias reference // (ORDER BY / LIMIT). Either way, the // unknown_column diagnostic would // mislead, so suppress it here. let alias_match = depth_at_item == 0 && leg_aliases.iter().any(|a| { a.eq_ignore_ascii_case(&item.text) }); if alias_match { if let Some(clause) = current_clause { diagnostics.push(Diagnostic { severity: Severity::Error, span: item.span, message: crate::friendly::translate( "diagnostic.projection_alias_misplaced", &[ ("alias", &item.text as &dyn std::fmt::Display), ("clause", &clause as &dyn std::fmt::Display), ], ), }); } // Allowed-clause alias ref — silent. continue; } let table_arg = if bindings.len() == 1 { bindings[0].table.clone() } else { bindings .iter() .map(|b| b.table.as_str()) .collect::>() .join(", ") }; diagnostics.push(Diagnostic { severity: Severity::Error, span: item.span, message: crate::friendly::translate( "diagnostic.unknown_column", &[ ("name", &item.text as &dyn std::fmt::Display), ("table", &table_arg as &dyn std::fmt::Display), ], ), }); } 1 => {} // unique match, OK _ => { let qualifiers = matched.join(", "); diagnostics.push(Diagnostic { severity: Severity::Error, span: item.span, message: crate::friendly::translate( "diagnostic.ambiguous_column", &[ ("column", &item.text as &dyn std::fmt::Display), ("qualifiers", &qualifiers as &dyn std::fmt::Display), ], ), }); } } } // else: no FROM in scope — engine catches the // unbound column reference. Skip silently to // avoid noise on `SELECT a` style expressions // (which the grammar admits per §1). } IdentSource::NewName => { // Pre-flight duplicate CTE detection (ADR-0032 // §11.5 / Plan §Open-2, user-approved). The // pre-pass collected the de-duplicated set; we // scan again to find the SECOND occurrence and // emit on its span. if role == "cte_name" { if seen_cte_names .iter() .any(|n| n.eq_ignore_ascii_case(&item.text)) { diagnostics.push(Diagnostic { severity: Severity::Error, span: item.span, message: crate::friendly::translate( "diagnostic.duplicate_cte", &[( "name", &item.text as &dyn std::fmt::Display, )], ), }); } else { seen_cte_names.push(item.text.clone()); } } } IdentSource::Relationships | IdentSource::Indexes | IdentSource::Types | IdentSource::Free => {} } } diagnostics } fn cte_names_contains(names: &[String], candidate: &str) -> bool { names.iter().any(|n| n.eq_ignore_ascii_case(candidate)) } /// Compound-query column-count mismatch ERROR pass (ADR-0032 §11.2 /// / §11.7). A `UNION` / `INTERSECT` / `EXCEPT` chain whose legs /// have differing projection arities will be rejected by the /// engine at execution time; this pass catches it pre-flight so /// the learner sees the slot highlighted at the offending operator /// instead of an engine string. /// /// Counting strategy: the matched-path is flat, so we maintain a /// per-depth book-keeping of in-progress legs. A leg starts at a /// `SELECT` keyword and counts projection items as the number of /// top-level commas (at the leg's own paren-depth) seen before /// the first leg-end keyword (`FROM` / `WHERE` / `GROUP` / `HAVING` /// / `ORDER` / `LIMIT` / `OFFSET`) or compound-leg keyword /// (`UNION` / `INTERSECT` / `EXCEPT`) or matching `)` at the same /// depth. Commas nested inside function calls or subqueries sit /// at a deeper paren-depth and are ignored. /// /// When a compound operator at depth `d` is encountered, the /// just-completed leg's arity at depth `d` is stashed as a /// pending comparand; the next leg's arity at depth `d` is /// compared against it on that leg's close. The op token's span /// is the diagnostic anchor — that's the join point the learner /// pointed the chain at. fn compound_arity_diagnostics( path: &MatchedPath, ) -> Vec { use outcome::{Diagnostic, MatchedKind, Severity}; use std::collections::HashMap; struct LegState { arity: usize, in_projection: bool, } struct Pending { op_text: &'static str, op_span: (usize, usize), prev_arity: usize, } let mut diagnostics = Vec::new(); let mut depth: i32 = 0; let mut legs: HashMap = HashMap::new(); let mut pending: HashMap = HashMap::new(); let close_leg = |depth: i32, legs: &mut HashMap, pending: &mut HashMap, diagnostics: &mut Vec| -> Option { let leg = legs.remove(&depth)?; if let Some(p) = pending.remove(&depth) && p.prev_arity != leg.arity { diagnostics.push(Diagnostic { severity: Severity::Error, span: p.op_span, message: crate::friendly::translate( "diagnostic.compound_arity_mismatch", &[ ("op", &p.op_text as &dyn std::fmt::Display), ("left_n", &p.prev_arity as &dyn std::fmt::Display), ("right_n", &leg.arity as &dyn std::fmt::Display), ], ), }); } Some(leg.arity) }; for item in &path.items { let depth_at_item = depth; match &item.kind { MatchedKind::Punct('(') => { depth += 1; } MatchedKind::Punct(')') => { // Any leg at this depth closes before the depth // pops. If a pending set-op was waiting, the leg // it expected never arrived — drop the pending // (the chain ended without a second leg, which // the grammar shouldn't admit but is safer than // panicking). close_leg(depth, &mut legs, &mut pending, &mut diagnostics); pending.remove(&depth); depth -= 1; } MatchedKind::Punct(',') if depth_at_item == depth => { if let Some(leg) = legs.get_mut(&depth) && leg.in_projection { leg.arity += 1; } } MatchedKind::Word(w) if depth_at_item == depth => { match *w { "select" => { // A leg already at this depth shouldn't // happen (a previous compound op or // close-paren would have removed it), but // overwriting is safe — the new leg // supersedes any stale state. legs.insert( depth, LegState { arity: 1, in_projection: true }, ); } "from" | "where" | "group" | "having" | "order" | "limit" | "offset" => { if let Some(leg) = legs.get_mut(&depth) { leg.in_projection = false; } } op_word @ ("union" | "intersect" | "except") => { // Close the just-finished leg, comparing // it against any pending set-op state at // this depth. if let Some(arity) = close_leg( depth, &mut legs, &mut pending, &mut diagnostics, ) { pending.insert( depth, Pending { op_text: op_word, op_span: item.span, prev_arity: arity, }, ); } } _ => {} } } _ => {} } } // Drain any still-open legs at end-of-path. Same comparison // as the close_leg helper does on `)`. let depths: Vec = legs.keys().copied().collect(); for d in depths { close_leg(d, &mut legs, &mut pending, &mut diagnostics); } diagnostics } /// SQL-expression predicate-warning pass (ADR-0032 §11.6 — the /// Phase-1 carry-over gap closure). /// /// Phase 1's `predicate_warnings` walks the DSL `Expr` AST and /// emits `diagnostic.eq_null`, `diagnostic.type_mismatch`, and /// `diagnostic.like_numeric` (ADR-0027 Amendment 1). The SQL /// expression grammar (`sql_expr.rs`) deliberately builds no /// AST (ADR-0031 §2), so until Phase 2 the same warnings /// silently failed to fire on SQL `WHERE` / `HAVING` / `ON` / /// `CASE` / projection / `ORDER BY` slots. /// /// This pass walks the matched-path looking for the predicate- /// tail shapes by node-name labels and emits the same catalog /// keys. Scope is intentionally narrow: only bare column refs /// in the form ` ` are recognised. The /// qualified-ref form (`. `) and /// expression-operand cases (` LIKE ` where the /// expression isn't a bare column) are not detected here — /// catching them would require either an AST or a much fuller /// pattern matcher, and the false-negative posture is safe /// (the warning is advisory; the engine still runs the query). fn sql_predicate_warnings( path: &MatchedPath, schema: Option<&crate::completion::SchemaCache>, ) -> Vec { use crate::dsl::grammar::IdentSource; use outcome::{Diagnostic, MatchedKind, Severity}; let Some(schema) = schema else { return Vec::new(); }; // Pre-pass: same as `schema_existence_diagnostics` — collect // the in-scope bindings so a bare column ref can be resolved // to its source table. let mut bindings: Vec = Vec::new(); let mut cte_names: Vec = Vec::new(); { let mut pending_alias_index: Option = None; for item in &path.items { let MatchedKind::Ident { source, role } = item.kind else { continue; }; match source { IdentSource::Tables if role == "table_name" && (schema_has_table(schema, &item.text) || cte_names_contains(&cte_names, &item.text)) => { bindings.push(PassBinding { table: item.text.clone(), alias: None, }); pending_alias_index = Some(bindings.len() - 1); } IdentSource::Tables if role == "table_name" => { pending_alias_index = None; } IdentSource::NewName if role == "table_alias" => { if let Some(idx) = pending_alias_index { bindings[idx].alias = Some(item.text.clone()); } pending_alias_index = None; } IdentSource::NewName if role == "cte_name" => { if !cte_names_contains(&cte_names, &item.text) { cte_names.push(item.text.clone()); } pending_alias_index = None; } _ => { pending_alias_index = None; } } } } let mut diagnostics = Vec::new(); let items = &path.items; // Scan for predicate-tail shapes: `` followed // by ` ` or `LIKE `. for i in 0..items.len() { let MatchedKind::Ident { source, role } = items[i].kind else { continue; }; if source != IdentSource::Columns || role != "sql_expr_ident" { continue; } // Skip qualified-ref qualifiers — they're handled by // resolving the t.c chain on the qualifier's binding, // which this minimal pass doesn't do. if is_followed_by_qualified_ref(items, i) { continue; } // Resolve column → which binding's column → what type. let Some(col_type) = resolve_bare_column_type( &bindings, &cte_names, schema, &items[i].text, ) else { // Unknown column or in a CTE-binding (whose columns // are unknown until harvest lands). Either way, skip. continue; }; let col_name = items[i].text.clone(); let Some(next) = items.get(i + 1) else { continue; }; // `IS NULL` / `IS NOT NULL` is the right way to test // NULL, but `= NULL` / `!= NULL` is the trap — flag. if let MatchedKind::Word(kw @ ("=" | "!=" | "<>")) = next.kind && let Some(third) = items.get(i + 2) && matches!(third.kind, MatchedKind::Word("null")) { let _ = kw; diagnostics.push(Diagnostic { severity: Severity::Warning, span: third.span, message: crate::friendly::translate( "diagnostic.eq_null", &[], ), }); continue; } // ` LIKE ` — pedagogical: LIKE is a // text-pattern match, so a numeric column rarely makes // sense as the target. if matches!(next.kind, MatchedKind::Word("like")) && col_type.is_numeric() { diagnostics.push(Diagnostic { severity: Severity::Warning, span: items[i].span, message: crate::friendly::translate( "diagnostic.like_numeric", &[ ("column", &col_name as &dyn std::fmt::Display), ("type", &col_type.keyword() as &dyn std::fmt::Display), ], ), }); continue; } // ` ` — emit type_mismatch when // the literal's type is structurally incompatible with // the column's type. Conservative: only flag clear-cut // numeric-vs-text mismatches. if let MatchedKind::Word(op @ ("=" | "!=" | "<>" | "<" | "<=" | ">" | ">=")) = next.kind && let Some(third) = items.get(i + 2) { let _ = op; let mismatch = match (col_type, &third.kind) { // Numeric column vs string literal. ( crate::dsl::types::Type::Int | crate::dsl::types::Type::Real | crate::dsl::types::Type::Decimal | crate::dsl::types::Type::Serial, MatchedKind::StringLit, ) => true, // Text-shaped column vs raw number literal. ( crate::dsl::types::Type::Text | crate::dsl::types::Type::Date | crate::dsl::types::Type::DateTime | crate::dsl::types::Type::ShortId, MatchedKind::NumberLit, ) => true, // Bool vs anything but `true`/`false`/0/1 numbers // — too noisy to flag in this conservative pass. _ => false, }; if mismatch { diagnostics.push(Diagnostic { severity: Severity::Warning, span: third.span, message: crate::friendly::translate( "diagnostic.type_mismatch", &[ ("column", &col_name as &dyn std::fmt::Display), ("type", &col_type.keyword() as &dyn std::fmt::Display), ], ), }); } } } diagnostics } /// Look up a bare column ref's type by checking each binding. /// Returns the type if exactly one binding owns the column. /// Returns `None` for unknown / ambiguous / CTE-routed columns /// (the latter because the §10.3 stage-2 harvest is deferred, /// so CTE binding columns are unknown). fn resolve_bare_column_type( bindings: &[PassBinding], cte_names: &[String], schema: &crate::completion::SchemaCache, column: &str, ) -> Option { let mut found: Option = None; for b in bindings { if cte_names_contains(cte_names, &b.table) { // CTE — columns unknown for now. continue; } if let Some(ty) = schema_column_type(schema, &b.table, column) { if found.is_some() { // Ambiguous — skip the warning. return None; } found = Some(ty); } } found } /// True when the matched-path item at index `i` is immediately /// followed by `Punct('.')` and a `Columns`-source ident with /// role `sql_expr_qualified_ref` — i.e. this item is the `t` /// half of a `t.c` qualified reference. Used by /// `schema_existence_diagnostics` to skip the bare-column check /// on qualifiers. fn is_followed_by_qualified_ref( items: &[outcome::MatchedItem], i: usize, ) -> bool { use outcome::MatchedKind; let dot = items.get(i + 1); let next_ident = items.get(i + 2); matches!( dot.map(|it| &it.kind), Some(MatchedKind::Punct('.')) ) && matches!( next_ident.map(|it| &it.kind), Some(MatchedKind::Ident { role: "sql_expr_qualified_ref", .. }) ) } fn schema_has_table(schema: &crate::completion::SchemaCache, name: &str) -> bool { schema.tables.iter().any(|t| t.eq_ignore_ascii_case(name)) } fn schema_column_type( schema: &crate::completion::SchemaCache, table: &str, column: &str, ) -> Option { schema .columns_for_table(table)? .iter() .find(|c| c.name.eq_ignore_ascii_case(column)) .map(|c| c.user_type) } fn schema_has_column( schema: &crate::completion::SchemaCache, table: &str, column: &str, ) -> bool { schema .columns_for_table(table) .is_some_and(|cols| cols.iter().any(|c| c.name.eq_ignore_ascii_case(column))) } /// The WHERE expression of a filter command, if it has one. const fn command_where_expr(command: &Command) -> Option<&Expr> { match command { Command::Update { filter: RowFilter::Where(expr), .. } | Command::Delete { filter: RowFilter::Where(expr), .. } | Command::ShowData { filter: Some(expr), .. } => Some(expr), _ => None, } } /// WARNING diagnostics for a WHERE expression (ADR-0026 §7): /// a type-mismatched comparison, or `= NULL` / `!= NULL`. /// Both are valid and runnable — the warning is advisory. /// /// Each diagnostic's span is the offending **literal operand** /// — precise enough for a per-literal highlight (ADR-0027). fn expr_warnings( expr: &Expr, columns: &[crate::completion::TableColumn], ) -> Vec { let mut out = Vec::new(); collect_expr_warnings(expr, columns, &mut out); out } fn collect_expr_warnings( expr: &Expr, columns: &[crate::completion::TableColumn], out: &mut Vec, ) { match expr { Expr::Or(terms) | Expr::And(terms) => { for term in terms { collect_expr_warnings(term, columns, out); } } Expr::Not(inner) => collect_expr_warnings(inner, columns, out), Expr::Predicate(predicate) => { predicate_warnings(predicate, columns, out); } } } fn predicate_warnings( predicate: &Predicate, columns: &[crate::completion::TableColumn], out: &mut Vec, ) { use outcome::{Diagnostic, Severity}; let warn = |message: String, span: (usize, usize)| Diagnostic { severity: Severity::Warning, span, message, }; match predicate { Predicate::Compare { left, op, right } => { // `= NULL` / `!= NULL`: valid syntax that is never // true — the user almost certainly means IS NULL. // The highlight points at the `null` literal itself. let null_operand = if matches!(op, CompareOp::Eq | CompareOp::NotEq) { [left, right].into_iter().find(|&o| is_null_literal(o)) } else { None }; if let Some(operand) = null_operand { out.push(warn( crate::friendly::translate("diagnostic.eq_null", &[]), operand.span(), )); } else if let Some((message, span)) = pair_type_mismatch(left, right, columns) { out.push(warn(message, span)); } } Predicate::Between { target, low, high, .. } => { for bound in [low, high] { if let Some((message, span)) = pair_type_mismatch(target, bound, columns) { out.push(warn(message, span)); } } } Predicate::In { target, items, .. } => { for item in items { if let Some((message, span)) = pair_type_mismatch(target, item, columns) { out.push(warn(message, span)); } } } // `LIKE` is a text-pattern test; against a numeric // column it runs but is almost never intended // (ADR-0027, Amendment 1). The negation is irrelevant — // `NOT LIKE` on a numeric column is just as dubious. Predicate::Like { target, .. } => { if let Some((message, span)) = like_numeric_warning(target, columns) { out.push(warn(message, span)); } } // `IS [NOT] NULL` is the *correct* null test — never // flagged. Predicate::IsNull { .. } => {} } } /// A `LIKE` whose target is a numeric column: `LIKE` matches /// text patterns, so a numeric target is almost certainly a /// mistake (ADR-0027, Amendment 1). The message is paired with /// the target column operand's span. `None` when the target is /// a literal, an unknown column, or a non-numeric column. fn like_numeric_warning( target: &Operand, columns: &[crate::completion::TableColumn], ) -> Option<(String, (usize, usize))> { let Operand::Column { name, span } = target else { return None; }; let ty = columns .iter() .find(|tc| tc.name.eq_ignore_ascii_case(name))? .user_type; if !ty.is_numeric() { return None; } Some(( crate::friendly::translate( "diagnostic.like_numeric", &[ ("column", name as &dyn std::fmt::Display), ("type", &ty.keyword() as &dyn std::fmt::Display), ], ), *span, )) } const fn is_null_literal(operand: &Operand) -> bool { matches!( operand, Operand::Literal { value: crate::dsl::value::Value::Null, .. } ) } /// If one operand is a known column and the other a non-null /// literal whose type the column cannot hold, the type-mismatch /// WARNING message paired with the **literal operand's span**; /// otherwise `None` (column-to-column, literal-to-literal, an /// unknown column — already an ERROR — or a compatible pair). fn pair_type_mismatch( a: &Operand, b: &Operand, columns: &[crate::completion::TableColumn], ) -> Option<(String, (usize, usize))> { let (column, literal, span) = match (a, b) { ( Operand::Column { name, .. }, Operand::Literal { value, span }, ) | ( Operand::Literal { value, span }, Operand::Column { name, .. }, ) => (name, value, *span), _ => return None, }; // `null` fits any column; `= NULL` is flagged separately. if matches!(literal, crate::dsl::value::Value::Null) { return None; } let ty = columns .iter() .find(|tc| tc.name.eq_ignore_ascii_case(column))? .user_type; if literal.bind_for_column(column, ty).is_ok() { return None; } Some(( crate::friendly::translate( "diagnostic.type_mismatch", &[ ("column", column as &dyn std::fmt::Display), ("type", &ty.keyword() as &dyn std::fmt::Display), ], ), span, )) } /// What the grammar would accept at the end of `source` /// (ADR-0024 §architecture, Phase F walker-driven completion). /// /// Empty / whitespace-only input yields every command-entry word /// as `Expectation::Word(primary)`. Otherwise the walker is /// driven to `EndOfInput`; if the input completes a command, /// the result is empty; if it fails or is incomplete, the /// walker's expected-set surfaces verbatim — `Ident { source, /// role }` carries its `IdentSource` (so the completion engine /// can schema-look-up without a string round-trip), `Word` / /// `Literal` carry their primary literal, etc. /// /// Inputs whose first token is not a registered entry word /// fall back to listing every entry word — matches the /// synthetic "unknown command" expectation set the parser /// produces. #[must_use] pub fn expected_at_input(source: &str) -> Vec { expected_at_input_in_mode(source, crate::mode::Mode::Advanced) } /// Mode-aware [`expected_at_input`] (ADR-0030 §2). Filters the /// empty / unknown-entry fallback by mode so simple mode does /// not surface advanced-only entry words. #[must_use] pub fn expected_at_input_in_mode( source: &str, mode: crate::mode::Mode, ) -> Vec { use crate::dsl::grammar::{REGISTRY, is_advanced_only}; let mode_filtered = || -> Vec { REGISTRY .iter() .filter(|(c, _)| { mode == crate::mode::Mode::Advanced || !is_advanced_only(c.entry.primary) }) .map(|(c, _)| outcome::Expectation::Word(c.entry.primary)) .collect() }; if source.trim().is_empty() { return mode_filtered(); } let mut ctx = context::WalkContext::new(); ctx.mode = mode; let (result, _cmd) = walk(source, outcome::WalkBound::EndOfInput, &mut ctx); let Some(result) = result else { // Walker didn't engage (unknown entry word): the // completion engine should still surface the available // entry words so the user can recover. return mode_filtered(); }; match result.outcome { // On Match, surface the outer-shape's skipped-Optional // expectations so the completion engine can offer // optional-suffix candidates at the end of a valid // command (`save` → `as`, etc.). outcome::WalkOutcome::Match { .. } => result.tail_expected, // A trailing-junk Mismatch (the shape matched, then the // user kept typing) still carries the outer shape's // skipped trailing optionals in `tail_expected` — e.g. // an optional `--create-fk` flag the trailing `--` is // starting to type. Surface those alongside the // mismatch's own expected set so completion still offers // them. A genuine mid-command mismatch has an empty // `tail_expected`, so this is a no-op there. outcome::WalkOutcome::Mismatch { expected, .. } => { let mut merged = expected; merged.extend(result.tail_expected); merged } outcome::WalkOutcome::Incomplete { expected, .. } => expected, // Validation failure path: the walker matched the // structural shape but the AST builder rejected (e.g. // Form C with column-shaped items). The walker still // captured the skipped-Optional expectations before the // validation fired — surface those so the user gets // useful Tab candidates even at a validation-flagged // position. outcome::WalkOutcome::ValidationFailed { .. } => result.tail_expected, } } /// Strict-required expected set at the end of `source`, plus /// the walker's `pending_value_type` at the cursor. /// /// Like `expected_at_input` but returns empty on /// `WalkOutcome::Match` — optional-suffix continuations are not /// surfaced. Used by the hint resolver to distinguish "must /// type more" from "could continue", and to dispatch per-type /// prose when the cursor is inside a typed value slot. /// Post-walk snapshot the hint resolver needs: the strict /// expected set plus the `WalkContext` fields that survive the /// walk and feed per-column / pedagogical prose. struct HintWalkSnapshot { expected: Vec, pending_value_type: Option, pending_value_column: Option, /// The grammar-declared `HintMode` at the cursor's slot /// (`Node::Hinted` annotation, ADR-0024 §HintMode-per-node). pending_hint_mode: Option, current_table_columns: Option>, /// `Some` when the input used Form A's explicit column list. /// `None` for Form B (`insert into T values …`) and for /// every non-insert command. user_listed_columns: Option>, } fn expected_for_hint_snapshot( source: &str, schema: Option<&crate::completion::SchemaCache>, mode: crate::mode::Mode, ) -> HintWalkSnapshot { use crate::dsl::grammar::REGISTRY; let entry_words = || -> Vec { REGISTRY .iter() .map(|(c, _)| outcome::Expectation::Word(c.entry.primary)) .collect() }; let empty_snapshot = || HintWalkSnapshot { expected: entry_words(), pending_value_type: None, pending_value_column: None, pending_hint_mode: None, current_table_columns: None, user_listed_columns: None, }; if source.trim().is_empty() { return empty_snapshot(); } let mut ctx = schema.map_or_else(context::WalkContext::new, |s| { context::WalkContext::with_schema(s) }); ctx.mode = mode; let (result, _cmd) = walk(source, outcome::WalkBound::EndOfInput, &mut ctx); let Some(result) = result else { return empty_snapshot(); }; let expected = match result.outcome { outcome::WalkOutcome::Match { .. } | outcome::WalkOutcome::ValidationFailed { .. } => { Vec::new() } outcome::WalkOutcome::Incomplete { expected, .. } | outcome::WalkOutcome::Mismatch { expected, .. } => expected, }; HintWalkSnapshot { expected, pending_value_type: ctx.pending_value_type, pending_value_column: ctx.pending_value_column, pending_hint_mode: ctx.pending_hint_mode, current_table_columns: ctx.current_table_columns, user_listed_columns: ctx.user_listed_columns, } } /// Public walk entry. `bound` is `EndOfInput` for parse; /// `Position(cursor)` for completion / hint (Phase A: not yet /// wired). /// /// Returns: /// - `(Some(WalkResult), Some(Command))` on full match — the /// AST builder produced a typed Command. /// - `(Some(WalkResult), None)` on failure where the walker /// committed (matched the entry word). Caller surfaces the /// walker's error. /// - `(None, None)` when the entry word doesn't match any /// registered command — the router falls through to chumsky. pub fn walk<'a>( source: &str, bound: WalkBound, ctx: &mut WalkContext<'a>, ) -> (Option, Option) { // Phase A only consumes EndOfInput; Position would slice // the source, which is the same operation. let effective_source: &str = match bound { WalkBound::EndOfInput => source, WalkBound::Position(end) => &source[..end.min(source.len())], }; let start = skip_whitespace(effective_source, 0); if start >= effective_source.len() { return (None, None); } // Identify the command by its entry word. If the first // identifier-shape token isn't a registered entry, the // walker yields to chumsky. let Some((kw_start, kw_end)) = consume_ident(effective_source, start) else { return (None, None); }; let entry_text = &effective_source[kw_start..kw_end]; let candidates = grammar::commands_for_entry_word(entry_text); if candidates.is_empty() { // First token isn't a registered entry word — yield to // the chumsky path. return (None, None); } // ADR-0033 Amendment 1 — category-grouped, mode-aware // dispatch. `decide` chooses which registered candidate to // commit (or emits the "this is SQL" hint), running any // speculative match-testing on scratch contexts so the // caller's `ctx` is only ever touched by the committed walk. match decide( effective_source, kw_start, kw_end, &candidates, ctx.mode, ctx.schema, ) { Decision::Commit { idx, node } => { let (result, cmd) = walk_one_command(effective_source, source, kw_start, kw_end, idx, node, ctx); (Some(result), cmd) } Decision::ThisIsSql { primary } => ( Some(this_is_sql_result(entry_text, primary, kw_start, kw_end)), None, ), } } /// The dispatcher's choice for a given input (ADR-0033 /// Amendment 1): commit a specific registered candidate, or emit /// the simple-mode "this is SQL" hint. enum Decision { /// Walk this candidate into the caller's `WalkContext`. Commit { idx: usize, node: &'static crate::dsl::grammar::CommandNode, }, /// Simple mode with SQL-shaped input: emit /// `advanced_mode.sql_in_simple`, carrying this entry literal. ThisIsSql { primary: &'static str }, } /// Category-grouped, mode-aware dispatch decision (ADR-0033 /// Amendment 1). /// /// Pure with respect to the caller's context: any speculative /// match-testing runs on a fresh scratch `WalkContext` (see /// `scratch_outcome`), so `decide` never mutates the caller's /// accumulators. /// /// - **Simple mode** commits the DSL (`Simple`) candidate. With /// no DSL candidate (a SQL-only entry word) it emits the /// "this is SQL" hint. For a shared entry word whose DSL shape /// does not match but whose SQL shape does, it also emits the /// hint — so `delete … returning *` in simple mode points the /// user at advanced mode rather than at a bare DSL parse error. /// - **Advanced mode** tries `Advanced` candidates first, then /// the `Simple` candidate as a fallback; the first full match /// wins. When none fully match it commits the candidate that /// progressed furthest (advanced-first on ties) so the surfaced /// error is the most informative. fn decide( effective_source: &str, kw_start: usize, kw_end: usize, candidates: &[( usize, &'static crate::dsl::grammar::CommandNode, crate::dsl::grammar::CommandCategory, )], mode: crate::mode::Mode, schema: Option<&crate::completion::SchemaCache>, ) -> Decision { use crate::dsl::grammar::CommandCategory; let advanced: Vec<(usize, &'static crate::dsl::grammar::CommandNode)> = candidates .iter() .filter(|(_, _, cat)| *cat == CommandCategory::Advanced) .map(|(i, n, _)| (*i, *n)) .collect(); let simple: Vec<(usize, &'static crate::dsl::grammar::CommandNode)> = candidates .iter() .filter(|(_, _, cat)| *cat == CommandCategory::Simple) .map(|(i, n, _)| (*i, *n)) .collect(); match mode { crate::mode::Mode::Simple => { let Some(&(sidx, snode)) = simple.first() else { // No DSL candidate — the entry word is SQL-only. let primary = candidates.first().map_or("", |(_, n, _)| n.entry.primary); return Decision::ThisIsSql { primary }; }; if advanced.is_empty() { return Decision::Commit { idx: sidx, node: snode }; } // Shared entry word: prefer the DSL node; only point // at advanced mode when the DSL shape does not match // but the SQL shape does. if scratch_full_match(effective_source, kw_start, kw_end, snode, mode, schema) { return Decision::Commit { idx: sidx, node: snode }; } let (_, anode) = advanced[0]; if scratch_full_match(effective_source, kw_start, kw_end, anode, mode, schema) { return Decision::ThisIsSql { primary: anode.entry.primary, }; } Decision::Commit { idx: sidx, node: snode } } crate::mode::Mode::Advanced => { // Advanced candidates first, DSL as the fallback. let ordered: Vec<(usize, &'static crate::dsl::grammar::CommandNode)> = advanced.iter().chain(simple.iter()).copied().collect(); // `candidates` is non-empty (the caller checked), so // `ordered` is non-empty too. if ordered.len() == 1 { let (idx, node) = ordered[0]; return Decision::Commit { idx, node }; } for &(idx, node) in &ordered { if scratch_full_match(effective_source, kw_start, kw_end, node, mode, schema) { return Decision::Commit { idx, node }; } } // None fully matched — commit the furthest-progress // candidate, keeping the first (advanced) on ties. let mut best = ordered[0]; let mut best_progress = scratch_progress(effective_source, kw_start, kw_end, best.1, mode, schema); for &(idx, node) in &ordered[1..] { let progress = scratch_progress(effective_source, kw_start, kw_end, node, mode, schema); if progress > best_progress { best = (idx, node); best_progress = progress; } } Decision::Commit { idx: best.0, node: best.1, } } } } /// Build the `advanced_mode.sql_in_simple` result for a SQL entry /// word typed in simple mode (ADR-0030 §2, ADR-0033 Amendment 1). /// The entry word stays highlighted as a keyword; the input /// carries an ERROR verdict (it will not run here). fn this_is_sql_result( entry_text: &str, primary: &'static str, kw_start: usize, kw_end: usize, ) -> WalkResult { let mut path = MatchedPath::new(); let mut per_byte = Vec::new(); path.push(crate::dsl::walker::outcome::MatchedItem { kind: crate::dsl::walker::outcome::MatchedKind::Word(primary), text: entry_text.to_string(), span: (kw_start, kw_end), }); per_byte.push(crate::dsl::walker::outcome::ByteClass { start: kw_start, end: kw_end, class: grammar::HighlightClass::Keyword, }); WalkResult { outcome: WalkOutcome::ValidationFailed { position: kw_start, error: crate::dsl::grammar::ValidationError { message_key: "advanced_mode.sql_in_simple", args: vec![("command", primary.to_string())], }, }, matched_path: path, per_byte_class: per_byte, diagnostics: Vec::new(), tail_expected: Vec::new(), } } /// Run `walk_one_command` on a fresh scratch `WalkContext` so the /// dispatcher can test a candidate without disturbing the /// caller's accumulators (ADR-0033 Amendment 1). fn scratch_outcome( effective_source: &str, kw_start: usize, kw_end: usize, node: &'static crate::dsl::grammar::CommandNode, mode: crate::mode::Mode, schema: Option<&crate::completion::SchemaCache>, ) -> WalkOutcome { let mut sctx = schema.map_or_else(context::WalkContext::new, context::WalkContext::with_schema); sctx.mode = mode; let (result, _cmd) = walk_one_command(effective_source, effective_source, kw_start, kw_end, 0, node, &mut sctx); result.outcome } /// Whether a candidate fully matches the input (a clean /// `WalkOutcome::Match`), tested on a scratch context. fn scratch_full_match( effective_source: &str, kw_start: usize, kw_end: usize, node: &'static crate::dsl::grammar::CommandNode, mode: crate::mode::Mode, schema: Option<&crate::completion::SchemaCache>, ) -> bool { matches!( scratch_outcome(effective_source, kw_start, kw_end, node, mode, schema), WalkOutcome::Match { .. } ) } /// How far (byte position) a candidate's walk progressed. A full /// match scores the whole input; a failure scores its failure /// position. Used only to tie-break when no candidate fully /// matches. fn scratch_progress( effective_source: &str, kw_start: usize, kw_end: usize, node: &'static crate::dsl::grammar::CommandNode, mode: crate::mode::Mode, schema: Option<&crate::completion::SchemaCache>, ) -> usize { match scratch_outcome(effective_source, kw_start, kw_end, node, mode, schema) { WalkOutcome::Match { .. } => effective_source.len(), WalkOutcome::Incomplete { position, .. } | WalkOutcome::Mismatch { position, .. } | WalkOutcome::ValidationFailed { position, .. } => position, } } /// Walk a *single* committed command's shape and produce its /// `WalkResult` + optional `Command` (ADR-0033 Amendment 1). /// /// Factored out of `walk` so the dispatcher's speculative /// match-testing (`scratch_outcome`) reuses the exact same walk + /// outcome-mapping + AST-builder + diagnostic path on a scratch /// context, while the committed walk runs into the caller's /// context. `source` is the full (unbounded) input the AST /// builder reads for SQL command text; `effective_source` is the /// bound-trimmed slice the walker matches against. fn walk_one_command<'a>( effective_source: &str, source: &str, kw_start: usize, kw_end: usize, command_idx: usize, command_node: &'static crate::dsl::grammar::CommandNode, ctx: &mut WalkContext<'a>, ) -> (WalkResult, Option) { let entry_text = &effective_source[kw_start..kw_end]; let mut path = MatchedPath::new(); let mut per_byte = Vec::new(); // Record the entry-word match. path.push(crate::dsl::walker::outcome::MatchedItem { kind: crate::dsl::walker::outcome::MatchedKind::Word(command_node.entry.primary), text: entry_text.to_string(), span: (kw_start, kw_end), }); per_byte.push(crate::dsl::walker::outcome::ByteClass { start: kw_start, end: kw_end, class: grammar::HighlightClass::Keyword, }); let mut tail_expected: Vec = Vec::new(); let outcome = match walk_node( effective_source, kw_end, &command_node.shape, ctx, &mut path, &mut per_byte, ) { NodeWalkResult::Matched { end, skipped } => { // Carry the outer shape's skipped-Optional // expectations into WalkResult so completion can // surface optional-suffix candidates (`save` → // `as`). Empty for shapes with no trailing // optionals. tail_expected = skipped; NodeWalkResult::Matched { end, skipped: Vec::new(), } } other => other, }; let outcome = match outcome { NodeWalkResult::Matched { end, .. } => { let trailing = skip_whitespace(effective_source, end); if trailing < effective_source.len() { // The shape matched but the user kept typing. // Don't merge skipped-Optional expectations // into the trailing-input error: the completion // engine reads `expected` to decide what to // suggest, and adding "what could have come // before this trailing token" would suggest // candidates the user has already passed. WalkOutcome::Mismatch { position: trailing, expected: vec![Expectation::EndOfInput], } } else { WalkOutcome::Match { command_idx } } } NodeWalkResult::NoMatch { position, expected } => { // The shape required content the user hasn't typed. // (Optional/empty-Seq shapes always return Matched // even when skipped, so reaching NoMatch here means // the command really wanted something more.) let post = skip_whitespace(effective_source, position); if post >= effective_source.len() { WalkOutcome::Incomplete { position: post, expected } } else { WalkOutcome::Mismatch { position: post, expected } } } NodeWalkResult::Incomplete { position, expected } => { WalkOutcome::Incomplete { position, expected } } NodeWalkResult::Failed { position, kind } => match kind { FailureKind::Mismatch { expected } => { WalkOutcome::Mismatch { position, expected } } FailureKind::Validation(error) => { WalkOutcome::ValidationFailed { position, error } } }, }; // Apply the AST builder. A validation error here surfaces // as a `ValidationFailed` outcome (so the bridge can render // the catalog wording correctly) rather than as a generic // "AST builder failed" fallback. let (final_outcome, cmd) = match outcome { WalkOutcome::Match { .. } => match (command_node.ast_builder)(&path, source) { Ok(c) => (outcome, Some(c)), Err(error) => ( WalkOutcome::ValidationFailed { position: path .items .last() .map_or(kw_start, |i| i.span.0), error, }, None, ), }, other => (other, None), }; // Schema-existence diagnostics (ADR-0027 §2) layer on top // of a structurally-valid parse; a parse that already // failed gets its ERROR verdict from `outcome`. let mut diagnostics = if matches!(final_outcome, WalkOutcome::Match { .. }) { let mut d = schema_existence_diagnostics(&path, ctx.schema); // ADR-0032 §11.6 — Phase-1 carry-over gap closure. // The SQL-expression predicate-warning pass runs on // every successful parse, covering SQL `WHERE` / // `HAVING` / `ON` / `CASE` / projection / `ORDER BY` // slots uniformly (a flat matched-path walk doesn't // distinguish slot kind). The existing DSL `Expr` // AST variant below remains the source of truth for // DSL `WHERE` expressions; a DSL command produces no // sql_expr_ident roles so the two passes don't // collide. d.extend(sql_predicate_warnings(&path, ctx.schema)); // ADR-0032 §11.2 / §11.7 — compound-arity ERROR pass. // Catches `SELECT 1, 2 UNION SELECT 1` pre-flight so the // operator slot is highlighted rather than the engine // wording shown at execution time. d.extend(compound_arity_diagnostics(&path)); // ADR-0032 §10.3 / §11.2 — diagnostics emitted during // the walk by node handlers with direct context the // post-walk passes can't reconstruct (primarily the // CTE harvest's arity-check at body-frame exit). Drain // unconditionally so accumulated entries don't leak // into a subsequent walk via a re-used WalkContext. d.extend(std::mem::take(&mut ctx.pending_diagnostics)); d } else { ctx.pending_diagnostics.clear(); Vec::new() }; // Expression WARNING diagnostics — type-mismatched // comparisons and `= NULL` (ADR-0026 §7, surfaced through // ADR-0027's model). Only a successfully-built command has // a `where` expression to inspect. if let Some(command) = &cmd && let Some(expr) = command_where_expr(command) { let columns = ctx.current_table_columns.as_deref().unwrap_or(&[]); diagnostics.extend(expr_warnings(expr, columns)); } let result = WalkResult { outcome: final_outcome, matched_path: path, per_byte_class: per_byte, tail_expected, diagnostics, }; (result, cmd) } #[cfg(test)] mod tests { //! Walker behaviour tests — Phase A (ADR-0024 §migration). //! //! These cover every app-lifecycle command the walker now //! owns. Each input is paired with its expected `Command` //! output (the differential-against-chumsky check //! materialised as hand-curated expectations — same role //! the differential test scaffolding plays per ADR-0024 //! §test-discipline). //! //! The handoff document lists these tests as "walker- //! specific tests for trie-only features" — they pin down //! the walker's contract for the migrated commands so //! Phase B-F migrations can refactor without regression. use crate::dsl::command::{AppCommand, Command, MessagesValue, ModeValue}; use crate::dsl::parser::parse_command; fn parse(input: &str) -> Result { parse_command(input) } // ---- Bare no-arg commands --------------------------------- #[test] fn walker_parses_quit() { assert_eq!(parse("quit").unwrap(), Command::App(AppCommand::Quit)); } #[test] fn walker_parses_help() { assert_eq!(parse("help").unwrap(), Command::App(AppCommand::Help)); } #[test] fn walker_parses_rebuild() { assert_eq!(parse("rebuild").unwrap(), Command::App(AppCommand::Rebuild)); } #[test] fn walker_parses_new() { assert_eq!(parse("new").unwrap(), Command::App(AppCommand::New)); } #[test] fn walker_parses_load() { assert_eq!(parse("load").unwrap(), Command::App(AppCommand::Load)); } // ---- Save / save as --------------------------------------- #[test] fn walker_parses_save() { assert_eq!(parse("save").unwrap(), Command::App(AppCommand::Save)); } #[test] fn walker_parses_save_as() { assert_eq!(parse("save as").unwrap(), Command::App(AppCommand::SaveAs)); } #[test] fn walker_save_keywords_case_insensitive() { assert_eq!(parse("SAVE").unwrap(), Command::App(AppCommand::Save)); assert_eq!(parse("Save AS").unwrap(), Command::App(AppCommand::SaveAs)); } // ---- Mode ------------------------------------------------- #[test] fn walker_parses_mode_simple() { assert_eq!( parse("mode simple").unwrap(), Command::App(AppCommand::Mode { value: ModeValue::Simple, }) ); } #[test] fn walker_parses_mode_advanced() { assert_eq!( parse("mode advanced").unwrap(), Command::App(AppCommand::Mode { value: ModeValue::Advanced, }) ); } #[test] fn walker_mode_unknown_value_emits_friendly_error() { let err = parse("mode foo").unwrap_err(); match err { crate::dsl::ParseError::Invalid { message, .. } => { // The catalog wording for `mode.unknown` carries // the user's value verbatim. assert!(message.contains("foo"), "got: {message}"); } other => panic!("expected Invalid, got {other:?}"), } } // ---- Messages --------------------------------------------- #[test] fn walker_parses_messages_bare() { assert_eq!( parse("messages").unwrap(), Command::App(AppCommand::Messages { value: None }) ); } #[test] fn walker_parses_messages_short() { assert_eq!( parse("messages short").unwrap(), Command::App(AppCommand::Messages { value: Some(MessagesValue::Short), }) ); } #[test] fn walker_parses_messages_verbose() { assert_eq!( parse("messages verbose").unwrap(), Command::App(AppCommand::Messages { value: Some(MessagesValue::Verbose), }) ); } #[test] fn walker_messages_unknown_value_emits_friendly_error() { let err = parse("messages bogus").unwrap_err(); match err { crate::dsl::ParseError::Invalid { message, .. } => { assert!(message.contains("bogus"), "got: {message}"); } other => panic!("expected Invalid, got {other:?}"), } } // ---- Export ----------------------------------------------- #[test] fn walker_parses_export_bare() { assert_eq!( parse("export").unwrap(), Command::App(AppCommand::Export { path: None }) ); } #[test] fn walker_parses_export_with_path() { assert_eq!( parse("export backups/MyExport.zip").unwrap(), Command::App(AppCommand::Export { path: Some("backups/MyExport.zip".to_string()), }) ); } #[test] fn walker_export_trims_trailing_whitespace() { // Pre-migration the source-slice helper trimmed; the // walker treats " " after `export` as zero BarePath // matches and produces the bare form. assert_eq!( parse("export ").unwrap(), Command::App(AppCommand::Export { path: None }) ); } // ---- Import ----------------------------------------------- #[test] fn walker_parses_import_bare() { assert_eq!( parse("import").unwrap(), Command::App(AppCommand::Import { path: String::new(), target: None, }) ); } #[test] fn walker_parses_import_with_path() { assert_eq!( parse("import some/file.zip").unwrap(), Command::App(AppCommand::Import { path: "some/file.zip".to_string(), target: None, }) ); } #[test] fn walker_parses_import_with_path_and_target() { assert_eq!( parse("import some/file.zip as MyImported").unwrap(), Command::App(AppCommand::Import { path: "some/file.zip".to_string(), target: Some("MyImported".to_string()), }) ); } #[test] fn walker_import_keeps_as_inside_path() { // The lexer-free walker terminates `BarePath` at the // first whitespace byte. `path/asfile.zip` is one // token; the `as` *inside* it stays part of the path. assert_eq!( parse("import path/asfile.zip").unwrap(), Command::App(AppCommand::Import { path: "path/asfile.zip".to_string(), target: None, }) ); } #[test] fn walker_import_trailing_as_without_target_errors() { // Phase B Optional-backtracking: when the user types // `import foo.zip as ` and stops, the inner Optional // `(as )` partial-matches `as` then runs out // of input → backtracks (matches chumsky's `or_not` // semantics). The walker reports a successful parse of // `import foo.zip` followed by trailing `as ` → a // structural Mismatch with expected=`end of input`. // The friendly "import: empty target after `as`" // wording is no longer produced by the walker, but the // integration test // (`import_with_empty_target_after_as_errors`) still // passes because the rendered `import_usage` template // line in the dispatch output contains both "import" // and "target". let err = parse("import foo.zip as ").unwrap_err(); match err { crate::dsl::ParseError::Invalid { message, .. } => { assert!( message.contains("import"), "expected `import` in 'after ``' framing; got: {message}" ); } other => panic!("expected Invalid, got {other:?}"), } } // ---- Routing fall-through --------------------------------- #[test] fn walker_does_not_engage_for_non_app_keywords() { // The router falls through to the chumsky path. The // existing chumsky parser produces this Command. assert!(matches!( parse("drop table Customers").unwrap(), Command::DropTable { .. } )); } #[test] fn walker_does_not_engage_for_unknown_first_token() { // Not an entry word — chumsky yields its usual // unknown-command error. assert!(parse("frobulate").is_err()); } // ---- Trailing-garbage detection --------------------------- #[test] fn walker_quit_with_trailing_garbage_errors() { assert!(parse("quit nonsense").is_err()); } #[test] fn walker_save_with_trailing_garbage_errors() { assert!(parse("save Customers").is_err()); } // ---- Whitespace tolerance --------------------------------- #[test] fn walker_tolerates_leading_and_internal_whitespace() { assert_eq!(parse(" quit ").unwrap(), Command::App(AppCommand::Quit)); assert_eq!( parse("save as").unwrap(), Command::App(AppCommand::SaveAs) ); assert_eq!( parse("mode\tadvanced").unwrap(), Command::App(AppCommand::Mode { value: ModeValue::Advanced, }) ); } // ========================================================= // Phase B — DDL commands. // ========================================================= use crate::dsl::action::ReferentialAction; use crate::dsl::command::{ChangeColumnMode, RelationshipSelector}; use crate::dsl::types::Type; #[test] fn walker_parses_drop_table() { assert_eq!( parse("drop table Customers").unwrap(), Command::DropTable { name: "Customers".to_string(), } ); } #[test] fn walker_parses_drop_column_with_optional_connectives() { let want = Command::DropColumn { table: "Customers".to_string(), column: "Email".to_string(), cascade: false, }; assert_eq!(parse("drop column Customers: Email").unwrap(), want); assert_eq!(parse("drop column from Customers: Email").unwrap(), want); assert_eq!(parse("drop column from table Customers: Email").unwrap(), want); assert_eq!(parse("drop column table Customers: Email").unwrap(), want); } #[test] fn walker_parses_drop_relationship_named() { assert_eq!( parse("drop relationship Orders_to_Customers").unwrap(), Command::DropRelationship { selector: RelationshipSelector::Named { name: "Orders_to_Customers".to_string(), }, } ); } #[test] fn walker_parses_drop_relationship_endpoints() { assert_eq!( parse("drop relationship from Customers.id to Orders.customer_id").unwrap(), Command::DropRelationship { selector: RelationshipSelector::Endpoints { parent_table: "Customers".to_string(), parent_column: "id".to_string(), child_table: "Orders".to_string(), child_column: "customer_id".to_string(), }, } ); } #[test] fn walker_parses_add_column() { assert_eq!( parse("add column Customers: Email (text)").unwrap(), Command::AddColumn { table: "Customers".to_string(), column: "Email".to_string(), ty: Type::Text, not_null: false, unique: false, default: None, check: None, } ); } #[test] fn walker_add_column_unknown_type_errors_with_friendly_wording() { let err = parse("add column Customers: Email (varchar)").unwrap_err(); match err { crate::dsl::ParseError::Invalid { message, .. } => { assert!(message.contains("varchar"), "got: {message}"); } other => panic!("expected Invalid, got {other:?}"), } } #[test] fn walker_parses_rename_column() { assert_eq!( parse("rename column Customers: Email to ContactEmail").unwrap(), Command::RenameColumn { table: "Customers".to_string(), old: "Email".to_string(), new: "ContactEmail".to_string(), } ); } #[test] fn walker_parses_change_column() { assert_eq!( parse("change column Customers: Email (text)").unwrap(), Command::ChangeColumnType { table: "Customers".to_string(), column: "Email".to_string(), ty: Type::Text, mode: ChangeColumnMode::Default, } ); } #[test] fn walker_parses_change_column_with_force_conversion_flag() { assert_eq!( parse("change column Customers: Email (int) --force-conversion").unwrap(), Command::ChangeColumnType { table: "Customers".to_string(), column: "Email".to_string(), ty: Type::Int, mode: ChangeColumnMode::ForceConversion, } ); } #[test] fn walker_change_column_rejects_both_flags() { let err = parse("change column Customers: Email (int) --force-conversion --dont-convert") .unwrap_err(); match err { crate::dsl::ParseError::Invalid { message, .. } => { assert!(message.contains("mutually exclusive"), "got: {message}"); } other => panic!("expected Invalid, got {other:?}"), } } #[test] fn walker_parses_add_relationship_minimal() { assert_eq!( parse("add 1:n relationship from Customers.id to Orders.customer_id").unwrap(), Command::AddRelationship { name: None, parent_table: "Customers".to_string(), parent_column: "id".to_string(), child_table: "Orders".to_string(), child_column: "customer_id".to_string(), on_delete: ReferentialAction::default_action(), on_update: ReferentialAction::default_action(), create_fk: false, } ); } #[test] fn walker_parses_add_relationship_with_name_and_actions_and_flag() { assert_eq!( parse( "add 1:n relationship as cust_orders from Customers.id to Orders.customer_id \ on delete cascade on update set null --create-fk" ) .unwrap(), Command::AddRelationship { name: Some("cust_orders".to_string()), parent_table: "Customers".to_string(), parent_column: "id".to_string(), child_table: "Orders".to_string(), child_column: "customer_id".to_string(), on_delete: ReferentialAction::Cascade, on_update: ReferentialAction::SetNull, create_fk: true, } ); } #[test] fn walker_add_relationship_repeated_clause_errors() { let err = parse( "add 1:n relationship from Customers.id to Orders.customer_id \ on delete cascade on delete restrict", ) .unwrap_err(); match err { crate::dsl::ParseError::Invalid { message, .. } => { assert!( message.contains("delete") && message.contains("twice"), "got: {message}" ); } other => panic!("expected Invalid, got {other:?}"), } } // ========================================================= // Phase C — create table. // ========================================================= use crate::dsl::command::ColumnSpec; fn col(name: &str, ty: Type) -> ColumnSpec { ColumnSpec::new(name, ty) } #[test] fn walker_parses_create_table_with_pk_default_id_serial() { assert_eq!( parse("create table Customers with pk").unwrap(), Command::CreateTable { name: "Customers".to_string(), columns: vec![col("id", Type::Serial)], primary_key: vec!["id".to_string()], } ); } #[test] fn walker_parses_create_table_named_typed_pk() { assert_eq!( parse("create table Customers with pk email(text)").unwrap(), Command::CreateTable { name: "Customers".to_string(), columns: vec![col("email", Type::Text)], primary_key: vec!["email".to_string()], } ); } #[test] fn walker_parses_create_table_compound_pk() { assert_eq!( parse("create table OrderLines with pk order_id(int),product_id(int)").unwrap(), Command::CreateTable { name: "OrderLines".to_string(), columns: vec![col("order_id", Type::Int), col("product_id", Type::Int)], primary_key: vec!["order_id".to_string(), "product_id".to_string()], } ); } #[test] fn walker_create_table_pk_tolerates_whitespace_around_punct() { assert_eq!( parse("create table T with pk id ( serial )").unwrap(), Command::CreateTable { name: "T".to_string(), columns: vec![col("id", Type::Serial)], primary_key: vec!["id".to_string()], } ); assert_eq!( parse("create table T with pk a ( int ) , b ( int )").unwrap(), Command::CreateTable { name: "T".to_string(), columns: vec![col("a", Type::Int), col("b", Type::Int)], primary_key: vec!["a".to_string(), "b".to_string()], } ); } #[test] fn walker_bare_create_table_errors_with_with_pk_hint() { let err = parse("create table Customers").unwrap_err(); match err { crate::dsl::ParseError::Invalid { message, .. } => { assert!( message.contains("with pk"), "error should mention `with pk`:\n{message}" ); } other => panic!("expected Invalid, got {other:?}"), } } #[test] fn walker_create_table_keywords_are_case_insensitive() { assert_eq!( parse("CREATE TABLE Customers WITH PK email(TEXT)").unwrap(), Command::CreateTable { name: "Customers".to_string(), columns: vec![col("email", Type::Text)], primary_key: vec!["email".to_string()], } ); } // ========================================================= // Phase D — data commands (show, insert, update, delete). // ========================================================= use crate::dsl::value::Value; use crate::dsl::command::RowFilter; #[test] fn walker_parses_show_data() { assert_eq!( parse("show data Customers").unwrap(), Command::ShowData { name: "Customers".to_string(), filter: None, limit: None, } ); } #[test] fn walker_parses_show_table() { assert_eq!( parse("show table Customers").unwrap(), Command::ShowTable { name: "Customers".to_string() } ); } #[test] fn walker_parses_show_data_with_where_and_limit() { // ADR-0026 §5: `show data` gains an optional `where` // and an optional `limit `. match parse("show data Customers where id=1 limit 10").unwrap() { Command::ShowData { name, filter: Some(_), limit: Some(10), } => assert_eq!(name, "Customers"), other => panic!("expected ShowData with filter + limit, got {other:?}"), } } #[test] fn walker_parses_show_data_with_limit_only() { assert!(matches!( parse("show data Customers limit 5").unwrap(), Command::ShowData { filter: None, limit: Some(5), .. } )); } #[test] fn walker_parses_update_with_complex_where() { // The WHERE is a full boolean expression, not a single // equality (ADR-0026). match parse("update T set Active=true where Age>30 and Name like 'A%'") .unwrap() { Command::Update { filter: RowFilter::Where(crate::dsl::Expr::And(terms)), .. } => assert_eq!(terms.len(), 2, "two AND-ed predicates"), other => panic!("expected Update with And-expression filter, got {other:?}"), } } #[test] fn walker_parses_delete_with_or_where() { assert!(matches!( parse("delete from T where id=1 or id=2").unwrap(), Command::Delete { filter: RowFilter::Where(crate::dsl::Expr::Or(_)), .. } )); } // ---- input_verdict (ADR-0027 §3) -------------------------- #[test] fn input_verdict_clean_command_is_none() { assert_eq!(super::input_verdict("quit", None), None); assert_eq!(super::input_verdict("show table Customers", None), None); } #[test] fn input_verdict_empty_input_is_none() { assert_eq!(super::input_verdict("", None), None); assert_eq!(super::input_verdict(" ", None), None); } #[test] fn input_verdict_incomplete_command_is_error() { assert_eq!( super::input_verdict("create table", None), Some(super::Severity::Error), ); } #[test] fn input_verdict_unknown_command_is_error() { assert_eq!( super::input_verdict("frobnicate the gizmo", None), Some(super::Severity::Error), ); } #[test] fn input_verdict_mismatched_token_is_error() { // `quit` takes no argument — trailing junk fails. assert_eq!( super::input_verdict("quit now", None), Some(super::Severity::Error), ); } #[test] fn input_verdict_unknown_table_is_error() { // The command parses, but the table does not exist — // an ERROR diagnostic (ADR-0027 §2). let schema = schema_with("Customers", &[("id", Type::Int)]); assert_eq!( super::input_verdict("show data NoSuchTable", Some(&schema)), Some(super::Severity::Error), ); } #[test] fn input_verdict_unknown_column_is_error() { let schema = schema_with("Customers", &[("id", Type::Int), ("Name", Type::Text)]); assert_eq!( super::input_verdict( "show data Customers where NoSuchCol = 1", Some(&schema), ), Some(super::Severity::Error), ); } #[test] fn input_verdict_known_table_and_column_is_clean() { let schema = schema_with("Customers", &[("id", Type::Int), ("Name", Type::Text)]); assert_eq!( super::input_verdict( "show data Customers where id = 1", Some(&schema), ), None, ); } #[test] fn input_verdict_type_mismatch_is_warning() { // `Age` is int; comparing it with a text literal runs, // but is flagged (ADR-0026 §7). let schema = schema_with("Customers", &[("id", Type::Int), ("Age", Type::Int)]); assert_eq!( super::input_verdict( "delete from Customers where Age = 'hello'", Some(&schema), ), Some(super::Severity::Warning), ); } #[test] fn input_verdict_eq_null_is_warning() { let schema = schema_with("Customers", &[("id", Type::Int), ("Name", Type::Text)]); assert_eq!( super::input_verdict( "delete from Customers where Name = null", Some(&schema), ), Some(super::Severity::Warning), ); } #[test] fn input_verdict_compatible_comparison_is_clean() { let schema = schema_with("Customers", &[("id", Type::Int), ("Name", Type::Text)]); assert_eq!( super::input_verdict( "delete from Customers where id = 5", Some(&schema), ), None, ); } #[test] fn input_verdict_error_outranks_warning() { // An unknown column (ERROR) alongside `= NULL` // (WARNING) — the indicator shows the higher severity. let schema = schema_with("Customers", &[("id", Type::Int)]); assert_eq!( super::input_verdict( "delete from Customers where NoSuchCol = null", Some(&schema), ), Some(super::Severity::Error), ); } // ---- Existing-cases sweep (ADR-0027 §6) ------------------- #[test] fn input_verdict_sweep_unknown_table_across_commands() { let schema = schema_with("Customers", &[("id", Type::Int)]); for input in [ "drop table NoSuchTable", "show table NoSuchTable", "show data NoSuchTable", "add column to NoSuchTable: x (int)", ] { assert_eq!( super::input_verdict(input, Some(&schema)), Some(super::Severity::Error), "unknown table in {input:?} should be flagged", ); } } #[test] fn input_verdict_sweep_unknown_column_across_commands() { let schema = schema_with( "Customers", &[("id", Type::Int), ("Name", Type::Text)], ); for input in [ "drop column from table Customers: NoSuchCol", "update Customers set NoSuchCol = 1 where id = 1", ] { assert_eq!( super::input_verdict(input, Some(&schema)), Some(super::Severity::Error), "unknown column in {input:?} should be flagged", ); } } #[test] fn input_verdict_known_entities_across_commands_are_clean() { let schema = schema_with( "Customers", &[("id", Type::Int), ("Name", Type::Text)], ); for input in [ "show table Customers", "drop table Customers", "add column to Customers: Email (text)", "drop column from table Customers: Name", ] { assert_eq!( super::input_verdict(input, Some(&schema)), None, "{input:?} references only known entities — clean", ); } } // ---- precise diagnostic spans (ADR-0027 highlight wiring) - /// Walk `input` with `schema` and return the diagnostics the /// walk produced. fn diagnostics( input: &str, schema: &SchemaCache, ) -> Vec { let mut ctx = super::context::WalkContext::with_schema(schema); let (result, _cmd) = super::walk(input, super::outcome::WalkBound::EndOfInput, &mut ctx); result.map_or_else(Vec::new, |r| r.diagnostics) } #[test] fn type_mismatch_warning_span_covers_only_the_literal() { // The coarse pre-ADR-0027 span was the whole WHERE // clause; it is now exactly the offending literal. let schema = schema_with("Customers", &[("Age", Type::Int)]); let input = "delete from Customers where Age = 'hello'"; let diags = diagnostics(input, &schema); assert_eq!(diags.len(), 1); assert_eq!(diags[0].severity, super::Severity::Warning); let (s, e) = diags[0].span; assert_eq!( &input[s..e], "'hello'", "the WARNING span should cover exactly the literal", ); } #[test] fn eq_null_warning_span_covers_the_null_literal() { let schema = schema_with("Customers", &[("Name", Type::Text)]); let input = "delete from Customers where Name = null"; let diags = diagnostics(input, &schema); assert_eq!(diags.len(), 1); let (s, e) = diags[0].span; assert_eq!(&input[s..e], "null"); } #[test] fn between_warning_spans_each_offending_bound() { // `Age` is int; both text bounds mismatch — two // distinct WARNINGs, each spanning its own bound. let schema = schema_with("Customers", &[("Age", Type::Int)]); let input = "delete from Customers where Age between 'a' and 'z'"; let diags = diagnostics(input, &schema); assert_eq!(diags.len(), 2); let spans: Vec<&str> = diags.iter().map(|d| &input[d.span.0..d.span.1]).collect(); assert_eq!(spans, vec!["'a'", "'z'"]); } #[test] fn in_warning_spans_only_the_mismatched_item() { // `Age` is int; of `(1, 'two', 3)` only `'two'` is wrong. let schema = schema_with("Customers", &[("Age", Type::Int)]); let input = "delete from Customers where Age in (1, 'two', 3)"; let diags = diagnostics(input, &schema); assert_eq!(diags.len(), 1); let (s, e) = diags[0].span; assert_eq!(&input[s..e], "'two'"); } #[test] fn unknown_column_error_span_covers_the_identifier() { let schema = schema_with("Customers", &[("id", Type::Int)]); let input = "delete from Customers where NoSuchCol = 1"; let diags = diagnostics(input, &schema); assert_eq!(diags.len(), 1); assert_eq!(diags[0].severity, super::Severity::Error); let (s, e) = diags[0].span; assert_eq!(&input[s..e], "NoSuchCol"); } // ---- LIKE on a numeric column (ADR-0027, Amendment 1) ----- #[test] fn like_on_a_numeric_column_is_a_warning() { // `LIKE` is a text-pattern match — against an int // column it runs but is almost never intended. let schema = schema_with("Customers", &[("Age", Type::Int)]); let input = "delete from Customers where Age like '1%'"; let diags = diagnostics(input, &schema); assert_eq!(diags.len(), 1); assert_eq!(diags[0].severity, super::Severity::Warning); let (s, e) = diags[0].span; assert_eq!(&input[s..e], "Age", "the span is the numeric column"); } #[test] fn not_like_on_a_numeric_column_is_also_a_warning() { let schema = schema_with("Orders", &[("Total", Type::Decimal)]); assert_eq!( super::input_verdict( "delete from Orders where Total not like '9%'", Some(&schema), ), Some(super::Severity::Warning), ); } #[test] fn like_on_a_text_column_is_clean() { // `LIKE 'A%'` on a text column is its intended use. let schema = schema_with("Customers", &[("Name", Type::Text)]); assert_eq!( super::input_verdict( "delete from Customers where Name like 'A%'", Some(&schema), ), None, ); } #[test] fn walker_parses_insert_with_explicit_column_list() { assert_eq!( parse("insert into Customers (Email, Name) values ('a@b.c', 'Alice')").unwrap(), Command::Insert { table: "Customers".to_string(), columns: Some(vec!["Email".to_string(), "Name".to_string()]), values: vec![Value::Text("a@b.c".to_string()), Value::Text("Alice".to_string())], } ); } #[test] fn walker_parses_insert_with_values_keyword_only() { assert_eq!( parse("insert into Customers values (1, 'Alice', null)").unwrap(), Command::Insert { table: "Customers".to_string(), columns: None, values: vec![ Value::Number("1".to_string()), Value::Text("Alice".to_string()), Value::Null, ], } ); } #[test] fn walker_parses_insert_short_form_without_column_list() { assert_eq!( parse("insert into Customers (1, 'Alice', true)").unwrap(), Command::Insert { table: "Customers".to_string(), columns: None, values: vec![ Value::Number("1".to_string()), Value::Text("Alice".to_string()), Value::Bool(true), ], } ); } #[test] fn walker_parses_insert_supports_negative_numbers() { assert_eq!( parse("insert into T values (-5)").unwrap(), Command::Insert { table: "T".to_string(), columns: None, values: vec![Value::Number("-5".to_string())], } ); } #[test] fn walker_parses_update_with_where() { assert_eq!( parse("update Customers set Email='new@b.c' where id=1").unwrap(), Command::Update { table: "Customers".to_string(), assignments: vec![("Email".to_string(), Value::Text("new@b.c".to_string()))], filter: RowFilter::eq("id", Value::Number("1".to_string())), } ); } #[test] fn walker_parses_update_with_multiple_assignments() { assert_eq!( parse("update Customers set Email='a@b.c', Name='Alice' where id=1").unwrap(), Command::Update { table: "Customers".to_string(), assignments: vec![ ("Email".to_string(), Value::Text("a@b.c".to_string())), ("Name".to_string(), Value::Text("Alice".to_string())), ], filter: RowFilter::eq("id", Value::Number("1".to_string())), } ); } #[test] fn walker_parses_update_with_all_rows_flag() { assert_eq!( parse("update Customers set Active=true --all-rows").unwrap(), Command::Update { table: "Customers".to_string(), assignments: vec![("Active".to_string(), Value::Bool(true))], filter: RowFilter::AllRows, } ); } #[test] fn walker_parses_delete_with_where() { assert_eq!( parse("delete from Customers where id=42").unwrap(), Command::Delete { table: "Customers".to_string(), filter: RowFilter::eq("id", Value::Number("42".to_string())), } ); } #[test] fn walker_parses_delete_with_all_rows() { assert_eq!( parse("delete from Customers --all-rows").unwrap(), Command::Delete { table: "Customers".to_string(), filter: RowFilter::AllRows, } ); } #[test] fn walker_delete_without_where_or_flag_errors() { assert!(parse("delete from Customers").is_err()); } #[test] fn walker_update_without_where_or_flag_errors() { assert!(parse("update Customers set Email='x'").is_err()); } // ========================================================= // Phase E — replay. // ========================================================= #[test] fn walker_parses_replay_with_bare_relative_path() { assert_eq!( parse("replay history.log").unwrap(), Command::Replay { path: "history.log".to_string(), } ); } #[test] fn walker_parses_replay_with_bare_absolute_path() { assert_eq!( parse("replay /tmp/seed.commands").unwrap(), Command::Replay { path: "/tmp/seed.commands".to_string(), } ); } #[test] fn walker_parses_replay_with_quoted_path_supports_whitespace() { // Phase A's path-bearing UX change: paths with spaces use // the quoted form. assert_eq!( parse("replay 'my project/seed.commands'").unwrap(), Command::Replay { path: "my project/seed.commands".to_string(), } ); } #[test] fn walker_parses_replay_with_quoted_path_supports_escaped_quote() { assert_eq!( parse("replay 'O''Brien.commands'").unwrap(), Command::Replay { path: "O'Brien.commands".to_string(), } ); } #[test] fn walker_replay_keyword_case_insensitive() { assert_eq!( parse("REPLAY foo.txt").unwrap(), Command::Replay { path: "foo.txt".to_string(), } ); } #[test] fn walker_replay_without_path_errors() { assert!(parse("replay").is_err()); } #[test] fn walker_replay_with_empty_quoted_path_parses_as_empty() { // Parser layer accepts; runtime rejects empty paths // before any I/O. Mirrors the chumsky-side contract // (parser.rs `replay_with_empty_quoted_path_errors`). assert_eq!( parse("replay ''").unwrap(), Command::Replay { path: String::new(), } ); } // ========================================================= // hint_mode_at_input (ADR-0024 §HintMode-per-node) // ========================================================= use crate::dsl::grammar::HintMode; use super::hint_mode_at_input; #[test] fn hint_mode_value_literal_slot_after_insert_open_paren() { // `insert into T (` expects a value-literal or column // ident at the inner position. After `values (` it's // strictly value-literals — the signature triggers // ProseOnly. match hint_mode_at_input("insert into T values (") { Some(HintMode::ProseOnly("hint.value_literal_slot")) => {} other => panic!("expected ProseOnly value_literal_slot, got {other:?}"), } } #[test] fn hint_mode_value_literal_slot_after_update_set_assign() { match hint_mode_at_input("update T set col=") { Some(HintMode::ProseOnly("hint.value_literal_slot")) => {} other => panic!("expected ProseOnly value_literal_slot, got {other:?}"), } } #[test] fn hint_mode_value_literal_slot_in_where_clause() { match hint_mode_at_input("delete from T where col=") { Some(HintMode::ProseOnly("hint.value_literal_slot")) => {} other => panic!("expected ProseOnly value_literal_slot, got {other:?}"), } } #[test] fn hint_mode_new_name_slot_for_create_table() { // `create table ` expects a NewName ident. match hint_mode_at_input("create table ") { Some(HintMode::ForceProse("hint.ambient_typing_name")) => {} other => panic!("expected ForceProse typing_name, got {other:?}"), } } #[test] fn hint_mode_new_name_slot_for_add_column_name() { // `add column T: ` expects a NewName ident. match hint_mode_at_input("add column to table T: ") { Some(HintMode::ForceProse("hint.ambient_typing_name")) => {} other => panic!("expected ForceProse typing_name, got {other:?}"), } } #[test] fn hint_mode_none_for_keyword_position() { // Entry-keyword position: no HintMode override applies. assert!(hint_mode_at_input("").is_none()); assert!(hint_mode_at_input("cr").is_none()); } #[test] fn hint_mode_none_for_complete_command() { // Valid complete command: no expected, no override. assert!(hint_mode_at_input("create table T with pk").is_none()); } #[test] fn hint_mode_none_at_schema_ident_slot() { // `show data ` expects a table-name ident from the // schema — schema-listable slot, not a HintMode case. assert!(hint_mode_at_input("show data ").is_none()); } // ========================================================= // Phase D full — schema-aware value typing. // ========================================================= use crate::completion::{SchemaCache, TableColumn}; use crate::dsl::parser::parse_command_with_schema; fn schema_with(table: &str, columns: &[(&str, Type)]) -> SchemaCache { let cols: Vec = columns .iter() .map(|(n, t)| TableColumn { name: (*n).to_string(), user_type: *t, }) .collect(); let mut cache = SchemaCache::default(); cache.tables.push(table.to_string()); for c in &cols { cache.columns.push(c.name.clone()); } cache.table_columns.insert(table.to_string(), cols); cache } #[test] fn phase_d_insert_with_schema_accepts_typed_values_per_column() { // Form B: the grammar dispatches one slot per // non-auto-generated column — the serial `id` is // skipped because the dispatch path (`db::do_insert`) // auto-fills it (ADR-0018 §3). let schema = schema_with( "Customers", &[("id", Type::Serial), ("Name", Type::Text), ("Active", Type::Bool)], ); // 2 user-typed values: Name (text), Active (bool). let cmd = parse_command_with_schema( "insert into Customers values ('Alice', true)", &schema, ) .expect("parse"); match cmd { Command::Insert { table, values, .. } => { assert_eq!(table, "Customers"); assert_eq!(values.len(), 2); } other => panic!("expected Insert, got {other:?}"), } } #[test] fn phase_d_insert_form_b_skips_serial_column() { // Form B: `insert into values (…)` excludes // auto-generated columns from the value list. Supplying // a value for the serial column is a count mismatch. let schema = schema_with( "Customers", &[("id", Type::Serial), ("Name", Type::Text)], ); // Two values where Form B expects one (Name only): let err = parse_command_with_schema( "insert into Customers values (1, 'Alice')", &schema, ) .expect_err("Form B should reject user-supplied serial"); match err { crate::dsl::ParseError::Invalid { .. } => {} other => panic!("expected Invalid, got {other:?}"), } } #[test] fn phase_d_insert_form_a_accepts_serial_when_listed() { // Form A: user explicitly lists `id`. The dispatch path // accepts user-supplied serial values when they're in // the explicit column list; the grammar mirrors that. let schema = schema_with( "Customers", &[("id", Type::Serial), ("Name", Type::Text)], ); let cmd = parse_command_with_schema( "insert into Customers (id, Name) values (1, 'Alice')", &schema, ) .expect("parse"); match cmd { Command::Insert { columns, values, .. } => { assert_eq!(columns.as_deref(), Some(&["id".to_string(), "Name".to_string()][..])); assert_eq!(values.len(), 2); } other => panic!("expected Insert, got {other:?}"), } } #[test] fn phase_d_insert_form_a_filters_to_user_listed_columns() { // Form A: listing only Name should accept exactly one // value (for Name), even though the table has more // columns. let schema = schema_with( "Customers", &[("id", Type::Serial), ("Name", Type::Text), ("Active", Type::Bool)], ); let cmd = parse_command_with_schema( "insert into Customers (Name) values ('Alice')", &schema, ) .expect("parse"); match cmd { Command::Insert { columns, values, .. } => { assert_eq!(columns.as_deref(), Some(&["Name".to_string()][..])); assert_eq!(values.len(), 1); } other => panic!("expected Insert, got {other:?}"), } } #[test] fn phase_d_insert_rejects_decimal_in_int_column() { // The schema has `id` as Int. `3.14` is a Number with a // decimal — the typed `int_slot` validator rejects. let schema = schema_with("T", &[("id", Type::Int)]); let err = parse_command_with_schema("insert into T values (3.14)", &schema) .expect_err("should reject"); match err { crate::dsl::ParseError::Invalid { message, .. } => { assert!( message.contains("integer") || message.contains("3.14"), "got: {message}" ); } other => panic!("expected Invalid, got {other:?}"), } } #[test] fn phase_d_insert_accepts_null_at_any_column() { // null is the absence sentinel; every typed slot // accepts it. let schema = schema_with( "T", &[("a", Type::Int), ("b", Type::Text), ("c", Type::Bool)], ); let cmd = parse_command_with_schema( "insert into T values (null, null, null)", &schema, ) .expect("parse"); match cmd { Command::Insert { values, .. } => { assert!(values.iter().all(|v| matches!(v, Value::Null))); } other => panic!("expected Insert, got {other:?}"), } } #[test] fn phase_d_insert_falls_back_when_table_not_in_schema() { // The schema is empty; the walker can't resolve column // info for `Customers`. The DynamicSubgrammar falls // back to the schemaless generic value-literal list and // accepts mixed-shape values as it did pre-Phase-D. let schema = SchemaCache::default(); let cmd = parse_command_with_schema( "insert into Customers values (1, 'Alice')", &schema, ) .expect("parse — fallback path"); match cmd { Command::Insert { values, .. } => assert_eq!(values.len(), 2), other => panic!("expected Insert, got {other:?}"), } } #[test] fn phase_d_schemaless_parse_command_still_works() { // The pre-Phase-D `parse_command(input)` signature // passes no schema; the DynamicSubgrammar falls back to // the schemaless value-literal list. let cmd = parse("insert into T values (1, 'Alice', null)").expect("parse"); match cmd { Command::Insert { values, .. } => assert_eq!(values.len(), 3), other => panic!("expected Insert, got {other:?}"), } } #[test] fn phase_d_insert_accepts_bool_value_for_bool_column() { let schema = schema_with("T", &[("flag", Type::Bool)]); let cmd = parse_command_with_schema("insert into T values (false)", &schema) .expect("parse"); match cmd { Command::Insert { values, .. } => { assert_eq!(values, vec![Value::Bool(false)]); } other => panic!("expected Insert, got {other:?}"), } } #[test] fn phase_d_update_accepts_text_value_for_text_column() { let schema = schema_with( "Customers", &[("id", Type::Int), ("Email", Type::Text)], ); let cmd = parse_command_with_schema( "update Customers set Email='new@b.c' where id=1", &schema, ) .expect("parse"); match cmd { Command::Update { assignments, .. } => { assert_eq!(assignments.len(), 1); assert_eq!(assignments[0].0, "Email"); } other => panic!("expected Update, got {other:?}"), } } #[test] fn phase_d_update_rejects_decimal_in_int_set_column() { // Email is text; Score is int. Assigning `3.14` to Score // hits the int_slot validator. let schema = schema_with( "T", &[("id", Type::Int), ("Score", Type::Int)], ); let err = parse_command_with_schema( "update T set Score=3.14 where id=1", &schema, ) .expect_err("should reject"); match err { crate::dsl::ParseError::Invalid { message, .. } => { assert!( message.contains("integer") || message.contains("3.14"), "got: {message}" ); } other => panic!("expected Invalid, got {other:?}"), } } #[test] fn phase_d_delete_where_uses_typed_column_value() { // `where id=1` — id is Int; `1` matches the int_slot. let schema = schema_with("T", &[("id", Type::Int), ("Name", Type::Text)]); let cmd = parse_command_with_schema("delete from T where id=1", &schema) .expect("parse"); match cmd { Command::Delete { .. } => {} other => panic!("expected Delete, got {other:?}"), } } #[test] fn phase_d_delete_where_permits_decimal_at_int_column() { // ADR-0026 §7: a type-mismatched WHERE comparison is // flagged in the editor but never blocks. `id` is Int // and `3.14` is not — yet the command still parses and // would run (this relaxes the pre-ADR-0026 rejection). let schema = schema_with("T", &[("id", Type::Int)]); let cmd = parse_command_with_schema("delete from T where id=3.14", &schema) .expect("type-mismatched WHERE comparisons are permissive"); assert!(matches!(cmd, crate::dsl::Command::Delete { .. }), "got {cmd:?}"); } // ---- Typed-slot HintMode (Phase D + HintMode dispatch) ---- use crate::dsl::walker::hint_mode_at_input_with_schema; #[test] fn typed_hint_at_insert_first_value_position_for_int_column() { let schema = schema_with( "Customers", &[("id", Type::Int), ("Name", Type::Text)], ); match hint_mode_at_input_with_schema("insert into Customers values (", &schema) { Some(HintMode::ProseOnly("hint.value_slot_int")) => {} other => panic!("expected ProseOnly value_slot_int, got {other:?}"), } } #[test] fn typed_hint_at_insert_second_value_position_for_text_column() { let schema = schema_with( "Customers", &[("id", Type::Int), ("Name", Type::Text)], ); match hint_mode_at_input_with_schema("insert into Customers values (1, ", &schema) { Some(HintMode::ProseOnly("hint.value_slot_text")) => {} other => panic!("expected ProseOnly value_slot_text, got {other:?}"), } } #[test] fn typed_hint_at_update_set_value_uses_column_type() { let schema = schema_with( "Customers", &[("id", Type::Int), ("Email", Type::Text)], ); match hint_mode_at_input_with_schema("update Customers set Email=", &schema) { Some(HintMode::ProseOnly("hint.value_slot_text")) => {} other => panic!("expected ProseOnly value_slot_text, got {other:?}"), } } #[test] fn typed_hint_at_update_set_value_for_int_column() { let schema = schema_with( "Customers", &[("id", Type::Int), ("Score", Type::Int)], ); match hint_mode_at_input_with_schema("update Customers set Score=", &schema) { Some(HintMode::ProseOnly("hint.value_slot_int")) => {} other => panic!("expected ProseOnly value_slot_int, got {other:?}"), } } #[test] fn typed_hint_at_where_value_uses_column_type() { let schema = schema_with("Events", &[("ts", Type::DateTime)]); match hint_mode_at_input_with_schema("delete from Events where ts=", &schema) { Some(HintMode::ProseOnly("hint.value_slot_datetime")) => {} other => panic!("expected ProseOnly value_slot_datetime, got {other:?}"), } } #[test] fn typed_hint_falls_back_to_generic_when_schema_missing() { // Empty schema: walker can't resolve column types. let schema = SchemaCache::default(); match hint_mode_at_input_with_schema("insert into T values (", &schema) { Some(HintMode::ProseOnly("hint.value_literal_slot")) => {} other => panic!("expected generic ProseOnly, got {other:?}"), } } #[test] fn typed_hint_not_emitted_after_complete_value() { // `insert into T values (1` — the int slot just MATCHED // (`1` is a valid int). Pending_value_type was cleared on // the successful match. No hint at this position // (between values). let schema = schema_with("T", &[("id", Type::Int)]); // Walker is now waiting for `,` or `)`. No HintMode. let mode = hint_mode_at_input_with_schema("insert into T values (1", &schema); // The current position isn't a typed slot; expected is // `,` / `)`. No HintMode fires. assert!(mode.is_none(), "got {mode:?}"); } #[test] fn typed_hint_for_each_user_settable_type_routes_via_form_b() { // Form B (`insert into T values (…)`) excludes auto- // generated columns from the value list — so only the // user-settable types appear at this position. for (ty, key) in [ (Type::Int, "hint.value_slot_int"), (Type::Real, "hint.value_slot_real"), (Type::Decimal, "hint.value_slot_decimal"), (Type::Bool, "hint.value_slot_bool"), (Type::Text, "hint.value_slot_text"), (Type::Date, "hint.value_slot_date"), (Type::DateTime, "hint.value_slot_datetime"), (Type::Blob, "hint.value_slot_blob"), ] { let schema = schema_with("T", &[("c", ty)]); let mode = hint_mode_at_input_with_schema("insert into T values (", &schema); assert!( matches!(mode, Some(HintMode::ProseOnly(k)) if k == key), "expected ProseOnly({key}) for type {ty:?}, got {mode:?}", ); } } #[test] fn typed_hint_for_auto_generated_types_routes_via_form_a() { // Serial / shortid columns can be set by the user only // in Form A (`insert into T (col) values (…)`) — Form B // skips them because the dispatch path auto-fills. for (ty, key) in [ (Type::Serial, "hint.value_slot_serial"), (Type::ShortId, "hint.value_slot_shortid"), ] { let schema = schema_with("T", &[("c", ty)]); let mode = hint_mode_at_input_with_schema("insert into T (c) values (", &schema); assert!( matches!(mode, Some(HintMode::ProseOnly(k)) if k == key), "expected ProseOnly({key}) for type {ty:?}, got {mode:?}", ); } } #[test] fn typed_hint_form_b_skips_serial_column_to_generic_or_text_neighbor() { // A serial-only table in Form B has nothing for the user // to type — column_value_list returns the schemaless // fallback, so the hint at the first value position is // the generic value-literal prose. let schema = schema_with("T", &[("id", Type::Serial)]); let mode = hint_mode_at_input_with_schema("insert into T values (", &schema); assert!( matches!(mode, Some(HintMode::ProseOnly("hint.value_literal_slot"))), "got {mode:?}", ); } #[test] fn phase_d_update_multi_assignment_uses_per_column_types() { let schema = schema_with( "Customers", &[ ("id", Type::Int), ("Name", Type::Text), ("Score", Type::Int), ], ); // `Score=42` (int slot) and `Name='Alice'` (text slot) // — each value slot dispatches on the column whose // ident matched immediately before. let cmd = parse_command_with_schema( "update Customers set Score=42, Name='Alice' where id=1", &schema, ) .expect("parse"); match cmd { Command::Update { assignments, .. } => { assert_eq!(assignments.len(), 2); assert_eq!(assignments[0].0, "Score"); assert_eq!(assignments[1].0, "Name"); } other => panic!("expected Update, got {other:?}"), } } // ---- ADR-0032 §11.5 Phase-2 diagnostics --------------------- /// Build a two-table schema for join/qualified-ref tests. fn two_table_schema() -> SchemaCache { let mut cache = SchemaCache::default(); cache.tables.push("a".to_string()); cache.tables.push("b".to_string()); cache.columns.push("id".to_string()); cache.columns.push("name".to_string()); cache.columns.push("total".to_string()); cache.table_columns.insert( "a".to_string(), vec![ TableColumn { name: "id".to_string(), user_type: Type::Int, }, TableColumn { name: "name".to_string(), user_type: Type::Text, }, ], ); cache.table_columns.insert( "b".to_string(), vec![ TableColumn { name: "id".to_string(), user_type: Type::Int, }, TableColumn { name: "total".to_string(), user_type: Type::Real, }, ], ); cache } fn diag_keys(source: &str, schema: &SchemaCache) -> Vec<&'static str> { // SQL SELECT lives in Advanced mode (ADR-0030 §2). The // default `input_diagnostics` uses Simple, which gates // the command out and yields no diagnostics. Build the // walk manually so we can set the right mode. let mut ctx = super::context::WalkContext::with_schema(schema); ctx.mode = crate::mode::Mode::Advanced; let (result, _cmd) = super::walk( source, super::outcome::WalkBound::EndOfInput, &mut ctx, ); let diagnostics = result.map_or_else(Vec::new, |r| r.diagnostics); diagnostics .into_iter() .map(|d| Box::leak(d.message.into_boxed_str()) as &str) .collect() } #[test] fn unknown_qualifier_in_qualified_ref_is_error() { let schema = two_table_schema(); // `t` is not in scope (only `a` and `b` are). let diags = diag_keys("select t.id from a join b on a.id = b.id", &schema); assert!( diags.iter().any(|d| d.contains("no such table or alias")), "expected unknown_qualifier; got {diags:?}", ); } #[test] fn ambiguous_bare_column_is_error() { let schema = two_table_schema(); // `id` exists in both `a` and `b`. let diags = diag_keys("select id from a join b on a.id = b.id", &schema); assert!( diags.iter().any(|d| d.contains("ambiguous")), "expected ambiguous_column; got {diags:?}", ); } #[test] fn unambiguous_bare_column_no_error() { let schema = two_table_schema(); // `name` is only in `a`; `total` is only in `b` — no ambiguity. let diags = diag_keys( "select name, total from a join b on a.id = b.id", &schema, ); assert!( diags.is_empty(), "expected no diagnostics; got {diags:?}", ); } #[test] fn qualified_refs_in_join_on_resolve_cleanly() { let schema = two_table_schema(); let diags = diag_keys("select a.name, b.total from a join b on a.id = b.id", &schema); assert!( diags.is_empty(), "expected no diagnostics; got {diags:?}", ); } #[test] fn unknown_column_via_qualified_ref() { let schema = two_table_schema(); let diags = diag_keys("select a.nosuch from a", &schema); assert!( diags.iter().any(|d| d.contains("no such column")), "expected unknown_column; got {diags:?}", ); } #[test] fn cte_name_is_valid_table_source() { let schema = schema_with("base", &[("id", Type::Int)]); // `cte_x` doesn't exist as a table; it's declared by // WITH and the post-walk pass should treat it as valid. let diags = diag_keys( "with cte_x as (select * from base) select * from cte_x", &schema, ); assert!( diags.is_empty(), "expected no diagnostics; got {diags:?}", ); } #[test] fn duplicate_cte_in_same_with_block_is_error() { // `WITH …` doesn't dispatch through the registry yet // (a `data::WITH` `CommandNode` is a future sub-phase). // Walk the fragment directly via SQL_SELECT_STATEMENT // so the diagnostic pass sees the cte_name idents, then // assert duplicate_cte fires on the second occurrence. let schema = schema_with("base", &[("id", Type::Int)]); let mut ctx = super::context::WalkContext::with_schema(&schema); let mut path = super::outcome::MatchedPath::new(); let mut per_byte: Vec = Vec::new(); let input = "with x as (select 1), x as (select 2) select * from x"; let result = crate::dsl::walker::driver::walk_node( input, 0, &crate::dsl::grammar::sql_select::SQL_SELECT_STATEMENT, &mut ctx, &mut path, &mut per_byte, ); assert!( matches!( result, crate::dsl::walker::driver::NodeWalkResult::Matched { .. } ), "fragment should walk: {result:?}" ); let diags = super::schema_existence_diagnostics(&path, Some(&schema)); let messages: Vec<&str> = diags.iter().map(|d| d.message.as_str()).collect(); assert!( messages.iter().any(|m| m.contains("duplicate")), "expected duplicate_cte; got {messages:?}", ); } #[test] fn unknown_table_in_from_still_flags() { // Regression — the multi-binding extension must not // break the single-table unknown-table case. let schema = schema_with("base", &[("id", Type::Int)]); let diags = diag_keys("select * from nonexistent", &schema); assert!( diags.iter().any(|d| d.contains("no such table")), "expected unknown_table; got {diags:?}", ); } #[test] fn alias_resolves_qualifier() { let schema = two_table_schema(); // The alias `x` resolves to `a` — `x.name` finds `a.name`. let diags = diag_keys("select x.name from a x", &schema); assert!( diags.is_empty(), "expected no diagnostics; got {diags:?}", ); } // ---- ADR-0032 §11.6 — Phase-1 carry-over gap closure ---- /// A schema with a single table whose columns span a few /// types — enough to exercise like_numeric and /// type_mismatch on SQL expressions. fn typed_schema() -> SchemaCache { schema_with( "products", &[ ("id", Type::Serial), ("name", Type::Text), ("price", Type::Real), ("created", Type::Date), ("is_active", Type::Bool), ], ) } #[test] fn sql_where_like_numeric_warns() { // ADR-0032 §11.6 — THE Phase-1 gap that motivated this // section. `LIKE` on a numeric column made no sense, but // Phase 1's predicate-warning pass walked the DSL Expr // AST and never saw SQL WHERE. let schema = typed_schema(); let diags = diag_keys("select * from products where price like 5", &schema); assert!( diags.iter().any(|d| d.contains("LIKE")), "expected like_numeric warning on SQL WHERE; got {diags:?}", ); } #[test] fn sql_where_eq_null_warns() { let schema = typed_schema(); let diags = diag_keys("select * from products where name = null", &schema); assert!( diags.iter().any(|d| d.contains("= NULL")), "expected eq_null warning on SQL WHERE; got {diags:?}", ); } #[test] fn sql_where_type_mismatch_text_vs_number_warns() { let schema = typed_schema(); let diags = diag_keys("select * from products where name = 5", &schema); assert!( diags.iter().any(|d| d.contains("different type")), "expected type_mismatch warning on SQL WHERE; got {diags:?}", ); } #[test] fn sql_where_type_mismatch_number_vs_text_warns() { let schema = typed_schema(); let diags = diag_keys( "select * from products where price = 'high'", &schema, ); assert!( diags.iter().any(|d| d.contains("different type")), "expected type_mismatch warning on SQL WHERE; got {diags:?}", ); } #[test] fn sql_where_type_compatible_does_not_warn() { let schema = typed_schema(); let diags = diag_keys("select * from products where price = 5", &schema); // `price` is real; `5` is numeric — compatible (any // numeric-real comparison is fine). No warning. assert!( !diags .iter() .any(|d| d.contains("different type") || d.contains("LIKE")), "expected no warnings for compatible types; got {diags:?}", ); } #[test] fn sql_having_predicate_warning_fires() { // Phase-1 gap also affects HAVING. let schema = typed_schema(); let diags = diag_keys( "select count(*) from products group by name having price like 5", &schema, ); assert!( diags.iter().any(|d| d.contains("LIKE")), "expected like_numeric warning on HAVING; got {diags:?}", ); } #[test] fn sql_case_predicate_warning_fires() { // ADR-0032 §11.6 — predicate warning fires inside // `CASE WHEN ` shapes too. let schema = typed_schema(); let diags = diag_keys( "select case when price like 5 then 1 else 0 end from products", &schema, ); assert!( diags.iter().any(|d| d.contains("LIKE")), "expected like_numeric warning inside CASE; got {diags:?}", ); } #[test] fn sql_order_by_predicate_warning_fires() { // Predicate-shape inside ORDER BY's sql_expr — same // pass, same warning. let schema = typed_schema(); let diags = diag_keys( "select * from products order by price like 5", &schema, ); assert!( diags.iter().any(|d| d.contains("LIKE")), "expected like_numeric warning inside ORDER BY; got {diags:?}", ); } #[test] fn sql_projection_predicate_warning_fires() { // Predicate shape used as a projection item (returns // 0/1). Same warning surface. let schema = typed_schema(); let diags = diag_keys( "select price like 5 from products", &schema, ); assert!( diags.iter().any(|d| d.contains("LIKE")), "expected like_numeric warning inside projection; got {diags:?}", ); } #[test] fn sql_join_on_predicate_warning_fires() { // Phase-1 gap also affects JOIN ON. let mut cache = SchemaCache::default(); cache.tables.push("a".to_string()); cache.tables.push("b".to_string()); cache.columns.push("id".to_string()); cache.columns.push("price".to_string()); cache.table_columns.insert( "a".to_string(), vec![TableColumn { name: "id".to_string(), user_type: Type::Int }], ); cache.table_columns.insert( "b".to_string(), vec![TableColumn { name: "price".to_string(), user_type: Type::Real }], ); let diags = diag_keys( "select * from a join b on price like 5", &cache, ); assert!( diags.iter().any(|d| d.contains("LIKE")), "expected like_numeric warning on JOIN ON; got {diags:?}", ); } // ---- ADR-0032 §11.2 — projection_alias_misplaced ---- #[test] fn projection_alias_in_where_is_misplaced() { // ADR-0032 §11.2 plan test: `SELECT a + b AS x FROM t // WHERE x > 0` fires `projection_alias_misplaced`. let schema = schema_with( "t", &[("a", Type::Int), ("b", Type::Int)], ); let diags = diag_keys( "select a + b as x from t where x > 0", &schema, ); assert!( diags.iter().any(|d| { d.contains("alias `x`") && d.contains("WHERE") }), "expected projection_alias_misplaced on WHERE; got {diags:?}", ); // The unknown_column diagnostic must NOT also fire on // the same span — the alias check pre-empts it. assert!( !diags.iter().any(|d| d.contains("no such column")), "unknown_column must be suppressed when alias matches; got {diags:?}", ); } #[test] fn projection_alias_in_having_is_misplaced() { let schema = schema_with( "t", &[("a", Type::Int), ("b", Type::Int)], ); let diags = diag_keys( "select a + b as x from t group by a having x > 0", &schema, ); assert!( diags.iter().any(|d| { d.contains("alias `x`") && d.contains("HAVING") }), "expected projection_alias_misplaced on HAVING; got {diags:?}", ); } #[test] fn projection_alias_in_group_by_is_misplaced() { let schema = schema_with( "t", &[("a", Type::Int), ("b", Type::Int)], ); let diags = diag_keys( "select a + b as x from t group by x", &schema, ); assert!( diags.iter().any(|d| { d.contains("alias `x`") && d.contains("GROUP BY") }), "expected projection_alias_misplaced on GROUP BY; got {diags:?}", ); } #[test] fn projection_alias_in_order_by_is_allowed() { // ADR-0032 §11.2 negative case: `… ORDER BY x` doesn't // fire — aliases are bound by ORDER BY evaluation time. let schema = schema_with( "t", &[("a", Type::Int), ("b", Type::Int)], ); let diags = diag_keys( "select a + b as x from t order by x", &schema, ); assert!( diags.is_empty(), "ORDER BY alias is allowed; got {diags:?}", ); } #[test] fn real_column_shadowed_by_alias_is_not_misplaced() { // `SELECT name AS id FROM t WHERE id > 0` — the alias // `id` shadows nothing in the table, but a real column // `id` exists. WHERE id refers to the table column (per // SQL spec); the diagnostic must NOT fire. let schema = schema_with( "t", &[("id", Type::Int), ("name", Type::Text)], ); let diags = diag_keys( "select name as id from t where id > 0", &schema, ); assert!( diags.is_empty(), "real-column WHERE ref must not be flagged as misplaced; got {diags:?}", ); } // ---- ADR-0032 §11.2 — compound_arity_mismatch ---- #[test] fn compound_union_arity_mismatch_fires() { // ADR-0032 §11.2 plan test: `SELECT 1, 2 UNION SELECT 1` // fires `compound_arity_mismatch`. let schema = schema_with("t", &[("a", Type::Int)]); let diags = diag_keys("select 1, 2 union select 1", &schema); assert!( diags.iter().any(|d| { d.contains("union") && d.contains("number of columns") }), "expected compound_arity_mismatch on UNION; got {diags:?}", ); } #[test] fn compound_union_arity_match_no_diagnostic() { // Matched-arity legs don't fire. let schema = schema_with("t", &[("a", Type::Int)]); let diags = diag_keys("select 1, 2 union select 3, 4", &schema); assert!( !diags.iter().any(|d| d.contains("number of columns")), "matched arity should not fire; got {diags:?}", ); } #[test] fn compound_intersect_arity_mismatch_fires() { let schema = schema_with("t", &[("a", Type::Int)]); let diags = diag_keys("select 1 intersect select 1, 2", &schema); assert!( diags.iter().any(|d| { d.contains("intersect") && d.contains("number of columns") }), "expected compound_arity_mismatch on INTERSECT; got {diags:?}", ); } #[test] fn compound_except_arity_mismatch_fires() { let schema = schema_with("t", &[("a", Type::Int)]); let diags = diag_keys("select 1, 2, 3 except select 1, 2", &schema); assert!( diags.iter().any(|d| { d.contains("except") && d.contains("number of columns") }), "expected compound_arity_mismatch on EXCEPT; got {diags:?}", ); } #[test] fn compound_arity_with_function_call_args_not_confused() { // Function-call commas are at deeper depth — they must // not be counted as projection items. // `count(a, b)` is ONE projection item. let schema = schema_with( "t", &[("a", Type::Int), ("b", Type::Int)], ); let diags = diag_keys( "select count(a, b) from t union select 1", &schema, ); assert!( !diags.iter().any(|d| d.contains("number of columns")), "function-call commas must not inflate arity; got {diags:?}", ); } #[test] fn compound_union_all_arity_mismatch_fires() { // `UNION ALL` keyword sequence is handled identically. let schema = schema_with("t", &[("a", Type::Int)]); let diags = diag_keys("select 1 union all select 1, 2", &schema); assert!( diags.iter().any(|d| { d.contains("union") && d.contains("number of columns") }), "expected compound_arity_mismatch on UNION ALL; got {diags:?}", ); } #[test] fn compound_three_leg_chain_emits_per_mismatch() { // Chained legs at the same depth — each set-op compares // its preceding leg against its following leg. let schema = schema_with("t", &[("a", Type::Int)]); let diags = diag_keys( "select 1 union select 1, 2 union select 1", &schema, ); let mismatch_count = diags .iter() .filter(|d| d.contains("number of columns")) .count(); assert_eq!( mismatch_count, 2, "expected two mismatch diagnostics; got {diags:?}", ); } #[test] fn compound_arity_inside_cte_body_detected() { // CTE body at depth 1 — the arity-mismatch is detected // inside the parens, at the inner UNION. let schema = schema_with("t", &[("a", Type::Int)]); let diags = diag_keys( "with x as (select 1, 2 union select 1) select * from x", &schema, ); assert!( diags.iter().any(|d| { d.contains("union") && d.contains("number of columns") }), "expected compound_arity_mismatch inside CTE body; got {diags:?}", ); } // ---- ADR-0032 §11.2 — cte_arity_mismatch ---- #[test] fn cte_arity_mismatch_when_col_list_shorter() { // `WITH x(a, b) AS (SELECT 1, 2, 3)` — declared 2, // derived 3 → fires. let schema = schema_with("base", &[("id", Type::Int)]); let diags = diag_keys( "with x (a, b) as (select 1, 2, 3) select * from x", &schema, ); assert!( diags.iter().any(|d| { d.contains("CTE `x`") && d.contains("declares 2 columns") && d.contains("body has 3") }), "expected cte_arity_mismatch (declared 2, actual 3); got {diags:?}", ); } #[test] fn cte_arity_mismatch_when_col_list_longer() { // `WITH x(a, b, c) AS (SELECT 1)` — declared 3, // derived 1 → fires. let schema = schema_with("base", &[("id", Type::Int)]); let diags = diag_keys( "with x (a, b, c) as (select 1) select * from x", &schema, ); assert!( diags.iter().any(|d| { d.contains("CTE `x`") && d.contains("declares 3 columns") && d.contains("body has 1") }), "expected cte_arity_mismatch (declared 3, actual 1); got {diags:?}", ); } #[test] fn cte_arity_match_no_diagnostic() { // `WITH x(a, b) AS (SELECT 1, 2)` — matched arity, no // diagnostic. let schema = schema_with("base", &[("id", Type::Int)]); let diags = diag_keys( "with x (a, b) as (select 1, 2) select * from x", &schema, ); assert!( !diags.iter().any(|d| d.contains("declares")), "matched arity should not fire; got {diags:?}", ); } #[test] fn cte_arity_no_col_list_no_diagnostic() { // No explicit col-list → no arity check (derived // columns are the canonical view). let schema = schema_with("base", &[("id", Type::Int)]); let diags = diag_keys( "with x as (select 1, 2, 3) select * from x", &schema, ); assert!( !diags.iter().any(|d| d.contains("declares")), "no col-list should suppress arity check; got {diags:?}", ); } #[test] fn alias_in_inner_subquery_does_not_affect_outer_aliases() { // The inner `AS y` is inside parens (depth > 0) and // must not be collected into the outer leg's alias bag. // Outer `WHERE x` would otherwise (wrongly) match `y` // — here we test that the outer `WHERE y` is flagged // as unknown_column (not misplaced) because there is no // alias `y` in the OUTER leg's projection. let schema = schema_with( "t", &[("a", Type::Int), ("b", Type::Int)], ); let diags = diag_keys( "select (select a as y from t) from t where y > 0", &schema, ); assert!( !diags.iter().any(|d| d.contains("misplaced")), "inner-subquery alias must not affect outer scope; got {diags:?}", ); } } #[cfg(test)] mod projection_before_from_tests { //! ADR-0032 §10.6 — projection-before-FROM correctness //! after the full walk. The 2d schema-existence pass's //! two-pass binding collection (gather all FROM bindings //! first, then resolve column refs) means the diagnostic //! verdict is already correct at end-of-walk: //! //! - A projection ident that resolves under the eventual //! FROM scope produces no diagnostic. //! - A projection ident that does NOT resolve produces an //! `unknown_column` diagnostic on its span — the renderer //! then overlays this as an Error visual via the //! `input_render.rs` diagnostic-overlay path, achieving //! the user-visible effect §10.6 prescribes ("the //! highlight snaps to the column class … or to the //! unknown-identifier diagnostic"). //! //! These tests pin the behavior so a future refactor can't //! silently regress it. use super::*; use crate::completion::{SchemaCache, TableColumn}; use crate::dsl::types::Type; fn schema_with_table_and_columns() -> SchemaCache { let mut s = SchemaCache::default(); s.tables.push("mytable".to_string()); s.columns.push("real_col".to_string()); s.columns.push("another_col".to_string()); s.table_columns.insert( "mytable".to_string(), vec![ TableColumn { name: "real_col".to_string(), user_type: Type::Text, }, TableColumn { name: "another_col".to_string(), user_type: Type::Int, }, ], ); s } fn diagnostics_advanced( source: &str, schema: &SchemaCache, ) -> Vec { let mut ctx = context::WalkContext::with_schema(schema); ctx.mode = crate::mode::Mode::Advanced; let (result, _) = walk(source, outcome::WalkBound::EndOfInput, &mut ctx); result.map_or_else(Vec::new, |r| r.diagnostics) } #[test] fn projection_before_from_resolves_via_eventual_from() { // `select real_col from mytable` — the projection // ident appears in the path BEFORE the FROM binding, // but the two-pass diagnostic resolves correctly // against the eventual scope. No diagnostic. let schema = schema_with_table_and_columns(); let diags = diagnostics_advanced("select real_col from mytable", &schema); assert!( diags.is_empty(), "projection-before-FROM legit column must not be flagged; got {diags:?}", ); } #[test] fn projection_before_from_flags_unknown_column() { // `select bogus_col from mytable` — bogus_col doesn't // belong to mytable. The diagnostic fires on the // projection ident's span; the renderer overlays this // as Error in `input_render.rs`. let schema = schema_with_table_and_columns(); let diags = diagnostics_advanced("select bogus_col from mytable", &schema); assert_eq!(diags.len(), 1, "{diags:?}"); assert_eq!(diags[0].severity, outcome::Severity::Error); // Span should cover `bogus_col` (offset 7..16). assert_eq!(diags[0].span, (7, 16)); assert!( diags[0].message.contains("no such column"), "expected unknown_column wording; got {:?}", diags[0].message, ); } #[test] fn multi_projection_before_from_flags_only_unknowns() { // `select real_col, bogus_col, another_col from mytable` // — only bogus_col flags; the two real ones resolve. let schema = schema_with_table_and_columns(); let diags = diagnostics_advanced( "select real_col, bogus_col, another_col from mytable", &schema, ); assert_eq!( diags.len(), 1, "expected exactly one diagnostic; got {diags:?}", ); assert!(diags[0].message.contains("bogus_col")); } #[test] fn projection_without_from_is_silent() { // `select c1, c2` — no FROM in scope at all. The // current behavior is to skip the bare-column check // entirely (avoid noise on `SELECT 1` style // expressions). This is documented in the // schema_existence pass. let schema = schema_with_table_and_columns(); let diags = diagnostics_advanced("select c1, c2", &schema); assert!( diags.is_empty(), "no FROM → silent; got {diags:?}", ); } } /// Sub-phase 3a — category-grouped, mode-aware dispatch /// (ADR-0033 Amendment 1). /// /// These exercise the dispatch mechanism end-to-end on a *smoke* /// registry: a single shared entry word (`smk`) carrying a /// `Simple` DSL node and an `Advanced` SQL node with /// distinguishable tails (`dsltail` / `sqltail`). The dispatch /// functions (`decide`, `walk_one_command`, `this_is_sql_result`) /// are module-private; this child module reaches them via /// `super::*`. The smoke nodes never enter the real `REGISTRY`, /// so production dispatch is unaffected. #[cfg(test)] mod dispatch_3a_tests { use super::*; use crate::dsl::command::{AppCommand, Command}; use crate::dsl::grammar::{ CommandCategory, CommandNode, Node, ValidationError, Word, }; use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace}; use crate::dsl::walker::outcome::MatchedPath; use crate::mode::Mode; // Distinct dummy commands so a test can tell which node a walk // committed to (the outcome alone doesn't distinguish them). fn dsl_builder(_: &MatchedPath, _: &str) -> Result { Ok(Command::App(AppCommand::Help)) } fn sql_builder(_: &MatchedPath, _: &str) -> Result { Ok(Command::App(AppCommand::Quit)) } static SMOKE_DSL: CommandNode = CommandNode { entry: Word::keyword("smk"), shape: Node::Word(Word::keyword("dsltail")), ast_builder: dsl_builder, help_id: None, usage_ids: &[], }; static SMOKE_SQL: CommandNode = CommandNode { entry: Word::keyword("smk"), shape: Node::Word(Word::keyword("sqltail")), ast_builder: sql_builder, help_id: None, usage_ids: &[], }; type Candidates = Vec<(usize, &'static CommandNode, CommandCategory)>; /// A shared entry word: both a DSL and a SQL node under `smk`. /// Listed SQL-first to prove `decide` partitions by category /// rather than relying on registry order. fn shared() -> Candidates { vec![ (0, &SMOKE_SQL, CommandCategory::Advanced), (1, &SMOKE_DSL, CommandCategory::Simple), ] } /// A SQL-only entry word (no DSL fallback) — models `select`. fn sql_only() -> Candidates { vec![(0, &SMOKE_SQL, CommandCategory::Advanced)] } fn kw(input: &str) -> (usize, usize) { let start = skip_whitespace(input, 0); consume_ident(input, start).expect("entry word") } fn run_decide(input: &str, mode: Mode, cands: &Candidates) -> Decision { let (ks, ke) = kw(input); decide(input, ks, ke, cands, mode, None) } /// Mirror `walk`'s dispatch: decide, then either walk the /// committed node or build the "this is SQL" result. Returns /// the resulting outcome plus the committed command (if any). fn dispatch(input: &str, mode: Mode, cands: &Candidates) -> (WalkOutcome, Option) { let (ks, ke) = kw(input); let entry_text = &input[ks..ke]; match decide(input, ks, ke, cands, mode, None) { Decision::Commit { idx, node } => { let mut ctx = context::WalkContext::new(); ctx.mode = mode; let (res, cmd) = walk_one_command(input, input, ks, ke, idx, node, &mut ctx); (res.outcome, cmd) } Decision::ThisIsSql { primary } => { (this_is_sql_result(entry_text, primary, ks, ke).outcome, None) } } } fn committed_node(input: &str, mode: Mode, cands: &Candidates) -> &'static CommandNode { match run_decide(input, mode, cands) { Decision::Commit { node, .. } => node, Decision::ThisIsSql { .. } => panic!("expected Commit, got ThisIsSql for {input:?}"), } } // ---- Exit-gate case 1: Simple + DSL input → DSL match ------ #[test] fn simple_mode_dsl_input_matches_dsl() { let cands = shared(); assert!( std::ptr::eq(committed_node("smk dsltail", Mode::Simple, &cands), &SMOKE_DSL), "simple mode must commit the DSL node for DSL input", ); let (outcome, cmd) = dispatch("smk dsltail", Mode::Simple, &cands); assert!(matches!(outcome, WalkOutcome::Match { .. }), "got {outcome:?}"); assert_eq!(cmd, Some(Command::App(AppCommand::Help))); } // ---- Exit-gate case 2: Advanced + SQL input → SQL match ---- #[test] fn advanced_mode_sql_input_matches_sql() { let cands = shared(); assert!( std::ptr::eq(committed_node("smk sqltail", Mode::Advanced, &cands), &SMOKE_SQL), "advanced mode must commit the SQL node for SQL input", ); let (outcome, cmd) = dispatch("smk sqltail", Mode::Advanced, &cands); assert!(matches!(outcome, WalkOutcome::Match { .. }), "got {outcome:?}"); assert_eq!(cmd, Some(Command::App(AppCommand::Quit))); } // ---- Exit-gate case 3: Simple + SQL-only input → // ValidationFailed advanced_mode.sql_in_simple ---------- #[test] fn simple_mode_sql_only_input_is_this_is_sql() { // Shared word, but the input matches only the SQL tail. let cands = shared(); match run_decide("smk sqltail", Mode::Simple, &cands) { Decision::ThisIsSql { primary } => assert_eq!(primary, "smk"), Decision::Commit { idx, .. } => { panic!("expected ThisIsSql, got Commit {{ idx: {idx} }}") } } let (outcome, cmd) = dispatch("smk sqltail", Mode::Simple, &cands); match outcome { WalkOutcome::ValidationFailed { error, .. } => { assert_eq!(error.message_key, "advanced_mode.sql_in_simple"); } other => panic!("expected ValidationFailed, got {other:?}"), } assert_eq!(cmd, None); } /// A pure SQL-only entry word (no DSL node, like `select`) in /// simple mode also yields the "this is SQL" hint — the /// behaviour the old whole-command `is_advanced_only` gate /// produced, now via `decide`. #[test] fn simple_mode_sql_only_entry_word_is_this_is_sql() { let cands = sql_only(); let (outcome, _) = dispatch("smk sqltail", Mode::Simple, &cands); match outcome { WalkOutcome::ValidationFailed { error, .. } => { assert_eq!(error.message_key, "advanced_mode.sql_in_simple"); } other => panic!("expected ValidationFailed, got {other:?}"), } } // ---- Exit-gate case 4 / 5: Advanced + DSL-only input → // DSL match via fallback (the R1-equivalent invariant) -- #[test] fn advanced_mode_dsl_input_falls_back_to_dsl() { // `dsltail` matches the DSL node but NOT the SQL node. // Advanced mode tries SQL first; it must fall back to the // DSL node rather than surfacing the SQL node's failure. let cands = shared(); assert!( std::ptr::eq(committed_node("smk dsltail", Mode::Advanced, &cands), &SMOKE_DSL), "advanced mode must fall back to DSL when SQL doesn't match", ); let (outcome, cmd) = dispatch("smk dsltail", Mode::Advanced, &cands); assert!(matches!(outcome, WalkOutcome::Match { .. }), "got {outcome:?}"); assert_eq!(cmd, Some(Command::App(AppCommand::Help))); } /// In advanced mode a non-shared DSL entry word (no Advanced /// candidate) still commits the single DSL node. #[test] fn advanced_mode_dsl_only_entry_word_commits_dsl() { let cands: Candidates = vec![(0, &SMOKE_DSL, CommandCategory::Simple)]; assert!(std::ptr::eq( committed_node("smk dsltail", Mode::Advanced, &cands), &SMOKE_DSL, )); let (outcome, _) = dispatch("smk dsltail", Mode::Advanced, &cands); assert!(matches!(outcome, WalkOutcome::Match { .. }), "got {outcome:?}"); } } #[cfg(test)] mod order_by_expected_set_tests { //! F5 (handoff 30 §3.3) — when ORDER BY has consumed `order //! by` and is awaiting a sort item, the expected set must not //! be padded with clause keywords belonging to clauses that //! sit *before* ORDER BY (the FROM's JOIN options, WHERE / //! GROUP BY / HAVING, set-ops). Those optionals were skipped //! earlier in the seq; once ORDER BY commits past them they //! are no longer valid continuations at the cursor. use super::*; use crate::dsl::walker::outcome::Expectation; use crate::mode::Mode; fn expected_words(source: &str) -> Vec<&'static str> { expected_at_input_in_mode(source, Mode::Advanced) .iter() .filter_map(|e| match e { Expectation::Word(w) => Some(*w), _ => None, }) .collect() } #[test] fn order_by_excludes_preceding_clause_keywords() { let words = expected_words("select Name from T order by "); let preceding_clause_kw = [ "where", "group", "having", "join", "union", "intersect", "except", "left", "right", "full", "cross", "inner", "as", ]; let leaked: Vec<&str> = preceding_clause_kw .iter() .copied() .filter(|k| words.contains(k)) .collect(); assert!( leaked.is_empty(), "ORDER BY expected set leaked preceding-clause keywords \ {leaked:?}; full word set: {words:?}", ); } #[test] fn order_by_after_sort_item_offers_direction() { // After a complete sort item (`order by Name`) the // sort-direction keywords are valid continuations. // walk_repeated used to discard the item's trailing // optionals, so completion offered neither. let words = expected_words("select Name from T order by Name "); assert!(words.contains(&"asc"), "expected `asc`; got {words:?}"); assert!(words.contains(&"desc"), "expected `desc`; got {words:?}"); // The separator is deliberately not surfaced (user choice). let full = expected_at_input_in_mode( "select Name from T order by Name ", Mode::Advanced, ); assert!( !full.iter().any(|e| matches!(e, Expectation::Punct(','))), "`,` separator should not be surfaced; got {full:?}", ); } #[test] fn order_by_still_offers_a_sort_item() { // Guard against over-correction: the legitimate sort-item // continuation (a column identifier) must survive the // pending-skipped suppression. let expected = expected_at_input_in_mode( "select Name from T order by ", Mode::Advanced, ); assert!( expected.iter().any(|e| matches!( e, Expectation::Ident { .. } | Expectation::NumberLit )), "ORDER BY must still offer a sort item; got {expected:?}", ); } }