dd37a1cbfc
Implements the six ADR-0032 §10.3 output-column derivation rules at CTE body-frame exit, populating the placeholder CteBinding's columns. Unblocks `diagnostic.cte_arity_mismatch` (which compares declared col-list arity vs derived projection arity) and the upcoming qualified-prefix completion in 2e proper. - `WalkContext::pending_cte_harvest`: bookkeeping for an in-progress CTE harvest, armed by writes_cte_name + extended by cte_column idents, consumed by the next walk_scoped_subgrammar invocation (CTE syntax has no intervening ScopedSubgrammar, so timing is deterministic). Cleared on every walk_scoped_subgrammar entry to prevent stale state surviving a speculative walk rollback. - `run_cte_harvest`: post-walk path-scan classifier that reconstructs the body's first leg's projection-list and applies the six derivation rules. Compound bodies take columns from the first leg per spec; recursive CTE bodies take the non-recursive (first) leg. Optional (col-list) renames positionally with preserved types. - `expand_binding`: bridges a TableBinding to a CteColumn list, resolving CTE-source bindings (empty columns + table-name matches an in-scope CteBinding) through to the CTE's harvested columns. Enables sibling CTEs to project correctly: in \`WITH a AS (...), b AS (SELECT * FROM a) ...\`, b's harvest sees a's derived columns through the body's from_scope binding. - `WalkContext::pending_diagnostics`: accumulator for diagnostics emitted DURING the walk by node handlers with context the post-walk passes can't reconstruct. Drained by the top-level walk function on both match and non-match paths so a re-used context can't leak entries between walks. Test totals: 1399 → 1414 passing (+15: 10 derivation rules + 1 sibling CTE + 4 arity match/mismatch tests). Clippy clean.
276 lines
12 KiB
Rust
276 lines
12 KiB
Rust
//! `WalkContext` — per-walk mutable state that flows through the
|
|
//! walker (ADR-0024 §WalkContext, §Phase D).
|
|
//!
|
|
//! Phase D plumbed a schema reference through the context so
|
|
//! schema-aware nodes (`Ident { source: Tables }` writing
|
|
//! `current_table`, `DynamicSubgrammar` reading
|
|
//! `current_table_columns`) can resolve real entities at walk
|
|
//! time. Pre-Phase-D `default()` callers (tests, the chumsky-
|
|
//! era `parse_command(input)` signature) still work — the
|
|
//! schema slot is `None` and dynamic dispatch falls back to a
|
|
//! generic value-literal slot.
|
|
|
|
use crate::completion::{SchemaCache, TableColumn};
|
|
use crate::dsl::types::Type;
|
|
use crate::mode::Mode;
|
|
|
|
/// A single `FROM`-source binding in the active lexical scope
|
|
/// (ADR-0032 §10.1). One binding per `FROM` table or `JOIN`
|
|
/// target, populated as the walker descends through
|
|
/// `from_clause` / `join_clause`.
|
|
#[derive(Debug, Clone)]
|
|
pub struct TableBinding {
|
|
/// The table name as the user typed it (case-preserving
|
|
/// per ADR-0009).
|
|
pub table: String,
|
|
/// The optional `AS` alias or bare alias (ADR-0032 §1).
|
|
pub alias: Option<String>,
|
|
/// The schema-resolved columns for the table. Empty if the
|
|
/// schema did not know the table (the unknown-table
|
|
/// diagnostic will fire in 2d).
|
|
pub columns: Vec<TableColumn>,
|
|
}
|
|
|
|
/// A CTE definition visible from inside its own body
|
|
/// (`WITH RECURSIVE` self-reference) and from the outer scope
|
|
/// after the body completes (ADR-0032 §10.3).
|
|
#[derive(Debug, Clone)]
|
|
pub struct CteBinding {
|
|
pub name: String,
|
|
pub columns: Vec<CteColumn>,
|
|
}
|
|
|
|
/// One output column derived from a CTE body's projection
|
|
/// list per ADR-0032 §10.3's derivation rules.
|
|
#[derive(Debug, Clone)]
|
|
pub struct CteColumn {
|
|
/// `None` for computed projections without an alias —
|
|
/// the engine assigns an implementation-defined name and
|
|
/// the slot is silently skipped from the qualified-prefix
|
|
/// candidate list (ADR-0032 §10.3).
|
|
pub name: Option<String>,
|
|
/// The resolved playground type if the body's projection
|
|
/// yields one (a single column reference). `None` for
|
|
/// computed columns and recursive CTE bodies (per ADR-0032
|
|
/// Amendment 1's empirical findings).
|
|
pub type_: Option<Type>,
|
|
}
|
|
|
|
/// One lexical scope on the walker's `from_scope_stack`.
|
|
///
|
|
/// Pushed on entry to a `Node::ScopedSubgrammar` and popped on
|
|
/// exit (ADR-0032 §10.2). The bottom of the stack is the
|
|
/// implicit top-level scope DSL paths and top-level SQL
|
|
/// statements operate in.
|
|
#[derive(Debug, Default, Clone)]
|
|
pub struct ScopeFrame {
|
|
/// In-scope FROM-source bindings for this frame. Populated
|
|
/// by `from_clause` / `join_clause` walks.
|
|
pub from_scope: Vec<TableBinding>,
|
|
/// CTE definitions visible in this frame. Populated by
|
|
/// `with_clause` walks before each CTE's body; the body's
|
|
/// output columns are harvested into the placeholder
|
|
/// binding at the body's frame exit (§10.3).
|
|
pub cte_bindings: Vec<CteBinding>,
|
|
/// Projection-list aliases observed in this frame.
|
|
/// `ORDER BY` slots offer these as additional candidates
|
|
/// per ADR-0032 §10.4.
|
|
pub projection_aliases: Vec<String>,
|
|
}
|
|
|
|
/// Per-walk state.
|
|
///
|
|
/// Carries an optional schema reference (so callers without a
|
|
/// schema continue to work) plus mutable accumulators that
|
|
/// nodes can write to during the walk:
|
|
///
|
|
/// - `current_table` / `current_table_columns` — populated when
|
|
/// an `Ident { source: Tables }` node with `writes_table:
|
|
/// true` matches a known table.
|
|
/// - `current_column` — populated by `Ident { source: Columns
|
|
/// writes_column: true }` for `set col = …` / `where col =
|
|
/// …` slots so the next value-slot picks the column's typed
|
|
/// sub-grammar.
|
|
#[derive(Debug)]
|
|
pub struct WalkContext<'a> {
|
|
pub schema: Option<&'a SchemaCache>,
|
|
/// The input mode this walk runs under (ADR-0030 §2). In
|
|
/// `Mode::Simple` the walker gates out SQL-only commands —
|
|
/// an advanced-only entry word yields the "this is SQL"
|
|
/// hint rather than a normal parse. Defaults to
|
|
/// `Mode::Simple`; real call sites set it from the active
|
|
/// `App` mode.
|
|
pub mode: Mode,
|
|
pub current_table: Option<String>,
|
|
pub current_table_columns: Option<Vec<TableColumn>>,
|
|
pub current_column: Option<TableColumn>,
|
|
/// The column type the walker is *about* to consume a value
|
|
/// for (ADR-0024 §Phase D §typed-value-slots). Set by the
|
|
/// walker on entry to a `Node::TypedValueSlot`, cleared on
|
|
/// successful inner match. The hint resolver reads this to
|
|
/// emit per-type prose ("Type an integer", "Type a date as
|
|
/// 'YYYY-MM-DD'", …) at empty prefix at typed value slots.
|
|
pub pending_value_type: Option<crate::dsl::types::Type>,
|
|
/// The column name (if known) the walker is about to
|
|
/// consume a value for.
|
|
///
|
|
/// Populated by:
|
|
/// - `Ident { source: Columns, writes_column: true }` for
|
|
/// `update set <col>=` and `where <col>=` positions, where
|
|
/// the column ident matches in the path immediately
|
|
/// before the value slot.
|
|
/// - `Node::TypedValueSlot { column_name: Some(name), … }`
|
|
/// for the per-column typed slots in `column_value_list`
|
|
/// (insert-into-T-values positions, where the column name
|
|
/// is keyed by position in the table's column list).
|
|
///
|
|
/// Cleared on successful inner match alongside
|
|
/// `pending_value_type`.
|
|
pub pending_value_column: Option<String>,
|
|
/// The hint-panel `HintMode` declared by the grammar node
|
|
/// the walker is currently inside (ADR-0024
|
|
/// §HintMode-per-node). Set on entry to a `Node::Hinted`
|
|
/// wrapper, cleared on successful inner match. The hint
|
|
/// resolver reads this directly instead of inferring the
|
|
/// slot kind from the shape of the expected set.
|
|
pub pending_hint_mode: Option<crate::dsl::grammar::HintMode>,
|
|
/// The columns the user explicitly listed in
|
|
/// `insert into <T> (col1, col2, …) values (…)` (Form A),
|
|
/// in declaration order.
|
|
///
|
|
/// Populated as each ident-shape token in the leading paren
|
|
/// matches an `Ident` node with `writes_user_listed_column:
|
|
/// true`. `None` (default) means no explicit list was
|
|
/// observed — the inner `values (…)` slot list then
|
|
/// defaults to "every non-auto-generated column of the
|
|
/// current table" (Form B `insert into T values (…)`
|
|
/// behavior; ADR-0018 §3 — auto-generated columns are
|
|
/// skipped from the value list because the dispatch path
|
|
/// auto-fills them).
|
|
pub user_listed_columns: Option<Vec<String>>,
|
|
/// Count of active `Node::Subgrammar` frames on the walk
|
|
/// stack (ADR-0026 §2). The walker increments on entry to a
|
|
/// `Subgrammar`, restores the saved value on exit, and
|
|
/// refuses past `driver::MAX_SUBGRAMMAR_DEPTH` so a
|
|
/// pathologically nested expression fails with a friendly
|
|
/// error instead of overflowing the process stack.
|
|
/// `Node::ScopedSubgrammar` shares the same counter
|
|
/// uniformly (ADR-0032 §9).
|
|
pub subgrammar_depth: usize,
|
|
/// The stack of lexical scope frames (ADR-0032 §10.2).
|
|
/// The bottom frame is the implicit top-level scope DSL
|
|
/// paths and top-level SQL statements operate in;
|
|
/// `Node::ScopedSubgrammar` entries push and pop new frames
|
|
/// on top. Always non-empty: the bottom frame is created at
|
|
/// `WalkContext::new` / `with_schema` time and never popped.
|
|
pub from_scope_stack: Vec<ScopeFrame>,
|
|
/// Diagnostics emitted *during* the walk by node handlers
|
|
/// that have context the post-walk path scanners can no
|
|
/// longer reconstruct (notably the §10.3 CTE harvest, which
|
|
/// runs at body-frame exit and has direct access to both
|
|
/// the declared col-list and the derived columns). The
|
|
/// walker's top-level `walk` function drains this on
|
|
/// successful parses and folds the entries into the final
|
|
/// diagnostic vector.
|
|
pub pending_diagnostics: Vec<crate::dsl::walker::outcome::Diagnostic>,
|
|
/// Set by the `writes_cte_name` ident path right after the
|
|
/// placeholder `CteBinding` is pushed onto the outer frame.
|
|
/// Tells the very next `walk_scoped_subgrammar` invocation
|
|
/// that the body it's about to walk is a CTE body and that,
|
|
/// on `Matched` exit, it should run the §10.3 harvest into
|
|
/// the recorded placeholder. `cte_column` idents (the
|
|
/// optional `(c1, c2)` list between the cte name and `AS`)
|
|
/// append to `col_list` as they're seen.
|
|
///
|
|
/// CTE syntax has no intervening `ScopedSubgrammar` between
|
|
/// the cte-name ident and the body, so the timing is
|
|
/// deterministic. Cleared by `walk_scoped_subgrammar` whether
|
|
/// or not the inner walk matched (a speculatively-walked
|
|
/// then-rolled-back body must not leave a stale request).
|
|
pub pending_cte_harvest: Option<PendingCteHarvest>,
|
|
}
|
|
|
|
/// Bookkeeping for an in-progress CTE harvest (ADR-0032 §10.3
|
|
/// stage 2).
|
|
///
|
|
/// The `writes_cte_name` ident sets one of these after pushing
|
|
/// the placeholder `CteBinding`; the next
|
|
/// `walk_scoped_subgrammar` invocation takes it and runs the
|
|
/// harvest after the body matches.
|
|
#[derive(Debug, Clone)]
|
|
pub struct PendingCteHarvest {
|
|
/// Index of the placeholder `CteBinding` in the *outer*
|
|
/// frame's `cte_bindings`. The outer frame is
|
|
/// `from_scope_stack[len() - 2]` at the moment the body's
|
|
/// frame is on top.
|
|
pub placeholder_index: usize,
|
|
/// Explicit `(c1, c2, …)` rename list — empty when the CTE
|
|
/// declared no column list. The harvest's derived column
|
|
/// names are overridden positionally by this list per ADR-
|
|
/// 0032 §10.3.
|
|
pub col_list: Vec<String>,
|
|
/// Span of the cte_name ident — the diagnostic anchor for
|
|
/// `cte_arity_mismatch` if the col-list arity disagrees with
|
|
/// the body's derived arity.
|
|
pub cte_name: String,
|
|
pub cte_name_span: (usize, usize),
|
|
}
|
|
|
|
impl<'a> WalkContext<'a> {
|
|
/// Schemaless walk context — the legacy default used by
|
|
/// pre-Phase-D callers and tests that don't care about
|
|
/// schema-aware narrowing. Carries a single empty
|
|
/// `ScopeFrame` on `from_scope_stack` (ADR-0032 §10.2).
|
|
#[must_use]
|
|
pub fn new() -> Self {
|
|
Self {
|
|
schema: None,
|
|
mode: Mode::Simple,
|
|
current_table: None,
|
|
current_table_columns: None,
|
|
current_column: None,
|
|
pending_value_type: None,
|
|
pending_value_column: None,
|
|
pending_hint_mode: None,
|
|
user_listed_columns: None,
|
|
subgrammar_depth: 0,
|
|
from_scope_stack: vec![ScopeFrame::default()],
|
|
pending_diagnostics: Vec::new(),
|
|
pending_cte_harvest: None,
|
|
}
|
|
}
|
|
|
|
/// Schema-aware walk context. Dynamic sub-grammars read
|
|
/// `schema` (via `current_table_columns`) to unfold typed
|
|
/// per-column value slots.
|
|
#[must_use]
|
|
pub fn with_schema(schema: &'a SchemaCache) -> Self {
|
|
Self {
|
|
schema: Some(schema),
|
|
mode: Mode::Simple,
|
|
current_table: None,
|
|
current_table_columns: None,
|
|
current_column: None,
|
|
pending_value_type: None,
|
|
pending_value_column: None,
|
|
pending_hint_mode: None,
|
|
user_listed_columns: None,
|
|
subgrammar_depth: 0,
|
|
from_scope_stack: vec![ScopeFrame::default()],
|
|
pending_diagnostics: Vec::new(),
|
|
pending_cte_harvest: None,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Default for WalkContext<'_> {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
/// Convenience re-export so non-walker modules don't reach
|
|
/// across `completion::TableColumn` directly.
|
|
#[allow(dead_code)]
|
|
pub type ColumnInfo = TableColumn;
|