From 4f89106a639d6e104aa2bfc39503d44ab85fab72 Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Wed, 20 May 2026 11:34:53 +0000 Subject: [PATCH] =?UTF-8?q?walker:=20Node::ScopedSubgrammar=20variant=20+?= =?UTF-8?q?=20scope-frame=20stack=20(ADR-0032=20=C2=A710.2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sub-phase 2b checkpoint 1 — adds the foundation for SQL SELECT lexical-scope discipline without changing existing walker semantics. New types in `dsl::walker::context`: - `TableBinding` — one FROM-source binding with table name, optional alias, and schema-resolved columns (§10.1). - `CteBinding` + `CteColumn` — a CTE definition visible from inside its body (WITH RECURSIVE self-reference) and from the outer scope after harvest (§10.3). - `ScopeFrame` — `from_scope`, `cte_bindings`, and `projection_aliases` for one lexical scope. Default-empty; the fields will be populated by later 2b checkpoints. `WalkContext` gains `from_scope_stack: Vec`, initialised with one bottom frame in both `new()` and `with_schema()`. The bottom frame is the implicit top-level scope DSL paths and top-level SQL statements operate in; `Node::ScopedSubgrammar` entries push and pop additional frames on top. `current_table` / `current_table_columns` remain as direct fields for this checkpoint — converting them to derived helpers is a later 2b step. New grammar-tree variant: - `Node::ScopedSubgrammar(&'static Self)` — like `Subgrammar`, but pushes a fresh `ScopeFrame` on entry and pops it on exit (ADR-0032 §10.2). Shares `subgrammar_depth` with the plain Subgrammar variant so the MAX_SUBGRAMMAR_DEPTH = 64 cap fires uniformly across both — §9's "no new walker capability for grammar recursion" claim holds. DSL Expr (ADR-0026) and sql_expr.rs ladder (ADR-0031) recursion continue to use the plain Subgrammar variant and never push a scope. Driver gains a parallel `walk_scoped_subgrammar` arm; the push/pop is unconditional so a speculatively-walked branch a later Choice rolls back leaves the stack clean. Test coverage in `driver.rs`: - A recursive ScopedSubgrammar test grammar walks correctly through depths 0-3. - The depth cap fires the same `expression_too_deep` friendly validation error as for plain Subgrammar. - The bottom frame invariant: `WalkContext::new` seeds exactly one frame, and after a walk the stack is restored. No grammar tree references the new variant yet — the rewire of sql_select.rs CTE bodies and the sql_expr.rs additive extensions for §5/§6 are the next 2b checkpoint. Test totals: 1330 baseline + 3 = 1333 passing, 0 failed, 1 ignored. Clippy clean. --- src/dsl/grammar/mod.rs | 18 ++++++ src/dsl/walker/context.rs | 102 ++++++++++++++++++++++++++++++-- src/dsl/walker/driver.rs | 120 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 236 insertions(+), 4 deletions(-) diff --git a/src/dsl/grammar/mod.rs b/src/dsl/grammar/mod.rs index f090738..cdcc9d5 100644 --- a/src/dsl/grammar/mod.rs +++ b/src/dsl/grammar/mod.rs @@ -316,6 +316,24 @@ pub enum Node { /// this one references a fixed fragment already in the /// grammar tree. Subgrammar(&'static Self), + /// Like `Subgrammar`, but the walker additionally **pushes a + /// new `ScopeFrame`** onto `WalkContext::from_scope_stack` on + /// entry and pops it on exit (ADR-0032 §10.2). The + /// `subgrammar_depth` counter increments uniformly across + /// both variants — the depth cap applies the same way — so + /// this variant introduces no new walker capability for + /// grammar recursion; it only layers lexical-scope discipline + /// on top. + /// + /// Used at every SQL `SELECT` recursion point: subqueries + /// in `sql_expr.rs` (scalar `(SELECT …)`, `IN (SELECT …)`, + /// `[NOT] EXISTS (SELECT …)`) and CTE bodies in + /// `sql_select.rs` reference the compound-SELECT through + /// `Node::ScopedSubgrammar(&SQL_SELECT_COMPOUND)`. DSL `Expr` + /// recursion (ADR-0026) and the `sql_expr.rs` precedence- + /// ladder recursion (ADR-0031) keep using the plain + /// `Subgrammar` variant and never push a scope. + ScopedSubgrammar(&'static Self), /// Resolves at walk time using the active `WalkContext`. /// Phase D+ uses this for `column_value_list`. The factory /// is pure in `ctx`, so the walker memoizes the resolution diff --git a/src/dsl/walker/context.rs b/src/dsl/walker/context.rs index 587b70d..130e147 100644 --- a/src/dsl/walker/context.rs +++ b/src/dsl/walker/context.rs @@ -11,8 +11,73 @@ //! generic value-literal slot. use crate::completion::{SchemaCache, TableColumn}; +use crate::dsl::types::Type; use crate::mode::Mode; +/// A single `FROM`-source binding in the active lexical scope +/// (ADR-0032 §10.1). One binding per `FROM` table or `JOIN` +/// target, populated as the walker descends through +/// `from_clause` / `join_clause`. +#[derive(Debug, Clone)] +pub struct TableBinding { + /// The table name as the user typed it (case-preserving + /// per ADR-0009). + pub table: String, + /// The optional `AS` alias or bare alias (ADR-0032 §1). + pub alias: Option, + /// The schema-resolved columns for the table. Empty if the + /// schema did not know the table (the unknown-table + /// diagnostic will fire in 2d). + pub columns: Vec, +} + +/// A CTE definition visible from inside its own body +/// (`WITH RECURSIVE` self-reference) and from the outer scope +/// after the body completes (ADR-0032 §10.3). +#[derive(Debug, Clone)] +pub struct CteBinding { + pub name: String, + pub columns: Vec, +} + +/// One output column derived from a CTE body's projection +/// list per ADR-0032 §10.3's derivation rules. +#[derive(Debug, Clone)] +pub struct CteColumn { + /// `None` for computed projections without an alias — + /// the engine assigns an implementation-defined name and + /// the slot is silently skipped from the qualified-prefix + /// candidate list (ADR-0032 §10.3). + pub name: Option, + /// The resolved playground type if the body's projection + /// yields one (a single column reference). `None` for + /// computed columns and recursive CTE bodies (per ADR-0032 + /// Amendment 1's empirical findings). + pub type_: Option, +} + +/// One lexical scope on the walker's `from_scope_stack`. +/// +/// Pushed on entry to a `Node::ScopedSubgrammar` and popped on +/// exit (ADR-0032 §10.2). The bottom of the stack is the +/// implicit top-level scope DSL paths and top-level SQL +/// statements operate in. +#[derive(Debug, Default, Clone)] +pub struct ScopeFrame { + /// In-scope FROM-source bindings for this frame. Populated + /// by `from_clause` / `join_clause` walks. + pub from_scope: Vec, + /// CTE definitions visible in this frame. Populated by + /// `with_clause` walks before each CTE's body; the body's + /// output columns are harvested into the placeholder + /// binding at the body's frame exit (§10.3). + pub cte_bindings: Vec, + /// Projection-list aliases observed in this frame. + /// `ORDER BY` slots offer these as additional candidates + /// per ADR-0032 §10.4. + pub projection_aliases: Vec, +} + /// Per-walk state. /// /// Carries an optional schema reference (so callers without a @@ -26,7 +91,7 @@ use crate::mode::Mode; /// writes_column: true }` for `set col = …` / `where col = /// …` slots so the next value-slot picks the column's typed /// sub-grammar. -#[derive(Debug, Default)] +#[derive(Debug)] pub struct WalkContext<'a> { pub schema: Option<&'a SchemaCache>, /// The input mode this walk runs under (ADR-0030 §2). In @@ -89,23 +154,45 @@ pub struct WalkContext<'a> { /// refuses past `driver::MAX_SUBGRAMMAR_DEPTH` so a /// pathologically nested expression fails with a friendly /// error instead of overflowing the process stack. + /// `Node::ScopedSubgrammar` shares the same counter + /// uniformly (ADR-0032 §9). pub subgrammar_depth: usize, + /// The stack of lexical scope frames (ADR-0032 §10.2). + /// The bottom frame is the implicit top-level scope DSL + /// paths and top-level SQL statements operate in; + /// `Node::ScopedSubgrammar` entries push and pop new frames + /// on top. Always non-empty: the bottom frame is created at + /// `WalkContext::new` / `with_schema` time and never popped. + pub from_scope_stack: Vec, } impl<'a> WalkContext<'a> { /// Schemaless walk context — the legacy default used by /// pre-Phase-D callers and tests that don't care about - /// schema-aware narrowing. + /// schema-aware narrowing. Carries a single empty + /// `ScopeFrame` on `from_scope_stack` (ADR-0032 §10.2). #[must_use] pub fn new() -> Self { - Self::default() + Self { + schema: None, + mode: Mode::Simple, + current_table: None, + current_table_columns: None, + current_column: None, + pending_value_type: None, + pending_value_column: None, + pending_hint_mode: None, + user_listed_columns: None, + subgrammar_depth: 0, + from_scope_stack: vec![ScopeFrame::default()], + } } /// Schema-aware walk context. Dynamic sub-grammars read /// `schema` (via `current_table_columns`) to unfold typed /// per-column value slots. #[must_use] - pub const fn with_schema(schema: &'a SchemaCache) -> Self { + pub fn with_schema(schema: &'a SchemaCache) -> Self { Self { schema: Some(schema), mode: Mode::Simple, @@ -117,10 +204,17 @@ impl<'a> WalkContext<'a> { pending_hint_mode: None, user_listed_columns: None, subgrammar_depth: 0, + from_scope_stack: vec![ScopeFrame::default()], } } } +impl Default for WalkContext<'_> { + fn default() -> Self { + Self::new() + } +} + /// Convenience re-export so non-walker modules don't reach /// across `completion::TableColumn` directly. #[allow(dead_code)] diff --git a/src/dsl/walker/driver.rs b/src/dsl/walker/driver.rs index 900d9bb..af4c5c5 100644 --- a/src/dsl/walker/driver.rs +++ b/src/dsl/walker/driver.rs @@ -214,6 +214,9 @@ fn walk_node_inner( Node::Subgrammar(inner) => { walk_subgrammar(source, pos, inner, ctx, path, per_byte) } + Node::ScopedSubgrammar(inner) => { + walk_scoped_subgrammar(source, pos, inner, ctx, path, per_byte) + } Node::DynamicSubgrammar(factory) => { // ADR-0024 §sub-grammars: resolve the inner Node at // walk time from the active `WalkContext`, then walk @@ -941,6 +944,42 @@ fn walk_subgrammar( result } +/// Walk a `ScopedSubgrammar` reference once (ADR-0032 §10.2). +/// +/// Pushes a fresh `ScopeFrame` onto `from_scope_stack` on +/// entry and pops it back on exit. The push/pop is +/// unconditional — a speculatively-walked branch that a +/// `Choice` later rolls back leaves the stack clean. Shares +/// the `subgrammar_depth` counter with the plain `Subgrammar` +/// variant so the depth cap fires uniformly. +fn walk_scoped_subgrammar( + source: &str, + pos: usize, + inner: &'static Node, + ctx: &mut WalkContext, + path: &mut MatchedPath, + per_byte: &mut Vec, +) -> NodeWalkResult { + let saved_depth = ctx.subgrammar_depth; + ctx.subgrammar_depth += 1; + if ctx.subgrammar_depth > MAX_SUBGRAMMAR_DEPTH { + ctx.subgrammar_depth = saved_depth; + return NodeWalkResult::Failed { + position: pos, + kind: FailureKind::Validation(ValidationError { + message_key: "parse.custom.expression_too_deep", + args: Vec::new(), + }), + }; + } + ctx.from_scope_stack + .push(crate::dsl::walker::context::ScopeFrame::default()); + let result = walk_node(source, pos, inner, ctx, path, per_byte); + ctx.from_scope_stack.pop(); + ctx.subgrammar_depth = saved_depth; + result +} + fn merge_expected(dst: &mut Vec, src: Vec) { for e in src { if !dst.contains(&e) { @@ -1074,4 +1113,85 @@ mod tests { "resolve_dynamic should populate the memo cache", ); } + + // ---- ScopedSubgrammar (ADR-0032 §10.2) ----------------------- + + // Recursive test grammar parallel to NESTED, but using + // `ScopedSubgrammar` for the recursion — exercises the + // push/pop discipline. Same shape (`x` | `( )`), + // different recursion variant. + static SCOPED_NESTED_GROUP: &[Node] = &[ + Node::Punct('('), + Node::ScopedSubgrammar(&SCOPED_NESTED), + Node::Punct(')'), + ]; + static SCOPED_NESTED_CHOICES: &[Node] = &[ + Node::Seq(SCOPED_NESTED_GROUP), + Node::Word(Word::keyword("x")), + ]; + static SCOPED_NESTED: Node = Node::Choice(SCOPED_NESTED_CHOICES); + + fn walk_scoped_nested(input: &str) -> (NodeWalkResult, usize) { + let mut ctx = WalkContext::new(); + let mut path = MatchedPath::new(); + let mut per_byte = Vec::new(); + let baseline_frames = ctx.from_scope_stack.len(); + let result = walk_node( + input, + 0, + &SCOPED_NESTED, + &mut ctx, + &mut path, + &mut per_byte, + ); + assert_eq!( + ctx.subgrammar_depth, 0, + "subgrammar_depth must be restored to 0 after the walk", + ); + assert_eq!( + ctx.from_scope_stack.len(), + baseline_frames, + "from_scope_stack must be restored after the walk", + ); + (result, baseline_frames) + } + + #[test] + fn scoped_subgrammar_walks_a_recursive_grammar() { + for input in ["x", "(x)", "((x))", "(((x)))"] { + let (result, _) = walk_scoped_nested(input); + assert!( + matches!(result, NodeWalkResult::Matched { .. }), + "{input:?} should match the recursive ScopedSubgrammar grammar", + ); + } + } + + #[test] + fn scoped_subgrammar_shares_depth_cap_with_subgrammar() { + // Over the cap with ScopedSubgrammar recursion fails the + // same way as the Subgrammar test above — both variants + // share `WalkContext::subgrammar_depth`. + let over = MAX_SUBGRAMMAR_DEPTH + 1; + let input = format!("{}x{}", "(".repeat(over), ")".repeat(over)); + match walk_scoped_nested(&input).0 { + NodeWalkResult::Failed { + kind: FailureKind::Validation(err), + .. + } => assert_eq!(err.message_key, "parse.custom.expression_too_deep"), + other => panic!( + "expected expression_too_deep on pathological scoped nesting, got {other:?}", + ), + } + } + + #[test] + fn scoped_subgrammar_baseline_frame_is_always_present() { + let ctx = WalkContext::new(); + assert_eq!( + ctx.from_scope_stack.len(), + 1, + "WalkContext::new should seed exactly one bottom frame", + ); + } }