From fd259048dad098e4cdc940e89ebaed12bf8c4091 Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Wed, 20 May 2026 20:34:42 +0000 Subject: [PATCH] =?UTF-8?q?grammar:=20admit=20WITH=20inside=20subqueries?= =?UTF-8?q?=20/=20CTE=20bodies=20(ADR-0032=20=C2=A710.3)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ADR-0032 §10.3 says cte_bindings lives on the scope frame, with inner subqueries free to declare their own CTEs that shadow outer ones. The grammar didn't actually admit nested WITH inside SQL_SELECT_COMPOUND — a real ADR-vs-implementation gap. Closes the gap by making SQL_SELECT_COMPOUND a Choice between a WITH-prefixed form and a plain form. The naive Optional-prefix approach silently broke the paren-vs-subquery dispatch in sql_expr.rs's PAREN_GROUP: Optional matches 0 bytes, committing the Seq, so SELECT_CORE's NoMatch on `(a + b)` became Failed and the Choice couldn't fall through to or_expr. The Choice-fronted form keeps the fast NoMatch on non-WITH non-SELECT first tokens. Side effect: scalar subquery / IN / EXISTS / derived-table bodies now admit a leading WITH too, which matches standard SQL. Updated two tests that were guarding the old `(WITH …)` rejection behavior. Added one new harvest test exercising nested-WITH inside a CTE body — the harvest's `expand_binding` mechanism already handled the data correctly; the grammar gap was the sole blocker. Test totals: 1414 → 1415 passing (+1 nested-with-in-cte test). Clippy clean. --- src/dsl/grammar/sql_expr.rs | 17 ++++++------- src/dsl/grammar/sql_select.rs | 48 +++++++++++++++++++++++++++++------ src/dsl/walker/driver.rs | 30 ++++++++++++++++++++++ 3 files changed, 78 insertions(+), 17 deletions(-) diff --git a/src/dsl/grammar/sql_expr.rs b/src/dsl/grammar/sql_expr.rs index 6b4be93..8b33eb7 100644 --- a/src/dsl/grammar/sql_expr.rs +++ b/src/dsl/grammar/sql_expr.rs @@ -706,17 +706,16 @@ mod tests { #[test] fn scalar_subquery_dispatches_against_paren_group() { - // Both `(or_expr)` and `(SELECT …)` start with `(`. - // The ADR factors the `(` and the first inside token - // discriminates — `SELECT` → subquery, anything else - // → expression. (Per ADR-0032 §1 / §9, subqueries - // recurse through `SQL_SELECT_COMPOUND` which omits - // the outer `WITH` — so `(WITH …)` is NOT admitted as - // a scalar subquery; that form is only valid at - // statement top-level.) + // Both `(or_expr)` and `(SELECT …)` / `(WITH …)` start + // with `(`. The ADR factors the `(` and the first inside + // token discriminates — `SELECT` / `WITH` → subquery, + // anything else → expression. Per ADR-0032 §10.3, a + // subquery body may declare its own CTEs (shadowing + // outer ones), so `(WITH …)` IS admitted as a scalar + // subquery shape. good("(a + 1)"); good("(select 1)"); - bad("(with x as (select 1) select * from x)"); + good("(with x as (select 1) select * from x)"); } #[test] diff --git a/src/dsl/grammar/sql_select.rs b/src/dsl/grammar/sql_select.rs index 579d6af..266fffd 100644 --- a/src/dsl/grammar/sql_select.rs +++ b/src/dsl/grammar/sql_select.rs @@ -562,7 +562,7 @@ static SET_OP_TAIL_NODES: &[Node] = &[Node::Subgrammar(&SET_OP), Node::Subgrammar(&SELECT_CORE)]; static SET_OP_TAIL: Node = Node::Seq(SET_OP_TAIL_NODES); -static COMPOUND_SELECT_NODES: &[Node] = &[ +static PLAIN_COMPOUND_NODES: &[Node] = &[ Node::Subgrammar(&SELECT_CORE), Node::Repeated { inner: &SET_OP_TAIL, @@ -572,11 +572,40 @@ static COMPOUND_SELECT_NODES: &[Node] = &[ Node::Optional(&ORDER_BY_CLAUSE), Node::Optional(&LIMIT_CLAUSE), ]; +/// The compound select shape **without** a leading `WITH_CLAUSE`. +/// The Choice-fronted `SQL_SELECT_COMPOUND` references this twice: +/// once after a `WITH_CLAUSE` prefix and once on its own. +static PLAIN_COMPOUND: Node = Node::Seq(PLAIN_COMPOUND_NODES); + +// ADR-0032 §10.3: a subquery / CTE body may declare its own +// CTEs (shadowing outer ones of the same name). The compound +// recursion point therefore accepts an optional leading WITH. +// +// `Optional(WITH_CLAUSE)` would soft-commit the Seq on the +// 0-byte skip and turn downstream `NoMatch` into `Failed` for +// the Choice that decides "is `(...)` a paren expression or a +// scalar subquery?" (see sql_expr.rs PAREN_INSIDE_CHOICES). A +// Choice between WITH-prefixed and plain forms preserves the +// fast NoMatch on non-`with` non-`select` first tokens, so the +// expression branch of that Choice still wins on `(a + b)`. +static WITH_PREFIXED_COMPOUND_NODES: &[Node] = &[ + Node::Subgrammar(&WITH_CLAUSE), + Node::Subgrammar(&PLAIN_COMPOUND), +]; +static WITH_PREFIXED_COMPOUND: Node = + Node::Seq(WITH_PREFIXED_COMPOUND_NODES); + +static COMPOUND_CHOICES: &[Node] = &[ + Node::Subgrammar(&WITH_PREFIXED_COMPOUND), + Node::Subgrammar(&PLAIN_COMPOUND), +]; /// The compound-select fragment that subqueries / CTE bodies -/// recurse into via `Subgrammar` (2a) / `ScopedSubgrammar` (2b). -/// Omits the outer `with_clause`; that lives on -/// `SQL_SELECT_STATEMENT`. -pub static SQL_SELECT_COMPOUND: Node = Node::Seq(COMPOUND_SELECT_NODES); +/// recurse into. +/// +/// Referenced via `Subgrammar` (2a) / `ScopedSubgrammar` (2b). +/// A Choice between the `WITH …` prefix form and the plain +/// `SELECT …` form (ADR-0032 §10.3). +pub static SQL_SELECT_COMPOUND: Node = Node::Choice(COMPOUND_CHOICES); // ================================================================= // CTE definitions @@ -1268,16 +1297,19 @@ mod tests { // ----- compound-select fragment entry point ----- #[test] - fn compound_fragment_walks_without_with_clause() { + fn compound_fragment_walks_with_or_without_with_clause() { // SQL_SELECT_COMPOUND is what subqueries / CTE bodies // recurse into. It admits a select_core + optional - // set-op chain + outer ORDER/LIMIT. + // set-op chain + outer ORDER/LIMIT, optionally + // prefixed by a WITH clause (ADR-0032 §10.3 — a + // subquery body may declare its own CTEs that shadow + // outer ones). assert!(walks_via(&SQL_SELECT_COMPOUND, "select 1")); assert!(walks_via( &SQL_SELECT_COMPOUND, "select a from t union select b from u", )); - assert!(!walks_via( + assert!(walks_via( &SQL_SELECT_COMPOUND, "with x as (select 1) select * from x", )); diff --git a/src/dsl/walker/driver.rs b/src/dsl/walker/driver.rs index 1de6c7f..f7b8d3e 100644 --- a/src/dsl/walker/driver.rs +++ b/src/dsl/walker/driver.rs @@ -2151,6 +2151,36 @@ mod tests { assert_eq!(ctes[0].columns[0].name.as_deref(), Some("id")); } + #[test] + fn cte_harvest_nested_with_in_cte_body() { + // Nested WITH inside a CTE body now parses (ADR-0032 + // §10.3 — inner subqueries may declare their own CTEs). + // The outer CTE's body has its own scope and its own + // CTE inside it. The outer's `*` projects from its + // body's FROM, which references the inner CTE; the + // inner CTE's columns flow through `expand_binding`. + let schema = schema_users(); + let ctes = cte_bindings_after_walk_with_schema( + "with outer_cte as (with inner_cte as (select id, name from users) select * from inner_cte) select * from outer_cte", + &schema, + ); + let outer = ctes + .iter() + .find(|c| c.name == "outer_cte") + .expect("outer_cte binding"); + assert_eq!(outer.columns.len(), 2); + assert_eq!(outer.columns[0].name.as_deref(), Some("id")); + assert_eq!( + outer.columns[0].type_, + Some(crate::dsl::types::Type::Int), + ); + assert_eq!(outer.columns[1].name.as_deref(), Some("name")); + assert_eq!( + outer.columns[1].type_, + Some(crate::dsl::types::Type::Text), + ); + } + #[test] fn cte_harvest_sibling_b_sees_a_columns() { // Sibling CTEs at the same level. When `b`'s body