grammar: admit WITH inside subqueries / CTE bodies (ADR-0032 §10.3)

ADR-0032 §10.3 says cte_bindings lives on the scope frame, with
inner subqueries free to declare their own CTEs that shadow outer
ones. The grammar didn't actually admit nested WITH inside
SQL_SELECT_COMPOUND — a real ADR-vs-implementation gap.

Closes the gap by making SQL_SELECT_COMPOUND a Choice between a
WITH-prefixed form and a plain form. The naive Optional-prefix
approach silently broke the paren-vs-subquery dispatch in
sql_expr.rs's PAREN_GROUP: Optional matches 0 bytes, committing
the Seq, so SELECT_CORE's NoMatch on `(a + b)` became Failed and
the Choice couldn't fall through to or_expr. The Choice-fronted
form keeps the fast NoMatch on non-WITH non-SELECT first tokens.

Side effect: scalar subquery / IN / EXISTS / derived-table
bodies now admit a leading WITH too, which matches standard SQL.

Updated two tests that were guarding the old `(WITH …)` rejection
behavior. Added one new harvest test exercising nested-WITH inside
a CTE body — the harvest's `expand_binding` mechanism already
handled the data correctly; the grammar gap was the sole blocker.

Test totals: 1414 → 1415 passing (+1 nested-with-in-cte test).
Clippy clean.
This commit is contained in:
claude@clouddev1
2026-05-20 20:34:42 +00:00
parent dd37a1cbfc
commit fd259048da
3 changed files with 78 additions and 17 deletions
+8 -9
View File
@@ -706,17 +706,16 @@ mod tests {
#[test]
fn scalar_subquery_dispatches_against_paren_group() {
// Both `(or_expr)` and `(SELECT …)` start with `(`.
// The ADR factors the `(` and the first inside token
// discriminates — `SELECT` → subquery, anything else
// → expression. (Per ADR-0032 §1 / §9, subqueries
// recurse through `SQL_SELECT_COMPOUND` which omits
// the outer `WITH` — so `(WITH …)` is NOT admitted as
// a scalar subquery; that form is only valid at
// statement top-level.)
// Both `(or_expr)` and `(SELECT …)` / `(WITH …)` start
// with `(`. The ADR factors the `(` and the first inside
// token discriminates — `SELECT` / `WITH` → subquery,
// anything else → expression. Per ADR-0032 §10.3, a
// subquery body may declare its own CTEs (shadowing
// outer ones), so `(WITH …)` IS admitted as a scalar
// subquery shape.
good("(a + 1)");
good("(select 1)");
bad("(with x as (select 1) select * from x)");
good("(with x as (select 1) select * from x)");
}
#[test]
+40 -8
View File
@@ -562,7 +562,7 @@ static SET_OP_TAIL_NODES: &[Node] =
&[Node::Subgrammar(&SET_OP), Node::Subgrammar(&SELECT_CORE)];
static SET_OP_TAIL: Node = Node::Seq(SET_OP_TAIL_NODES);
static COMPOUND_SELECT_NODES: &[Node] = &[
static PLAIN_COMPOUND_NODES: &[Node] = &[
Node::Subgrammar(&SELECT_CORE),
Node::Repeated {
inner: &SET_OP_TAIL,
@@ -572,11 +572,40 @@ static COMPOUND_SELECT_NODES: &[Node] = &[
Node::Optional(&ORDER_BY_CLAUSE),
Node::Optional(&LIMIT_CLAUSE),
];
/// The compound select shape **without** a leading `WITH_CLAUSE`.
/// The Choice-fronted `SQL_SELECT_COMPOUND` references this twice:
/// once after a `WITH_CLAUSE` prefix and once on its own.
static PLAIN_COMPOUND: Node = Node::Seq(PLAIN_COMPOUND_NODES);
// ADR-0032 §10.3: a subquery / CTE body may declare its own
// CTEs (shadowing outer ones of the same name). The compound
// recursion point therefore accepts an optional leading WITH.
//
// `Optional(WITH_CLAUSE)` would soft-commit the Seq on the
// 0-byte skip and turn downstream `NoMatch` into `Failed` for
// the Choice that decides "is `(...)` a paren expression or a
// scalar subquery?" (see sql_expr.rs PAREN_INSIDE_CHOICES). A
// Choice between WITH-prefixed and plain forms preserves the
// fast NoMatch on non-`with` non-`select` first tokens, so the
// expression branch of that Choice still wins on `(a + b)`.
static WITH_PREFIXED_COMPOUND_NODES: &[Node] = &[
Node::Subgrammar(&WITH_CLAUSE),
Node::Subgrammar(&PLAIN_COMPOUND),
];
static WITH_PREFIXED_COMPOUND: Node =
Node::Seq(WITH_PREFIXED_COMPOUND_NODES);
static COMPOUND_CHOICES: &[Node] = &[
Node::Subgrammar(&WITH_PREFIXED_COMPOUND),
Node::Subgrammar(&PLAIN_COMPOUND),
];
/// The compound-select fragment that subqueries / CTE bodies
/// recurse into via `Subgrammar` (2a) / `ScopedSubgrammar` (2b).
/// Omits the outer `with_clause`; that lives on
/// `SQL_SELECT_STATEMENT`.
pub static SQL_SELECT_COMPOUND: Node = Node::Seq(COMPOUND_SELECT_NODES);
/// recurse into.
///
/// Referenced via `Subgrammar` (2a) / `ScopedSubgrammar` (2b).
/// A Choice between the `WITH …` prefix form and the plain
/// `SELECT …` form (ADR-0032 §10.3).
pub static SQL_SELECT_COMPOUND: Node = Node::Choice(COMPOUND_CHOICES);
// =================================================================
// CTE definitions
@@ -1268,16 +1297,19 @@ mod tests {
// ----- compound-select fragment entry point -----
#[test]
fn compound_fragment_walks_without_with_clause() {
fn compound_fragment_walks_with_or_without_with_clause() {
// SQL_SELECT_COMPOUND is what subqueries / CTE bodies
// recurse into. It admits a select_core + optional
// set-op chain + outer ORDER/LIMIT.
// set-op chain + outer ORDER/LIMIT, optionally
// prefixed by a WITH clause (ADR-0032 §10.3 — a
// subquery body may declare its own CTEs that shadow
// outer ones).
assert!(walks_via(&SQL_SELECT_COMPOUND, "select 1"));
assert!(walks_via(
&SQL_SELECT_COMPOUND,
"select a from t union select b from u",
));
assert!(!walks_via(
assert!(walks_via(
&SQL_SELECT_COMPOUND,
"with x as (select 1) select * from x",
));
+30
View File
@@ -2151,6 +2151,36 @@ mod tests {
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("id"));
}
#[test]
fn cte_harvest_nested_with_in_cte_body() {
// Nested WITH inside a CTE body now parses (ADR-0032
// §10.3 — inner subqueries may declare their own CTEs).
// The outer CTE's body has its own scope and its own
// CTE inside it. The outer's `*` projects from its
// body's FROM, which references the inner CTE; the
// inner CTE's columns flow through `expand_binding`.
let schema = schema_users();
let ctes = cte_bindings_after_walk_with_schema(
"with outer_cte as (with inner_cte as (select id, name from users) select * from inner_cte) select * from outer_cte",
&schema,
);
let outer = ctes
.iter()
.find(|c| c.name == "outer_cte")
.expect("outer_cte binding");
assert_eq!(outer.columns.len(), 2);
assert_eq!(outer.columns[0].name.as_deref(), Some("id"));
assert_eq!(
outer.columns[0].type_,
Some(crate::dsl::types::Type::Int),
);
assert_eq!(outer.columns[1].name.as_deref(), Some("name"));
assert_eq!(
outer.columns[1].type_,
Some(crate::dsl::types::Type::Text),
);
}
#[test]
fn cte_harvest_sibling_b_sees_a_columns() {
// Sibling CTEs at the same level. When `b`'s body