grammar: sql_expr additive extensions for §5/§6, CTE body rewires to ScopedSubgrammar
Sub-phase 2b checkpoint 2 — closes the recursion loop between sql_expr.rs and sql_select.rs so subquery expressions and qualified column refs become structurally valid in every SQL context where they belong. sql_expr.rs: - §5 qualified-ref tail. `name_or_call` gains a `.identifier` suffix as a Choice sibling of the function-call `(args)` tail. The leading identifier is still matched once (per ADR-0031 §1's factoring); the optional tail dispatches between the two suffixes by their first character (`.` vs `(`). - §6.1 scalar subquery as primary. The `(or_expr)` and `(SELECT …)` branches share the leading `(`; the first inside token (`SELECT` → subquery, anything else → expression) discriminates. The subquery recurses through `Node::ScopedSubgrammar(&sql_select::SQL_SELECT_COMPOUND)`. - §6.2 IN (subquery) predicate. Sibling of the existing IN-value-list; same `(` factoring, same dispatch. - §6.3 [NOT] EXISTS primary. Bare `EXISTS (compound_select)` lives in `primary`; `NOT EXISTS` falls out via the existing `not_expr := NOT not_expr` tier above `primary`. sql_select.rs: - CTE body recursion rewires `Node::Subgrammar` → `Node::ScopedSubgrammar`, matching §10.2. The top-level statement's COMPOUND embedding stays plain Subgrammar — the implicit bottom frame is the right scope for a statement- level SELECT. Structural side-effect — const-eval cycle workaround: Closing the sql_expr ⇄ sql_select reference loop made Rust's const-evaluator follow the cycle through every `const Node` that transitively reaches it. Mirroring sql_expr.rs's existing pattern, composition Nodes in sql_select.rs (Seq / Choice / Optional / Repeated / Lookahead) are now `static Node` and appear in slice positions through `Node::Subgrammar(&NAME)` wraps; only leaf items (Punct, Word, Ident) remain `const`. Same workaround applies to data.rs's SELECT_PROJ_LIST / SELECT_PROJECTION chain and the inlined `SQL_EXPR` reference. Statics resolve lazily at link time, so the cycle is valid; const-eval is not, and the named `const SQL_EXPR` alias is gone in both files (replaced with the inline `Node::Subgrammar (&sql_expr::SQL_OR_EXPR)` expression at every use site). Test coverage: - sql_expr.rs gains 11 new tests for qualified refs, scalar subquery, IN-subquery, EXISTS / NOT EXISTS, nested subqueries, and the existing IN-value-list form (regression). - sql_select.rs gains 7 new tests for qualified refs in WHERE, scalar subqueries in WHERE / projection, IN / EXISTS / NOT EXISTS in WHERE, nested subqueries, and qualified refs inside CTE bodies. - All 70 prior sql_select tests still pass; the 2a baseline is preserved. `(WITH x AS (…) SELECT * FROM x)` is explicitly NOT admitted as a scalar subquery — ADR-0032 §1 / §9 wire subqueries to SQL_SELECT_COMPOUND, which omits the outer with_clause. WITH remains a statement-level-only construct. Documented in the relevant test. Test totals: 1333 → 1351 passing, 0 failed, 1 ignored (unchanged). Clippy clean.
This commit is contained in:
+156
-11
@@ -57,7 +57,7 @@
|
||||
//! validation, highlight, completion, and the no-left-recursion
|
||||
//! guarantee; it simply has no tree to hand back.
|
||||
|
||||
use crate::dsl::grammar::{IdentSource, Node, Word};
|
||||
use crate::dsl::grammar::{IdentSource, Node, Word, sql_select};
|
||||
|
||||
// =================================================================
|
||||
// Shared leaf nodes
|
||||
@@ -206,16 +206,26 @@ static BETWEEN_FORM_NODES: &[Node] = &[
|
||||
Node::Subgrammar(&ADDITIVE),
|
||||
];
|
||||
|
||||
/// `IN ( additive [, additive]* )`.
|
||||
/// `IN ( additive [, additive]* | compound_select )` —
|
||||
/// ADR-0032 §6.2. The `IN (` prefix is factored; after the
|
||||
/// opening paren a `Choice` dispatches between the
|
||||
/// compound-select subquery and the comma-separated value
|
||||
/// list. The first inside token disambiguates the same way the
|
||||
/// scalar-subquery `primary` does. The subquery recurses
|
||||
/// through `ScopedSubgrammar`.
|
||||
static IN_ITEM: Node = Node::Subgrammar(&ADDITIVE);
|
||||
static IN_FORM_NODES: &[Node] = &[
|
||||
Node::Word(Word::keyword("in")),
|
||||
Node::Punct('('),
|
||||
static IN_INSIDE_CHOICES: &[Node] = &[
|
||||
Node::ScopedSubgrammar(&sql_select::SQL_SELECT_COMPOUND),
|
||||
Node::Repeated {
|
||||
inner: &IN_ITEM,
|
||||
separator: Some(&COMMA),
|
||||
min: 1,
|
||||
},
|
||||
];
|
||||
static IN_FORM_NODES: &[Node] = &[
|
||||
Node::Word(Word::keyword("in")),
|
||||
Node::Punct('('),
|
||||
Node::Choice(IN_INSIDE_CHOICES),
|
||||
Node::Punct(')'),
|
||||
];
|
||||
|
||||
@@ -315,10 +325,32 @@ static UNARY: Node = Node::Choice(UNARY_CHOICES);
|
||||
// primary := literal | ( or_expr ) | case_expr | name_or_call
|
||||
// =================================================================
|
||||
|
||||
/// `( or_expr )` — a parenthesised group is a whole expression.
|
||||
/// `( or_expr )` and the scalar subquery `( compound_select )`
|
||||
/// share a leading `(`. Per ADR-0032 §6.1, the `(` is matched
|
||||
/// once and the inside is a `Choice` between
|
||||
/// `compound_select` (the scalar subquery) and `or_expr` (the
|
||||
/// parenthesised expression). The first inside token
|
||||
/// disambiguates: `SELECT` or `WITH` → subquery; anything else →
|
||||
/// expression. Subquery recursion goes through
|
||||
/// `ScopedSubgrammar` to push a new lexical scope (§10.2).
|
||||
static PAREN_INSIDE_CHOICES: &[Node] = &[
|
||||
Node::ScopedSubgrammar(&sql_select::SQL_SELECT_COMPOUND),
|
||||
Node::Subgrammar(&SQL_OR_EXPR),
|
||||
];
|
||||
static PAREN_GROUP_NODES: &[Node] = &[
|
||||
Node::Punct('('),
|
||||
Node::Subgrammar(&SQL_OR_EXPR),
|
||||
Node::Choice(PAREN_INSIDE_CHOICES),
|
||||
Node::Punct(')'),
|
||||
];
|
||||
|
||||
// ADR-0032 §6.3 — `EXISTS ( compound_select )` as a primary.
|
||||
// `[ NOT ] EXISTS` falls out via the existing `not_expr := NOT
|
||||
// not_expr` tier above `primary`, so only the bare `EXISTS`
|
||||
// form lives here.
|
||||
static EXISTS_PRIMARY_NODES: &[Node] = &[
|
||||
Node::Word(Word::keyword("exists")),
|
||||
Node::Punct('('),
|
||||
Node::ScopedSubgrammar(&sql_select::SQL_SELECT_COMPOUND),
|
||||
Node::Punct(')'),
|
||||
];
|
||||
|
||||
@@ -417,19 +449,45 @@ static CALL_TAIL_NODES: &[Node] = &[
|
||||
Node::Optional(&CALL_ARGS),
|
||||
Node::Punct(')'),
|
||||
];
|
||||
static CALL_TAIL: Node = Node::Seq(CALL_TAIL_NODES);
|
||||
|
||||
static NAME_OR_CALL_NODES: &[Node] = &[EXPR_IDENT, Node::Optional(&CALL_TAIL)];
|
||||
// ADR-0032 §5 — qualified column reference. `name_or_call` gains
|
||||
// a `.identifier` suffix as a Choice sibling of the function-call
|
||||
// `( args )` tail. The leading identifier is matched once (no
|
||||
// Choice branch begins with an identifier per ADR-0031 §1's
|
||||
// factoring); the optional tail dispatches between the two
|
||||
// suffixes by their first character (`.` vs `(`).
|
||||
const QUALIFIED_REF_IDENT: Node = Node::Ident {
|
||||
source: IdentSource::Columns,
|
||||
role: "sql_expr_qualified_ref",
|
||||
validator: None,
|
||||
highlight_override: None,
|
||||
writes_table: false,
|
||||
writes_column: false,
|
||||
writes_user_listed_column: false,
|
||||
};
|
||||
static QUALIFIED_REF_TAIL_NODES: &[Node] = &[
|
||||
Node::Punct('.'),
|
||||
QUALIFIED_REF_IDENT,
|
||||
];
|
||||
|
||||
static NAME_OR_CALL_TAIL_CHOICES: &[Node] = &[
|
||||
Node::Seq(QUALIFIED_REF_TAIL_NODES),
|
||||
Node::Seq(CALL_TAIL_NODES),
|
||||
];
|
||||
static NAME_OR_CALL_TAIL: Node = Node::Choice(NAME_OR_CALL_TAIL_CHOICES);
|
||||
|
||||
static NAME_OR_CALL_NODES: &[Node] = &[EXPR_IDENT, Node::Optional(&NAME_OR_CALL_TAIL)];
|
||||
|
||||
/// `primary`. Keyword literals (`null` / `true` / `false`) and the
|
||||
/// `CASE` keyword come before `name_or_call`, so they parse as
|
||||
/// what they are rather than as column references.
|
||||
/// `CASE` / `EXISTS` keywords come before `name_or_call`, so they
|
||||
/// parse as what they are rather than as column references.
|
||||
static PRIMARY_CHOICES: &[Node] = &[
|
||||
Node::Word(Word::keyword("null")),
|
||||
Node::Word(Word::keyword("true")),
|
||||
Node::Word(Word::keyword("false")),
|
||||
Node::NumberLit { validator: None },
|
||||
Node::StringLit,
|
||||
Node::Seq(EXISTS_PRIMARY_NODES),
|
||||
Node::Seq(PAREN_GROUP_NODES),
|
||||
Node::Seq(CASE_NODES),
|
||||
Node::Seq(NAME_OR_CALL_NODES),
|
||||
@@ -596,4 +654,91 @@ mod tests {
|
||||
let input = format!("{}1{}", "(".repeat(depth), ")".repeat(depth));
|
||||
assert!(!walks(&input), "pathological nesting must be rejected");
|
||||
}
|
||||
|
||||
// ---- ADR-0032 §5 additive: qualified column references ----
|
||||
|
||||
#[test]
|
||||
fn qualified_ref_basic_shapes() {
|
||||
good("t.c");
|
||||
good("t.c = 1");
|
||||
good("a.b + c.d");
|
||||
good("upper(t.name)");
|
||||
good("t.a is null");
|
||||
good("t.x like 'A%'");
|
||||
good("t.x between 1 and 10");
|
||||
good("t.x in (1, 2, 3)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qualified_ref_function_call_disambiguation() {
|
||||
// The optional tail dispatches `.identifier` (qualified
|
||||
// ref) vs `(args)` (function call) by first token — a
|
||||
// bare ident remains a column ref.
|
||||
good("foo(x)"); // function call
|
||||
good("foo.bar"); // qualified ref
|
||||
good("foo"); // bare ref
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn qualified_ref_not_admitted_as_function() {
|
||||
// No schema.fn() form — qualified ref and call don't
|
||||
// compose. `t.foo(x)` would only parse as `t.foo`
|
||||
// followed by `(x)` — but `(x)` is not a valid
|
||||
// continuation of an expression, so the walk fails.
|
||||
bad("t.foo(x)");
|
||||
}
|
||||
|
||||
// ---- ADR-0032 §6 additive: subquery expressions ----
|
||||
|
||||
#[test]
|
||||
fn scalar_subquery_as_primary() {
|
||||
good("(select 1)");
|
||||
good("x = (select y from t)");
|
||||
good("(select count(*) from t) > 100");
|
||||
good("upper((select name from t where id = 1))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scalar_subquery_dispatches_against_paren_group() {
|
||||
// Both `(or_expr)` and `(SELECT …)` start with `(`.
|
||||
// The ADR factors the `(` and the first inside token
|
||||
// discriminates — `SELECT` → subquery, anything else
|
||||
// → expression. (Per ADR-0032 §1 / §9, subqueries
|
||||
// recurse through `SQL_SELECT_COMPOUND` which omits
|
||||
// the outer `WITH` — so `(WITH …)` is NOT admitted as
|
||||
// a scalar subquery; that form is only valid at
|
||||
// statement top-level.)
|
||||
good("(a + 1)");
|
||||
good("(select 1)");
|
||||
bad("(with x as (select 1) select * from x)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn in_subquery_predicate() {
|
||||
good("x in (select y from t)");
|
||||
good("x not in (select y from t)");
|
||||
good("x in (select y from t union select z from u)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn in_value_list_still_works() {
|
||||
// The existing IN-value-list form is preserved
|
||||
// alongside the new IN-subquery form.
|
||||
good("status in (1, 2, 3)");
|
||||
good("name not in ('a', 'b', 'c')");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exists_primary() {
|
||||
good("exists (select 1)");
|
||||
good("not exists (select 1)");
|
||||
good("exists (select 1 from t where x = 1)");
|
||||
good("exists (select * from t) and a > 0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subquery_recursion_through_compound() {
|
||||
good("x in (select y from t where y in (select z from u))");
|
||||
good("exists (select 1 from t where exists (select 1 from u))");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user