walker: add Subgrammar node + recursion-depth cap (ADR-0026 step 1)

New `Node::Subgrammar(&'static Node)` variant lets a named
static grammar fragment recurse through a reference — `Seq` /
`Choice` embed children by value and cannot close a cycle, but
a `&'static Node` can point back at an enclosing fragment. This
is the mechanism the stratified WHERE-expression grammar
(ADR-0026 §2) recurses through.

The walker counts active Subgrammar frames in
`WalkContext::subgrammar_depth` and refuses past
`MAX_SUBGRAMMAR_DEPTH` (64), surfacing a friendly
`parse.custom.expression_too_deep` error instead of a stack
overflow. Depth is saved/restored per frame so a
speculatively-walked-then-rolled-back Choice branch leaves no
residue.

No grammar references the node yet; covered by walker unit
tests with a small recursive `( x )` test grammar.
This commit is contained in:
claude@clouddev1
2026-05-18 22:36:19 +00:00
parent ac41938365
commit f0b2043a39
5 changed files with 164 additions and 1 deletions
+22
View File
@@ -292,6 +292,28 @@ pub enum Node {
separator: Option<&'static Self>,
min: usize,
},
/// Walks the referenced `&'static Node` once, mandatory
/// (ADR-0026 §2). The reference indirection is what lets a
/// named `static` grammar fragment appear inside its own
/// subtree: a `Seq` / `Choice` embeds its children by value
/// and so cannot close a cycle, but a `&'static Node`
/// reference can point back at an enclosing fragment. This
/// is the mechanism the stratified WHERE-expression grammar
/// recurses through — the `( or_expr )` branch and the
/// `not_expr` self-reference.
///
/// The walker counts active `Subgrammar` frames in
/// `WalkContext::subgrammar_depth` and refuses past
/// `walker::driver::MAX_SUBGRAMMAR_DEPTH`, so pathologically
/// nested input (`((((…))))`) fails with a friendly error
/// rather than overflowing the parser stack.
///
/// The static counterpart of `DynamicSubgrammar`: that one
/// builds a fresh node from the `WalkContext` at walk time;
/// this one references a fixed fragment already in the
/// grammar tree.
#[allow(dead_code)]
Subgrammar(&'static Self),
/// Resolves at walk time using the active `WalkContext`.
/// Phase D+ uses this for `column_value_list`. The factory
/// is pure in `ctx`, so the walker memoizes the resolution
+8
View File
@@ -75,6 +75,13 @@ pub struct WalkContext<'a> {
/// skipped from the value list because the dispatch path
/// auto-fills them).
pub user_listed_columns: Option<Vec<String>>,
/// Count of active `Node::Subgrammar` frames on the walk
/// stack (ADR-0026 §2). The walker increments on entry to a
/// `Subgrammar`, restores the saved value on exit, and
/// refuses past `driver::MAX_SUBGRAMMAR_DEPTH` so a
/// pathologically nested expression fails with a friendly
/// error instead of overflowing the process stack.
pub subgrammar_depth: usize,
}
impl<'a> WalkContext<'a> {
@@ -100,6 +107,7 @@ impl<'a> WalkContext<'a> {
pending_value_column: None,
pending_hint_mode: None,
user_listed_columns: None,
subgrammar_depth: 0,
}
}
}
+128 -1
View File
@@ -35,6 +35,18 @@ use crate::dsl::walker::outcome::{
ByteClass, Expectation, MatchedItem, MatchedKind, MatchedPath,
};
/// Maximum nesting of `Node::Subgrammar` frames (ADR-0026 §1).
///
/// The stratified WHERE-expression grammar descends one
/// `Subgrammar` hop per precedence tier, plus a tier-stack per
/// parenthesised group, so this bounds real expression nesting
/// many parentheses deep — far past any hand-written filter.
/// Its purpose is purely a stack-overflow guard: input nested
/// past the cap (`((((…))))`) fails with a friendly
/// `expression_too_deep` error instead of recursing until the
/// process stack is exhausted.
pub const MAX_SUBGRAMMAR_DEPTH: usize = 64;
/// Memo cache for `Node::DynamicSubgrammar` resolution.
///
/// A factory builds a `Node` from the active `WalkContext`; the
@@ -199,6 +211,9 @@ fn walk_node_inner(
kind: FailureKind::Mismatch { expected: vec![] },
}
}
Node::Subgrammar(inner) => {
walk_subgrammar(source, pos, inner, ctx, path, per_byte)
}
Node::DynamicSubgrammar(factory) => {
// ADR-0024 §sub-grammars: resolve the inner Node at
// walk time from the active `WalkContext`, then walk
@@ -884,6 +899,48 @@ fn walk_optional(
}
}
/// Walk a `&'static Node` reference once (ADR-0026 §2).
///
/// The reference indirection is what lets a named `static`
/// grammar fragment recurse: `Seq` / `Choice` embed children by
/// value and so cannot close a cycle, but a `Subgrammar` node
/// holding a `&'static Node` can point back into an enclosing
/// fragment. The stratified WHERE-expression grammar's
/// `( or_expr )` branch and `not_expr` self-reference both
/// recurse this way.
///
/// `WalkContext::subgrammar_depth` counts active frames. Past
/// `MAX_SUBGRAMMAR_DEPTH` the walk fails with a friendly
/// `expression_too_deep` validation error rather than
/// overflowing the process stack. The depth is saved on entry
/// and restored on exit unconditionally, so a speculatively-
/// walked branch that a `Choice` later rolls back leaves the
/// counter clean.
fn walk_subgrammar(
source: &str,
pos: usize,
inner: &'static Node,
ctx: &mut WalkContext,
path: &mut MatchedPath,
per_byte: &mut Vec<ByteClass>,
) -> NodeWalkResult {
let saved_depth = ctx.subgrammar_depth;
ctx.subgrammar_depth += 1;
if ctx.subgrammar_depth > MAX_SUBGRAMMAR_DEPTH {
ctx.subgrammar_depth = saved_depth;
return NodeWalkResult::Failed {
position: pos,
kind: FailureKind::Validation(ValidationError {
message_key: "parse.custom.expression_too_deep",
args: Vec::new(),
}),
};
}
let result = walk_node(source, pos, inner, ctx, path, per_byte);
ctx.subgrammar_depth = saved_depth;
result
}
fn merge_expected(dst: &mut Vec<Expectation>, src: Vec<Expectation>) {
for e in src {
if !dst.contains(&e) {
@@ -894,9 +951,79 @@ fn merge_expected(dst: &mut Vec<Expectation>, src: Vec<Expectation>) {
#[cfg(test)]
mod tests {
use super::{DYNAMIC_CACHE, resolve_dynamic};
use super::{
DYNAMIC_CACHE, FailureKind, MAX_SUBGRAMMAR_DEPTH, NodeWalkResult,
resolve_dynamic, walk_node,
};
use crate::dsl::grammar::{Node, Word};
use crate::dsl::walker::context::WalkContext;
use crate::dsl::walker::outcome::MatchedPath;
// Recursive test grammar for the `Subgrammar` node
// (ADR-0026 §2): `x` | `( <self> )`. `NESTED_GROUP` reaches
// back to `NESTED` through `Subgrammar(&NESTED)` — the cycle
// a by-value `Seq` slice could not express.
static NESTED_GROUP: &[Node] = &[
Node::Punct('('),
Node::Subgrammar(&NESTED),
Node::Punct(')'),
];
static NESTED_CHOICES: &[Node] = &[
Node::Seq(NESTED_GROUP),
Node::Word(Word::keyword("x")),
];
static NESTED: Node = Node::Choice(NESTED_CHOICES);
fn walk_nested(input: &str) -> NodeWalkResult {
let mut ctx = WalkContext::new();
let mut path = MatchedPath::new();
let mut per_byte = Vec::new();
let result =
walk_node(input, 0, &NESTED, &mut ctx, &mut path, &mut per_byte);
assert_eq!(
ctx.subgrammar_depth, 0,
"subgrammar_depth must be restored to 0 after the walk",
);
result
}
#[test]
fn subgrammar_walks_a_recursive_grammar() {
for input in ["x", "(x)", "((x))", "(((x)))"] {
assert!(
matches!(walk_nested(input), NodeWalkResult::Matched { .. }),
"{input:?} should match the recursive Subgrammar grammar",
);
}
}
#[test]
fn subgrammar_depth_cap_allows_exactly_the_limit() {
let input = format!(
"{}x{}",
"(".repeat(MAX_SUBGRAMMAR_DEPTH),
")".repeat(MAX_SUBGRAMMAR_DEPTH),
);
assert!(
matches!(walk_nested(&input), NodeWalkResult::Matched { .. }),
"exactly MAX_SUBGRAMMAR_DEPTH nested groups should still walk",
);
}
#[test]
fn subgrammar_depth_cap_rejects_pathological_nesting() {
let over = MAX_SUBGRAMMAR_DEPTH + 1;
let input = format!("{}x{}", "(".repeat(over), ")".repeat(over));
match walk_nested(&input) {
NodeWalkResult::Failed {
kind: FailureKind::Validation(err),
..
} => assert_eq!(err.message_key, "parse.custom.expression_too_deep"),
other => {
panic!("expected an expression_too_deep failure, got {other:?}")
}
}
}
/// Trivial factory — ignores the context. The memo behaviour
/// is keyed on the context, not the factory's output, so a
+1
View File
@@ -183,6 +183,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[
("parse.custom.bind_type_mismatch", &["found", "expected"]),
("parse.custom.change_column_flags_exclusive", &[]),
("parse.custom.create_table_needs_pk", &[]),
("parse.custom.expression_too_deep", &[]),
("parse.custom.insert_form_a_missing_values", &["columns"]),
("parse.custom.on_action_specified_twice", &["target"]),
("parse.custom.replay_path_expected", &[]),
+5
View File
@@ -372,6 +372,11 @@ parse:
replay_path_expected: "expected a path after `replay`"
create_table_needs_pk: |-
tables need at least one column. Add `with pk` for a default `id INTEGER PRIMARY KEY`, or `with pk <name>(<type>)` to choose. Use a comma-separated list for compound primary keys.
# ADR-0026 §1: the recursion-depth guard on the
# WHERE-expression grammar. Input nested past the cap
# (`((((…))))`) stops here with a friendly error instead
# of overflowing the parser stack.
expression_too_deep: "expression nested too deeply"
on_action_specified_twice: "`on {target}` specified twice"
change_column_flags_exclusive: "`--force-conversion` and `--dont-convert` are mutually exclusive — pick one."
unknown_type: "unknown type '{found}' (expected one of: {expected})"