walker: add Subgrammar node + recursion-depth cap (ADR-0026 step 1)

New `Node::Subgrammar(&'static Node)` variant lets a named
static grammar fragment recurse through a reference — `Seq` /
`Choice` embed children by value and cannot close a cycle, but
a `&'static Node` can point back at an enclosing fragment. This
is the mechanism the stratified WHERE-expression grammar
(ADR-0026 §2) recurses through.

The walker counts active Subgrammar frames in
`WalkContext::subgrammar_depth` and refuses past
`MAX_SUBGRAMMAR_DEPTH` (64), surfacing a friendly
`parse.custom.expression_too_deep` error instead of a stack
overflow. Depth is saved/restored per frame so a
speculatively-walked-then-rolled-back Choice branch leaves no
residue.

No grammar references the node yet; covered by walker unit
tests with a small recursive `( x )` test grammar.
This commit is contained in:
claude@clouddev1
2026-05-18 22:36:19 +00:00
parent ac41938365
commit f0b2043a39
5 changed files with 164 additions and 1 deletions
+8
View File
@@ -75,6 +75,13 @@ pub struct WalkContext<'a> {
/// skipped from the value list because the dispatch path
/// auto-fills them).
pub user_listed_columns: Option<Vec<String>>,
/// Count of active `Node::Subgrammar` frames on the walk
/// stack (ADR-0026 §2). The walker increments on entry to a
/// `Subgrammar`, restores the saved value on exit, and
/// refuses past `driver::MAX_SUBGRAMMAR_DEPTH` so a
/// pathologically nested expression fails with a friendly
/// error instead of overflowing the process stack.
pub subgrammar_depth: usize,
}
impl<'a> WalkContext<'a> {
@@ -100,6 +107,7 @@ impl<'a> WalkContext<'a> {
pending_value_column: None,
pending_hint_mode: None,
user_listed_columns: None,
subgrammar_depth: 0,
}
}
}
+128 -1
View File
@@ -35,6 +35,18 @@ use crate::dsl::walker::outcome::{
ByteClass, Expectation, MatchedItem, MatchedKind, MatchedPath,
};
/// Maximum nesting of `Node::Subgrammar` frames (ADR-0026 §1).
///
/// The stratified WHERE-expression grammar descends one
/// `Subgrammar` hop per precedence tier, plus a tier-stack per
/// parenthesised group, so this bounds real expression nesting
/// many parentheses deep — far past any hand-written filter.
/// Its purpose is purely a stack-overflow guard: input nested
/// past the cap (`((((…))))`) fails with a friendly
/// `expression_too_deep` error instead of recursing until the
/// process stack is exhausted.
pub const MAX_SUBGRAMMAR_DEPTH: usize = 64;
/// Memo cache for `Node::DynamicSubgrammar` resolution.
///
/// A factory builds a `Node` from the active `WalkContext`; the
@@ -199,6 +211,9 @@ fn walk_node_inner(
kind: FailureKind::Mismatch { expected: vec![] },
}
}
Node::Subgrammar(inner) => {
walk_subgrammar(source, pos, inner, ctx, path, per_byte)
}
Node::DynamicSubgrammar(factory) => {
// ADR-0024 §sub-grammars: resolve the inner Node at
// walk time from the active `WalkContext`, then walk
@@ -884,6 +899,48 @@ fn walk_optional(
}
}
/// Walk a `&'static Node` reference once (ADR-0026 §2).
///
/// The reference indirection is what lets a named `static`
/// grammar fragment recurse: `Seq` / `Choice` embed children by
/// value and so cannot close a cycle, but a `Subgrammar` node
/// holding a `&'static Node` can point back into an enclosing
/// fragment. The stratified WHERE-expression grammar's
/// `( or_expr )` branch and `not_expr` self-reference both
/// recurse this way.
///
/// `WalkContext::subgrammar_depth` counts active frames. Past
/// `MAX_SUBGRAMMAR_DEPTH` the walk fails with a friendly
/// `expression_too_deep` validation error rather than
/// overflowing the process stack. The depth is saved on entry
/// and restored on exit unconditionally, so a speculatively-
/// walked branch that a `Choice` later rolls back leaves the
/// counter clean.
fn walk_subgrammar(
source: &str,
pos: usize,
inner: &'static Node,
ctx: &mut WalkContext,
path: &mut MatchedPath,
per_byte: &mut Vec<ByteClass>,
) -> NodeWalkResult {
let saved_depth = ctx.subgrammar_depth;
ctx.subgrammar_depth += 1;
if ctx.subgrammar_depth > MAX_SUBGRAMMAR_DEPTH {
ctx.subgrammar_depth = saved_depth;
return NodeWalkResult::Failed {
position: pos,
kind: FailureKind::Validation(ValidationError {
message_key: "parse.custom.expression_too_deep",
args: Vec::new(),
}),
};
}
let result = walk_node(source, pos, inner, ctx, path, per_byte);
ctx.subgrammar_depth = saved_depth;
result
}
fn merge_expected(dst: &mut Vec<Expectation>, src: Vec<Expectation>) {
for e in src {
if !dst.contains(&e) {
@@ -894,9 +951,79 @@ fn merge_expected(dst: &mut Vec<Expectation>, src: Vec<Expectation>) {
#[cfg(test)]
mod tests {
use super::{DYNAMIC_CACHE, resolve_dynamic};
use super::{
DYNAMIC_CACHE, FailureKind, MAX_SUBGRAMMAR_DEPTH, NodeWalkResult,
resolve_dynamic, walk_node,
};
use crate::dsl::grammar::{Node, Word};
use crate::dsl::walker::context::WalkContext;
use crate::dsl::walker::outcome::MatchedPath;
// Recursive test grammar for the `Subgrammar` node
// (ADR-0026 §2): `x` | `( <self> )`. `NESTED_GROUP` reaches
// back to `NESTED` through `Subgrammar(&NESTED)` — the cycle
// a by-value `Seq` slice could not express.
static NESTED_GROUP: &[Node] = &[
Node::Punct('('),
Node::Subgrammar(&NESTED),
Node::Punct(')'),
];
static NESTED_CHOICES: &[Node] = &[
Node::Seq(NESTED_GROUP),
Node::Word(Word::keyword("x")),
];
static NESTED: Node = Node::Choice(NESTED_CHOICES);
fn walk_nested(input: &str) -> NodeWalkResult {
let mut ctx = WalkContext::new();
let mut path = MatchedPath::new();
let mut per_byte = Vec::new();
let result =
walk_node(input, 0, &NESTED, &mut ctx, &mut path, &mut per_byte);
assert_eq!(
ctx.subgrammar_depth, 0,
"subgrammar_depth must be restored to 0 after the walk",
);
result
}
#[test]
fn subgrammar_walks_a_recursive_grammar() {
for input in ["x", "(x)", "((x))", "(((x)))"] {
assert!(
matches!(walk_nested(input), NodeWalkResult::Matched { .. }),
"{input:?} should match the recursive Subgrammar grammar",
);
}
}
#[test]
fn subgrammar_depth_cap_allows_exactly_the_limit() {
let input = format!(
"{}x{}",
"(".repeat(MAX_SUBGRAMMAR_DEPTH),
")".repeat(MAX_SUBGRAMMAR_DEPTH),
);
assert!(
matches!(walk_nested(&input), NodeWalkResult::Matched { .. }),
"exactly MAX_SUBGRAMMAR_DEPTH nested groups should still walk",
);
}
#[test]
fn subgrammar_depth_cap_rejects_pathological_nesting() {
let over = MAX_SUBGRAMMAR_DEPTH + 1;
let input = format!("{}x{}", "(".repeat(over), ")".repeat(over));
match walk_nested(&input) {
NodeWalkResult::Failed {
kind: FailureKind::Validation(err),
..
} => assert_eq!(err.message_key, "parse.custom.expression_too_deep"),
other => {
panic!("expected an expression_too_deep failure, got {other:?}")
}
}
}
/// Trivial factory — ignores the context. The memo behaviour
/// is keyed on the context, not the factory's output, so a