walker: add Subgrammar node + recursion-depth cap (ADR-0026 step 1)
New `Node::Subgrammar(&'static Node)` variant lets a named static grammar fragment recurse through a reference — `Seq` / `Choice` embed children by value and cannot close a cycle, but a `&'static Node` can point back at an enclosing fragment. This is the mechanism the stratified WHERE-expression grammar (ADR-0026 §2) recurses through. The walker counts active Subgrammar frames in `WalkContext::subgrammar_depth` and refuses past `MAX_SUBGRAMMAR_DEPTH` (64), surfacing a friendly `parse.custom.expression_too_deep` error instead of a stack overflow. Depth is saved/restored per frame so a speculatively-walked-then-rolled-back Choice branch leaves no residue. No grammar references the node yet; covered by walker unit tests with a small recursive `( x )` test grammar.
This commit is contained in:
@@ -75,6 +75,13 @@ pub struct WalkContext<'a> {
|
||||
/// skipped from the value list because the dispatch path
|
||||
/// auto-fills them).
|
||||
pub user_listed_columns: Option<Vec<String>>,
|
||||
/// Count of active `Node::Subgrammar` frames on the walk
|
||||
/// stack (ADR-0026 §2). The walker increments on entry to a
|
||||
/// `Subgrammar`, restores the saved value on exit, and
|
||||
/// refuses past `driver::MAX_SUBGRAMMAR_DEPTH` so a
|
||||
/// pathologically nested expression fails with a friendly
|
||||
/// error instead of overflowing the process stack.
|
||||
pub subgrammar_depth: usize,
|
||||
}
|
||||
|
||||
impl<'a> WalkContext<'a> {
|
||||
@@ -100,6 +107,7 @@ impl<'a> WalkContext<'a> {
|
||||
pending_value_column: None,
|
||||
pending_hint_mode: None,
|
||||
user_listed_columns: None,
|
||||
subgrammar_depth: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+128
-1
@@ -35,6 +35,18 @@ use crate::dsl::walker::outcome::{
|
||||
ByteClass, Expectation, MatchedItem, MatchedKind, MatchedPath,
|
||||
};
|
||||
|
||||
/// Maximum nesting of `Node::Subgrammar` frames (ADR-0026 §1).
|
||||
///
|
||||
/// The stratified WHERE-expression grammar descends one
|
||||
/// `Subgrammar` hop per precedence tier, plus a tier-stack per
|
||||
/// parenthesised group, so this bounds real expression nesting
|
||||
/// many parentheses deep — far past any hand-written filter.
|
||||
/// Its purpose is purely a stack-overflow guard: input nested
|
||||
/// past the cap (`((((…))))`) fails with a friendly
|
||||
/// `expression_too_deep` error instead of recursing until the
|
||||
/// process stack is exhausted.
|
||||
pub const MAX_SUBGRAMMAR_DEPTH: usize = 64;
|
||||
|
||||
/// Memo cache for `Node::DynamicSubgrammar` resolution.
|
||||
///
|
||||
/// A factory builds a `Node` from the active `WalkContext`; the
|
||||
@@ -199,6 +211,9 @@ fn walk_node_inner(
|
||||
kind: FailureKind::Mismatch { expected: vec![] },
|
||||
}
|
||||
}
|
||||
Node::Subgrammar(inner) => {
|
||||
walk_subgrammar(source, pos, inner, ctx, path, per_byte)
|
||||
}
|
||||
Node::DynamicSubgrammar(factory) => {
|
||||
// ADR-0024 §sub-grammars: resolve the inner Node at
|
||||
// walk time from the active `WalkContext`, then walk
|
||||
@@ -884,6 +899,48 @@ fn walk_optional(
|
||||
}
|
||||
}
|
||||
|
||||
/// Walk a `&'static Node` reference once (ADR-0026 §2).
|
||||
///
|
||||
/// The reference indirection is what lets a named `static`
|
||||
/// grammar fragment recurse: `Seq` / `Choice` embed children by
|
||||
/// value and so cannot close a cycle, but a `Subgrammar` node
|
||||
/// holding a `&'static Node` can point back into an enclosing
|
||||
/// fragment. The stratified WHERE-expression grammar's
|
||||
/// `( or_expr )` branch and `not_expr` self-reference both
|
||||
/// recurse this way.
|
||||
///
|
||||
/// `WalkContext::subgrammar_depth` counts active frames. Past
|
||||
/// `MAX_SUBGRAMMAR_DEPTH` the walk fails with a friendly
|
||||
/// `expression_too_deep` validation error rather than
|
||||
/// overflowing the process stack. The depth is saved on entry
|
||||
/// and restored on exit unconditionally, so a speculatively-
|
||||
/// walked branch that a `Choice` later rolls back leaves the
|
||||
/// counter clean.
|
||||
fn walk_subgrammar(
|
||||
source: &str,
|
||||
pos: usize,
|
||||
inner: &'static Node,
|
||||
ctx: &mut WalkContext,
|
||||
path: &mut MatchedPath,
|
||||
per_byte: &mut Vec<ByteClass>,
|
||||
) -> NodeWalkResult {
|
||||
let saved_depth = ctx.subgrammar_depth;
|
||||
ctx.subgrammar_depth += 1;
|
||||
if ctx.subgrammar_depth > MAX_SUBGRAMMAR_DEPTH {
|
||||
ctx.subgrammar_depth = saved_depth;
|
||||
return NodeWalkResult::Failed {
|
||||
position: pos,
|
||||
kind: FailureKind::Validation(ValidationError {
|
||||
message_key: "parse.custom.expression_too_deep",
|
||||
args: Vec::new(),
|
||||
}),
|
||||
};
|
||||
}
|
||||
let result = walk_node(source, pos, inner, ctx, path, per_byte);
|
||||
ctx.subgrammar_depth = saved_depth;
|
||||
result
|
||||
}
|
||||
|
||||
fn merge_expected(dst: &mut Vec<Expectation>, src: Vec<Expectation>) {
|
||||
for e in src {
|
||||
if !dst.contains(&e) {
|
||||
@@ -894,9 +951,79 @@ fn merge_expected(dst: &mut Vec<Expectation>, src: Vec<Expectation>) {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{DYNAMIC_CACHE, resolve_dynamic};
|
||||
use super::{
|
||||
DYNAMIC_CACHE, FailureKind, MAX_SUBGRAMMAR_DEPTH, NodeWalkResult,
|
||||
resolve_dynamic, walk_node,
|
||||
};
|
||||
use crate::dsl::grammar::{Node, Word};
|
||||
use crate::dsl::walker::context::WalkContext;
|
||||
use crate::dsl::walker::outcome::MatchedPath;
|
||||
|
||||
// Recursive test grammar for the `Subgrammar` node
|
||||
// (ADR-0026 §2): `x` | `( <self> )`. `NESTED_GROUP` reaches
|
||||
// back to `NESTED` through `Subgrammar(&NESTED)` — the cycle
|
||||
// a by-value `Seq` slice could not express.
|
||||
static NESTED_GROUP: &[Node] = &[
|
||||
Node::Punct('('),
|
||||
Node::Subgrammar(&NESTED),
|
||||
Node::Punct(')'),
|
||||
];
|
||||
static NESTED_CHOICES: &[Node] = &[
|
||||
Node::Seq(NESTED_GROUP),
|
||||
Node::Word(Word::keyword("x")),
|
||||
];
|
||||
static NESTED: Node = Node::Choice(NESTED_CHOICES);
|
||||
|
||||
fn walk_nested(input: &str) -> NodeWalkResult {
|
||||
let mut ctx = WalkContext::new();
|
||||
let mut path = MatchedPath::new();
|
||||
let mut per_byte = Vec::new();
|
||||
let result =
|
||||
walk_node(input, 0, &NESTED, &mut ctx, &mut path, &mut per_byte);
|
||||
assert_eq!(
|
||||
ctx.subgrammar_depth, 0,
|
||||
"subgrammar_depth must be restored to 0 after the walk",
|
||||
);
|
||||
result
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subgrammar_walks_a_recursive_grammar() {
|
||||
for input in ["x", "(x)", "((x))", "(((x)))"] {
|
||||
assert!(
|
||||
matches!(walk_nested(input), NodeWalkResult::Matched { .. }),
|
||||
"{input:?} should match the recursive Subgrammar grammar",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subgrammar_depth_cap_allows_exactly_the_limit() {
|
||||
let input = format!(
|
||||
"{}x{}",
|
||||
"(".repeat(MAX_SUBGRAMMAR_DEPTH),
|
||||
")".repeat(MAX_SUBGRAMMAR_DEPTH),
|
||||
);
|
||||
assert!(
|
||||
matches!(walk_nested(&input), NodeWalkResult::Matched { .. }),
|
||||
"exactly MAX_SUBGRAMMAR_DEPTH nested groups should still walk",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subgrammar_depth_cap_rejects_pathological_nesting() {
|
||||
let over = MAX_SUBGRAMMAR_DEPTH + 1;
|
||||
let input = format!("{}x{}", "(".repeat(over), ")".repeat(over));
|
||||
match walk_nested(&input) {
|
||||
NodeWalkResult::Failed {
|
||||
kind: FailureKind::Validation(err),
|
||||
..
|
||||
} => assert_eq!(err.message_key, "parse.custom.expression_too_deep"),
|
||||
other => {
|
||||
panic!("expected an expression_too_deep failure, got {other:?}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Trivial factory — ignores the context. The memo behaviour
|
||||
/// is keyed on the context, not the factory's output, so a
|
||||
|
||||
Reference in New Issue
Block a user