walker: add Subgrammar node + recursion-depth cap (ADR-0026 step 1)
New `Node::Subgrammar(&'static Node)` variant lets a named static grammar fragment recurse through a reference — `Seq` / `Choice` embed children by value and cannot close a cycle, but a `&'static Node` can point back at an enclosing fragment. This is the mechanism the stratified WHERE-expression grammar (ADR-0026 §2) recurses through. The walker counts active Subgrammar frames in `WalkContext::subgrammar_depth` and refuses past `MAX_SUBGRAMMAR_DEPTH` (64), surfacing a friendly `parse.custom.expression_too_deep` error instead of a stack overflow. Depth is saved/restored per frame so a speculatively-walked-then-rolled-back Choice branch leaves no residue. No grammar references the node yet; covered by walker unit tests with a small recursive `( x )` test grammar.
This commit is contained in:
@@ -292,6 +292,28 @@ pub enum Node {
|
||||
separator: Option<&'static Self>,
|
||||
min: usize,
|
||||
},
|
||||
/// Walks the referenced `&'static Node` once, mandatory
|
||||
/// (ADR-0026 §2). The reference indirection is what lets a
|
||||
/// named `static` grammar fragment appear inside its own
|
||||
/// subtree: a `Seq` / `Choice` embeds its children by value
|
||||
/// and so cannot close a cycle, but a `&'static Node`
|
||||
/// reference can point back at an enclosing fragment. This
|
||||
/// is the mechanism the stratified WHERE-expression grammar
|
||||
/// recurses through — the `( or_expr )` branch and the
|
||||
/// `not_expr` self-reference.
|
||||
///
|
||||
/// The walker counts active `Subgrammar` frames in
|
||||
/// `WalkContext::subgrammar_depth` and refuses past
|
||||
/// `walker::driver::MAX_SUBGRAMMAR_DEPTH`, so pathologically
|
||||
/// nested input (`((((…))))`) fails with a friendly error
|
||||
/// rather than overflowing the parser stack.
|
||||
///
|
||||
/// The static counterpart of `DynamicSubgrammar`: that one
|
||||
/// builds a fresh node from the `WalkContext` at walk time;
|
||||
/// this one references a fixed fragment already in the
|
||||
/// grammar tree.
|
||||
#[allow(dead_code)]
|
||||
Subgrammar(&'static Self),
|
||||
/// Resolves at walk time using the active `WalkContext`.
|
||||
/// Phase D+ uses this for `column_value_list`. The factory
|
||||
/// is pure in `ctx`, so the walker memoizes the resolution
|
||||
|
||||
@@ -75,6 +75,13 @@ pub struct WalkContext<'a> {
|
||||
/// skipped from the value list because the dispatch path
|
||||
/// auto-fills them).
|
||||
pub user_listed_columns: Option<Vec<String>>,
|
||||
/// Count of active `Node::Subgrammar` frames on the walk
|
||||
/// stack (ADR-0026 §2). The walker increments on entry to a
|
||||
/// `Subgrammar`, restores the saved value on exit, and
|
||||
/// refuses past `driver::MAX_SUBGRAMMAR_DEPTH` so a
|
||||
/// pathologically nested expression fails with a friendly
|
||||
/// error instead of overflowing the process stack.
|
||||
pub subgrammar_depth: usize,
|
||||
}
|
||||
|
||||
impl<'a> WalkContext<'a> {
|
||||
@@ -100,6 +107,7 @@ impl<'a> WalkContext<'a> {
|
||||
pending_value_column: None,
|
||||
pending_hint_mode: None,
|
||||
user_listed_columns: None,
|
||||
subgrammar_depth: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+128
-1
@@ -35,6 +35,18 @@ use crate::dsl::walker::outcome::{
|
||||
ByteClass, Expectation, MatchedItem, MatchedKind, MatchedPath,
|
||||
};
|
||||
|
||||
/// Maximum nesting of `Node::Subgrammar` frames (ADR-0026 §1).
|
||||
///
|
||||
/// The stratified WHERE-expression grammar descends one
|
||||
/// `Subgrammar` hop per precedence tier, plus a tier-stack per
|
||||
/// parenthesised group, so this bounds real expression nesting
|
||||
/// many parentheses deep — far past any hand-written filter.
|
||||
/// Its purpose is purely a stack-overflow guard: input nested
|
||||
/// past the cap (`((((…))))`) fails with a friendly
|
||||
/// `expression_too_deep` error instead of recursing until the
|
||||
/// process stack is exhausted.
|
||||
pub const MAX_SUBGRAMMAR_DEPTH: usize = 64;
|
||||
|
||||
/// Memo cache for `Node::DynamicSubgrammar` resolution.
|
||||
///
|
||||
/// A factory builds a `Node` from the active `WalkContext`; the
|
||||
@@ -199,6 +211,9 @@ fn walk_node_inner(
|
||||
kind: FailureKind::Mismatch { expected: vec![] },
|
||||
}
|
||||
}
|
||||
Node::Subgrammar(inner) => {
|
||||
walk_subgrammar(source, pos, inner, ctx, path, per_byte)
|
||||
}
|
||||
Node::DynamicSubgrammar(factory) => {
|
||||
// ADR-0024 §sub-grammars: resolve the inner Node at
|
||||
// walk time from the active `WalkContext`, then walk
|
||||
@@ -884,6 +899,48 @@ fn walk_optional(
|
||||
}
|
||||
}
|
||||
|
||||
/// Walk a `&'static Node` reference once (ADR-0026 §2).
|
||||
///
|
||||
/// The reference indirection is what lets a named `static`
|
||||
/// grammar fragment recurse: `Seq` / `Choice` embed children by
|
||||
/// value and so cannot close a cycle, but a `Subgrammar` node
|
||||
/// holding a `&'static Node` can point back into an enclosing
|
||||
/// fragment. The stratified WHERE-expression grammar's
|
||||
/// `( or_expr )` branch and `not_expr` self-reference both
|
||||
/// recurse this way.
|
||||
///
|
||||
/// `WalkContext::subgrammar_depth` counts active frames. Past
|
||||
/// `MAX_SUBGRAMMAR_DEPTH` the walk fails with a friendly
|
||||
/// `expression_too_deep` validation error rather than
|
||||
/// overflowing the process stack. The depth is saved on entry
|
||||
/// and restored on exit unconditionally, so a speculatively-
|
||||
/// walked branch that a `Choice` later rolls back leaves the
|
||||
/// counter clean.
|
||||
fn walk_subgrammar(
|
||||
source: &str,
|
||||
pos: usize,
|
||||
inner: &'static Node,
|
||||
ctx: &mut WalkContext,
|
||||
path: &mut MatchedPath,
|
||||
per_byte: &mut Vec<ByteClass>,
|
||||
) -> NodeWalkResult {
|
||||
let saved_depth = ctx.subgrammar_depth;
|
||||
ctx.subgrammar_depth += 1;
|
||||
if ctx.subgrammar_depth > MAX_SUBGRAMMAR_DEPTH {
|
||||
ctx.subgrammar_depth = saved_depth;
|
||||
return NodeWalkResult::Failed {
|
||||
position: pos,
|
||||
kind: FailureKind::Validation(ValidationError {
|
||||
message_key: "parse.custom.expression_too_deep",
|
||||
args: Vec::new(),
|
||||
}),
|
||||
};
|
||||
}
|
||||
let result = walk_node(source, pos, inner, ctx, path, per_byte);
|
||||
ctx.subgrammar_depth = saved_depth;
|
||||
result
|
||||
}
|
||||
|
||||
fn merge_expected(dst: &mut Vec<Expectation>, src: Vec<Expectation>) {
|
||||
for e in src {
|
||||
if !dst.contains(&e) {
|
||||
@@ -894,9 +951,79 @@ fn merge_expected(dst: &mut Vec<Expectation>, src: Vec<Expectation>) {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{DYNAMIC_CACHE, resolve_dynamic};
|
||||
use super::{
|
||||
DYNAMIC_CACHE, FailureKind, MAX_SUBGRAMMAR_DEPTH, NodeWalkResult,
|
||||
resolve_dynamic, walk_node,
|
||||
};
|
||||
use crate::dsl::grammar::{Node, Word};
|
||||
use crate::dsl::walker::context::WalkContext;
|
||||
use crate::dsl::walker::outcome::MatchedPath;
|
||||
|
||||
// Recursive test grammar for the `Subgrammar` node
|
||||
// (ADR-0026 §2): `x` | `( <self> )`. `NESTED_GROUP` reaches
|
||||
// back to `NESTED` through `Subgrammar(&NESTED)` — the cycle
|
||||
// a by-value `Seq` slice could not express.
|
||||
static NESTED_GROUP: &[Node] = &[
|
||||
Node::Punct('('),
|
||||
Node::Subgrammar(&NESTED),
|
||||
Node::Punct(')'),
|
||||
];
|
||||
static NESTED_CHOICES: &[Node] = &[
|
||||
Node::Seq(NESTED_GROUP),
|
||||
Node::Word(Word::keyword("x")),
|
||||
];
|
||||
static NESTED: Node = Node::Choice(NESTED_CHOICES);
|
||||
|
||||
fn walk_nested(input: &str) -> NodeWalkResult {
|
||||
let mut ctx = WalkContext::new();
|
||||
let mut path = MatchedPath::new();
|
||||
let mut per_byte = Vec::new();
|
||||
let result =
|
||||
walk_node(input, 0, &NESTED, &mut ctx, &mut path, &mut per_byte);
|
||||
assert_eq!(
|
||||
ctx.subgrammar_depth, 0,
|
||||
"subgrammar_depth must be restored to 0 after the walk",
|
||||
);
|
||||
result
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subgrammar_walks_a_recursive_grammar() {
|
||||
for input in ["x", "(x)", "((x))", "(((x)))"] {
|
||||
assert!(
|
||||
matches!(walk_nested(input), NodeWalkResult::Matched { .. }),
|
||||
"{input:?} should match the recursive Subgrammar grammar",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subgrammar_depth_cap_allows_exactly_the_limit() {
|
||||
let input = format!(
|
||||
"{}x{}",
|
||||
"(".repeat(MAX_SUBGRAMMAR_DEPTH),
|
||||
")".repeat(MAX_SUBGRAMMAR_DEPTH),
|
||||
);
|
||||
assert!(
|
||||
matches!(walk_nested(&input), NodeWalkResult::Matched { .. }),
|
||||
"exactly MAX_SUBGRAMMAR_DEPTH nested groups should still walk",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subgrammar_depth_cap_rejects_pathological_nesting() {
|
||||
let over = MAX_SUBGRAMMAR_DEPTH + 1;
|
||||
let input = format!("{}x{}", "(".repeat(over), ")".repeat(over));
|
||||
match walk_nested(&input) {
|
||||
NodeWalkResult::Failed {
|
||||
kind: FailureKind::Validation(err),
|
||||
..
|
||||
} => assert_eq!(err.message_key, "parse.custom.expression_too_deep"),
|
||||
other => {
|
||||
panic!("expected an expression_too_deep failure, got {other:?}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Trivial factory — ignores the context. The memo behaviour
|
||||
/// is keyed on the context, not the factory's output, so a
|
||||
|
||||
@@ -183,6 +183,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[
|
||||
("parse.custom.bind_type_mismatch", &["found", "expected"]),
|
||||
("parse.custom.change_column_flags_exclusive", &[]),
|
||||
("parse.custom.create_table_needs_pk", &[]),
|
||||
("parse.custom.expression_too_deep", &[]),
|
||||
("parse.custom.insert_form_a_missing_values", &["columns"]),
|
||||
("parse.custom.on_action_specified_twice", &["target"]),
|
||||
("parse.custom.replay_path_expected", &[]),
|
||||
|
||||
@@ -372,6 +372,11 @@ parse:
|
||||
replay_path_expected: "expected a path after `replay`"
|
||||
create_table_needs_pk: |-
|
||||
tables need at least one column. Add `with pk` for a default `id INTEGER PRIMARY KEY`, or `with pk <name>(<type>)` to choose. Use a comma-separated list for compound primary keys.
|
||||
# ADR-0026 §1: the recursion-depth guard on the
|
||||
# WHERE-expression grammar. Input nested past the cap
|
||||
# (`((((…))))`) stops here with a friendly error instead
|
||||
# of overflowing the parser stack.
|
||||
expression_too_deep: "expression nested too deeply"
|
||||
on_action_specified_twice: "`on {target}` specified twice"
|
||||
change_column_flags_exclusive: "`--force-conversion` and `--dont-convert` are mutually exclusive — pick one."
|
||||
unknown_type: "unknown type '{found}' (expected one of: {expected})"
|
||||
|
||||
Reference in New Issue
Block a user