walker: add Subgrammar node + recursion-depth cap (ADR-0026 step 1)
New `Node::Subgrammar(&'static Node)` variant lets a named static grammar fragment recurse through a reference — `Seq` / `Choice` embed children by value and cannot close a cycle, but a `&'static Node` can point back at an enclosing fragment. This is the mechanism the stratified WHERE-expression grammar (ADR-0026 §2) recurses through. The walker counts active Subgrammar frames in `WalkContext::subgrammar_depth` and refuses past `MAX_SUBGRAMMAR_DEPTH` (64), surfacing a friendly `parse.custom.expression_too_deep` error instead of a stack overflow. Depth is saved/restored per frame so a speculatively-walked-then-rolled-back Choice branch leaves no residue. No grammar references the node yet; covered by walker unit tests with a small recursive `( x )` test grammar.
This commit is contained in:
@@ -292,6 +292,28 @@ pub enum Node {
|
|||||||
separator: Option<&'static Self>,
|
separator: Option<&'static Self>,
|
||||||
min: usize,
|
min: usize,
|
||||||
},
|
},
|
||||||
|
/// Walks the referenced `&'static Node` once, mandatory
|
||||||
|
/// (ADR-0026 §2). The reference indirection is what lets a
|
||||||
|
/// named `static` grammar fragment appear inside its own
|
||||||
|
/// subtree: a `Seq` / `Choice` embeds its children by value
|
||||||
|
/// and so cannot close a cycle, but a `&'static Node`
|
||||||
|
/// reference can point back at an enclosing fragment. This
|
||||||
|
/// is the mechanism the stratified WHERE-expression grammar
|
||||||
|
/// recurses through — the `( or_expr )` branch and the
|
||||||
|
/// `not_expr` self-reference.
|
||||||
|
///
|
||||||
|
/// The walker counts active `Subgrammar` frames in
|
||||||
|
/// `WalkContext::subgrammar_depth` and refuses past
|
||||||
|
/// `walker::driver::MAX_SUBGRAMMAR_DEPTH`, so pathologically
|
||||||
|
/// nested input (`((((…))))`) fails with a friendly error
|
||||||
|
/// rather than overflowing the parser stack.
|
||||||
|
///
|
||||||
|
/// The static counterpart of `DynamicSubgrammar`: that one
|
||||||
|
/// builds a fresh node from the `WalkContext` at walk time;
|
||||||
|
/// this one references a fixed fragment already in the
|
||||||
|
/// grammar tree.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
Subgrammar(&'static Self),
|
||||||
/// Resolves at walk time using the active `WalkContext`.
|
/// Resolves at walk time using the active `WalkContext`.
|
||||||
/// Phase D+ uses this for `column_value_list`. The factory
|
/// Phase D+ uses this for `column_value_list`. The factory
|
||||||
/// is pure in `ctx`, so the walker memoizes the resolution
|
/// is pure in `ctx`, so the walker memoizes the resolution
|
||||||
|
|||||||
@@ -75,6 +75,13 @@ pub struct WalkContext<'a> {
|
|||||||
/// skipped from the value list because the dispatch path
|
/// skipped from the value list because the dispatch path
|
||||||
/// auto-fills them).
|
/// auto-fills them).
|
||||||
pub user_listed_columns: Option<Vec<String>>,
|
pub user_listed_columns: Option<Vec<String>>,
|
||||||
|
/// Count of active `Node::Subgrammar` frames on the walk
|
||||||
|
/// stack (ADR-0026 §2). The walker increments on entry to a
|
||||||
|
/// `Subgrammar`, restores the saved value on exit, and
|
||||||
|
/// refuses past `driver::MAX_SUBGRAMMAR_DEPTH` so a
|
||||||
|
/// pathologically nested expression fails with a friendly
|
||||||
|
/// error instead of overflowing the process stack.
|
||||||
|
pub subgrammar_depth: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> WalkContext<'a> {
|
impl<'a> WalkContext<'a> {
|
||||||
@@ -100,6 +107,7 @@ impl<'a> WalkContext<'a> {
|
|||||||
pending_value_column: None,
|
pending_value_column: None,
|
||||||
pending_hint_mode: None,
|
pending_hint_mode: None,
|
||||||
user_listed_columns: None,
|
user_listed_columns: None,
|
||||||
|
subgrammar_depth: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+128
-1
@@ -35,6 +35,18 @@ use crate::dsl::walker::outcome::{
|
|||||||
ByteClass, Expectation, MatchedItem, MatchedKind, MatchedPath,
|
ByteClass, Expectation, MatchedItem, MatchedKind, MatchedPath,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Maximum nesting of `Node::Subgrammar` frames (ADR-0026 §1).
|
||||||
|
///
|
||||||
|
/// The stratified WHERE-expression grammar descends one
|
||||||
|
/// `Subgrammar` hop per precedence tier, plus a tier-stack per
|
||||||
|
/// parenthesised group, so this bounds real expression nesting
|
||||||
|
/// many parentheses deep — far past any hand-written filter.
|
||||||
|
/// Its purpose is purely a stack-overflow guard: input nested
|
||||||
|
/// past the cap (`((((…))))`) fails with a friendly
|
||||||
|
/// `expression_too_deep` error instead of recursing until the
|
||||||
|
/// process stack is exhausted.
|
||||||
|
pub const MAX_SUBGRAMMAR_DEPTH: usize = 64;
|
||||||
|
|
||||||
/// Memo cache for `Node::DynamicSubgrammar` resolution.
|
/// Memo cache for `Node::DynamicSubgrammar` resolution.
|
||||||
///
|
///
|
||||||
/// A factory builds a `Node` from the active `WalkContext`; the
|
/// A factory builds a `Node` from the active `WalkContext`; the
|
||||||
@@ -199,6 +211,9 @@ fn walk_node_inner(
|
|||||||
kind: FailureKind::Mismatch { expected: vec![] },
|
kind: FailureKind::Mismatch { expected: vec![] },
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Node::Subgrammar(inner) => {
|
||||||
|
walk_subgrammar(source, pos, inner, ctx, path, per_byte)
|
||||||
|
}
|
||||||
Node::DynamicSubgrammar(factory) => {
|
Node::DynamicSubgrammar(factory) => {
|
||||||
// ADR-0024 §sub-grammars: resolve the inner Node at
|
// ADR-0024 §sub-grammars: resolve the inner Node at
|
||||||
// walk time from the active `WalkContext`, then walk
|
// walk time from the active `WalkContext`, then walk
|
||||||
@@ -884,6 +899,48 @@ fn walk_optional(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Walk a `&'static Node` reference once (ADR-0026 §2).
|
||||||
|
///
|
||||||
|
/// The reference indirection is what lets a named `static`
|
||||||
|
/// grammar fragment recurse: `Seq` / `Choice` embed children by
|
||||||
|
/// value and so cannot close a cycle, but a `Subgrammar` node
|
||||||
|
/// holding a `&'static Node` can point back into an enclosing
|
||||||
|
/// fragment. The stratified WHERE-expression grammar's
|
||||||
|
/// `( or_expr )` branch and `not_expr` self-reference both
|
||||||
|
/// recurse this way.
|
||||||
|
///
|
||||||
|
/// `WalkContext::subgrammar_depth` counts active frames. Past
|
||||||
|
/// `MAX_SUBGRAMMAR_DEPTH` the walk fails with a friendly
|
||||||
|
/// `expression_too_deep` validation error rather than
|
||||||
|
/// overflowing the process stack. The depth is saved on entry
|
||||||
|
/// and restored on exit unconditionally, so a speculatively-
|
||||||
|
/// walked branch that a `Choice` later rolls back leaves the
|
||||||
|
/// counter clean.
|
||||||
|
fn walk_subgrammar(
|
||||||
|
source: &str,
|
||||||
|
pos: usize,
|
||||||
|
inner: &'static Node,
|
||||||
|
ctx: &mut WalkContext,
|
||||||
|
path: &mut MatchedPath,
|
||||||
|
per_byte: &mut Vec<ByteClass>,
|
||||||
|
) -> NodeWalkResult {
|
||||||
|
let saved_depth = ctx.subgrammar_depth;
|
||||||
|
ctx.subgrammar_depth += 1;
|
||||||
|
if ctx.subgrammar_depth > MAX_SUBGRAMMAR_DEPTH {
|
||||||
|
ctx.subgrammar_depth = saved_depth;
|
||||||
|
return NodeWalkResult::Failed {
|
||||||
|
position: pos,
|
||||||
|
kind: FailureKind::Validation(ValidationError {
|
||||||
|
message_key: "parse.custom.expression_too_deep",
|
||||||
|
args: Vec::new(),
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
let result = walk_node(source, pos, inner, ctx, path, per_byte);
|
||||||
|
ctx.subgrammar_depth = saved_depth;
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
fn merge_expected(dst: &mut Vec<Expectation>, src: Vec<Expectation>) {
|
fn merge_expected(dst: &mut Vec<Expectation>, src: Vec<Expectation>) {
|
||||||
for e in src {
|
for e in src {
|
||||||
if !dst.contains(&e) {
|
if !dst.contains(&e) {
|
||||||
@@ -894,9 +951,79 @@ fn merge_expected(dst: &mut Vec<Expectation>, src: Vec<Expectation>) {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{DYNAMIC_CACHE, resolve_dynamic};
|
use super::{
|
||||||
|
DYNAMIC_CACHE, FailureKind, MAX_SUBGRAMMAR_DEPTH, NodeWalkResult,
|
||||||
|
resolve_dynamic, walk_node,
|
||||||
|
};
|
||||||
use crate::dsl::grammar::{Node, Word};
|
use crate::dsl::grammar::{Node, Word};
|
||||||
use crate::dsl::walker::context::WalkContext;
|
use crate::dsl::walker::context::WalkContext;
|
||||||
|
use crate::dsl::walker::outcome::MatchedPath;
|
||||||
|
|
||||||
|
// Recursive test grammar for the `Subgrammar` node
|
||||||
|
// (ADR-0026 §2): `x` | `( <self> )`. `NESTED_GROUP` reaches
|
||||||
|
// back to `NESTED` through `Subgrammar(&NESTED)` — the cycle
|
||||||
|
// a by-value `Seq` slice could not express.
|
||||||
|
static NESTED_GROUP: &[Node] = &[
|
||||||
|
Node::Punct('('),
|
||||||
|
Node::Subgrammar(&NESTED),
|
||||||
|
Node::Punct(')'),
|
||||||
|
];
|
||||||
|
static NESTED_CHOICES: &[Node] = &[
|
||||||
|
Node::Seq(NESTED_GROUP),
|
||||||
|
Node::Word(Word::keyword("x")),
|
||||||
|
];
|
||||||
|
static NESTED: Node = Node::Choice(NESTED_CHOICES);
|
||||||
|
|
||||||
|
fn walk_nested(input: &str) -> NodeWalkResult {
|
||||||
|
let mut ctx = WalkContext::new();
|
||||||
|
let mut path = MatchedPath::new();
|
||||||
|
let mut per_byte = Vec::new();
|
||||||
|
let result =
|
||||||
|
walk_node(input, 0, &NESTED, &mut ctx, &mut path, &mut per_byte);
|
||||||
|
assert_eq!(
|
||||||
|
ctx.subgrammar_depth, 0,
|
||||||
|
"subgrammar_depth must be restored to 0 after the walk",
|
||||||
|
);
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn subgrammar_walks_a_recursive_grammar() {
|
||||||
|
for input in ["x", "(x)", "((x))", "(((x)))"] {
|
||||||
|
assert!(
|
||||||
|
matches!(walk_nested(input), NodeWalkResult::Matched { .. }),
|
||||||
|
"{input:?} should match the recursive Subgrammar grammar",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn subgrammar_depth_cap_allows_exactly_the_limit() {
|
||||||
|
let input = format!(
|
||||||
|
"{}x{}",
|
||||||
|
"(".repeat(MAX_SUBGRAMMAR_DEPTH),
|
||||||
|
")".repeat(MAX_SUBGRAMMAR_DEPTH),
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
matches!(walk_nested(&input), NodeWalkResult::Matched { .. }),
|
||||||
|
"exactly MAX_SUBGRAMMAR_DEPTH nested groups should still walk",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn subgrammar_depth_cap_rejects_pathological_nesting() {
|
||||||
|
let over = MAX_SUBGRAMMAR_DEPTH + 1;
|
||||||
|
let input = format!("{}x{}", "(".repeat(over), ")".repeat(over));
|
||||||
|
match walk_nested(&input) {
|
||||||
|
NodeWalkResult::Failed {
|
||||||
|
kind: FailureKind::Validation(err),
|
||||||
|
..
|
||||||
|
} => assert_eq!(err.message_key, "parse.custom.expression_too_deep"),
|
||||||
|
other => {
|
||||||
|
panic!("expected an expression_too_deep failure, got {other:?}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Trivial factory — ignores the context. The memo behaviour
|
/// Trivial factory — ignores the context. The memo behaviour
|
||||||
/// is keyed on the context, not the factory's output, so a
|
/// is keyed on the context, not the factory's output, so a
|
||||||
|
|||||||
@@ -183,6 +183,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[
|
|||||||
("parse.custom.bind_type_mismatch", &["found", "expected"]),
|
("parse.custom.bind_type_mismatch", &["found", "expected"]),
|
||||||
("parse.custom.change_column_flags_exclusive", &[]),
|
("parse.custom.change_column_flags_exclusive", &[]),
|
||||||
("parse.custom.create_table_needs_pk", &[]),
|
("parse.custom.create_table_needs_pk", &[]),
|
||||||
|
("parse.custom.expression_too_deep", &[]),
|
||||||
("parse.custom.insert_form_a_missing_values", &["columns"]),
|
("parse.custom.insert_form_a_missing_values", &["columns"]),
|
||||||
("parse.custom.on_action_specified_twice", &["target"]),
|
("parse.custom.on_action_specified_twice", &["target"]),
|
||||||
("parse.custom.replay_path_expected", &[]),
|
("parse.custom.replay_path_expected", &[]),
|
||||||
|
|||||||
@@ -372,6 +372,11 @@ parse:
|
|||||||
replay_path_expected: "expected a path after `replay`"
|
replay_path_expected: "expected a path after `replay`"
|
||||||
create_table_needs_pk: |-
|
create_table_needs_pk: |-
|
||||||
tables need at least one column. Add `with pk` for a default `id INTEGER PRIMARY KEY`, or `with pk <name>(<type>)` to choose. Use a comma-separated list for compound primary keys.
|
tables need at least one column. Add `with pk` for a default `id INTEGER PRIMARY KEY`, or `with pk <name>(<type>)` to choose. Use a comma-separated list for compound primary keys.
|
||||||
|
# ADR-0026 §1: the recursion-depth guard on the
|
||||||
|
# WHERE-expression grammar. Input nested past the cap
|
||||||
|
# (`((((…))))`) stops here with a friendly error instead
|
||||||
|
# of overflowing the parser stack.
|
||||||
|
expression_too_deep: "expression nested too deeply"
|
||||||
on_action_specified_twice: "`on {target}` specified twice"
|
on_action_specified_twice: "`on {target}` specified twice"
|
||||||
change_column_flags_exclusive: "`--force-conversion` and `--dont-convert` are mutually exclusive — pick one."
|
change_column_flags_exclusive: "`--force-conversion` and `--dont-convert` are mutually exclusive — pick one."
|
||||||
unknown_type: "unknown type '{found}' (expected one of: {expected})"
|
unknown_type: "unknown type '{found}' (expected one of: {expected})"
|
||||||
|
|||||||
Reference in New Issue
Block a user