Walker: memoize DynamicSubgrammar resolution to bound the Box::leak

Node::DynamicSubgrammar factories build a Node from the WalkContext and
must Box::leak it (the Node enum's combinator children are &'static).
Leaking per walk grew unbounded under per-keystroke completion
(handoff-12 §2.1).

resolve_dynamic now memoizes on the schema state a factory reads
(table columns, current column, user-listed columns) keyed by factory
fn-pointer. Each distinct value-list shape leaks exactly once — total
leak bounded by distinct (schema × form) combinations, not keystroke
count. TableColumn gains Hash for the cache key.

The handoff's original arena sketch needed a lifetime-generic Node
(major refactor); memoization gets the same bound without it.
This commit is contained in:
claude@clouddev1
2026-05-15 22:06:33 +00:00
parent 911a537a83
commit 9bbb96e735
2 changed files with 122 additions and 7 deletions
+1 -1
View File
@@ -49,7 +49,7 @@ pub struct SchemaCache {
/// One column's user-facing type info, scoped to a table
/// (ADR-0024 §Phase D, §WalkContext).
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct TableColumn {
pub name: String,
pub user_type: crate::dsl::types::Type,
+121 -6
View File
@@ -21,6 +21,10 @@
//! mid-shape. Surfaces as `WalkOutcome::Mismatch` or
//! `WalkOutcome::ValidationFailed` at the top level.
use std::collections::HashMap;
use std::sync::{LazyLock, Mutex};
use crate::completion::TableColumn;
use crate::dsl::grammar::{HighlightClass, Node, ValidationError};
use crate::dsl::walker::context::WalkContext;
use crate::dsl::walker::lex_helpers::{
@@ -31,6 +35,57 @@ use crate::dsl::walker::outcome::{
ByteClass, Expectation, MatchedItem, MatchedKind, MatchedPath,
};
/// Memo cache for `Node::DynamicSubgrammar` resolution.
///
/// A factory builds a `Node` from the active `WalkContext`; the
/// resolved Node's combinator children are `&'static`, so it
/// must be `Box::leak`ed. Leaking per walk grows unbounded
/// under per-keystroke completion. Memoizing on the schema
/// state the factory reads means each *distinct* value-list
/// shape leaks exactly once — the total leak is bounded by the
/// number of distinct (table-columns × form) combinations, not
/// by keystroke count (handoff-13 §memoization).
#[derive(PartialEq, Eq, Hash)]
struct DynamicKey {
/// The factory's function-pointer address. Distinguishes
/// `column_value_list` from `current_column_value`.
factory: usize,
/// Every `WalkContext` field a dynamic factory may read.
/// A superset of any single factory's true dependencies —
/// sound (never returns a wrong shape), at worst slightly
/// over-keyed (an extra leak when an unread field differs).
current_table_columns: Option<Vec<TableColumn>>,
current_column: Option<TableColumn>,
user_listed_columns: Option<Vec<String>>,
}
static DYNAMIC_CACHE: LazyLock<Mutex<HashMap<DynamicKey, &'static Node>>> =
LazyLock::new(|| Mutex::new(HashMap::new()));
/// Resolve a `DynamicSubgrammar` factory to a `&'static Node`,
/// reusing a previously-leaked Node when the factory's inputs
/// match a cached entry.
fn resolve_dynamic(
factory: fn(&WalkContext) -> Node,
ctx: &WalkContext,
) -> &'static Node {
let key = DynamicKey {
factory: factory as usize,
current_table_columns: ctx.current_table_columns.clone(),
current_column: ctx.current_column.clone(),
user_listed_columns: ctx.user_listed_columns.clone(),
};
let mut cache = DYNAMIC_CACHE
.lock()
.expect("dynamic-subgrammar cache mutex poisoned");
if let Some(&node) = cache.get(&key) {
return node;
}
let resolved: &'static Node = Box::leak(Box::new(factory(ctx)));
cache.insert(key, resolved);
resolved
}
#[derive(Debug, Clone)]
pub enum NodeWalkResult {
Matched {
@@ -146,12 +201,15 @@ fn walk_node_inner(
}
Node::DynamicSubgrammar(factory) => {
// ADR-0024 §sub-grammars: resolve the inner Node at
// walk time using the active `WalkContext`, then
// recursively walk it. `Box::leak` per-walk gives the
// inner static-slice fields (Choice/Seq) the lifetime
// they require; the leak is bounded by command-shape
// complexity per walk.
let resolved: &'static Node = Box::leak(Box::new(factory(ctx)));
// walk time from the active `WalkContext`, then walk
// it. The resolved Node's combinator children are
// `&'static`, so a runtime-built Node has to be
// leaked. `resolve_dynamic` memoizes on the schema
// state the factory reads, so each *distinct*
// value-list shape leaks once — the total leak is
// bounded by schema size, not by keystroke count
// (handoff-13 §memoization).
let resolved = resolve_dynamic(*factory, ctx);
walk_node(source, pos, resolved, ctx, path, per_byte)
}
Node::TypedValueSlot {
@@ -819,3 +877,60 @@ fn merge_expected(dst: &mut Vec<Expectation>, src: Vec<Expectation>) {
}
}
}
#[cfg(test)]
mod tests {
use super::{DYNAMIC_CACHE, resolve_dynamic};
use crate::dsl::grammar::{Node, Word};
use crate::dsl::walker::context::WalkContext;
/// Trivial factory — ignores the context. The memo behaviour
/// is keyed on the context, not the factory's output, so a
/// constant factory is enough to exercise the cache.
fn const_factory(_ctx: &WalkContext) -> Node {
Node::Word(Word::keyword("memo_probe"))
}
#[test]
fn resolve_dynamic_memoizes_identical_context() {
let ctx = WalkContext::new();
let first = resolve_dynamic(const_factory, &ctx);
let second = resolve_dynamic(const_factory, &ctx);
// Same factory + same context → the leaked Node is
// reused (pointer identity), so repeated per-keystroke
// walks against an unchanged schema leak only once.
assert!(
std::ptr::eq(first, second),
"identical context should hit the memo cache",
);
}
#[test]
fn resolve_dynamic_distinct_context_does_not_share() {
let ctx_a = WalkContext::new();
let mut ctx_b = WalkContext::new();
ctx_b.user_listed_columns = Some(vec!["Col".to_string()]);
let a = resolve_dynamic(const_factory, &ctx_a);
let b = resolve_dynamic(const_factory, &ctx_b);
// Different context state → different cache key → a
// separate entry (and a separate one-time leak).
assert!(
!std::ptr::eq(a, b),
"distinct context must not collide in the memo cache",
);
}
#[test]
fn resolve_dynamic_cache_is_populated() {
let ctx = WalkContext::new();
let _ = resolve_dynamic(const_factory, &ctx);
let populated = !DYNAMIC_CACHE
.lock()
.expect("cache lock")
.is_empty();
assert!(
populated,
"resolve_dynamic should populate the memo cache",
);
}
}