Walker: memoize DynamicSubgrammar resolution to bound the Box::leak
Node::DynamicSubgrammar factories build a Node from the WalkContext and must Box::leak it (the Node enum's combinator children are &'static). Leaking per walk grew unbounded under per-keystroke completion (handoff-12 §2.1). resolve_dynamic now memoizes on the schema state a factory reads (table columns, current column, user-listed columns) keyed by factory fn-pointer. Each distinct value-list shape leaks exactly once — total leak bounded by distinct (schema × form) combinations, not keystroke count. TableColumn gains Hash for the cache key. The handoff's original arena sketch needed a lifetime-generic Node (major refactor); memoization gets the same bound without it.
This commit is contained in:
+1
-1
@@ -49,7 +49,7 @@ pub struct SchemaCache {
|
||||
|
||||
/// One column's user-facing type info, scoped to a table
|
||||
/// (ADR-0024 §Phase D, §WalkContext).
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct TableColumn {
|
||||
pub name: String,
|
||||
pub user_type: crate::dsl::types::Type,
|
||||
|
||||
+121
-6
@@ -21,6 +21,10 @@
|
||||
//! mid-shape. Surfaces as `WalkOutcome::Mismatch` or
|
||||
//! `WalkOutcome::ValidationFailed` at the top level.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{LazyLock, Mutex};
|
||||
|
||||
use crate::completion::TableColumn;
|
||||
use crate::dsl::grammar::{HighlightClass, Node, ValidationError};
|
||||
use crate::dsl::walker::context::WalkContext;
|
||||
use crate::dsl::walker::lex_helpers::{
|
||||
@@ -31,6 +35,57 @@ use crate::dsl::walker::outcome::{
|
||||
ByteClass, Expectation, MatchedItem, MatchedKind, MatchedPath,
|
||||
};
|
||||
|
||||
/// Memo cache for `Node::DynamicSubgrammar` resolution.
|
||||
///
|
||||
/// A factory builds a `Node` from the active `WalkContext`; the
|
||||
/// resolved Node's combinator children are `&'static`, so it
|
||||
/// must be `Box::leak`ed. Leaking per walk grows unbounded
|
||||
/// under per-keystroke completion. Memoizing on the schema
|
||||
/// state the factory reads means each *distinct* value-list
|
||||
/// shape leaks exactly once — the total leak is bounded by the
|
||||
/// number of distinct (table-columns × form) combinations, not
|
||||
/// by keystroke count (handoff-13 §memoization).
|
||||
#[derive(PartialEq, Eq, Hash)]
|
||||
struct DynamicKey {
|
||||
/// The factory's function-pointer address. Distinguishes
|
||||
/// `column_value_list` from `current_column_value`.
|
||||
factory: usize,
|
||||
/// Every `WalkContext` field a dynamic factory may read.
|
||||
/// A superset of any single factory's true dependencies —
|
||||
/// sound (never returns a wrong shape), at worst slightly
|
||||
/// over-keyed (an extra leak when an unread field differs).
|
||||
current_table_columns: Option<Vec<TableColumn>>,
|
||||
current_column: Option<TableColumn>,
|
||||
user_listed_columns: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
static DYNAMIC_CACHE: LazyLock<Mutex<HashMap<DynamicKey, &'static Node>>> =
|
||||
LazyLock::new(|| Mutex::new(HashMap::new()));
|
||||
|
||||
/// Resolve a `DynamicSubgrammar` factory to a `&'static Node`,
|
||||
/// reusing a previously-leaked Node when the factory's inputs
|
||||
/// match a cached entry.
|
||||
fn resolve_dynamic(
|
||||
factory: fn(&WalkContext) -> Node,
|
||||
ctx: &WalkContext,
|
||||
) -> &'static Node {
|
||||
let key = DynamicKey {
|
||||
factory: factory as usize,
|
||||
current_table_columns: ctx.current_table_columns.clone(),
|
||||
current_column: ctx.current_column.clone(),
|
||||
user_listed_columns: ctx.user_listed_columns.clone(),
|
||||
};
|
||||
let mut cache = DYNAMIC_CACHE
|
||||
.lock()
|
||||
.expect("dynamic-subgrammar cache mutex poisoned");
|
||||
if let Some(&node) = cache.get(&key) {
|
||||
return node;
|
||||
}
|
||||
let resolved: &'static Node = Box::leak(Box::new(factory(ctx)));
|
||||
cache.insert(key, resolved);
|
||||
resolved
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum NodeWalkResult {
|
||||
Matched {
|
||||
@@ -146,12 +201,15 @@ fn walk_node_inner(
|
||||
}
|
||||
Node::DynamicSubgrammar(factory) => {
|
||||
// ADR-0024 §sub-grammars: resolve the inner Node at
|
||||
// walk time using the active `WalkContext`, then
|
||||
// recursively walk it. `Box::leak` per-walk gives the
|
||||
// inner static-slice fields (Choice/Seq) the lifetime
|
||||
// they require; the leak is bounded by command-shape
|
||||
// complexity per walk.
|
||||
let resolved: &'static Node = Box::leak(Box::new(factory(ctx)));
|
||||
// walk time from the active `WalkContext`, then walk
|
||||
// it. The resolved Node's combinator children are
|
||||
// `&'static`, so a runtime-built Node has to be
|
||||
// leaked. `resolve_dynamic` memoizes on the schema
|
||||
// state the factory reads, so each *distinct*
|
||||
// value-list shape leaks once — the total leak is
|
||||
// bounded by schema size, not by keystroke count
|
||||
// (handoff-13 §memoization).
|
||||
let resolved = resolve_dynamic(*factory, ctx);
|
||||
walk_node(source, pos, resolved, ctx, path, per_byte)
|
||||
}
|
||||
Node::TypedValueSlot {
|
||||
@@ -819,3 +877,60 @@ fn merge_expected(dst: &mut Vec<Expectation>, src: Vec<Expectation>) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{DYNAMIC_CACHE, resolve_dynamic};
|
||||
use crate::dsl::grammar::{Node, Word};
|
||||
use crate::dsl::walker::context::WalkContext;
|
||||
|
||||
/// Trivial factory — ignores the context. The memo behaviour
|
||||
/// is keyed on the context, not the factory's output, so a
|
||||
/// constant factory is enough to exercise the cache.
|
||||
fn const_factory(_ctx: &WalkContext) -> Node {
|
||||
Node::Word(Word::keyword("memo_probe"))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_dynamic_memoizes_identical_context() {
|
||||
let ctx = WalkContext::new();
|
||||
let first = resolve_dynamic(const_factory, &ctx);
|
||||
let second = resolve_dynamic(const_factory, &ctx);
|
||||
// Same factory + same context → the leaked Node is
|
||||
// reused (pointer identity), so repeated per-keystroke
|
||||
// walks against an unchanged schema leak only once.
|
||||
assert!(
|
||||
std::ptr::eq(first, second),
|
||||
"identical context should hit the memo cache",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_dynamic_distinct_context_does_not_share() {
|
||||
let ctx_a = WalkContext::new();
|
||||
let mut ctx_b = WalkContext::new();
|
||||
ctx_b.user_listed_columns = Some(vec!["Col".to_string()]);
|
||||
let a = resolve_dynamic(const_factory, &ctx_a);
|
||||
let b = resolve_dynamic(const_factory, &ctx_b);
|
||||
// Different context state → different cache key → a
|
||||
// separate entry (and a separate one-time leak).
|
||||
assert!(
|
||||
!std::ptr::eq(a, b),
|
||||
"distinct context must not collide in the memo cache",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_dynamic_cache_is_populated() {
|
||||
let ctx = WalkContext::new();
|
||||
let _ = resolve_dynamic(const_factory, &ctx);
|
||||
let populated = !DYNAMIC_CACHE
|
||||
.lock()
|
||||
.expect("cache lock")
|
||||
.is_empty();
|
||||
assert!(
|
||||
populated,
|
||||
"resolve_dynamic should populate the memo cache",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user