From 4e16d97fe07320d1fc89c27b692d4ee4a50ab0fe Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Thu, 21 May 2026 18:18:50 +0000 Subject: [PATCH] =?UTF-8?q?walker:=203a=20=E2=80=94=20category-grouped=20m?= =?UTF-8?q?ode-aware=20dispatch=20(ADR-0033=20Amendment=201)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces ADR-0033 §2's original Node::Guard + Choice(SQL,DSL) mechanism, which was found during 3a to be unworkable: any guard-in-Choice approach forces a walk_choice change (walk_choice falls through only on NoMatch, so simple-mode valid-DSL would wrongly surface "this is SQL"), and walk_seq treats a NoMatch past idx 0 as a hard Failed, breaking advanced-mode DSL fall-through. Mechanism (Amendment 1): each REGISTRY entry is tagged CommandCategory::{Simple, Advanced}, generalising the whole-command is_advanced_only gate. walk() becomes a thin dispatcher over decide() (mode-aware candidate selection: simple commits the DSL node or emits the "this is SQL" hint; advanced tries SQL first, DSL as a full-line fallback) and an extracted walk_one_command(); speculative match-testing runs on a scratch WalkContext so the caller's context is only touched by the committed walk. No Node::Guard, no walk_choice/walk_seq change. 6 dispatch smoke tests on a shared-entry-word smoke registry; 1446 baseline green; clippy clean. --- docs/adr/0033-sql-dml-grammar.md | 146 +++++++++ docs/adr/README.md | 2 +- src/app.rs | 2 +- src/dsl/grammar/mod.rs | 159 +++++++--- src/dsl/walker/mod.rs | 503 ++++++++++++++++++++++++++++--- 5 files changed, 720 insertions(+), 92 deletions(-) diff --git a/docs/adr/0033-sql-dml-grammar.md b/docs/adr/0033-sql-dml-grammar.md index c266513..a36f71b 100644 --- a/docs/adr/0033-sql-dml-grammar.md +++ b/docs/adr/0033-sql-dml-grammar.md @@ -1030,6 +1030,152 @@ grammar on top. - **No `EXPLAIN` of SQL DML** (ADR-0030 §13 OOS-2 — the DSL `explain` still wraps what it already wraps). +## Amendment 1 — Dispatch mechanism: category-grouped dispatch supersedes `Node::Guard` (2026-05-21) + +This amendment **supersedes §2's "Mode-gating mechanism" +subsection** and **revises sub-phase 3a's scope**. It was +written during 3a implementation, after tracing the proposed +mechanism against the actual walker code, and is recorded with +explicit user approval before any code landed. + +### The finding — option (a) does not work as §2 frames it + +§2 selected option (a): a standalone zero-byte `Node::Guard(fn)` +placed as the first element of the SQL branch's `Seq`, inside a +`Choice(SQL_shape, DSL_shape)`. §2 framed this as the "smallest +mechanism change" that "reuses the existing validator path" with +the failure surfacing as the same `WalkOutcome::ValidationFailed` +as the entry-word gate — implying **no `walk_choice` change**. + +Tracing the four required behaviours against +`src/dsl/walker/driver.rs` shows that framing is inaccurate, and +that **any** guard-in-`Choice` mechanism is forced to modify +`walk_choice`: + +1. **`walk_choice` returns immediately on a branch `Failed`** + (`driver.rs` `walk_choice`) — it only falls through to the + next branch on `NoMatch`. So in **Simple mode**, + `delete from t where id = 1` (which is *valid DSL*) would hit + the SQL branch's guard, the guard would fail, and the user + would see the "this is SQL" hint instead of the statement + running as DSL. That contradicts §2's own "Simple mode → DSL + only" promise. Fixing it requires a `walk_choice` change + regardless of how the guard node is shaped. + +2. **`walk_seq` treats a `NoMatch` at `idx > 0` as a hard + `Failed`** (`driver.rs` `walk_seq`). A zero-byte Guard at + `idx 0` still advances `idx` to 1, pushing the SQL branch's + first real token to `idx 1`. So in **Advanced mode**, + `delete from t --all-rows` (DSL-only) cannot fall through to + DSL — the SQL branch returns `Failed`, and `walk_choice` + won't try the DSL branch. The literal form needs an + *additional* `walk_seq` semantics change (`idx == 0` → "no + bytes consumed yet"). + +This is exactly the R1 risk the ADR budgeted for ("if `walk_seq` +commits the Seq before the validator runs … the Choice can't +fall through"). + +### The replacement — category-grouped, mode-aware dispatch + +Rather than gate a `Choice` branch from inside the recursive +grammar, the dispatch decision moves up to the command +dispatcher (`walker::walk`), which is **where mode gating +already lives**: the existing whole-command gate +(`is_advanced_only` + the Simple-mode short-circuit in `walk`) +is precisely this pattern, hardcoded for `select` / `with`. The +amendment generalises that existing code instead of inventing a +parallel in-grammar mechanism. + +Concretely: + +- **Each registry entry carries a category** — + `CommandCategory::{Simple, Advanced}`. The category lives at + the registration site (the `REGISTRY` table), not as a field + on every `CommandNode` literal, because category is a + dispatcher concern, not intrinsic to a command's grammar. + `select` / `with` (and, from 3b, the SQL `insert` / `update` / + `delete` nodes) are `Advanced`; everything else is `Simple`. + This subsumes `ADVANCED_ONLY_ENTRIES`: `is_advanced_only(entry)` + becomes "every registry candidate for this entry word is + `Advanced`". + +- **A shared entry word has a node in both groups** — e.g. a + `Simple` DSL `insert` node and an `Advanced` SQL `insert` + node, both with entry word `insert`. The entry-word lookup + returns *all* candidates; the dispatcher selects by mode. + +- **Mode-aware selection in `walk` (no combinator changes):** + - **Simple mode** considers `Simple` candidates. If a Simple + candidate exists, it is committed. If none exists but an + `Advanced` candidate does (a pure-SQL entry word like + `select`), the dispatcher emits the + `advanced_mode.sql_in_simple` `ValidationFailed` — identical + to today's whole-command gate. + - **Advanced mode** tries `Advanced` candidate(s) first, then + the `Simple` candidate as a fallback. The first candidate + that fully matches wins (`delete from t where id = 1` → + SQL; `delete from t --all-rows` → falls through to DSL + because the SQL shape doesn't match `--all-rows`). + +- **No `Node::Guard`, no `walk_choice` / `walk_seq` change.** The + recursive combinators are untouched; each candidate is walked + cleanly and independently. Speculative evaluation runs on a + scratch `WalkContext`; only the committed candidate is walked + into the caller's context, so post-walk context state + (completion / hint accumulators) always reflects the chosen + candidate. + +### The two details (user-approved) + +1. **"This is SQL" hint in Simple mode for SQL-shaped input.** + For SQL-only input in Simple mode (e.g. + `delete … returning *`), the Simple candidate won't match. + To keep parity with `select` / `with`, the dispatcher + speculatively checks whether the `Advanced` candidate *would* + match; if so, it emits the `advanced_mode.sql_in_simple` + hint rather than a generic DSL parse error. Lives entirely + in `walk`. + +2. **Advanced-mode completion is SQL-first, DSL as full-line + fallback.** The `Choice` approach would have unioned SQL and + DSL continuations mid-typing for free; the grouping approach + surfaces one candidate's walk for completion / hints. The + decision is to show SQL completions primarily in Advanced + mode and fall to DSL only on a full DSL-shaped line — simpler + and matching the "SQL-first, DSL as familiar fallback" + intent. No unioned completion. + +### Consequences of the amendment + +- **`Node::Guard` is NOT added.** §2's mechanism caveat, + Consequences bullet 1 ("the walker gains a new node variant + `Node::Guard`"), and the original 3a scope are withdrawn. +- **`REGISTRY`'s element type changes** to carry the category + alongside each `&CommandNode`. The handful of registry + iteration sites (entry-word listing, `command_for_entry_word`, + the completion entry-word filters, `note_help`) destructure the + tuple; no behavioural change for the current single-candidate + registry. +- **The `Command` variants (§10), worker handlers, RETURNING + flag, cascade-summary plan, shortid auto-fill, and diagnostics + are unaffected** — this amendment is purely about how the + dispatcher routes a shared entry word to the right grammar. +- **Sub-phase 3a now validates the dispatch mechanism**, not the + guard: a smoke registry with a shared entry word (a `Simple` + and an `Advanced` node using distinguishable tokens) exercises + the Simple/Advanced selection, the fallback, and the + "this is SQL" path. The 3a exit gate's four cases map onto the + new mechanism unchanged in spirit. +- **3b adds the first real shared entry words** to `REGISTRY` + (the SQL `insert` / `update` / `delete` nodes). At that point + the completion entry-word *lists* will need to de-duplicate + the shared primary (two candidates, one entry word) — tracked + for 3b, not 3a. +- The mechanism remains reusable for Phase 4 DDL's analogous + shared-entry problem (`create`, …): tag the SQL DDL nodes + `Advanced`; the dispatcher handles the rest. + ## See also - ADR-0005 — the ten-type vocabulary INSERT works with. diff --git a/docs/adr/README.md b/docs/adr/README.md index 71a4b52..4f1f1e0 100644 --- a/docs/adr/README.md +++ b/docs/adr/README.md @@ -38,4 +38,4 @@ This directory contains the project's ADRs, recorded per - [ADR-0030 — Advanced mode: the standard-SQL surface](0030-advanced-mode-sql-surface.md) — **Accepted**, SQL added as grammar *within the unified grammar tree* (ADR-0024), not a separate batch parser — so SQL gets the same completion / highlighting / hints / parse-errors as the DSL; mode gates the SQL forms; DDL routes through the typed `Command` executor (metadata + type vocabulary preserved), DML and `SELECT` execute as validated SQL; engine-neutral posture, the DSL→SQL teaching echo; supersedes ADR-0001's `sqlparser-rs` reservation; phased plan (`Q1` / `Q2` / `Q4`) - [ADR-0031 — The SQL expression grammar](0031-sql-expression-grammar.md) — **Accepted**, the stratified SQL expression grammar fragment commissioned by ADR-0030 §3: a single precedence ladder (`OR`/`AND`/`NOT`, the comparison/`LIKE`/`IN`/`BETWEEN`/`IS NULL` predicate set, arithmetic incl. `||`, function calls, `CASE`) — the superset of ADR-0026's DSL `WHERE` grammar, authored as a parallel fragment so simple mode is untouched; pure validation, builds **no** AST (consumers run/store SQL as text per ADR-0030 §4/§6); reuses ADR-0026's `Subgrammar` recursion + depth cap unchanged; subquery expressions and qualified column refs deferred to ADR-0030 Phase 2 - [ADR-0032 — The full SQL `SELECT` grammar](0032-sql-select-grammar.md) — **Accepted**, the Phase-2 grammar commissioned by ADR-0030 §3: full `SELECT` with `INNER`/`LEFT`/`RIGHT`/`FULL OUTER`/`CROSS` joins, `GROUP BY`/`HAVING`, all four set ops (`UNION`/`UNION ALL`/`INTERSECT`/`EXCEPT`), `WITH` and `WITH RECURSIVE` CTEs, `LIMIT … OFFSET`, `DISTINCT`, `t.*`, and bare-alias projection (lifting Phase-1 §4.2); additive extensions to ADR-0031's `sql_expr` for scalar subqueries, `IN (SELECT …)`, `[NOT] EXISTS`, and qualified column refs (redeeming ADR-0031 §7 OOS-1/OOS-2); grammar-recursion via `Subgrammar(&SQL_SELECT_COMPOUND)` reuses ADR-0026's `MAX_SUBGRAMMAR_DEPTH = 64` cap unchanged; **softens ADR-0030 §8's "ambient assistance comes for free" claim**: completion scope needs new `WalkContext` accumulators (a `from_scope_stack` of `ScopeFrame`s holding `from_scope` / `cte_bindings` / `projection_aliases`), a **new walker node variant `Node::ScopedSubgrammar(&Node)`** as the push/pop trigger (existing `Node::Subgrammar` unchanged so DSL `Expr` and `sql_expr` recursion are unaffected), qualified-prefix completion narrowing, body-projection-derived CTE column resolution (so `SELECT *` and explicit-projection CTE bodies both yield real column completion past `cte_alias.|`), and a **post-walk fixup pass** that re-resolves projection-list identifier highlighting/validity once `FROM` is parsed (the projection-before-FROM problem); classifies every Phase-2 validation case against ADR-0027's ERROR/WARNING guideline (§11): five new `diagnostic.*` keys for parse-time-detectable cases (unknown qualifier, ambiguous column, projection-alias misplaced, CTE/compound arity mismatch) plus eight `engine.*` translation keys; a MatchedPath-walking predicate-warnings variant that closes the Phase-1 gap where SQL `WHERE` expressions emitted no `LIKE`-on-numeric / `= NULL` / type-mismatch warnings (ADR-0027 Amendment 1 finally extends to the SQL surface); adds a worker-side post-prepare type-resolution pass via engine column-origin metadata so bare column refs recover their playground type (partially lifting Phase-1 §4.5, the bool→0/1 case) — `Cargo.toml` gains `column_metadata` to rusqlite features (verified against pinned 0.39.0); `__rdbms_*` rejection extended to every new table-source slot; Amendment 1 narrows §12's resolution rule from a grammar-side structural classification to "trust the engine's column-origin metadata verbatim" after an empirical probe showed origin metadata follows through non-recursive CTEs, scalar subqueries, derived tables, set ops, and joins — the one structural exception is recursive CTE result columns, which return None and stay typeless; Amendment 2 records that §10.6's "rewrite the highlight class" prescription is realised via the two-pass schema-existence diagnostic + the renderer's diagnostic-overlay path (no separate per-byte rewrite step needed; no new HighlightClass variant), and that the projection-before-FROM completion narrowing has been improved by an `src/completion.rs` look-ahead probe when the leading walk's `from_scope` is empty but the full input parses -- [ADR-0033 — The full SQL DML grammar (`INSERT` / `UPDATE` / `DELETE`)](0033-sql-dml-grammar.md) — **Proposed**, the Phase-3 grammar commissioned by ADR-0030 §3: single- and multi-row `INSERT` (incl. `INSERT … SELECT` recursing through ADR-0032's `SQL_SELECT_COMPOUND`), `UPDATE` with `SET` assignment list, `DELETE`, all three optionally followed by `RETURNING projection_list`, plus full `ON CONFLICT … DO NOTHING / DO UPDATE` UPSERT on INSERT; **fixes the DSL-vs-SQL dispatch architecture for shared entry words (`insert`/`update`/`delete`)**: SQL-first / DSL-fallback in Advanced mode via a `Choice(SQL_shape, DSL_shape)` per shape, gated by a new walker capability `Node::Guard(fn)` — a zero-byte-consumption gating node that fails the enclosing Seq with a `ValidationError`; carries `Command::SqlInsert` / `SqlUpdate` / `SqlDelete` variants and `do_sql_*` worker handlers each of which knows the target table (for re-persistence) and the `returning: bool` flag (for DataResult routing); `shortid` auto-fill mirrors the DSL `do_insert` mechanism via worker post-fill; SQL DELETE produces the same per-relationship cascade summary the DSL DELETE does (ADR-0014 parity), with the WHERE-clause source bytes re-injected into per-child pre-count subqueries; three new walker diagnostics (`insert_arity_mismatch` ERROR, `auto_column_overridden` WARNING, `not_null_missing` WARNING) with positive + negative tests each; OOS list explicitly carves out `DEFAULT VALUES` (the project's planned seed feature), SQLite-specific `OR REPLACE` / `OR IGNORE` / `OR ABORT` / `OR FAIL` / `OR ROLLBACK` prefixes, `UPDATE FROM` multi-table updates, and WITH-prefixed DML; the `excluded` keyword inside `ON CONFLICT DO UPDATE` is a deliberate carve-out from ADR-0030 §7's engine-neutral posture (no standard-SQL UPSERT spelling exists that SQLite and PostgreSQL share); eleven phased sub-phases each with explicit exit gates + written DA gate, opening with `Node::Guard` mechanism scaffolding before any DML grammar lands; initial DA review recorded seven critiques that were resolved before status moved to Proposed +- [ADR-0033 — The full SQL DML grammar (`INSERT` / `UPDATE` / `DELETE`)](0033-sql-dml-grammar.md) — **Proposed**, the Phase-3 grammar commissioned by ADR-0030 §3: single- and multi-row `INSERT` (incl. `INSERT … SELECT` recursing through ADR-0032's `SQL_SELECT_COMPOUND`), `UPDATE` with `SET` assignment list, `DELETE`, all three optionally followed by `RETURNING projection_list`, plus full `ON CONFLICT … DO NOTHING / DO UPDATE` UPSERT on INSERT; **fixes the DSL-vs-SQL dispatch architecture for shared entry words (`insert`/`update`/`delete`)**: SQL-first / DSL-fallback in Advanced mode via a `Choice(SQL_shape, DSL_shape)` per shape, gated by a new walker capability `Node::Guard(fn)` — a zero-byte-consumption gating node that fails the enclosing Seq with a `ValidationError`; carries `Command::SqlInsert` / `SqlUpdate` / `SqlDelete` variants and `do_sql_*` worker handlers each of which knows the target table (for re-persistence) and the `returning: bool` flag (for DataResult routing); `shortid` auto-fill mirrors the DSL `do_insert` mechanism via worker post-fill; SQL DELETE produces the same per-relationship cascade summary the DSL DELETE does (ADR-0014 parity), with the WHERE-clause source bytes re-injected into per-child pre-count subqueries; three new walker diagnostics (`insert_arity_mismatch` ERROR, `auto_column_overridden` WARNING, `not_null_missing` WARNING) with positive + negative tests each; OOS list explicitly carves out `DEFAULT VALUES` (the project's planned seed feature), SQLite-specific `OR REPLACE` / `OR IGNORE` / `OR ABORT` / `OR FAIL` / `OR ROLLBACK` prefixes, `UPDATE FROM` multi-table updates, and WITH-prefixed DML; the `excluded` keyword inside `ON CONFLICT DO UPDATE` is a deliberate carve-out from ADR-0030 §7's engine-neutral posture (no standard-SQL UPSERT spelling exists that SQLite and PostgreSQL share); eleven phased sub-phases each with explicit exit gates + written DA gate, opening with the dispatch mechanism before any DML grammar lands; initial DA review recorded seven critiques that were resolved before status moved to Proposed; **Amendment 1 supersedes §2's dispatch mechanism**: the originally-chosen `Node::Guard(fn)` + `Choice(SQL_shape, DSL_shape)` was found during 3a to be unworkable as framed (any guard-in-`Choice` mechanism forces a `walk_choice` change — `walk_choice` only falls through on `NoMatch`, so Simple-mode valid-DSL would wrongly surface "this is SQL", and `walk_seq` treats a `NoMatch` past `idx 0` as a hard `Failed`, breaking Advanced-mode DSL fall-through); replaced by **category-grouped, mode-aware dispatch** in `walker::walk` (each `REGISTRY` entry tagged `CommandCategory::{Simple, Advanced}`, generalising the existing whole-command `is_advanced_only` gate), shared entry words carrying a node in both groups, no `Node::Guard` and no `walk_choice`/`walk_seq` change, advanced-mode completion SQL-first with DSL as a full-line fallback diff --git a/src/app.rs b/src/app.rs index d55e13c..ea0df02 100644 --- a/src/app.rs +++ b/src/app.rs @@ -1792,7 +1792,7 @@ impl App { // "DSL data commands" sub-header at the first command // whose help_id leaves the `app.` namespace. let mut dsl_header_done = false; - for command in REGISTRY { + for (command, _category) in REGISTRY { let Some(help_id) = command.help_id else { continue; }; diff --git a/src/dsl/grammar/mod.rs b/src/dsl/grammar/mod.rs index a8d8dae..6a7563c 100644 --- a/src/dsl/grammar/mod.rs +++ b/src/dsl/grammar/mod.rs @@ -410,6 +410,32 @@ pub enum Node { }, } +/// Which mode group a registered command belongs to (ADR-0030 +/// §2, ADR-0033 Amendment 1). +/// +/// Category is a *dispatcher* concern, not intrinsic to a +/// command's grammar, so it is attached at the `REGISTRY` +/// registration site rather than as a field on every +/// `CommandNode`. The dispatcher (`walker::walk`) uses it to +/// route a given input by the active input mode: +/// +/// - `Simple` commands are the DSL surface; available in both +/// simple and advanced mode. +/// - `Advanced` commands are the SQL surface; available only in +/// advanced mode. In simple mode an advanced-only entry word +/// yields the "this is SQL" hint (`advanced_mode.sql_in_simple`). +/// +/// A *shared* entry word (e.g. `insert`, from Phase 3 sub-phase +/// 3b on) carries a node in *both* groups — a `Simple` DSL node +/// and an `Advanced` SQL node. The dispatcher tries the SQL node +/// first in advanced mode and falls back to the DSL node when the +/// SQL shape does not match. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CommandCategory { + Simple, + Advanced, +} + /// Top-level entry record. One per command. The `entry` keyword /// alone identifies which command the walker dispatches to; /// `shape` is what follows the entry word. @@ -503,72 +529,105 @@ pub fn usage_key_for_input(source: &str) -> Option<&'static str> { /// which read the same data through the legacy `usage::REGISTRY`. #[must_use] pub fn entry_words_alphabetised() -> Vec<&'static str> { - let mut words: Vec<&'static str> = REGISTRY.iter().map(|c| c.entry.primary).collect(); + let mut words: Vec<&'static str> = + REGISTRY.iter().map(|(c, _)| c.entry.primary).collect(); words.sort_unstable(); + words.dedup(); words } -/// The active grammar registry. Phase A: the eleven app-lifecycle -/// commands. Migrated commands route through this; everything -/// else falls through to the chumsky path in `dsl::parser`. -pub static REGISTRY: &[&CommandNode] = &[ - &app::QUIT, - &app::HELP, - &app::REBUILD, - &app::SAVE, - &app::NEW, - &app::LOAD, - &app::EXPORT, - &app::IMPORT, - &app::MODE, - &app::MESSAGES, - &ddl::DROP, - &ddl::ADD, - &ddl::RENAME, - &ddl::CHANGE, - &ddl::CREATE, - &data::SHOW, - &data::INSERT, - &data::UPDATE, - &data::DELETE, - &data::REPLAY, - &data::EXPLAIN, - &data::SELECT, - &data::WITH, +/// The active grammar registry, each command paired with its +/// dispatch [`CommandCategory`] (ADR-0033 Amendment 1). +/// +/// Migrated commands route through this; everything else falls +/// through to the chumsky path in `dsl::parser`. `Advanced` +/// commands (`select`, `with`, and — from sub-phase 3b — the SQL +/// `insert` / `update` / `delete` nodes) are the SQL surface; +/// the rest are the DSL surface (`Simple`). A shared entry word +/// will appear twice (one `Simple`, one `Advanced` node); the +/// dispatcher selects by mode. +pub static REGISTRY: &[(&CommandNode, CommandCategory)] = &[ + (&app::QUIT, CommandCategory::Simple), + (&app::HELP, CommandCategory::Simple), + (&app::REBUILD, CommandCategory::Simple), + (&app::SAVE, CommandCategory::Simple), + (&app::NEW, CommandCategory::Simple), + (&app::LOAD, CommandCategory::Simple), + (&app::EXPORT, CommandCategory::Simple), + (&app::IMPORT, CommandCategory::Simple), + (&app::MODE, CommandCategory::Simple), + (&app::MESSAGES, CommandCategory::Simple), + (&ddl::DROP, CommandCategory::Simple), + (&ddl::ADD, CommandCategory::Simple), + (&ddl::RENAME, CommandCategory::Simple), + (&ddl::CHANGE, CommandCategory::Simple), + (&ddl::CREATE, CommandCategory::Simple), + (&data::SHOW, CommandCategory::Simple), + (&data::INSERT, CommandCategory::Simple), + (&data::UPDATE, CommandCategory::Simple), + (&data::DELETE, CommandCategory::Simple), + (&data::REPLAY, CommandCategory::Simple), + (&data::EXPLAIN, CommandCategory::Simple), + (&data::SELECT, CommandCategory::Advanced), + (&data::WITH, CommandCategory::Advanced), ]; -/// Entry words for commands available only in advanced mode -/// (ADR-0030 §2). Phase 1: `select`. In simple mode the walker -/// gates these out — typing one yields the precise "this is SQL" -/// hint rather than a normal parse or an "unknown command" error. -/// -/// This is whole-command gating keyed on the entry word, which -/// suffices while every SQL form is its own command. ADR-0030 §2's -/// finer-grained per-`Choice`-branch tagging arrives with the -/// shared DSL/SQL entry words (`create`, `insert`, …) in a later -/// phase. -const ADVANCED_ONLY_ENTRIES: &[&str] = &["select", "with"]; - /// Whether `entry` names an advanced-mode-only command (ADR-0030 -/// §2). Case-insensitive, matching keyword-matching elsewhere. +/// §2, ADR-0033 Amendment 1). Case-insensitive, matching +/// keyword-matching elsewhere. +/// +/// True when the entry word is registered and *every* candidate +/// for it is `Advanced` — i.e. there is no DSL (`Simple`) command +/// to fall back to. A shared entry word (a Simple DSL node plus +/// an Advanced SQL node) is therefore *not* advanced-only: it is +/// available in simple mode as DSL. #[must_use] pub fn is_advanced_only(entry: &str) -> bool { - ADVANCED_ONLY_ENTRIES - .iter() - .any(|e| e.eq_ignore_ascii_case(entry)) + let mut found = false; + for (c, category) in REGISTRY { + if c.entry.matches(entry) { + found = true; + if *category == CommandCategory::Simple { + return false; + } + } + } + found } -/// Look up a `CommandNode` by entry word, case-insensitively. +/// Look up the first `CommandNode` registered for an entry word, +/// case-insensitively. Returns the index into `REGISTRY` so +/// callers can use it as a `WalkOutcome::Match { command_idx }`. /// -/// Used by the router to decide whether the walker owns this -/// input. Returns the index into `REGISTRY` so callers can -/// later use it as a `WalkOutcome::Match { command_idx }`. +/// For shared entry words this returns whichever node is listed +/// first in `REGISTRY`; callers that must distinguish the Simple +/// from the Advanced candidate use [`commands_for_entry_word`]. pub fn command_for_entry_word(word: &str) -> Option<(usize, &'static CommandNode)> { REGISTRY .iter() .enumerate() - .find(|(_, c)| c.entry.matches(word)) - .map(|(i, c)| (i, *c)) + .find(|(_, (c, _))| c.entry.matches(word)) + .map(|(i, (c, _))| (i, *c)) +} + +/// Every `CommandNode` registered for an entry word, with its +/// `REGISTRY` index and [`CommandCategory`], case-insensitively +/// (ADR-0033 Amendment 1). +/// +/// A non-shared entry word returns a single candidate; a shared +/// entry word (`insert` / `update` / `delete` from sub-phase 3b) +/// returns its `Simple` DSL node and `Advanced` SQL node. The +/// dispatcher picks among them by the active input mode. +#[must_use] +pub fn commands_for_entry_word( + word: &str, +) -> Vec<(usize, &'static CommandNode, CommandCategory)> { + REGISTRY + .iter() + .enumerate() + .filter(|(_, (c, _))| c.entry.matches(word)) + .map(|(i, (c, category))| (i, *c, *category)) + .collect() } #[cfg(test)] diff --git a/src/dsl/walker/mod.rs b/src/dsl/walker/mod.rs index a6a7443..f323752 100644 --- a/src/dsl/walker/mod.rs +++ b/src/dsl/walker/mod.rs @@ -280,11 +280,11 @@ pub fn completion_probe_in_mode( let mode_filtered_entries = || -> Vec { REGISTRY .iter() - .filter(|c| { + .filter(|(c, _)| { mode == crate::mode::Mode::Advanced || !is_advanced_only(c.entry.primary) }) - .map(|c| outcome::Expectation::Word(c.entry.primary)) + .map(|(c, _)| outcome::Expectation::Word(c.entry.primary)) .collect() }; @@ -1529,11 +1529,11 @@ pub fn expected_at_input_in_mode( let mode_filtered = || -> Vec { REGISTRY .iter() - .filter(|c| { + .filter(|(c, _)| { mode == crate::mode::Mode::Advanced || !is_advanced_only(c.entry.primary) }) - .map(|c| outcome::Expectation::Word(c.entry.primary)) + .map(|(c, _)| outcome::Expectation::Word(c.entry.primary)) .collect() }; @@ -1614,7 +1614,7 @@ fn expected_for_hint_snapshot( let entry_words = || -> Vec { REGISTRY .iter() - .map(|c| outcome::Expectation::Word(c.entry.primary)) + .map(|(c, _)| outcome::Expectation::Word(c.entry.primary)) .collect() }; @@ -1690,11 +1690,267 @@ pub fn walk<'a>( return (None, None); }; let entry_text = &effective_source[kw_start..kw_end]; - let Some((command_idx, command_node)) = grammar::command_for_entry_word(entry_text) - else { + let candidates = grammar::commands_for_entry_word(entry_text); + if candidates.is_empty() { + // First token isn't a registered entry word — yield to + // the chumsky path. return (None, None); - }; + } + // ADR-0033 Amendment 1 — category-grouped, mode-aware + // dispatch. `decide` chooses which registered candidate to + // commit (or emits the "this is SQL" hint), running any + // speculative match-testing on scratch contexts so the + // caller's `ctx` is only ever touched by the committed walk. + match decide( + effective_source, + kw_start, + kw_end, + &candidates, + ctx.mode, + ctx.schema, + ) { + Decision::Commit { idx, node } => { + let (result, cmd) = + walk_one_command(effective_source, source, kw_start, kw_end, idx, node, ctx); + (Some(result), cmd) + } + Decision::ThisIsSql { primary } => ( + Some(this_is_sql_result(entry_text, primary, kw_start, kw_end)), + None, + ), + } +} + +/// The dispatcher's choice for a given input (ADR-0033 +/// Amendment 1): commit a specific registered candidate, or emit +/// the simple-mode "this is SQL" hint. +enum Decision { + /// Walk this candidate into the caller's `WalkContext`. + Commit { + idx: usize, + node: &'static crate::dsl::grammar::CommandNode, + }, + /// Simple mode with SQL-shaped input: emit + /// `advanced_mode.sql_in_simple`, carrying this entry literal. + ThisIsSql { primary: &'static str }, +} + +/// Category-grouped, mode-aware dispatch decision (ADR-0033 +/// Amendment 1). +/// +/// Pure with respect to the caller's context: any speculative +/// match-testing runs on a fresh scratch `WalkContext` (see +/// `scratch_outcome`), so `decide` never mutates the caller's +/// accumulators. +/// +/// - **Simple mode** commits the DSL (`Simple`) candidate. With +/// no DSL candidate (a SQL-only entry word) it emits the +/// "this is SQL" hint. For a shared entry word whose DSL shape +/// does not match but whose SQL shape does, it also emits the +/// hint — so `delete … returning *` in simple mode points the +/// user at advanced mode rather than at a bare DSL parse error. +/// - **Advanced mode** tries `Advanced` candidates first, then +/// the `Simple` candidate as a fallback; the first full match +/// wins. When none fully match it commits the candidate that +/// progressed furthest (advanced-first on ties) so the surfaced +/// error is the most informative. +fn decide( + effective_source: &str, + kw_start: usize, + kw_end: usize, + candidates: &[( + usize, + &'static crate::dsl::grammar::CommandNode, + crate::dsl::grammar::CommandCategory, + )], + mode: crate::mode::Mode, + schema: Option<&crate::completion::SchemaCache>, +) -> Decision { + use crate::dsl::grammar::CommandCategory; + + let advanced: Vec<(usize, &'static crate::dsl::grammar::CommandNode)> = candidates + .iter() + .filter(|(_, _, cat)| *cat == CommandCategory::Advanced) + .map(|(i, n, _)| (*i, *n)) + .collect(); + let simple: Vec<(usize, &'static crate::dsl::grammar::CommandNode)> = candidates + .iter() + .filter(|(_, _, cat)| *cat == CommandCategory::Simple) + .map(|(i, n, _)| (*i, *n)) + .collect(); + + match mode { + crate::mode::Mode::Simple => { + let Some(&(sidx, snode)) = simple.first() else { + // No DSL candidate — the entry word is SQL-only. + let primary = candidates.first().map_or("", |(_, n, _)| n.entry.primary); + return Decision::ThisIsSql { primary }; + }; + if advanced.is_empty() { + return Decision::Commit { idx: sidx, node: snode }; + } + // Shared entry word: prefer the DSL node; only point + // at advanced mode when the DSL shape does not match + // but the SQL shape does. + if scratch_full_match(effective_source, kw_start, kw_end, snode, mode, schema) { + return Decision::Commit { idx: sidx, node: snode }; + } + let (_, anode) = advanced[0]; + if scratch_full_match(effective_source, kw_start, kw_end, anode, mode, schema) { + return Decision::ThisIsSql { + primary: anode.entry.primary, + }; + } + Decision::Commit { idx: sidx, node: snode } + } + crate::mode::Mode::Advanced => { + // Advanced candidates first, DSL as the fallback. + let ordered: Vec<(usize, &'static crate::dsl::grammar::CommandNode)> = + advanced.iter().chain(simple.iter()).copied().collect(); + // `candidates` is non-empty (the caller checked), so + // `ordered` is non-empty too. + if ordered.len() == 1 { + let (idx, node) = ordered[0]; + return Decision::Commit { idx, node }; + } + for &(idx, node) in &ordered { + if scratch_full_match(effective_source, kw_start, kw_end, node, mode, schema) { + return Decision::Commit { idx, node }; + } + } + // None fully matched — commit the furthest-progress + // candidate, keeping the first (advanced) on ties. + let mut best = ordered[0]; + let mut best_progress = + scratch_progress(effective_source, kw_start, kw_end, best.1, mode, schema); + for &(idx, node) in &ordered[1..] { + let progress = + scratch_progress(effective_source, kw_start, kw_end, node, mode, schema); + if progress > best_progress { + best = (idx, node); + best_progress = progress; + } + } + Decision::Commit { + idx: best.0, + node: best.1, + } + } + } +} + +/// Build the `advanced_mode.sql_in_simple` result for a SQL entry +/// word typed in simple mode (ADR-0030 §2, ADR-0033 Amendment 1). +/// The entry word stays highlighted as a keyword; the input +/// carries an ERROR verdict (it will not run here). +fn this_is_sql_result( + entry_text: &str, + primary: &'static str, + kw_start: usize, + kw_end: usize, +) -> WalkResult { + let mut path = MatchedPath::new(); + let mut per_byte = Vec::new(); + path.push(crate::dsl::walker::outcome::MatchedItem { + kind: crate::dsl::walker::outcome::MatchedKind::Word(primary), + text: entry_text.to_string(), + span: (kw_start, kw_end), + }); + per_byte.push(crate::dsl::walker::outcome::ByteClass { + start: kw_start, + end: kw_end, + class: grammar::HighlightClass::Keyword, + }); + WalkResult { + outcome: WalkOutcome::ValidationFailed { + position: kw_start, + error: crate::dsl::grammar::ValidationError { + message_key: "advanced_mode.sql_in_simple", + args: vec![("command", primary.to_string())], + }, + }, + matched_path: path, + per_byte_class: per_byte, + diagnostics: Vec::new(), + tail_expected: Vec::new(), + } +} + +/// Run `walk_one_command` on a fresh scratch `WalkContext` so the +/// dispatcher can test a candidate without disturbing the +/// caller's accumulators (ADR-0033 Amendment 1). +fn scratch_outcome( + effective_source: &str, + kw_start: usize, + kw_end: usize, + node: &'static crate::dsl::grammar::CommandNode, + mode: crate::mode::Mode, + schema: Option<&crate::completion::SchemaCache>, +) -> WalkOutcome { + let mut sctx = schema.map_or_else(context::WalkContext::new, context::WalkContext::with_schema); + sctx.mode = mode; + let (result, _cmd) = + walk_one_command(effective_source, effective_source, kw_start, kw_end, 0, node, &mut sctx); + result.outcome +} + +/// Whether a candidate fully matches the input (a clean +/// `WalkOutcome::Match`), tested on a scratch context. +fn scratch_full_match( + effective_source: &str, + kw_start: usize, + kw_end: usize, + node: &'static crate::dsl::grammar::CommandNode, + mode: crate::mode::Mode, + schema: Option<&crate::completion::SchemaCache>, +) -> bool { + matches!( + scratch_outcome(effective_source, kw_start, kw_end, node, mode, schema), + WalkOutcome::Match { .. } + ) +} + +/// How far (byte position) a candidate's walk progressed. A full +/// match scores the whole input; a failure scores its failure +/// position. Used only to tie-break when no candidate fully +/// matches. +fn scratch_progress( + effective_source: &str, + kw_start: usize, + kw_end: usize, + node: &'static crate::dsl::grammar::CommandNode, + mode: crate::mode::Mode, + schema: Option<&crate::completion::SchemaCache>, +) -> usize { + match scratch_outcome(effective_source, kw_start, kw_end, node, mode, schema) { + WalkOutcome::Match { .. } => effective_source.len(), + WalkOutcome::Incomplete { position, .. } + | WalkOutcome::Mismatch { position, .. } + | WalkOutcome::ValidationFailed { position, .. } => position, + } +} + +/// Walk a *single* committed command's shape and produce its +/// `WalkResult` + optional `Command` (ADR-0033 Amendment 1). +/// +/// Factored out of `walk` so the dispatcher's speculative +/// match-testing (`scratch_outcome`) reuses the exact same walk + +/// outcome-mapping + AST-builder + diagnostic path on a scratch +/// context, while the committed walk runs into the caller's +/// context. `source` is the full (unbounded) input the AST +/// builder reads for SQL command text; `effective_source` is the +/// bound-trimmed slice the walker matches against. +fn walk_one_command<'a>( + effective_source: &str, + source: &str, + kw_start: usize, + kw_end: usize, + command_idx: usize, + command_node: &'static crate::dsl::grammar::CommandNode, + ctx: &mut WalkContext<'a>, +) -> (WalkResult, Option) { + let entry_text = &effective_source[kw_start..kw_end]; let mut path = MatchedPath::new(); let mut per_byte = Vec::new(); @@ -1710,37 +1966,6 @@ pub fn walk<'a>( class: grammar::HighlightClass::Keyword, }); - // Mode gate (ADR-0030 §2): an advanced-only command (a SQL - // form) typed in simple mode is *recognised as SQL* and - // yields a precise hint — "this is SQL; switch with `mode - // advanced`, or prefix the line with `:`" — rather than - // being walked normally or rejected as an unknown command. - // The entry word stays highlighted as a keyword (it is one); - // the input carries an ERROR verdict (it will not run here). - if ctx.mode == crate::mode::Mode::Simple - && grammar::is_advanced_only(command_node.entry.primary) - { - return ( - Some(WalkResult { - outcome: WalkOutcome::ValidationFailed { - position: kw_start, - error: crate::dsl::grammar::ValidationError { - message_key: "advanced_mode.sql_in_simple", - args: vec![( - "command", - command_node.entry.primary.to_string(), - )], - }, - }, - matched_path: path, - per_byte_class: per_byte, - diagnostics: Vec::new(), - tail_expected: Vec::new(), - }), - None, - ); - } - let mut tail_expected: Vec = Vec::new(); let outcome = match walk_node( effective_source, @@ -1879,7 +2104,7 @@ pub fn walk<'a>( tail_expected, diagnostics, }; - (Some(result), cmd) + (result, cmd) } #[cfg(test)] @@ -4361,3 +4586,201 @@ mod projection_before_from_tests { ); } } + +/// Sub-phase 3a — category-grouped, mode-aware dispatch +/// (ADR-0033 Amendment 1). +/// +/// These exercise the dispatch mechanism end-to-end on a *smoke* +/// registry: a single shared entry word (`smk`) carrying a +/// `Simple` DSL node and an `Advanced` SQL node with +/// distinguishable tails (`dsltail` / `sqltail`). The dispatch +/// functions (`decide`, `walk_one_command`, `this_is_sql_result`) +/// are module-private; this child module reaches them via +/// `super::*`. The smoke nodes never enter the real `REGISTRY`, +/// so production dispatch is unaffected. +#[cfg(test)] +mod dispatch_3a_tests { + use super::*; + use crate::dsl::command::{AppCommand, Command}; + use crate::dsl::grammar::{ + CommandCategory, CommandNode, Node, ValidationError, Word, + }; + use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace}; + use crate::dsl::walker::outcome::MatchedPath; + use crate::mode::Mode; + + // Distinct dummy commands so a test can tell which node a walk + // committed to (the outcome alone doesn't distinguish them). + fn dsl_builder(_: &MatchedPath, _: &str) -> Result { + Ok(Command::App(AppCommand::Help)) + } + fn sql_builder(_: &MatchedPath, _: &str) -> Result { + Ok(Command::App(AppCommand::Quit)) + } + + static SMOKE_DSL: CommandNode = CommandNode { + entry: Word::keyword("smk"), + shape: Node::Word(Word::keyword("dsltail")), + ast_builder: dsl_builder, + help_id: None, + usage_ids: &[], + }; + static SMOKE_SQL: CommandNode = CommandNode { + entry: Word::keyword("smk"), + shape: Node::Word(Word::keyword("sqltail")), + ast_builder: sql_builder, + help_id: None, + usage_ids: &[], + }; + + type Candidates = Vec<(usize, &'static CommandNode, CommandCategory)>; + + /// A shared entry word: both a DSL and a SQL node under `smk`. + /// Listed SQL-first to prove `decide` partitions by category + /// rather than relying on registry order. + fn shared() -> Candidates { + vec![ + (0, &SMOKE_SQL, CommandCategory::Advanced), + (1, &SMOKE_DSL, CommandCategory::Simple), + ] + } + + /// A SQL-only entry word (no DSL fallback) — models `select`. + fn sql_only() -> Candidates { + vec![(0, &SMOKE_SQL, CommandCategory::Advanced)] + } + + fn kw(input: &str) -> (usize, usize) { + let start = skip_whitespace(input, 0); + consume_ident(input, start).expect("entry word") + } + + fn run_decide(input: &str, mode: Mode, cands: &Candidates) -> Decision { + let (ks, ke) = kw(input); + decide(input, ks, ke, cands, mode, None) + } + + /// Mirror `walk`'s dispatch: decide, then either walk the + /// committed node or build the "this is SQL" result. Returns + /// the resulting outcome plus the committed command (if any). + fn dispatch(input: &str, mode: Mode, cands: &Candidates) -> (WalkOutcome, Option) { + let (ks, ke) = kw(input); + let entry_text = &input[ks..ke]; + match decide(input, ks, ke, cands, mode, None) { + Decision::Commit { idx, node } => { + let mut ctx = context::WalkContext::new(); + ctx.mode = mode; + let (res, cmd) = walk_one_command(input, input, ks, ke, idx, node, &mut ctx); + (res.outcome, cmd) + } + Decision::ThisIsSql { primary } => { + (this_is_sql_result(entry_text, primary, ks, ke).outcome, None) + } + } + } + + fn committed_node(input: &str, mode: Mode, cands: &Candidates) -> &'static CommandNode { + match run_decide(input, mode, cands) { + Decision::Commit { node, .. } => node, + Decision::ThisIsSql { .. } => panic!("expected Commit, got ThisIsSql for {input:?}"), + } + } + + // ---- Exit-gate case 1: Simple + DSL input → DSL match ------ + + #[test] + fn simple_mode_dsl_input_matches_dsl() { + let cands = shared(); + assert!( + std::ptr::eq(committed_node("smk dsltail", Mode::Simple, &cands), &SMOKE_DSL), + "simple mode must commit the DSL node for DSL input", + ); + let (outcome, cmd) = dispatch("smk dsltail", Mode::Simple, &cands); + assert!(matches!(outcome, WalkOutcome::Match { .. }), "got {outcome:?}"); + assert_eq!(cmd, Some(Command::App(AppCommand::Help))); + } + + // ---- Exit-gate case 2: Advanced + SQL input → SQL match ---- + + #[test] + fn advanced_mode_sql_input_matches_sql() { + let cands = shared(); + assert!( + std::ptr::eq(committed_node("smk sqltail", Mode::Advanced, &cands), &SMOKE_SQL), + "advanced mode must commit the SQL node for SQL input", + ); + let (outcome, cmd) = dispatch("smk sqltail", Mode::Advanced, &cands); + assert!(matches!(outcome, WalkOutcome::Match { .. }), "got {outcome:?}"); + assert_eq!(cmd, Some(Command::App(AppCommand::Quit))); + } + + // ---- Exit-gate case 3: Simple + SQL-only input → + // ValidationFailed advanced_mode.sql_in_simple ---------- + + #[test] + fn simple_mode_sql_only_input_is_this_is_sql() { + // Shared word, but the input matches only the SQL tail. + let cands = shared(); + match run_decide("smk sqltail", Mode::Simple, &cands) { + Decision::ThisIsSql { primary } => assert_eq!(primary, "smk"), + Decision::Commit { idx, .. } => { + panic!("expected ThisIsSql, got Commit {{ idx: {idx} }}") + } + } + let (outcome, cmd) = dispatch("smk sqltail", Mode::Simple, &cands); + match outcome { + WalkOutcome::ValidationFailed { error, .. } => { + assert_eq!(error.message_key, "advanced_mode.sql_in_simple"); + } + other => panic!("expected ValidationFailed, got {other:?}"), + } + assert_eq!(cmd, None); + } + + /// A pure SQL-only entry word (no DSL node, like `select`) in + /// simple mode also yields the "this is SQL" hint — the + /// behaviour the old whole-command `is_advanced_only` gate + /// produced, now via `decide`. + #[test] + fn simple_mode_sql_only_entry_word_is_this_is_sql() { + let cands = sql_only(); + let (outcome, _) = dispatch("smk sqltail", Mode::Simple, &cands); + match outcome { + WalkOutcome::ValidationFailed { error, .. } => { + assert_eq!(error.message_key, "advanced_mode.sql_in_simple"); + } + other => panic!("expected ValidationFailed, got {other:?}"), + } + } + + // ---- Exit-gate case 4 / 5: Advanced + DSL-only input → + // DSL match via fallback (the R1-equivalent invariant) -- + + #[test] + fn advanced_mode_dsl_input_falls_back_to_dsl() { + // `dsltail` matches the DSL node but NOT the SQL node. + // Advanced mode tries SQL first; it must fall back to the + // DSL node rather than surfacing the SQL node's failure. + let cands = shared(); + assert!( + std::ptr::eq(committed_node("smk dsltail", Mode::Advanced, &cands), &SMOKE_DSL), + "advanced mode must fall back to DSL when SQL doesn't match", + ); + let (outcome, cmd) = dispatch("smk dsltail", Mode::Advanced, &cands); + assert!(matches!(outcome, WalkOutcome::Match { .. }), "got {outcome:?}"); + assert_eq!(cmd, Some(Command::App(AppCommand::Help))); + } + + /// In advanced mode a non-shared DSL entry word (no Advanced + /// candidate) still commits the single DSL node. + #[test] + fn advanced_mode_dsl_only_entry_word_commits_dsl() { + let cands: Candidates = vec![(0, &SMOKE_DSL, CommandCategory::Simple)]; + assert!(std::ptr::eq( + committed_node("smk dsltail", Mode::Advanced, &cands), + &SMOKE_DSL, + )); + let (outcome, _) = dispatch("smk dsltail", Mode::Advanced, &cands); + assert!(matches!(outcome, WalkOutcome::Match { .. }), "got {outcome:?}"); + } +}