//! Unified declarative grammar tree (ADR-0024). //! //! The grammar tree is the single source of truth for the DSL — //! parsing, completion, syntax highlighting, parse-error usage //! rendering, and hint-panel content all derive from this same //! data structure (ADR-0023 institutional context). //! //! Phase A scope (ADR-0024 §migration): the framework lands //! alongside the eleven app-lifecycle commands (quit, help, //! rebuild, save, save as, new, load, export, import, mode, //! messages). The chumsky parser still owns every other //! command; the router in `dsl::parser` decides which path to //! take per first-token. Schema-aware nodes (`IdentSource::Tables` //! and friends) and `DynamicSubgrammar` are declared here but //! not exercised until Phase B-D. //! //! The shape of `Node` mirrors ADR-0024 §node-taxonomy with one //! pragmatic addition for Phase A: each `Ident` carries an //! optional content validator, used today by the `mode ` //! / `messages ` slots to surface friendly catalog //! wording (`mode.unknown`, `messages.unknown`) on out-of-set //! identifiers. The same hook generalises naturally to typed //! value slots in Phase D. pub mod app; pub mod data; pub mod ddl; pub mod expr; pub mod shared; pub mod sql_expr; pub mod sql_create_table; pub mod sql_delete; pub mod sql_insert; pub mod sql_select; pub mod sql_update; use crate::dsl::command::Command; use crate::dsl::walker::context::WalkContext; use crate::dsl::walker::outcome::MatchedPath; /// Highlight class assigned to a matched terminal. /// /// Recorded on the `WalkResult::per_byte_class` slice and surfaced /// by `walker::highlight_runs` to the input/echo-line renderers /// (ADR-0024 §architecture). #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum HighlightClass { Keyword, Identifier, /// Column data-type keyword (`int`, `serial`, `text`, …). /// Distinct from `Keyword` and `Identifier` so learners can /// tell "this is a type" from a clause keyword or a name they /// invented (ADR-0022 Amendment 4). Assigned via a type slot's /// `highlight_override`, not by byte shape. Type, Number, String, Punct, Flag, Error, } /// Where an `Ident` slot's candidates come from at completion time. /// /// Drives both the walker's `Expectation::Ident { source }` (which /// the parse-error bridge maps to a human label) and the /// `SchemaCache` lookup the completion engine uses for Tab /// candidates. The `Free` and `NewName` variants do not query the /// schema — `NewName` is for slots where the user invents the /// identifier, `Free` is the catch-all branch in `mode`/`messages` /// that funnels unknown values into a friendly validator. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum IdentSource { /// User invents this name. No schema lookup; no completion /// candidates beyond the identifier shape itself. NewName, /// Existing table name. Tables, /// Existing column in the current table. Columns, /// Existing relationship name. Relationships, /// Existing index name. Indexes, /// Closed set from `Type::all()` — surfaced by the walker's /// content validator on column-type slots; not user-listable /// from the schema. Types, /// Any identifier shape; used by synthetic catch-all branches /// (e.g., the unknown-value branch of `mode `). Free, } impl IdentSource { /// Whether this source can be completed from the schema /// cache (i.e. the candidate list comes from existing /// entities rather than user invention or a closed set). #[must_use] pub const fn completes_from_schema(self) -> bool { matches!( self, Self::Tables | Self::Columns | Self::Relationships | Self::Indexes ) } /// Human-facing label used in parse-error wording /// ("expected table name") and in the completion engine's /// round-trip from a textual `expected` entry back to a /// source kind. `Free` and `Types` collapse to "identifier" /// and "type" respectively. #[must_use] pub const fn expected_label(self) -> &'static str { match self { Self::NewName | Self::Free => "identifier", Self::Tables => "table name", Self::Columns => "column name", Self::Relationships => "relationship name", Self::Indexes => "index name", Self::Types => "type", } } /// Inverse of `expected_label`. Used by the completion engine /// to recover the source kind from the `ParseError::Invalid:: /// expected` strings the walker bridge produces. `"identifier"` /// maps to `NewName` (the only writeable label that uses that /// wording in production grammars today). #[must_use] pub fn from_expected_label(label: &str) -> Option { match label { "identifier" => Some(Self::NewName), "table name" => Some(Self::Tables), "column name" => Some(Self::Columns), "relationship name" => Some(Self::Relationships), "index name" => Some(Self::Indexes), "type" => Some(Self::Types), _ => None, } } } /// Hint-panel mode for an expected node (ADR-0024 §HintMode-per-node). /// /// `Default` (today's behaviour) shows candidates if any, falls /// back to a prose ladder otherwise. The other variants /// override at slot positions where the candidate list would be /// actively misleading or where the user benefits from format /// guidance: /// /// - `ProseOnly(catalog_key)` — show only prose from the /// catalog; suppress Tab candidates. Used today by the /// value-literal slot at empty prefix (the "null/true/false" /// candidate trio is misleading at a slot that more often /// takes a number / quoted text / date). /// - `ForceProse(catalog_key)` — force this prose at the /// catalog key regardless of candidates. Used today by /// `NewName` ident slots ("Type a name, then `(`"). /// - `IntroProse(catalog_key)` — show prose at slot entry to /// *introduce* a position whose first-class candidate is an /// ident slot (which would be invisible in a pure-candidate /// render) but whose keyword alternatives are also available. /// Unlike `ProseOnly`, Tab candidates remain available — the /// user still cycles through the keyword set. Used at the /// advanced-mode CREATE TABLE element slot, where the /// column-name `NewName` slot would otherwise be invisible /// alongside the table-level constraint keywords (issue #4). /// - `SuppressProse` — show only candidates; never fall back /// to a prose ladder. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum HintMode { Default, ForceProse(&'static str), ProseOnly(&'static str), IntroProse(&'static str), SuppressProse, } /// A keyword node literal. /// /// The `aliases` slice is empty for the app-lifecycle commands /// today; the round-5 `q` removal remains intentional, and any /// future re-introduction would be a one-line `aliases: &["q"]` /// addition (ADR-0024 §aliases). #[derive(Debug, Clone, Copy)] pub struct Word { pub primary: &'static str, pub aliases: &'static [&'static str], pub highlight_override: Option, } impl Word { pub const fn keyword(primary: &'static str) -> Self { Self { primary, aliases: &[], highlight_override: None, } } /// A keyword that highlights as a column **type** rather than a /// clause keyword (ADR-0022 Amendment 4). The one user today is /// the two-word `double precision` SQL alias (ADR-0035 §3): it /// is matched as keyword tokens, not an `IdentSource::Types` /// `Ident`, so without this it would render keyword-coloured /// while its single-word synonyms (`float`, `real`) render as /// types. pub const fn type_keyword(primary: &'static str) -> Self { Self { primary, aliases: &[], highlight_override: Some(HighlightClass::Type), } } /// Case-insensitive match against the primary or any alias. pub fn matches(&self, candidate: &str) -> bool { if candidate.eq_ignore_ascii_case(self.primary) { return true; } self.aliases .iter() .any(|a| candidate.eq_ignore_ascii_case(a)) } } /// Content-level validator for an `Ident` slot. Returns the /// catalog key + arg list to surface as `WalkOutcome::ValidationFailed` /// on mismatch. pub type IdentValidator = fn(matched: &str) -> Result<(), ValidationError>; /// Content-level validator for a `NumberLit` slot. Same shape /// as `IdentValidator`; surfaces as `ValidationFailed` on Err. pub type NumberValidator = fn(matched: &str) -> Result<(), ValidationError>; #[derive(Debug, Clone, PartialEq, Eq)] pub struct ValidationError { pub message_key: &'static str, pub args: Vec<(&'static str, String)>, } /// The grammar-tree node taxonomy (ADR-0024 §node-taxonomy). /// /// Some variants carry data (`Word` literal, `Punct` char, /// `Ident` source/role/validator); combinators reference their /// children through `&'static [Node]` / `&'static Node` slices, /// which lets the entire registry live in `const`s — no runtime /// allocation, every command is one declaration block in its /// grammar file. pub enum Node { /// A keyword token. Case-insensitive match (ADR-0009). Word(Word), /// A single punctuation character. The exact set comes from /// the migrated commands' usage — Phase A only needs none of /// these (app-lifecycle commands are pure keyword + ident + /// path), but the variant is declared for Phase B+ use. #[allow(dead_code)] Punct(char), /// An identifier slot. `source` drives completion candidates; /// `role` names the slot for error wording / completion-engine /// dispatch; `validator` runs after a successful identifier- /// shape match and may reject the value with a catalog-driven /// message. /// /// `writes_table` (Phase D): when `true` and `source == /// Tables`, the walker writes the matched ident to /// `WalkContext::current_table` and resolves /// `current_table_columns` from the schema cache (if any). /// `writes_column` (Phase D): when `true` and `source == /// Columns`, the walker writes the matched ident's /// `TableColumn` to `WalkContext::current_column` (resolved /// against `current_table_columns`). Subsequent value slots /// dispatch on the column's type. Ident { source: IdentSource, role: &'static str, validator: Option, #[allow(dead_code)] highlight_override: Option, writes_table: bool, writes_column: bool, /// Append the matched text to /// `WalkContext::user_listed_columns` (Phase D). Used by /// the `insert into (col1, col2, …)` column-list /// idents — when the walker sees these, the form is /// "Form A" and the inner values slot list mirrors the /// user's explicit selection instead of the /// auto-filtered schema default. writes_user_listed_column: bool, /// Set the matched text as the alias of the most- /// recently-pushed `TableBinding` on the top /// `ScopeFrame`'s `from_scope` (ADR-0032 §10.1). Used by /// the `[ AS ] alias` slot on `from_clause` / /// `join_clause` table sources in `sql_select.rs`; a /// no-op on `IdentSource::NewName` slots that do not /// follow a table-name push, or when the top frame's /// `from_scope` is empty. writes_table_alias: bool, /// Push a placeholder `CteBinding` (name only, empty /// columns) onto the top `ScopeFrame`'s `cte_bindings` /// (ADR-0032 §10.3 stage 1). Used by the CTE-name slot /// in `with_clause`; the placeholder is rewritten with /// derived output columns at the body's frame exit /// (§10.3 stage 2; harvest derivation rules pending). writes_cte_name: bool, /// Append the matched text to the top `ScopeFrame`'s /// `projection_aliases` (ADR-0032 §10.4). Used by the /// projection-list alias slot (both the bare and `AS` /// forms) so `ORDER BY` completion can offer aliases as /// candidates. writes_projection_alias: bool, }, /// A number literal. The optional `validator` runs against /// the matched text (used by Phase D value slots to enforce /// per-type integer/decimal rules). NumberLit { validator: Option, }, /// A literal byte sequence at this position — matches /// bytes verbatim (whitespace-skipped) with a lookahead so /// `1` doesn't half-match `12` and `n` doesn't half-match /// `name`. Used by Phase B's `add 1:n …` for the literal /// `1`. Surfaces in the expected-set as `` `` ``, /// matching chumsky's labelled-token rendering. Literal(&'static str), #[allow(dead_code)] StringLit, #[allow(dead_code)] BlobLit, /// A `--name` flag. Walker matches the flag shape and /// asserts the name matches the expected literal. Flag(&'static str), /// A non-whitespace run consumed verbatim from source. Per /// ADR-0024's path-bearing-commands UX change, paths with /// spaces use the quoted form (`StringLit`); `BarePath` /// terminates at the first whitespace byte. BarePath, /// Try each child in order. The first one that matches a /// non-empty prefix wins; if none match, the choice fails /// with the union of expectations. Choice(&'static [Self]), /// All children must match in order. Whitespace is implicitly /// allowed between siblings. Seq(&'static [Self]), /// The inner node may match or be skipped. Optional(&'static Self), /// `inner` matches at least `min` times, separated by /// `separator` (if any). Phase C+ uses this for `with pk` /// column lists. #[allow(dead_code)] Repeated { inner: &'static Self, separator: Option<&'static Self>, min: usize, }, /// Walks the referenced `&'static Node` once, mandatory /// (ADR-0026 §2). The reference indirection is what lets a /// named `static` grammar fragment appear inside its own /// subtree: a `Seq` / `Choice` embeds its children by value /// and so cannot close a cycle, but a `&'static Node` /// reference can point back at an enclosing fragment. This /// is the mechanism the stratified WHERE-expression grammar /// recurses through — the `( or_expr )` branch and the /// `not_expr` self-reference. /// /// The walker counts active `Subgrammar` frames in /// `WalkContext::subgrammar_depth` and refuses past /// `walker::driver::MAX_SUBGRAMMAR_DEPTH`, so pathologically /// nested input (`((((…))))`) fails with a friendly error /// rather than overflowing the parser stack. /// /// The static counterpart of `DynamicSubgrammar`: that one /// builds a fresh node from the `WalkContext` at walk time; /// this one references a fixed fragment already in the /// grammar tree. Subgrammar(&'static Self), /// Like `Subgrammar`, but the walker additionally **pushes a /// new `ScopeFrame`** onto `WalkContext::from_scope_stack` on /// entry and pops it on exit (ADR-0032 §10.2). The /// `subgrammar_depth` counter increments uniformly across /// both variants — the depth cap applies the same way — so /// this variant introduces no new walker capability for /// grammar recursion; it only layers lexical-scope discipline /// on top. /// /// Used at every SQL `SELECT` recursion point: subqueries /// in `sql_expr.rs` (scalar `(SELECT …)`, `IN (SELECT …)`, /// `[NOT] EXISTS (SELECT …)`) and CTE bodies in /// `sql_select.rs` reference the compound-SELECT through /// `Node::ScopedSubgrammar(&SQL_SELECT_COMPOUND)`. DSL `Expr` /// recursion (ADR-0026) and the `sql_expr.rs` precedence- /// ladder recursion (ADR-0031) keep using the plain /// `Subgrammar` variant and never push a scope. ScopedSubgrammar(&'static Self), /// Resolves at walk time using the active `WalkContext`. /// Phase D+ uses this for `column_value_list`. The factory /// is pure in `ctx`, so the walker memoizes the resolution /// (one leak per distinct schema shape). #[allow(dead_code)] DynamicSubgrammar(fn(&WalkContext) -> Self), /// Like `DynamicSubgrammar` but the factory also sees the /// source and the current byte position, so it can look /// ahead. Used by the insert first-paren to discriminate /// Form A (`(cols) values (...)`) from Form C (`(vals)`) /// before walking the contents — Form C then routes through /// the typed `column_value_list` (ADR-0024 §Phase D, Form C /// type-awareness). Not memoized: the output depends on the /// source, not just `ctx`. Lookahead(fn(&WalkContext, &str, usize) -> Self), /// Zero-width node that *establishes the active column* for the /// value slot that follows it (ADR-0036 Phase 3b). Matches the /// empty string and, as a side effect, sets /// `WalkContext::current_column` to the referenced column and /// `pending_value_column` to its name — exactly as an /// `Ident { writes_column: true }` does, but without consuming a /// column identifier from the input. /// /// This is the primitive that gives `INSERT … VALUES (…)` /// positions a per-position column identity: the positions are /// positional (no per-position column ident to write /// `current_column`), so a `DynamicSubgrammar` factory /// (`sql_insert::sql_value_list`) emits `SetColumn(colᵢ)` before /// each value position, then the shared boundary-aware `SET_VALUE` /// slot routes a lone literal to that column's typed slot and any /// expression to `sql_expr`. The referenced `TableColumn` is /// leaked by the factory (bounded by the column count, like the /// `DynamicSubgrammar` `Box::leak`). SetColumn(&'static crate::completion::TableColumn), /// Typed value-literal slot (ADR-0024 §Phase D §typed-value-slots). /// /// Walks `inner` to consume the literal but records the /// column type in `WalkContext::pending_value_type` so the /// hint resolver can emit per-type catalog prose ("Type an /// integer", "Type a date as 'YYYY-MM-DD'", …) at empty /// prefix at this slot. When `column_name` is `Some`, the /// walker also writes `pending_value_column` so the hint /// can be rendered with the actual column name (e.g. "for /// `Email`: Type a quoted string …") rather than a generic /// type hint. The recorded values clear on a successful /// inner match — so positions BETWEEN typed slots /// (`insert into T values (1` mid-input) don't carry stale /// hint state. TypedValueSlot { ty: crate::dsl::types::Type, column_name: Option<&'static str>, inner: &'static Self, }, /// Annotates `inner` with a hint-panel `HintMode` (ADR-0024 /// §HintMode-per-node). On entry the walker records `mode` /// in `WalkContext::pending_hint_mode`; on a successful /// inner match the record clears (so positions past the /// slot don't carry stale hint state). Transparent to /// matching, highlighting and the expected-set otherwise — /// it walks `inner` and returns its result verbatim. /// /// This is the node-attached replacement for the hint /// resolver's earlier signature-matching: the grammar tree /// declares the hint mode at the slot, the walker /// propagates it, the resolver reads it. Used by the /// value-literal fallback slot (`ProseOnly`) and `NewName` /// ident slots (`ForceProse`). Hinted { mode: HintMode, inner: &'static Self, }, } /// Which mode group a registered command belongs to (ADR-0030 /// §2, ADR-0033 Amendment 1). /// /// Category is a *dispatcher* concern, not intrinsic to a /// command's grammar, so it is attached at the `REGISTRY` /// registration site rather than as a field on every /// `CommandNode`. The dispatcher (`walker::walk`) uses it to /// route a given input by the active input mode: /// /// - `Simple` commands are the DSL surface; available in both /// simple and advanced mode. /// - `Advanced` commands are the SQL surface; available only in /// advanced mode. In simple mode an advanced-only entry word /// yields the "this is SQL" hint (`advanced_mode.sql_in_simple`). /// /// A *shared* entry word (e.g. `insert`, from Phase 3 sub-phase /// 3b on) carries a node in *both* groups — a `Simple` DSL node /// and an `Advanced` SQL node. The dispatcher tries the SQL node /// first in advanced mode and falls back to the DSL node when the /// SQL shape does not match. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum CommandCategory { Simple, Advanced, } /// Top-level entry record. One per command. The `entry` keyword /// alone identifies which command the walker dispatches to; /// `shape` is what follows the entry word. pub struct CommandNode { pub entry: Word, pub shape: Node, /// Builds the typed `Command` AST from the matched terminal /// path. May fail with a `ValidationError` for content-level /// rejections that are easier to express imperatively than /// as a per-node validator (Phase A: none — every app /// command's ast_builder is infallible). /// /// `source` is the full input line being parsed. Most builders /// reconstruct the `Command` from the matched `MatchedPath` /// alone and ignore it; SQL builders whose `Command` carries /// the validated SQL text (ADR-0030 §4/§6, ADR-0031 §2) read /// it. pub ast_builder: fn(&MatchedPath, &str) -> Result, /// Catalog key (`help.`) for this command's in-app /// `help` entry. Consumed by `App::note_help`, which /// iterates the REGISTRY and translates each `help_id` — /// so a newly-registered command appears in `help` /// automatically (ADR-0024 §help_id). pub help_id: Option<&'static str>, /// Catalog keys under `parse.usage.*` to render in the /// "usage:" block when a parse error fires for this command /// (ADR-0021 §1, ADR-0024 §architecture). Multi-form families /// like `drop` (drop table / drop column / drop relationship) /// carry every variant so the user sees the full family on a /// generic-entry-word failure. pub usage_ids: &'static [&'static str], } /// Look up the usage catalog keys for the entry word at the start /// of `source`. /// /// Case-insensitive, whitespace-tolerant. Replaces /// `dsl::usage::matched_entry` — the walker is the single source /// of truth for which command a given input belongs to. /// /// Returns the canonical (primary-form) entry literal and the /// `usage_ids` list, or `None` if no entry word matches. #[must_use] pub fn usage_keys_for_input(source: &str) -> Option<(&'static str, Vec<&'static str>)> { usage_keys_for_input_in_mode(source, crate::mode::Mode::Simple) } /// Mode-aware variant of [`usage_keys_for_input`] (ADR-0042 G3). /// /// A shared entry word (`create`, `drop`, `insert`, …) registers a /// `Simple` DSL node *and* one or more `Advanced` SQL nodes. The /// usage block must reflect the surface the user is actually typing: /// the SQL forms in `Advanced` mode, the DSL forms in `Simple` mode /// — otherwise advanced-mode `create` shows the DSL `create table … /// with pk …` template, which is not valid SQL. /// /// Selection prefers candidates whose [`CommandCategory`] matches /// the mode; if the entry word has none in that category (an /// app-lifecycle command is `Simple`-only yet usable in both modes), /// every candidate is used. The returned keys are the union of the /// selected nodes' `usage_ids`, de-duplicated in registry order — so /// advanced `create` shows both `sql_create_table` and /// `sql_create_index`. #[must_use] pub fn usage_keys_for_input_in_mode( source: &str, mode: crate::mode::Mode, ) -> Option<(&'static str, Vec<&'static str>)> { use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace}; let start = skip_whitespace(source, 0); let (kw_start, kw_end) = consume_ident(source, start)?; let word = &source[kw_start..kw_end]; let candidates = commands_for_entry_word(word); if candidates.is_empty() { return None; } let union = |nodes: &[(usize, &'static CommandNode, CommandCategory)]| -> Vec<&'static str> { let mut keys: Vec<&'static str> = Vec::new(); for (_, node, _) in nodes { for k in node.usage_ids { if !keys.contains(k) { keys.push(*k); } } } keys }; // Advanced mode: every candidate form is reachable — the SQL // nodes are primary, and the DSL nodes remain valid via fallback // (verified: `create table … with pk` and `drop column …` both // run in advanced mode). Show them all, mode-primary (Advanced) // first, so the usage hint never hides input that works. Simple // mode: only the DSL forms — the SQL-only forms hit the "this is // SQL" rail and are not reachable. (ADR-0042 G3.) let selected: Vec<(usize, &'static CommandNode, CommandCategory)> = if mode == crate::mode::Mode::Advanced { let mut v: Vec<_> = candidates .iter() .copied() .filter(|(_, _, c)| *c == CommandCategory::Advanced) .collect(); v.extend( candidates .iter() .copied() .filter(|(_, _, c)| *c != CommandCategory::Advanced), ); v } else { candidates .iter() .copied() .filter(|(_, _, c)| *c == CommandCategory::Simple) .collect() }; // Degenerate guard: an advanced-only word in simple mode (not // normally reachable — it hits the SQL rail first) leaves // `selected` empty; fall back to all candidates so a usage block // still renders rather than the available-commands fallback. let pick = if selected.is_empty() { candidates } else { selected }; let keys = union(&pick); if keys.is_empty() { return None; } let entry = pick[0].1.entry.primary; Some((entry, keys)) } /// The single usage template most relevant to `source`, when /// one is determinable. /// /// A single-form command resolves to its one usage key. A /// multi-form command (`add`, `drop`) disambiguates by the /// form word after the entry keyword — so a parse error in /// `add index …` resolves to the `add index` usage rather than /// the first-listed `add column`. Returns `None` for a bare /// multi-form entry word (`add` with nothing after it), where /// no form has been chosen — the caller decides whether to /// show the whole family or nothing. #[must_use] pub fn usage_key_for_input(source: &str) -> Option<&'static str> { usage_key_for_input_in_mode(source, crate::mode::Mode::Simple) } /// Mode-aware variant of [`usage_key_for_input`] (ADR-0042 G3) — /// disambiguates the single most-relevant usage key from the /// mode-selected key set. #[must_use] pub fn usage_key_for_input_in_mode( source: &str, mode: crate::mode::Mode, ) -> Option<&'static str> { use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace}; let (_entry, keys) = usage_keys_for_input_in_mode(source, mode)?; let first = *keys.first()?; if keys.len() == 1 { return Some(first); } // Multi-form: the form is named by the token right after // the entry keyword. let start = skip_whitespace(source, 0); let (_, entry_end) = consume_ident(source, start)?; let after = skip_whitespace(source, entry_end); // The `add 1:n relationship` form opens with a digit. if source.as_bytes().get(after).is_some_and(u8::is_ascii_digit) { return keys.iter().copied().find(|k| k.ends_with("relationship")); } // The `create m:n relationship` form (ADR-0045) opens with `m:n` // — a letter, so the digit branch misses it, and its usage key ends // `…create_m2n` (not `relationship`). if source[after..].get(..3).is_some_and(|s| s.eq_ignore_ascii_case("m:n")) { return keys.iter().copied().find(|k| k.ends_with("m2n")); } // Otherwise the form word is an identifier — `column`, // `index`, `table`, `relationship` — matched against the // usage key's suffix. let (s, e) = consume_ident(source, after)?; let form = source[s..e].to_ascii_lowercase(); keys.iter().copied().find(|k| k.ends_with(form.as_str())) } /// Every command-entry word in the registry, sorted alphabetically /// by primary literal. Replaces `dsl::usage::entry_keywords_alphabetised` /// which read the same data through the legacy `usage::REGISTRY`. #[must_use] pub fn entry_words_alphabetised() -> Vec<&'static str> { let mut words: Vec<&'static str> = REGISTRY.iter().map(|(c, _)| c.entry.primary).collect(); words.sort_unstable(); words.dedup(); words } /// The active grammar registry, each command paired with its /// dispatch [`CommandCategory`] (ADR-0033 Amendment 1). /// /// Migrated commands route through this; everything else falls /// through to the chumsky path in `dsl::parser`. `Advanced` /// commands (`select`, `with`, and — from sub-phase 3b — the SQL /// `insert` / `update` / `delete` nodes) are the SQL surface; /// the rest are the DSL surface (`Simple`). A shared entry word /// will appear twice (one `Simple`, one `Advanced` node); the /// dispatcher selects by mode. pub static REGISTRY: &[(&CommandNode, CommandCategory)] = &[ (&app::QUIT, CommandCategory::Simple), (&app::HELP, CommandCategory::Simple), (&app::REBUILD, CommandCategory::Simple), (&app::SAVE, CommandCategory::Simple), (&app::NEW, CommandCategory::Simple), (&app::LOAD, CommandCategory::Simple), (&app::EXPORT, CommandCategory::Simple), (&app::IMPORT, CommandCategory::Simple), (&app::MODE, CommandCategory::Simple), (&app::MESSAGES, CommandCategory::Simple), (&app::UNDO, CommandCategory::Simple), (&app::REDO, CommandCategory::Simple), (&app::COPY, CommandCategory::Simple), (&ddl::DROP, CommandCategory::Simple), (&ddl::ADD, CommandCategory::Simple), (&ddl::RENAME, CommandCategory::Simple), (&ddl::CHANGE, CommandCategory::Simple), (&ddl::CREATE, CommandCategory::Simple), (&ddl::CREATE_M2N, CommandCategory::Simple), (&data::SHOW, CommandCategory::Simple), (&data::SEED, CommandCategory::Simple), (&data::INSERT, CommandCategory::Simple), (&data::UPDATE, CommandCategory::Simple), (&data::DELETE, CommandCategory::Simple), (&data::REPLAY, CommandCategory::Simple), (&data::EXPLAIN, CommandCategory::Simple), (&data::SELECT, CommandCategory::Advanced), (&data::WITH, CommandCategory::Advanced), // Shared entry words (sub-phase 3j, ADR-0033 §2 / Amendment 1): // `insert` / `update` / `delete` each appear twice — the // `Simple` DSL node above and this `Advanced` SQL node. The // dispatcher tries the SQL node first in Advanced mode and falls // back to the DSL node when the SQL shape does not match. (&data::SQL_INSERT, CommandCategory::Advanced), (&data::SQL_UPDATE, CommandCategory::Advanced), (&data::SQL_DELETE, CommandCategory::Advanced), // Shared entry word `explain` (ADR-0039): the `Simple` DSL // `data::EXPLAIN` (above) wraps `show data` / `update` / `delete`; // this `Advanced` node wraps the SQL `select` / `with` / `insert` // / `update` / `delete`. SQL-first / DSL-fallback in advanced mode // (so `explain show data …` and DSL-only `--all-rows` still reach // the DSL node); DSL-only in simple mode. (&data::EXPLAIN_SQL, CommandCategory::Advanced), // Shared entry word `create` (ADR-0035 §2): the simple // `ddl::CREATE` (above) and these advanced SQL nodes. The // dispatcher tries the advanced candidates first in advanced mode // and falls back to the `create table … with pk …` DSL node when no // SQL shape matches — the `insert` precedent. 4d adds // SQL_CREATE_INDEX, so `create` now has *two* advanced nodes; // `decide` tries both (`create table …` → SQL_CREATE_TABLE, // `create [unique] index …` → SQL_CREATE_INDEX). (&ddl::SQL_CREATE_TABLE, CommandCategory::Advanced), (&ddl::SQL_CREATE_INDEX, CommandCategory::Advanced), // `alter` is a new advanced-*only* DDL entry word (ADR-0035 §2/§4e), // like `select`/`with` — no simple node, so `is_advanced_only` is // true and simple-mode `alter …` gets the "this is SQL" hint. (&ddl::SQL_ALTER_TABLE, CommandCategory::Advanced), // Shared `drop` entry word: `ddl::DROP` (simple) and these advanced // SQL nodes. SQL-first in advanced mode; `drop table [if exists] T` // → SQL_DROP_TABLE, `drop index [if exists] ` → SQL_DROP_INDEX // (4d — `drop` now has *two* advanced nodes; the dispatcher's // `decide` tries all advanced candidates). `drop column`/`drop // relationship`/`drop index on T(…)` fall back to the simple `drop` // node. (&ddl::SQL_DROP_TABLE, CommandCategory::Advanced), (&ddl::SQL_DROP_INDEX, CommandCategory::Advanced), ]; /// Whether `entry` names an advanced-mode-only command (ADR-0030 /// §2, ADR-0033 Amendment 1). Case-insensitive, matching /// keyword-matching elsewhere. /// /// True when the entry word is registered and *every* candidate /// for it is `Advanced` — i.e. there is no DSL (`Simple`) command /// to fall back to. A shared entry word (a Simple DSL node plus /// an Advanced SQL node) is therefore *not* advanced-only: it is /// available in simple mode as DSL. #[must_use] pub fn is_advanced_only(entry: &str) -> bool { let mut found = false; for (c, category) in REGISTRY { if c.entry.matches(entry) { found = true; if *category == CommandCategory::Simple { return false; } } } found } /// Look up the first `CommandNode` registered for an entry word, /// case-insensitively. Returns the index into `REGISTRY` so /// callers can use it as a `WalkOutcome::Match { command_idx }`. /// /// For shared entry words this returns whichever node is listed /// first in `REGISTRY`; callers that must distinguish the Simple /// from the Advanced candidate use [`commands_for_entry_word`]. pub fn command_for_entry_word(word: &str) -> Option<(usize, &'static CommandNode)> { REGISTRY .iter() .enumerate() .find(|(_, (c, _))| c.entry.matches(word)) .map(|(i, (c, _))| (i, *c)) } /// Every `CommandNode` registered for an entry word, with its /// `REGISTRY` index and [`CommandCategory`], case-insensitively /// (ADR-0033 Amendment 1). /// /// A non-shared entry word returns a single candidate; a shared /// entry word (`insert` / `update` / `delete` from sub-phase 3b) /// returns its `Simple` DSL node and `Advanced` SQL node. The /// dispatcher picks among them by the active input mode. #[must_use] pub fn commands_for_entry_word( word: &str, ) -> Vec<(usize, &'static CommandNode, CommandCategory)> { REGISTRY .iter() .enumerate() .filter(|(_, (c, _))| c.entry.matches(word)) .map(|(i, (c, category))| (i, *c, *category)) .collect() } #[cfg(test)] mod usage_key_tests { use super::usage_key_for_input; /// Every multi-form command resolves a typed form to its /// own usage key — a parse error in one form must never /// show another form's usage (the handoff-18 `151ed08` fix; /// regression-locked here, including the `add 1:n /// relationship` digit-led form). #[test] fn multi_form_commands_resolve_to_the_typed_form() { let cases = [ ("add column to T: c (int)", "parse.usage.add_column"), ("add index on T (c)", "parse.usage.add_index"), ( "add constraint unique to T.c", "parse.usage.add_constraint", ), ( "drop constraint check from T.c", "parse.usage.drop_constraint", ), ( "add 1:n relationship from A.x to B.y", "parse.usage.add_relationship", ), // Trailing junk must not change the resolved form. ( "add 1:n relationship from A.x to B.y --", "parse.usage.add_relationship", ), ("drop table T", "parse.usage.drop_table"), ("drop column from table T: c", "parse.usage.drop_column"), ("drop index i", "parse.usage.drop_index"), ( "drop relationship r", "parse.usage.drop_relationship", ), ("show data T", "parse.usage.show_data"), ("show table T", "parse.usage.show_table"), // `create` is multi-form (table vs m:n, ADR-0045): each typed // form resolves to its own usage key. ("create table T with pk id(int)", "parse.usage.create_table"), ( "create m:n relationship from A to B", "parse.usage.create_m2n", ), ]; for (input, expected) in cases { assert_eq!( usage_key_for_input(input), Some(expected), "usage key for {input:?}", ); } } #[test] fn a_bare_multi_form_entry_word_resolves_to_no_single_form() { // `add` / `drop` alone — no form chosen; the caller // shows the whole family rather than guessing. assert_eq!(usage_key_for_input("add "), None); assert_eq!(usage_key_for_input("drop "), None); } #[test] fn a_single_form_command_resolves_to_its_one_key() { assert_eq!( usage_key_for_input("create table T with pk"), Some("parse.usage.create_table"), ); } #[test] fn no_two_registered_commands_share_a_help_id() { // `note_help` emits one help block per `help_id: Some(_)` // with no dedup, so a duplicate help_id prints the same // command twice in `help`. Shared-entry-word `Advanced` // nodes (SQL_INSERT, …, EXPLAIN_SQL) therefore carry // `help_id: None` and defer to their `Simple` sibling. let mut seen = std::collections::HashSet::new(); for (command, _category) in super::REGISTRY { if let Some(id) = command.help_id { assert!( seen.insert(id), "duplicate help_id `{id}` in REGISTRY would print twice in `help`", ); } } } }