//! Unified declarative grammar tree (ADR-0024). //! //! The grammar tree is the single source of truth for the DSL — //! parsing, completion, syntax highlighting, parse-error usage //! rendering, and hint-panel content all derive from this same //! data structure (ADR-0023 institutional context). //! //! Phase A scope (ADR-0024 §migration): the framework lands //! alongside the eleven app-lifecycle commands (quit, help, //! rebuild, save, save as, new, load, export, import, mode, //! messages). The chumsky parser still owns every other //! command; the router in `dsl::parser` decides which path to //! take per first-token. Schema-aware nodes (`IdentSource::Tables` //! and friends) and `DynamicSubgrammar` are declared here but //! not exercised until Phase B-D. //! //! The shape of `Node` mirrors ADR-0024 §node-taxonomy with one //! pragmatic addition for Phase A: each `Ident` carries an //! optional content validator, used today by the `mode ` //! / `messages ` slots to surface friendly catalog //! wording (`mode.unknown`, `messages.unknown`) on out-of-set //! identifiers. The same hook generalises naturally to typed //! value slots in Phase D. pub mod app; pub mod data; pub mod ddl; pub mod shared; use crate::dsl::command::Command; use crate::dsl::walker::context::WalkContext; use crate::dsl::walker::outcome::MatchedPath; /// Highlight class assigned to a matched terminal. /// /// Recorded on the `WalkResult::per_byte_class` slice and surfaced /// by `walker::highlight_runs` to the input/echo-line renderers /// (ADR-0024 §architecture). #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum HighlightClass { Keyword, Identifier, Number, String, Punct, Flag, Error, } /// Where an `Ident` slot's candidates come from at completion time. /// /// Drives both the walker's `Expectation::Ident { source }` (which /// the parse-error bridge maps to a human label) and the /// `SchemaCache` lookup the completion engine uses for Tab /// candidates. The `Free` and `NewName` variants do not query the /// schema — `NewName` is for slots where the user invents the /// identifier, `Free` is the catch-all branch in `mode`/`messages` /// that funnels unknown values into a friendly validator. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum IdentSource { /// User invents this name. No schema lookup; no completion /// candidates beyond the identifier shape itself. NewName, /// Existing table name. Tables, /// Existing column in the current table. Columns, /// Existing relationship name. Relationships, /// Closed set from `Type::all()` — surfaced by the walker's /// content validator on column-type slots; not user-listable /// from the schema. Types, /// Any identifier shape; used by synthetic catch-all branches /// (e.g., the unknown-value branch of `mode `). Free, } impl IdentSource { /// Whether this source can be completed from the schema /// cache (i.e. the candidate list comes from existing /// entities rather than user invention or a closed set). #[must_use] pub const fn completes_from_schema(self) -> bool { matches!(self, Self::Tables | Self::Columns | Self::Relationships) } /// Human-facing label used in parse-error wording /// ("expected table name") and in the completion engine's /// round-trip from a textual `expected` entry back to a /// source kind. `Free` and `Types` collapse to "identifier" /// and "type" respectively. #[must_use] pub const fn expected_label(self) -> &'static str { match self { Self::NewName | Self::Free => "identifier", Self::Tables => "table name", Self::Columns => "column name", Self::Relationships => "relationship name", Self::Types => "type", } } /// Inverse of `expected_label`. Used by the completion engine /// to recover the source kind from the `ParseError::Invalid:: /// expected` strings the walker bridge produces. `"identifier"` /// maps to `NewName` (the only writeable label that uses that /// wording in production grammars today). #[must_use] pub fn from_expected_label(label: &str) -> Option { match label { "identifier" => Some(Self::NewName), "table name" => Some(Self::Tables), "column name" => Some(Self::Columns), "relationship name" => Some(Self::Relationships), "type" => Some(Self::Types), _ => None, } } } /// Hint-panel mode for an expected node (ADR-0024 §HintMode-per-node). /// /// `Default` (today's behaviour) shows candidates if any, falls /// back to a prose ladder otherwise. The other variants /// override at slot positions where the candidate list would be /// actively misleading or where the user benefits from format /// guidance: /// /// - `ProseOnly(catalog_key)` — show only prose from the /// catalog; suppress Tab candidates. Used today by the /// value-literal slot at empty prefix (the "null/true/false" /// candidate trio is misleading at a slot that more often /// takes a number / quoted text / date). /// - `ForceProse(catalog_key)` — force this prose at the /// catalog key regardless of candidates. Used today by /// `NewName` ident slots ("Type a name, then `(`"). /// - `SuppressProse` — show only candidates; never fall back /// to a prose ladder. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum HintMode { Default, ForceProse(&'static str), ProseOnly(&'static str), SuppressProse, } /// A keyword node literal. /// /// The `aliases` slice is empty for the app-lifecycle commands /// today; the round-5 `q` removal remains intentional, and any /// future re-introduction would be a one-line `aliases: &["q"]` /// addition (ADR-0024 §aliases). #[derive(Debug, Clone, Copy)] pub struct Word { pub primary: &'static str, pub aliases: &'static [&'static str], pub highlight_override: Option, } impl Word { pub const fn keyword(primary: &'static str) -> Self { Self { primary, aliases: &[], highlight_override: None, } } /// Case-insensitive match against the primary or any alias. pub fn matches(&self, candidate: &str) -> bool { if candidate.eq_ignore_ascii_case(self.primary) { return true; } self.aliases .iter() .any(|a| candidate.eq_ignore_ascii_case(a)) } } /// Content-level validator for an `Ident` slot. Returns the /// catalog key + arg list to surface as `WalkOutcome::ValidationFailed` /// on mismatch. pub type IdentValidator = fn(matched: &str) -> Result<(), ValidationError>; /// Content-level validator for a `NumberLit` slot. Same shape /// as `IdentValidator`; surfaces as `ValidationFailed` on Err. pub type NumberValidator = fn(matched: &str) -> Result<(), ValidationError>; #[derive(Debug, Clone, PartialEq, Eq)] pub struct ValidationError { pub message_key: &'static str, pub args: Vec<(&'static str, String)>, } /// The grammar-tree node taxonomy (ADR-0024 §node-taxonomy). /// /// Some variants carry data (`Word` literal, `Punct` char, /// `Ident` source/role/validator); combinators reference their /// children through `&'static [Node]` / `&'static Node` slices, /// which lets the entire registry live in `const`s — no runtime /// allocation, every command is one declaration block in its /// grammar file. pub enum Node { /// A keyword token. Case-insensitive match (ADR-0009). Word(Word), /// A single punctuation character. The exact set comes from /// the migrated commands' usage — Phase A only needs none of /// these (app-lifecycle commands are pure keyword + ident + /// path), but the variant is declared for Phase B+ use. #[allow(dead_code)] Punct(char), /// An identifier slot. `source` drives completion candidates; /// `role` names the slot for error wording / completion-engine /// dispatch; `validator` runs after a successful identifier- /// shape match and may reject the value with a catalog-driven /// message. /// /// `writes_table` (Phase D): when `true` and `source == /// Tables`, the walker writes the matched ident to /// `WalkContext::current_table` and resolves /// `current_table_columns` from the schema cache (if any). /// `writes_column` (Phase D): when `true` and `source == /// Columns`, the walker writes the matched ident's /// `TableColumn` to `WalkContext::current_column` (resolved /// against `current_table_columns`). Subsequent value slots /// dispatch on the column's type. Ident { source: IdentSource, role: &'static str, validator: Option, #[allow(dead_code)] highlight_override: Option, writes_table: bool, writes_column: bool, /// Append the matched text to /// `WalkContext::user_listed_columns` (Phase D). Used by /// the `insert into (col1, col2, …)` column-list /// idents — when the walker sees these, the form is /// "Form A" and the inner values slot list mirrors the /// user's explicit selection instead of the /// auto-filtered schema default. writes_user_listed_column: bool, }, /// A number literal. The optional `validator` runs against /// the matched text (used by Phase D value slots to enforce /// per-type integer/decimal rules). NumberLit { validator: Option, }, /// A literal byte sequence at this position — matches /// bytes verbatim (whitespace-skipped) with a lookahead so /// `1` doesn't half-match `12` and `n` doesn't half-match /// `name`. Used by Phase B's `add 1:n …` for the literal /// `1`. Surfaces in the expected-set as `` `` ``, /// matching chumsky's labelled-token rendering. Literal(&'static str), #[allow(dead_code)] StringLit, #[allow(dead_code)] BlobLit, /// A `--name` flag. Walker matches the flag shape and /// asserts the name matches the expected literal. Flag(&'static str), /// A non-whitespace run consumed verbatim from source. Per /// ADR-0024's path-bearing-commands UX change, paths with /// spaces use the quoted form (`StringLit`); `BarePath` /// terminates at the first whitespace byte. BarePath, /// Try each child in order. The first one that matches a /// non-empty prefix wins; if none match, the choice fails /// with the union of expectations. Choice(&'static [Self]), /// All children must match in order. Whitespace is implicitly /// allowed between siblings. Seq(&'static [Self]), /// The inner node may match or be skipped. Optional(&'static Self), /// `inner` matches at least `min` times, separated by /// `separator` (if any). Phase C+ uses this for `with pk` /// column lists. #[allow(dead_code)] Repeated { inner: &'static Self, separator: Option<&'static Self>, min: usize, }, /// Resolves at walk time using the active `WalkContext`. /// Phase D+ uses this for `column_value_list`. #[allow(dead_code)] DynamicSubgrammar(fn(&WalkContext) -> Self), /// Typed value-literal slot (ADR-0024 §Phase D §typed-value-slots). /// /// Walks `inner` to consume the literal but records the /// column type in `WalkContext::pending_value_type` so the /// hint resolver can emit per-type catalog prose ("Type an /// integer", "Type a date as 'YYYY-MM-DD'", …) at empty /// prefix at this slot. When `column_name` is `Some`, the /// walker also writes `pending_value_column` so the hint /// can be rendered with the actual column name (e.g. "for /// `Email`: Type a quoted string …") rather than a generic /// type hint. The recorded values clear on a successful /// inner match — so positions BETWEEN typed slots /// (`insert into T values (1` mid-input) don't carry stale /// hint state. TypedValueSlot { ty: crate::dsl::types::Type, column_name: Option<&'static str>, inner: &'static Self, }, } /// Top-level entry record. One per command. The `entry` keyword /// alone identifies which command the walker dispatches to; /// `shape` is what follows the entry word. pub struct CommandNode { pub entry: Word, pub shape: Node, /// Builds the typed `Command` AST from the matched terminal /// path. May fail with a `ValidationError` for content-level /// rejections that are easier to express imperatively than /// as a per-node validator (Phase A: none — every app /// command's ast_builder is infallible). pub ast_builder: fn(&MatchedPath) -> Result, #[allow(dead_code)] pub help_id: Option<&'static str>, /// Catalog keys under `parse.usage.*` to render in the /// "usage:" block when a parse error fires for this command /// (ADR-0021 §1, ADR-0024 §architecture). Multi-form families /// like `drop` (drop table / drop column / drop relationship) /// carry every variant so the user sees the full family on a /// generic-entry-word failure. pub usage_ids: &'static [&'static str], #[allow(dead_code)] pub hint_mode: Option, } /// Look up the usage catalog keys for the entry word at the start /// of `source`. /// /// Case-insensitive, whitespace-tolerant. Replaces /// `dsl::usage::matched_entry` — the walker is the single source /// of truth for which command a given input belongs to. /// /// Returns the canonical (primary-form) entry literal and the /// `usage_ids` list, or `None` if no entry word matches. #[must_use] pub fn usage_keys_for_input(source: &str) -> Option<(&'static str, &'static [&'static str])> { use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace}; let start = skip_whitespace(source, 0); let (kw_start, kw_end) = consume_ident(source, start)?; let word = &source[kw_start..kw_end]; let (_, node) = command_for_entry_word(word)?; Some((node.entry.primary, node.usage_ids)) } /// Every command-entry word in the registry, sorted alphabetically /// by primary literal. Replaces `dsl::usage::entry_keywords_alphabetised` /// which read the same data through the legacy `usage::REGISTRY`. #[must_use] pub fn entry_words_alphabetised() -> Vec<&'static str> { let mut words: Vec<&'static str> = REGISTRY.iter().map(|c| c.entry.primary).collect(); words.sort_unstable(); words } /// The active grammar registry. Phase A: the eleven app-lifecycle /// commands. Migrated commands route through this; everything /// else falls through to the chumsky path in `dsl::parser`. pub static REGISTRY: &[&CommandNode] = &[ &app::QUIT, &app::HELP, &app::REBUILD, &app::SAVE, &app::NEW, &app::LOAD, &app::EXPORT, &app::IMPORT, &app::MODE, &app::MESSAGES, &ddl::DROP, &ddl::ADD, &ddl::RENAME, &ddl::CHANGE, &ddl::CREATE, &data::SHOW, &data::INSERT, &data::UPDATE, &data::DELETE, &data::REPLAY, ]; /// Look up a `CommandNode` by entry word, case-insensitively. /// /// Used by the router to decide whether the walker owns this /// input. Returns the index into `REGISTRY` so callers can /// later use it as a `WalkOutcome::Match { command_idx }`. pub fn command_for_entry_word(word: &str) -> Option<(usize, &'static CommandNode)> { REGISTRY .iter() .enumerate() .find(|(_, c)| c.entry.matches(word)) .map(|(i, c)| (i, *c)) }