rdbms-playground/src/dsl/grammar/mod.rs

//! Unified declarative grammar tree (ADR-0024).
//!
//! The grammar tree is the single source of truth for the DSL —
//! parsing, completion, syntax highlighting, parse-error usage
//! rendering, and hint-panel content all derive from this same
//! data structure (ADR-0023 institutional context).
//!
//! Phase A scope (ADR-0024 §migration): the framework lands
//! alongside the eleven app-lifecycle commands (quit, help,
//! rebuild, save, save as, new, load, export, import, mode,
//! messages). The chumsky parser still owns every other
//! command; the router in `dsl::parser` decides which path to
//! take per first-token. Schema-aware nodes (`IdentSource::Tables`
//! and friends) and `DynamicSubgrammar` are declared here but
//! not exercised until Phase B-D.
//!
//! The shape of `Node` mirrors ADR-0024 §node-taxonomy with one
//! pragmatic addition for Phase A: each `Ident` carries an
//! optional content validator, used today by the `mode <value>`
//! / `messages <value>` slots to surface friendly catalog
//! wording (`mode.unknown`, `messages.unknown`) on out-of-set
//! identifiers. The same hook generalises naturally to typed
//! value slots in Phase D.

pub mod app;
pub mod data;
pub mod ddl;
pub mod shared;

use crate::dsl::command::Command;
use crate::dsl::walker::context::WalkContext;
use crate::dsl::walker::outcome::MatchedPath;

/// Highlight class assigned to a matched terminal.
///
/// Recorded on the `WalkResult::per_byte_class` slice and surfaced
/// by `walker::highlight_runs` to the input/echo-line renderers
/// (ADR-0024 §architecture).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HighlightClass {
    Keyword,
    Identifier,
    Number,
    String,
    Punct,
    Flag,
    Error,
}

/// Where an `Ident` slot's candidates come from at completion time.
///
/// Drives both the walker's `Expectation::Ident { source }` (which
/// the parse-error bridge maps to a human label) and the
/// `SchemaCache` lookup the completion engine uses for Tab
/// candidates. The `Free` and `NewName` variants do not query the
/// schema — `NewName` is for slots where the user invents the
/// identifier, `Free` is the catch-all branch in `mode`/`messages`
/// that funnels unknown values into a friendly validator.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum IdentSource {
    /// User invents this name. No schema lookup; no completion
    /// candidates beyond the identifier shape itself.
    NewName,
    /// Existing table name.
    Tables,
    /// Existing column in the current table.
    Columns,
    /// Existing relationship name.
    Relationships,
    /// Closed set from `Type::all()` — surfaced by the walker's
    /// content validator on column-type slots; not user-listable
    /// from the schema.
    Types,
    /// Any identifier shape; used by synthetic catch-all branches
    /// (e.g., the unknown-value branch of `mode <value>`).
    Free,
}

impl IdentSource {
    /// Whether this source can be completed from the schema
    /// cache (i.e. the candidate list comes from existing
    /// entities rather than user invention or a closed set).
    #[must_use]
    pub const fn completes_from_schema(self) -> bool {
        matches!(self, Self::Tables | Self::Columns | Self::Relationships)
    }

    /// Human-facing label used in parse-error wording
    /// ("expected table name") and in the completion engine's
    /// round-trip from a textual `expected` entry back to a
    /// source kind. `Free` and `Types` collapse to "identifier"
    /// and "type" respectively.
    #[must_use]
    pub const fn expected_label(self) -> &'static str {
        match self {
            Self::NewName | Self::Free => "identifier",
            Self::Tables => "table name",
            Self::Columns => "column name",
            Self::Relationships => "relationship name",
            Self::Types => "type",
        }
    }

    /// Inverse of `expected_label`. Used by the completion engine
    /// to recover the source kind from the `ParseError::Invalid::
    /// expected` strings the walker bridge produces. `"identifier"`
    /// maps to `NewName` (the only writeable label that uses that
    /// wording in production grammars today).
    #[must_use]
    pub fn from_expected_label(label: &str) -> Option<Self> {
        match label {
            "identifier" => Some(Self::NewName),
            "table name" => Some(Self::Tables),
            "column name" => Some(Self::Columns),
            "relationship name" => Some(Self::Relationships),
            "type" => Some(Self::Types),
            _ => None,
        }
    }
}

/// Hint-panel mode for an expected node (ADR-0024 §HintMode-per-node).
///
/// `Default` (today's behaviour) shows candidates if any, falls
/// back to a prose ladder otherwise. The other variants
/// override at slot positions where the candidate list would be
/// actively misleading or where the user benefits from format
/// guidance:
///
/// - `ProseOnly(catalog_key)` — show only prose from the
///   catalog; suppress Tab candidates. Used today by the
///   value-literal slot at empty prefix (the "null/true/false"
///   candidate trio is misleading at a slot that more often
///   takes a number / quoted text / date).
/// - `ForceProse(catalog_key)` — force this prose at the
///   catalog key regardless of candidates. Used today by
///   `NewName` ident slots ("Type a name, then `(`").
/// - `SuppressProse` — show only candidates; never fall back
///   to a prose ladder.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HintMode {
    Default,
    ForceProse(&'static str),
    ProseOnly(&'static str),
    SuppressProse,
}

/// A keyword node literal.
///
/// The `aliases` slice is empty for the app-lifecycle commands
/// today; the round-5 `q` removal remains intentional, and any
/// future re-introduction would be a one-line `aliases: &["q"]`
/// addition (ADR-0024 §aliases).
#[derive(Debug, Clone, Copy)]
pub struct Word {
    pub primary: &'static str,
    pub aliases: &'static [&'static str],
    pub highlight_override: Option<HighlightClass>,
}

impl Word {
    pub const fn keyword(primary: &'static str) -> Self {
        Self {
            primary,
            aliases: &[],
            highlight_override: None,
        }
    }

    /// Case-insensitive match against the primary or any alias.
    pub fn matches(&self, candidate: &str) -> bool {
        if candidate.eq_ignore_ascii_case(self.primary) {
            return true;
        }
        self.aliases
            .iter()
            .any(|a| candidate.eq_ignore_ascii_case(a))
    }
}

/// Content-level validator for an `Ident` slot. Returns the
/// catalog key + arg list to surface as `WalkOutcome::ValidationFailed`
/// on mismatch.
pub type IdentValidator = fn(matched: &str) -> Result<(), ValidationError>;

/// Content-level validator for a `NumberLit` slot. Same shape
/// as `IdentValidator`; surfaces as `ValidationFailed` on Err.
pub type NumberValidator = fn(matched: &str) -> Result<(), ValidationError>;

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ValidationError {
    pub message_key: &'static str,
    pub args: Vec<(&'static str, String)>,
}

/// The grammar-tree node taxonomy (ADR-0024 §node-taxonomy).
///
/// Some variants carry data (`Word` literal, `Punct` char,
/// `Ident` source/role/validator); combinators reference their
/// children through `&'static [Node]` / `&'static Node` slices,
/// which lets the entire registry live in `const`s — no runtime
/// allocation, every command is one declaration block in its
/// grammar file.
pub enum Node {
    /// A keyword token. Case-insensitive match (ADR-0009).
    Word(Word),
    /// A single punctuation character. The exact set comes from
    /// the migrated commands' usage — Phase A only needs none of
    /// these (app-lifecycle commands are pure keyword + ident +
    /// path), but the variant is declared for Phase B+ use.
    #[allow(dead_code)]
    Punct(char),
    /// An identifier slot. `source` drives completion candidates;
    /// `role` names the slot for error wording / completion-engine
    /// dispatch; `validator` runs after a successful identifier-
    /// shape match and may reject the value with a catalog-driven
    /// message.
    ///
    /// `writes_table` (Phase D): when `true` and `source ==
    /// Tables`, the walker writes the matched ident to
    /// `WalkContext::current_table` and resolves
    /// `current_table_columns` from the schema cache (if any).
    /// `writes_column` (Phase D): when `true` and `source ==
    /// Columns`, the walker writes the matched ident's
    /// `TableColumn` to `WalkContext::current_column` (resolved
    /// against `current_table_columns`). Subsequent value slots
    /// dispatch on the column's type.
    Ident {
        source: IdentSource,
        role: &'static str,
        validator: Option<IdentValidator>,
        #[allow(dead_code)]
        highlight_override: Option<HighlightClass>,
        writes_table: bool,
        writes_column: bool,
        /// Append the matched text to
        /// `WalkContext::user_listed_columns` (Phase D). Used by
        /// the `insert into <T> (col1, col2, …)` column-list
        /// idents — when the walker sees these, the form is
        /// "Form A" and the inner values slot list mirrors the
        /// user's explicit selection instead of the
        /// auto-filtered schema default.
        writes_user_listed_column: bool,
    },
    /// A number literal. The optional `validator` runs against
    /// the matched text (used by Phase D value slots to enforce
    /// per-type integer/decimal rules).
    NumberLit {
        validator: Option<NumberValidator>,
    },
    /// A literal byte sequence at this position — matches
    /// bytes verbatim (whitespace-skipped) with a lookahead so
    /// `1` doesn't half-match `12` and `n` doesn't half-match
    /// `name`. Used by Phase B's `add 1:n …` for the literal
    /// `1`. Surfaces in the expected-set as `` `<literal>` ``,
    /// matching chumsky's labelled-token rendering.
    Literal(&'static str),
    #[allow(dead_code)]
    StringLit,
    #[allow(dead_code)]
    BlobLit,
    /// A `--name` flag. Walker matches the flag shape and
    /// asserts the name matches the expected literal.
    Flag(&'static str),
    /// A non-whitespace run consumed verbatim from source. Per
    /// ADR-0024's path-bearing-commands UX change, paths with
    /// spaces use the quoted form (`StringLit`); `BarePath`
    /// terminates at the first whitespace byte.
    BarePath,
    /// Try each child in order. The first one that matches a
    /// non-empty prefix wins; if none match, the choice fails
    /// with the union of expectations.
    Choice(&'static [Self]),
    /// All children must match in order. Whitespace is implicitly
    /// allowed between siblings.
    Seq(&'static [Self]),
    /// The inner node may match or be skipped.
    Optional(&'static Self),
    /// `inner` matches at least `min` times, separated by
    /// `separator` (if any). Phase C+ uses this for `with pk`
    /// column lists.
    #[allow(dead_code)]
    Repeated {
        inner: &'static Self,
        separator: Option<&'static Self>,
        min: usize,
    },
    /// Resolves at walk time using the active `WalkContext`.
    /// Phase D+ uses this for `column_value_list`.
    #[allow(dead_code)]
    DynamicSubgrammar(fn(&WalkContext) -> Self),
    /// Typed value-literal slot (ADR-0024 §Phase D §typed-value-slots).
    ///
    /// Walks `inner` to consume the literal but records the
    /// column type in `WalkContext::pending_value_type` so the
    /// hint resolver can emit per-type catalog prose ("Type an
    /// integer", "Type a date as 'YYYY-MM-DD'", …) at empty
    /// prefix at this slot. When `column_name` is `Some`, the
    /// walker also writes `pending_value_column` so the hint
    /// can be rendered with the actual column name (e.g. "for
    /// `Email`: Type a quoted string …") rather than a generic
    /// type hint. The recorded values clear on a successful
    /// inner match — so positions BETWEEN typed slots
    /// (`insert into T values (1` mid-input) don't carry stale
    /// hint state.
    TypedValueSlot {
        ty: crate::dsl::types::Type,
        column_name: Option<&'static str>,
        inner: &'static Self,
    },
}

/// Top-level entry record. One per command. The `entry` keyword
/// alone identifies which command the walker dispatches to;
/// `shape` is what follows the entry word.
pub struct CommandNode {
    pub entry: Word,
    pub shape: Node,
    /// Builds the typed `Command` AST from the matched terminal
    /// path. May fail with a `ValidationError` for content-level
    /// rejections that are easier to express imperatively than
    /// as a per-node validator (Phase A: none — every app
    /// command's ast_builder is infallible).
    pub ast_builder: fn(&MatchedPath) -> Result<Command, ValidationError>,
    #[allow(dead_code)]
    pub help_id: Option<&'static str>,
    /// Catalog keys under `parse.usage.*` to render in the
    /// "usage:" block when a parse error fires for this command
    /// (ADR-0021 §1, ADR-0024 §architecture). Multi-form families
    /// like `drop` (drop table / drop column / drop relationship)
    /// carry every variant so the user sees the full family on a
    /// generic-entry-word failure.
    pub usage_ids: &'static [&'static str],
    #[allow(dead_code)]
    pub hint_mode: Option<HintMode>,
}

/// Look up the usage catalog keys for the entry word at the start
/// of `source`.
///
/// Case-insensitive, whitespace-tolerant. Replaces
/// `dsl::usage::matched_entry` — the walker is the single source
/// of truth for which command a given input belongs to.
///
/// Returns the canonical (primary-form) entry literal and the
/// `usage_ids` list, or `None` if no entry word matches.
#[must_use]
pub fn usage_keys_for_input(source: &str) -> Option<(&'static str, &'static [&'static str])> {
    use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace};
    let start = skip_whitespace(source, 0);
    let (kw_start, kw_end) = consume_ident(source, start)?;
    let word = &source[kw_start..kw_end];
    let (_, node) = command_for_entry_word(word)?;
    Some((node.entry.primary, node.usage_ids))
}

/// Every command-entry word in the registry, sorted alphabetically
/// by primary literal. Replaces `dsl::usage::entry_keywords_alphabetised`
/// which read the same data through the legacy `usage::REGISTRY`.
#[must_use]
pub fn entry_words_alphabetised() -> Vec<&'static str> {
    let mut words: Vec<&'static str> = REGISTRY.iter().map(|c| c.entry.primary).collect();
    words.sort_unstable();
    words
}

/// The active grammar registry. Phase A: the eleven app-lifecycle
/// commands. Migrated commands route through this; everything
/// else falls through to the chumsky path in `dsl::parser`.
pub static REGISTRY: &[&CommandNode] = &[
    &app::QUIT,
    &app::HELP,
    &app::REBUILD,
    &app::SAVE,
    &app::NEW,
    &app::LOAD,
    &app::EXPORT,
    &app::IMPORT,
    &app::MODE,
    &app::MESSAGES,
    &ddl::DROP,
    &ddl::ADD,
    &ddl::RENAME,
    &ddl::CHANGE,
    &ddl::CREATE,
    &data::SHOW,
    &data::INSERT,
    &data::UPDATE,
    &data::DELETE,
    &data::REPLAY,
];

/// Look up a `CommandNode` by entry word, case-insensitively.
///
/// Used by the router to decide whether the walker owns this
/// input. Returns the index into `REGISTRY` so callers can
/// later use it as a `WalkOutcome::Match { command_idx }`.
pub fn command_for_entry_word(word: &str) -> Option<(usize, &'static CommandNode)> {
    REGISTRY
        .iter()
        .enumerate()
        .find(|(_, c)| c.entry.matches(word))
        .map(|(i, c)| (i, *c))
}