Files
rdbms-playground/src/dsl/grammar/mod.rs
T
claude@clouddev1 6429b56443 feat(hint): H2 Phase C batch 2 — DDL tier-3 hints (ADR-0053)
Per-form hints for the schema-shaping commands: create table, create
m:n, add column/index/constraint, drop table/column/relationship/
index/constraint, rename column, change column (add_relationship was
the Phase-B exemplar). Examples verified against the canonical usage
templates. hint_ids wired on CREATE/CREATE_M2N/DROP/RENAME/CHANGE;
catalogue + keys.rs registered. +2 spot tests (incl. multi-form DROP
disambiguation); 2491 pass / 1 ignored, clippy clean.
2026-06-15 16:05:41 +00:00

1021 lines
43 KiB
Rust

//! Unified declarative grammar tree (ADR-0024).
//!
//! The grammar tree is the single source of truth for the DSL —
//! parsing, completion, syntax highlighting, parse-error usage
//! rendering, and hint-panel content all derive from this same
//! data structure (ADR-0023 institutional context).
//!
//! Phase A scope (ADR-0024 §migration): the framework lands
//! alongside the eleven app-lifecycle commands (quit, help,
//! rebuild, save, save as, new, load, export, import, mode,
//! messages). The chumsky parser still owns every other
//! command; the router in `dsl::parser` decides which path to
//! take per first-token. Schema-aware nodes (`IdentSource::Tables`
//! and friends) and `DynamicSubgrammar` are declared here but
//! not exercised until Phase B-D.
//!
//! The shape of `Node` mirrors ADR-0024 §node-taxonomy with one
//! pragmatic addition for Phase A: each `Ident` carries an
//! optional content validator, used today by the `mode <value>`
//! / `messages <value>` slots to surface friendly catalog
//! wording (`mode.unknown`, `messages.unknown`) on out-of-set
//! identifiers. The same hook generalises naturally to typed
//! value slots in Phase D.
pub mod app;
pub mod data;
pub mod ddl;
pub mod expr;
pub mod shared;
pub mod sql_expr;
pub mod sql_create_table;
pub mod sql_delete;
pub mod sql_insert;
pub mod sql_select;
pub mod sql_update;
use crate::dsl::command::Command;
use crate::dsl::walker::context::WalkContext;
use crate::dsl::walker::outcome::MatchedPath;
/// Highlight class assigned to a matched terminal.
///
/// Recorded on the `WalkResult::per_byte_class` slice and surfaced
/// by `walker::highlight_runs` to the input/echo-line renderers
/// (ADR-0024 §architecture).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HighlightClass {
Keyword,
Identifier,
/// Column data-type keyword (`int`, `serial`, `text`, …).
/// Distinct from `Keyword` and `Identifier` so learners can
/// tell "this is a type" from a clause keyword or a name they
/// invented (ADR-0022 Amendment 4). Assigned via a type slot's
/// `highlight_override`, not by byte shape.
Type,
Number,
String,
Punct,
Flag,
/// A curated function-vocabulary name — the `seed … set <col> as
/// <generator>` generator names (ADR-0048 D2/§Grammar). Rendered in
/// the existing `tok_function` colour (ADR-0022 Amд6 blue — no new
/// theme colour), assigned via a generator slot's
/// `highlight_override`, not by byte shape.
Function,
Error,
}
/// Where an `Ident` slot's candidates come from at completion time.
///
/// Drives both the walker's `Expectation::Ident { source }` (which
/// the parse-error bridge maps to a human label) and the
/// `SchemaCache` lookup the completion engine uses for Tab
/// candidates. The `Free` and `NewName` variants do not query the
/// schema — `NewName` is for slots where the user invents the
/// identifier, `Free` is the catch-all branch in `mode`/`messages`
/// that funnels unknown values into a friendly validator.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum IdentSource {
/// User invents this name. No schema lookup; no completion
/// candidates beyond the identifier shape itself.
NewName,
/// Existing table name.
Tables,
/// Existing column in the current table.
Columns,
/// Existing relationship name.
Relationships,
/// Existing index name.
Indexes,
/// Closed set from `Type::all()` — surfaced by the walker's
/// content validator on column-type slots; not user-listable
/// from the schema.
Types,
/// Closed, curated set of fake-data generator names (ADR-0048
/// D9) — the `seed … set <col> as <generator>` slot. Like
/// `Types`, not user-listable from the schema; the vocabulary
/// lives in `src/seed` and the completion engine offers it. The
/// grammar slot is purely structural (matches any identifier);
/// an unknown name is flagged live (validity) and rejected at
/// execution.
Generators,
/// Any identifier shape; used by synthetic catch-all branches
/// (e.g., the unknown-value branch of `mode <value>`).
Free,
}
impl IdentSource {
/// Whether this source can be completed from the schema
/// cache (i.e. the candidate list comes from existing
/// entities rather than user invention or a closed set).
#[must_use]
pub const fn completes_from_schema(self) -> bool {
matches!(
self,
Self::Tables | Self::Columns | Self::Relationships | Self::Indexes
)
}
/// Human-facing label used in parse-error wording
/// ("expected table name") and in the completion engine's
/// round-trip from a textual `expected` entry back to a
/// source kind. `Free` and `Types` collapse to "identifier"
/// and "type" respectively.
#[must_use]
pub const fn expected_label(self) -> &'static str {
match self {
Self::NewName | Self::Free => "identifier",
Self::Tables => "table name",
Self::Columns => "column name",
Self::Relationships => "relationship name",
Self::Indexes => "index name",
Self::Types => "type",
Self::Generators => "generator name",
}
}
/// Inverse of `expected_label`. Used by the completion engine
/// to recover the source kind from the `ParseError::Invalid::
/// expected` strings the walker bridge produces. `"identifier"`
/// maps to `NewName` (the only writeable label that uses that
/// wording in production grammars today).
#[must_use]
pub fn from_expected_label(label: &str) -> Option<Self> {
match label {
"identifier" => Some(Self::NewName),
"table name" => Some(Self::Tables),
"column name" => Some(Self::Columns),
"relationship name" => Some(Self::Relationships),
"index name" => Some(Self::Indexes),
"type" => Some(Self::Types),
"generator name" => Some(Self::Generators),
_ => None,
}
}
}
/// Hint-panel mode for an expected node (ADR-0024 §HintMode-per-node).
///
/// `Default` (today's behaviour) shows candidates if any, falls
/// back to a prose ladder otherwise. The other variants
/// override at slot positions where the candidate list would be
/// actively misleading or where the user benefits from format
/// guidance:
///
/// - `ProseOnly(catalog_key)` — show only prose from the
/// catalog; suppress Tab candidates. Used today by the
/// value-literal slot at empty prefix (the "null/true/false"
/// candidate trio is misleading at a slot that more often
/// takes a number / quoted text / date).
/// - `ForceProse(catalog_key)` — force this prose at the
/// catalog key regardless of candidates. Used today by
/// `NewName` ident slots ("Type a name, then `(`").
/// - `IntroProse(catalog_key)` — show prose at slot entry to
/// *introduce* a position whose first-class candidate is an
/// ident slot (which would be invisible in a pure-candidate
/// render) but whose keyword alternatives are also available.
/// Unlike `ProseOnly`, Tab candidates remain available — the
/// user still cycles through the keyword set. Used at the
/// advanced-mode CREATE TABLE element slot, where the
/// column-name `NewName` slot would otherwise be invisible
/// alongside the table-level constraint keywords (issue #4).
/// - `SuppressProse` — show only candidates; never fall back
/// to a prose ladder.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HintMode {
Default,
ForceProse(&'static str),
ProseOnly(&'static str),
IntroProse(&'static str),
SuppressProse,
}
/// A keyword node literal.
///
/// The `aliases` slice is empty for the app-lifecycle commands
/// today; the round-5 `q` removal remains intentional, and any
/// future re-introduction would be a one-line `aliases: &["q"]`
/// addition (ADR-0024 §aliases).
#[derive(Debug, Clone, Copy)]
pub struct Word {
pub primary: &'static str,
pub aliases: &'static [&'static str],
pub highlight_override: Option<HighlightClass>,
}
impl Word {
pub const fn keyword(primary: &'static str) -> Self {
Self {
primary,
aliases: &[],
highlight_override: None,
}
}
/// A keyword that highlights as a column **type** rather than a
/// clause keyword (ADR-0022 Amendment 4). The one user today is
/// the two-word `double precision` SQL alias (ADR-0035 §3): it
/// is matched as keyword tokens, not an `IdentSource::Types`
/// `Ident`, so without this it would render keyword-coloured
/// while its single-word synonyms (`float`, `real`) render as
/// types.
pub const fn type_keyword(primary: &'static str) -> Self {
Self {
primary,
aliases: &[],
highlight_override: Some(HighlightClass::Type),
}
}
/// Case-insensitive match against the primary or any alias.
pub fn matches(&self, candidate: &str) -> bool {
if candidate.eq_ignore_ascii_case(self.primary) {
return true;
}
self.aliases
.iter()
.any(|a| candidate.eq_ignore_ascii_case(a))
}
}
/// Content-level validator for an `Ident` slot. Returns the
/// catalog key + arg list to surface as `WalkOutcome::ValidationFailed`
/// on mismatch.
pub type IdentValidator = fn(matched: &str) -> Result<(), ValidationError>;
/// Content-level validator for a `NumberLit` slot. Same shape
/// as `IdentValidator`; surfaces as `ValidationFailed` on Err.
pub type NumberValidator = fn(matched: &str) -> Result<(), ValidationError>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ValidationError {
pub message_key: &'static str,
pub args: Vec<(&'static str, String)>,
}
/// The grammar-tree node taxonomy (ADR-0024 §node-taxonomy).
///
/// Some variants carry data (`Word` literal, `Punct` char,
/// `Ident` source/role/validator); combinators reference their
/// children through `&'static [Node]` / `&'static Node` slices,
/// which lets the entire registry live in `const`s — no runtime
/// allocation, every command is one declaration block in its
/// grammar file.
pub enum Node {
/// A keyword token. Case-insensitive match (ADR-0009).
Word(Word),
/// A single punctuation character. The exact set comes from
/// the migrated commands' usage — Phase A only needs none of
/// these (app-lifecycle commands are pure keyword + ident +
/// path), but the variant is declared for Phase B+ use.
#[allow(dead_code)]
Punct(char),
/// An identifier slot. `source` drives completion candidates;
/// `role` names the slot for error wording / completion-engine
/// dispatch; `validator` runs after a successful identifier-
/// shape match and may reject the value with a catalog-driven
/// message.
///
/// `writes_table` (Phase D): when `true` and `source ==
/// Tables`, the walker writes the matched ident to
/// `WalkContext::current_table` and resolves
/// `current_table_columns` from the schema cache (if any).
/// `writes_column` (Phase D): when `true` and `source ==
/// Columns`, the walker writes the matched ident's
/// `TableColumn` to `WalkContext::current_column` (resolved
/// against `current_table_columns`). Subsequent value slots
/// dispatch on the column's type.
Ident {
source: IdentSource,
role: &'static str,
validator: Option<IdentValidator>,
#[allow(dead_code)]
highlight_override: Option<HighlightClass>,
writes_table: bool,
writes_column: bool,
/// Append the matched text to
/// `WalkContext::user_listed_columns` (Phase D). Used by
/// the `insert into <T> (col1, col2, …)` column-list
/// idents — when the walker sees these, the form is
/// "Form A" and the inner values slot list mirrors the
/// user's explicit selection instead of the
/// auto-filtered schema default.
writes_user_listed_column: bool,
/// Set the matched text as the alias of the most-
/// recently-pushed `TableBinding` on the top
/// `ScopeFrame`'s `from_scope` (ADR-0032 §10.1). Used by
/// the `[ AS ] alias` slot on `from_clause` /
/// `join_clause` table sources in `sql_select.rs`; a
/// no-op on `IdentSource::NewName` slots that do not
/// follow a table-name push, or when the top frame's
/// `from_scope` is empty.
writes_table_alias: bool,
/// Push a placeholder `CteBinding` (name only, empty
/// columns) onto the top `ScopeFrame`'s `cte_bindings`
/// (ADR-0032 §10.3 stage 1). Used by the CTE-name slot
/// in `with_clause`; the placeholder is rewritten with
/// derived output columns at the body's frame exit
/// (§10.3 stage 2; harvest derivation rules pending).
writes_cte_name: bool,
/// Append the matched text to the top `ScopeFrame`'s
/// `projection_aliases` (ADR-0032 §10.4). Used by the
/// projection-list alias slot (both the bare and `AS`
/// forms) so `ORDER BY` completion can offer aliases as
/// candidates.
writes_projection_alias: bool,
},
/// A number literal. The optional `validator` runs against
/// the matched text (used by Phase D value slots to enforce
/// per-type integer/decimal rules).
NumberLit {
validator: Option<NumberValidator>,
},
/// A literal byte sequence at this position — matches
/// bytes verbatim (whitespace-skipped) with a lookahead so
/// `1` doesn't half-match `12` and `n` doesn't half-match
/// `name`. Used by Phase B's `add 1:n …` for the literal
/// `1`. Surfaces in the expected-set as `` `<literal>` ``,
/// matching chumsky's labelled-token rendering.
Literal(&'static str),
#[allow(dead_code)]
StringLit,
#[allow(dead_code)]
BlobLit,
/// A `--name` flag. Walker matches the flag shape and
/// asserts the name matches the expected literal.
Flag(&'static str),
/// A non-whitespace run consumed verbatim from source. Per
/// ADR-0024's path-bearing-commands UX change, paths with
/// spaces use the quoted form (`StringLit`); `BarePath`
/// terminates at the first whitespace byte.
BarePath,
/// Try each child in order. The first one that matches a
/// non-empty prefix wins; if none match, the choice fails
/// with the union of expectations.
Choice(&'static [Self]),
/// All children must match in order. Whitespace is implicitly
/// allowed between siblings.
Seq(&'static [Self]),
/// The inner node may match or be skipped.
Optional(&'static Self),
/// `inner` matches at least `min` times, separated by
/// `separator` (if any). Phase C+ uses this for `with pk`
/// column lists.
#[allow(dead_code)]
Repeated {
inner: &'static Self,
separator: Option<&'static Self>,
min: usize,
},
/// Walks the referenced `&'static Node` once, mandatory
/// (ADR-0026 §2). The reference indirection is what lets a
/// named `static` grammar fragment appear inside its own
/// subtree: a `Seq` / `Choice` embeds its children by value
/// and so cannot close a cycle, but a `&'static Node`
/// reference can point back at an enclosing fragment. This
/// is the mechanism the stratified WHERE-expression grammar
/// recurses through — the `( or_expr )` branch and the
/// `not_expr` self-reference.
///
/// The walker counts active `Subgrammar` frames in
/// `WalkContext::subgrammar_depth` and refuses past
/// `walker::driver::MAX_SUBGRAMMAR_DEPTH`, so pathologically
/// nested input (`((((…))))`) fails with a friendly error
/// rather than overflowing the parser stack.
///
/// The static counterpart of `DynamicSubgrammar`: that one
/// builds a fresh node from the `WalkContext` at walk time;
/// this one references a fixed fragment already in the
/// grammar tree.
Subgrammar(&'static Self),
/// Like `Subgrammar`, but the walker additionally **pushes a
/// new `ScopeFrame`** onto `WalkContext::from_scope_stack` on
/// entry and pops it on exit (ADR-0032 §10.2). The
/// `subgrammar_depth` counter increments uniformly across
/// both variants — the depth cap applies the same way — so
/// this variant introduces no new walker capability for
/// grammar recursion; it only layers lexical-scope discipline
/// on top.
///
/// Used at every SQL `SELECT` recursion point: subqueries
/// in `sql_expr.rs` (scalar `(SELECT …)`, `IN (SELECT …)`,
/// `[NOT] EXISTS (SELECT …)`) and CTE bodies in
/// `sql_select.rs` reference the compound-SELECT through
/// `Node::ScopedSubgrammar(&SQL_SELECT_COMPOUND)`. DSL `Expr`
/// recursion (ADR-0026) and the `sql_expr.rs` precedence-
/// ladder recursion (ADR-0031) keep using the plain
/// `Subgrammar` variant and never push a scope.
ScopedSubgrammar(&'static Self),
/// Resolves at walk time using the active `WalkContext`.
/// Phase D+ uses this for `column_value_list`. The factory
/// is pure in `ctx`, so the walker memoizes the resolution
/// (one leak per distinct schema shape).
#[allow(dead_code)]
DynamicSubgrammar(fn(&WalkContext) -> Self),
/// Like `DynamicSubgrammar` but the factory also sees the
/// source and the current byte position, so it can look
/// ahead. Used by the insert first-paren to discriminate
/// Form A (`(cols) values (...)`) from Form C (`(vals)`)
/// before walking the contents — Form C then routes through
/// the typed `column_value_list` (ADR-0024 §Phase D, Form C
/// type-awareness). Not memoized: the output depends on the
/// source, not just `ctx`.
Lookahead(fn(&WalkContext, &str, usize) -> Self),
/// Zero-width node that *establishes the active column* for the
/// value slot that follows it (ADR-0036 Phase 3b). Matches the
/// empty string and, as a side effect, sets
/// `WalkContext::current_column` to the referenced column and
/// `pending_value_column` to its name — exactly as an
/// `Ident { writes_column: true }` does, but without consuming a
/// column identifier from the input.
///
/// This is the primitive that gives `INSERT … VALUES (…)`
/// positions a per-position column identity: the positions are
/// positional (no per-position column ident to write
/// `current_column`), so a `DynamicSubgrammar` factory
/// (`sql_insert::sql_value_list`) emits `SetColumn(colᵢ)` before
/// each value position, then the shared boundary-aware `SET_VALUE`
/// slot routes a lone literal to that column's typed slot and any
/// expression to `sql_expr`. The referenced `TableColumn` is
/// leaked by the factory (bounded by the column count, like the
/// `DynamicSubgrammar` `Box::leak`).
SetColumn(&'static crate::completion::TableColumn),
/// Typed value-literal slot (ADR-0024 §Phase D §typed-value-slots).
///
/// Walks `inner` to consume the literal but records the
/// column type in `WalkContext::pending_value_type` so the
/// hint resolver can emit per-type catalog prose ("Type an
/// integer", "Type a date as 'YYYY-MM-DD'", …) at empty
/// prefix at this slot. When `column_name` is `Some`, the
/// walker also writes `pending_value_column` so the hint
/// can be rendered with the actual column name (e.g. "for
/// `Email`: Type a quoted string …") rather than a generic
/// type hint. The recorded values clear on a successful
/// inner match — so positions BETWEEN typed slots
/// (`insert into T values (1` mid-input) don't carry stale
/// hint state.
TypedValueSlot {
ty: crate::dsl::types::Type,
column_name: Option<&'static str>,
inner: &'static Self,
},
/// Annotates `inner` with a hint-panel `HintMode` (ADR-0024
/// §HintMode-per-node). On entry the walker records `mode`
/// in `WalkContext::pending_hint_mode`; on a successful
/// inner match the record clears (so positions past the
/// slot don't carry stale hint state). Transparent to
/// matching, highlighting and the expected-set otherwise —
/// it walks `inner` and returns its result verbatim.
///
/// This is the node-attached replacement for the hint
/// resolver's earlier signature-matching: the grammar tree
/// declares the hint mode at the slot, the walker
/// propagates it, the resolver reads it. Used by the
/// value-literal fallback slot (`ProseOnly`) and `NewName`
/// ident slots (`ForceProse`).
Hinted {
mode: HintMode,
inner: &'static Self,
},
}
/// Which mode group a registered command belongs to (ADR-0030
/// §2, ADR-0033 Amendment 1).
///
/// Category is a *dispatcher* concern, not intrinsic to a
/// command's grammar, so it is attached at the `REGISTRY`
/// registration site rather than as a field on every
/// `CommandNode`. The dispatcher (`walker::walk`) uses it to
/// route a given input by the active input mode:
///
/// - `Simple` commands are the DSL surface; available in both
/// simple and advanced mode.
/// - `Advanced` commands are the SQL surface; available only in
/// advanced mode. In simple mode an advanced-only entry word
/// yields the "this is SQL" hint (`advanced_mode.sql_in_simple`).
///
/// A *shared* entry word (e.g. `insert`, from Phase 3 sub-phase
/// 3b on) carries a node in *both* groups — a `Simple` DSL node
/// and an `Advanced` SQL node. The dispatcher tries the SQL node
/// first in advanced mode and falls back to the DSL node when the
/// SQL shape does not match.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CommandCategory {
Simple,
Advanced,
}
/// Top-level entry record. One per command. The `entry` keyword
/// alone identifies which command the walker dispatches to;
/// `shape` is what follows the entry word.
pub struct CommandNode {
pub entry: Word,
pub shape: Node,
/// Builds the typed `Command` AST from the matched terminal
/// path. May fail with a `ValidationError` for content-level
/// rejections that are easier to express imperatively than
/// as a per-node validator (Phase A: none — every app
/// command's ast_builder is infallible).
///
/// `source` is the full input line being parsed. Most builders
/// reconstruct the `Command` from the matched `MatchedPath`
/// alone and ignore it; SQL builders whose `Command` carries
/// the validated SQL text (ADR-0030 §4/§6, ADR-0031 §2) read
/// it.
pub ast_builder: fn(&MatchedPath, &str) -> Result<Command, ValidationError>,
/// Catalog key (`help.<id>`) for this command's in-app
/// `help` entry. Consumed by `App::note_help`, which
/// iterates the REGISTRY and translates each `help_id` —
/// so a newly-registered command appears in `help`
/// automatically (ADR-0024 §help_id).
pub help_id: Option<&'static str>,
/// Catalog key stems (`hint.cmd.<id>`) for this command's
/// **tier-3** contextual hints (ADR-0053 / H2), **one per form**,
/// mirroring `usage_ids`. A single-form command carries one; a
/// multi-form command (`add`, `drop`, `show`, `create`) carries
/// one per form so a live-input hint can be specific to the form
/// being typed (`hint.cmd.add_relationship`, not a shared `add`
/// block). `hint_key_for_input_in_mode` disambiguates by the form
/// word, reusing `usage_key_for_input_in_mode`'s logic. Empty
/// until a form's tier-3 block is authored (the surface falls back
/// to tier-2 ambient/error text). Distinct from `help_id` (which is
/// `None` on advanced-SQL forms purely to dedup the `help` list).
pub hint_ids: &'static [&'static str],
/// Catalog keys under `parse.usage.*` to render in the
/// "usage:" block when a parse error fires for this command
/// (ADR-0021 §1, ADR-0024 §architecture). Multi-form families
/// like `drop` (drop table / drop column / drop relationship)
/// carry every variant so the user sees the full family on a
/// generic-entry-word failure.
pub usage_ids: &'static [&'static str],
}
/// Look up the usage catalog keys for the entry word at the start
/// of `source`.
///
/// Case-insensitive, whitespace-tolerant. Replaces
/// `dsl::usage::matched_entry` — the walker is the single source
/// of truth for which command a given input belongs to.
///
/// Returns the canonical (primary-form) entry literal and the
/// `usage_ids` list, or `None` if no entry word matches.
#[must_use]
pub fn usage_keys_for_input(source: &str) -> Option<(&'static str, Vec<&'static str>)> {
usage_keys_for_input_in_mode(source, crate::mode::Mode::Simple)
}
/// Mode-aware variant of [`usage_keys_for_input`] (ADR-0042 G3).
///
/// A shared entry word (`create`, `drop`, `insert`, …) registers a
/// `Simple` DSL node *and* one or more `Advanced` SQL nodes. The
/// usage block must reflect the surface the user is actually typing:
/// the SQL forms in `Advanced` mode, the DSL forms in `Simple` mode
/// — otherwise advanced-mode `create` shows the DSL `create table …
/// with pk …` template, which is not valid SQL.
///
/// Selection prefers candidates whose [`CommandCategory`] matches
/// the mode; if the entry word has none in that category (an
/// app-lifecycle command is `Simple`-only yet usable in both modes),
/// every candidate is used. The returned keys are the union of the
/// selected nodes' `usage_ids`, de-duplicated in registry order — so
/// advanced `create` shows both `sql_create_table` and
/// `sql_create_index`.
#[must_use]
pub fn usage_keys_for_input_in_mode(
source: &str,
mode: crate::mode::Mode,
) -> Option<(&'static str, Vec<&'static str>)> {
let pick = selected_nodes_for_input_in_mode(source, mode);
if pick.is_empty() {
return None;
}
let mut keys: Vec<&'static str> = Vec::new();
for (_, node, _) in &pick {
for k in node.usage_ids {
if !keys.contains(k) {
keys.push(*k);
}
}
}
if keys.is_empty() {
return None;
}
let entry = pick[0].1.entry.primary;
Some((entry, keys))
}
/// The single tier-3 hint key (`hint.cmd.<id>` stem) for the command
/// **form** `source` is currently typing, in `mode` (H2 / ADR-0053).
///
/// Mirrors [`usage_key_for_input_in_mode`]: the union of the
/// mode-selected nodes' `hint_ids`, disambiguated to the typed form by
/// [`pick_form_key`] — so `add 1:n relationship` resolves to the
/// relationship hint, and an advanced-SQL form resolves to its own
/// (not its simple sibling's). `None` if no entry word matches or the
/// form has no tier-3 block yet (the caller falls back to tier-2).
#[must_use]
pub fn hint_key_for_input_in_mode(source: &str, mode: crate::mode::Mode) -> Option<&'static str> {
let nodes = selected_nodes_for_input_in_mode(source, mode);
if nodes.is_empty() {
return None;
}
let mut keys: Vec<&'static str> = Vec::new();
for (_, node, _) in &nodes {
for k in node.hint_ids {
if !keys.contains(k) {
keys.push(*k);
}
}
}
pick_form_key(source, &keys)
}
/// Shared mode-aware command-form selection for the entry word at the
/// start of `source`.
///
/// Extracted so the usage-key and hint-id lookups agree on which form
/// the user is typing.
///
/// Advanced mode: every candidate form is reachable — the SQL nodes
/// are primary, and the DSL nodes remain valid via fallback (verified:
/// `create table … with pk` and `drop column …` both run in advanced
/// mode). Mode-primary (Advanced) first, so a hint never hides input
/// that works. Simple mode: only the DSL forms — the SQL-only forms
/// hit the "this is SQL" rail and are not reachable. (ADR-0042 G3.)
/// Degenerate guard: an advanced-only word in simple mode leaves the
/// selection empty; fall back to all candidates.
fn selected_nodes_for_input_in_mode(
source: &str,
mode: crate::mode::Mode,
) -> Vec<(usize, &'static CommandNode, CommandCategory)> {
use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace};
let start = skip_whitespace(source, 0);
let Some((kw_start, kw_end)) = consume_ident(source, start) else {
return Vec::new();
};
let word = &source[kw_start..kw_end];
let candidates = commands_for_entry_word(word);
if candidates.is_empty() {
return Vec::new();
}
let selected: Vec<(usize, &'static CommandNode, CommandCategory)> =
if mode == crate::mode::Mode::Advanced {
let mut v: Vec<_> = candidates
.iter()
.copied()
.filter(|(_, _, c)| *c == CommandCategory::Advanced)
.collect();
v.extend(
candidates
.iter()
.copied()
.filter(|(_, _, c)| *c != CommandCategory::Advanced),
);
v
} else {
candidates
.iter()
.copied()
.filter(|(_, _, c)| *c == CommandCategory::Simple)
.collect()
};
if selected.is_empty() { candidates } else { selected }
}
/// The single usage template most relevant to `source`, when
/// one is determinable.
///
/// A single-form command resolves to its one usage key. A
/// multi-form command (`add`, `drop`) disambiguates by the
/// form word after the entry keyword — so a parse error in
/// `add index …` resolves to the `add index` usage rather than
/// the first-listed `add column`. Returns `None` for a bare
/// multi-form entry word (`add` with nothing after it), where
/// no form has been chosen — the caller decides whether to
/// show the whole family or nothing.
#[must_use]
pub fn usage_key_for_input(source: &str) -> Option<&'static str> {
usage_key_for_input_in_mode(source, crate::mode::Mode::Simple)
}
/// Mode-aware variant of [`usage_key_for_input`] (ADR-0042 G3) —
/// disambiguates the single most-relevant usage key from the
/// mode-selected key set.
#[must_use]
pub fn usage_key_for_input_in_mode(
source: &str,
mode: crate::mode::Mode,
) -> Option<&'static str> {
let (_entry, keys) = usage_keys_for_input_in_mode(source, mode)?;
pick_form_key(source, &keys)
}
/// From the form word after the entry keyword, pick the single `keys`
/// entry for the form `source` names.
///
/// A single-entry list resolves to its one key; a multi-form list
/// disambiguates by the form word (`add 1:n relationship` → the
/// `…relationship` key, `create m:n …` → the `…m2n` key, else the
/// identifier form word matched against each key's suffix). Shared by
/// the usage-template and tier-3-hint single-key lookups so they agree.
fn pick_form_key<'a>(source: &str, keys: &[&'a str]) -> Option<&'a str> {
use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace};
let first = *keys.first()?;
if keys.len() == 1 {
return Some(first);
}
let start = skip_whitespace(source, 0);
let (_, entry_end) = consume_ident(source, start)?;
let after = skip_whitespace(source, entry_end);
// The `add 1:n relationship` form opens with a digit.
if source.as_bytes().get(after).is_some_and(u8::is_ascii_digit) {
return keys.iter().copied().find(|k| k.ends_with("relationship"));
}
// The `create m:n relationship` form (ADR-0045) opens with `m:n`
// — a letter, so the digit branch misses it; its key ends `…m2n`.
if source[after..].get(..3).is_some_and(|s| s.eq_ignore_ascii_case("m:n")) {
return keys.iter().copied().find(|k| k.ends_with("m2n"));
}
// Otherwise the form word is an identifier — `column`, `index`,
// `table`, `relationship` — matched against each key's suffix.
let (s, e) = consume_ident(source, after)?;
let form = source[s..e].to_ascii_lowercase();
keys.iter().copied().find(|k| k.ends_with(form.as_str()))
}
/// Every command-entry word in the registry, sorted alphabetically
/// by primary literal. Replaces `dsl::usage::entry_keywords_alphabetised`
/// which read the same data through the legacy `usage::REGISTRY`.
#[must_use]
pub fn entry_words_alphabetised() -> Vec<&'static str> {
let mut words: Vec<&'static str> =
REGISTRY.iter().map(|(c, _)| c.entry.primary).collect();
words.sort_unstable();
words.dedup();
words
}
/// The active grammar registry, each command paired with its
/// dispatch [`CommandCategory`] (ADR-0033 Amendment 1).
///
/// Migrated commands route through this; everything else falls
/// through to the chumsky path in `dsl::parser`. `Advanced`
/// commands (`select`, `with`, and — from sub-phase 3b — the SQL
/// `insert` / `update` / `delete` nodes) are the SQL surface;
/// the rest are the DSL surface (`Simple`). A shared entry word
/// will appear twice (one `Simple`, one `Advanced` node); the
/// dispatcher selects by mode.
pub static REGISTRY: &[(&CommandNode, CommandCategory)] = &[
(&app::QUIT, CommandCategory::Simple),
(&app::HELP, CommandCategory::Simple),
(&app::HINT, CommandCategory::Simple),
(&app::REBUILD, CommandCategory::Simple),
(&app::SAVE, CommandCategory::Simple),
(&app::NEW, CommandCategory::Simple),
(&app::LOAD, CommandCategory::Simple),
(&app::EXPORT, CommandCategory::Simple),
(&app::IMPORT, CommandCategory::Simple),
(&app::MODE, CommandCategory::Simple),
(&app::MESSAGES, CommandCategory::Simple),
(&app::UNDO, CommandCategory::Simple),
(&app::REDO, CommandCategory::Simple),
(&app::COPY, CommandCategory::Simple),
(&ddl::DROP, CommandCategory::Simple),
(&ddl::ADD, CommandCategory::Simple),
(&ddl::RENAME, CommandCategory::Simple),
(&ddl::CHANGE, CommandCategory::Simple),
(&ddl::CREATE, CommandCategory::Simple),
(&ddl::CREATE_M2N, CommandCategory::Simple),
(&data::SHOW, CommandCategory::Simple),
(&data::SEED, CommandCategory::Simple),
(&data::INSERT, CommandCategory::Simple),
(&data::UPDATE, CommandCategory::Simple),
(&data::DELETE, CommandCategory::Simple),
(&data::REPLAY, CommandCategory::Simple),
(&data::EXPLAIN, CommandCategory::Simple),
(&data::SELECT, CommandCategory::Advanced),
(&data::WITH, CommandCategory::Advanced),
// Shared entry words (sub-phase 3j, ADR-0033 §2 / Amendment 1):
// `insert` / `update` / `delete` each appear twice — the
// `Simple` DSL node above and this `Advanced` SQL node. The
// dispatcher tries the SQL node first in Advanced mode and falls
// back to the DSL node when the SQL shape does not match.
(&data::SQL_INSERT, CommandCategory::Advanced),
(&data::SQL_UPDATE, CommandCategory::Advanced),
(&data::SQL_DELETE, CommandCategory::Advanced),
// Shared entry word `explain` (ADR-0039): the `Simple` DSL
// `data::EXPLAIN` (above) wraps `show data` / `update` / `delete`;
// this `Advanced` node wraps the SQL `select` / `with` / `insert`
// / `update` / `delete`. SQL-first / DSL-fallback in advanced mode
// (so `explain show data …` and DSL-only `--all-rows` still reach
// the DSL node); DSL-only in simple mode.
(&data::EXPLAIN_SQL, CommandCategory::Advanced),
// Shared entry word `create` (ADR-0035 §2): the simple
// `ddl::CREATE` (above) and these advanced SQL nodes. The
// dispatcher tries the advanced candidates first in advanced mode
// and falls back to the `create table … with pk …` DSL node when no
// SQL shape matches — the `insert` precedent. 4d adds
// SQL_CREATE_INDEX, so `create` now has *two* advanced nodes;
// `decide` tries both (`create table …` → SQL_CREATE_TABLE,
// `create [unique] index …` → SQL_CREATE_INDEX).
(&ddl::SQL_CREATE_TABLE, CommandCategory::Advanced),
(&ddl::SQL_CREATE_INDEX, CommandCategory::Advanced),
// `alter` is a new advanced-*only* DDL entry word (ADR-0035 §2/§4e),
// like `select`/`with` — no simple node, so `is_advanced_only` is
// true and simple-mode `alter …` gets the "this is SQL" hint.
(&ddl::SQL_ALTER_TABLE, CommandCategory::Advanced),
// Shared `drop` entry word: `ddl::DROP` (simple) and these advanced
// SQL nodes. SQL-first in advanced mode; `drop table [if exists] T`
// → SQL_DROP_TABLE, `drop index [if exists] <name>` → SQL_DROP_INDEX
// (4d — `drop` now has *two* advanced nodes; the dispatcher's
// `decide` tries all advanced candidates). `drop column`/`drop
// relationship`/`drop index on T(…)` fall back to the simple `drop`
// node.
(&ddl::SQL_DROP_TABLE, CommandCategory::Advanced),
(&ddl::SQL_DROP_INDEX, CommandCategory::Advanced),
];
/// Whether `entry` names an advanced-mode-only command (ADR-0030
/// §2, ADR-0033 Amendment 1). Case-insensitive, matching
/// keyword-matching elsewhere.
///
/// True when the entry word is registered and *every* candidate
/// for it is `Advanced` — i.e. there is no DSL (`Simple`) command
/// to fall back to. A shared entry word (a Simple DSL node plus
/// an Advanced SQL node) is therefore *not* advanced-only: it is
/// available in simple mode as DSL.
#[must_use]
pub fn is_advanced_only(entry: &str) -> bool {
let mut found = false;
for (c, category) in REGISTRY {
if c.entry.matches(entry) {
found = true;
if *category == CommandCategory::Simple {
return false;
}
}
}
found
}
/// Look up the first `CommandNode` registered for an entry word,
/// case-insensitively. Returns the index into `REGISTRY` so
/// callers can use it as a `WalkOutcome::Match { command_idx }`.
///
/// For shared entry words this returns whichever node is listed
/// first in `REGISTRY`; callers that must distinguish the Simple
/// from the Advanced candidate use [`commands_for_entry_word`].
pub fn command_for_entry_word(word: &str) -> Option<(usize, &'static CommandNode)> {
REGISTRY
.iter()
.enumerate()
.find(|(_, (c, _))| c.entry.matches(word))
.map(|(i, (c, _))| (i, *c))
}
/// Every `CommandNode` registered for an entry word, with its
/// `REGISTRY` index and [`CommandCategory`], case-insensitively
/// (ADR-0033 Amendment 1).
///
/// A non-shared entry word returns a single candidate; a shared
/// entry word (`insert` / `update` / `delete` from sub-phase 3b)
/// returns its `Simple` DSL node and `Advanced` SQL node. The
/// dispatcher picks among them by the active input mode.
#[must_use]
pub fn commands_for_entry_word(
word: &str,
) -> Vec<(usize, &'static CommandNode, CommandCategory)> {
REGISTRY
.iter()
.enumerate()
.filter(|(_, (c, _))| c.entry.matches(word))
.map(|(i, (c, category))| (i, *c, *category))
.collect()
}
#[cfg(test)]
mod hint_key_tests {
use super::hint_key_for_input_in_mode;
use crate::mode::Mode;
/// Per-form hint keying (ADR-0053 D3): a multi-form command
/// resolves the *typed* form, not the node — `add 1:n
/// relationship` → the relationship hint, `add column` → the
/// (as-yet-unauthored) column hint, never the wrong form.
#[test]
fn hint_key_resolves_the_typed_form() {
assert_eq!(
hint_key_for_input_in_mode("add 1:n relationship from A.x to B.y", Mode::Simple),
Some("add_relationship")
);
assert_eq!(
hint_key_for_input_in_mode("add column Note text to T", Mode::Simple),
Some("add_column")
);
assert_eq!(
hint_key_for_input_in_mode("insert into T values (1)", Mode::Simple),
Some("insert")
);
// Multi-form DROP disambiguates to the typed form too.
assert_eq!(
hint_key_for_input_in_mode("drop table T", Mode::Simple),
Some("drop_table")
);
// Unknown entry word → None (tier-2 fallback).
assert_eq!(hint_key_for_input_in_mode("zzz", Mode::Simple), None);
}
}
#[cfg(test)]
mod usage_key_tests {
use super::usage_key_for_input;
/// Every multi-form command resolves a typed form to its
/// own usage key — a parse error in one form must never
/// show another form's usage (the handoff-18 `151ed08` fix;
/// regression-locked here, including the `add 1:n
/// relationship` digit-led form).
#[test]
fn multi_form_commands_resolve_to_the_typed_form() {
let cases = [
("add column to T: c (int)", "parse.usage.add_column"),
("add index on T (c)", "parse.usage.add_index"),
(
"add constraint unique to T.c",
"parse.usage.add_constraint",
),
(
"drop constraint check from T.c",
"parse.usage.drop_constraint",
),
(
"add 1:n relationship from A.x to B.y",
"parse.usage.add_relationship",
),
// Trailing junk must not change the resolved form.
(
"add 1:n relationship from A.x to B.y --",
"parse.usage.add_relationship",
),
("drop table T", "parse.usage.drop_table"),
("drop column from table T: c", "parse.usage.drop_column"),
("drop index i", "parse.usage.drop_index"),
(
"drop relationship r",
"parse.usage.drop_relationship",
),
("show data T", "parse.usage.show_data"),
("show table T", "parse.usage.show_table"),
// `create` is multi-form (table vs m:n, ADR-0045): each typed
// form resolves to its own usage key.
("create table T with pk id(int)", "parse.usage.create_table"),
(
"create m:n relationship from A to B",
"parse.usage.create_m2n",
),
];
for (input, expected) in cases {
assert_eq!(
usage_key_for_input(input),
Some(expected),
"usage key for {input:?}",
);
}
}
#[test]
fn a_bare_multi_form_entry_word_resolves_to_no_single_form() {
// `add` / `drop` alone — no form chosen; the caller
// shows the whole family rather than guessing.
assert_eq!(usage_key_for_input("add "), None);
assert_eq!(usage_key_for_input("drop "), None);
}
#[test]
fn a_single_form_command_resolves_to_its_one_key() {
assert_eq!(
usage_key_for_input("create table T with pk"),
Some("parse.usage.create_table"),
);
}
#[test]
fn no_two_registered_commands_share_a_help_id() {
// `note_help` emits one help block per `help_id: Some(_)`
// with no dedup, so a duplicate help_id prints the same
// command twice in `help`. Shared-entry-word `Advanced`
// nodes (SQL_INSERT, …, EXPLAIN_SQL) therefore carry
// `help_id: None` and defer to their `Simple` sibling.
let mut seen = std::collections::HashSet::new();
for (command, _category) in super::REGISTRY {
if let Some(id) = command.help_id {
assert!(
seen.insert(id),
"duplicate help_id `{id}` in REGISTRY would print twice in `help`",
);
}
}
}
}