Files
rdbms-playground/src/dsl/grammar/mod.rs
T
claude@clouddev1 c30a6114b9 feat(cli): --version/-V + in-app version command + release guard (ADR-0054)
Cargo.toml version is the single source of truth, surfaced by a
--version/-V CLI flag and an in-app `version` command (both via
cli::version_text -> cli.version_line). release.yaml gains a guard that
fails the release unless the v* tag equals v<CARGO_PKG_VERSION>, keeping
--version, the release name, and the asset in lockstep. New app command
wired across grammar/REGISTRY/dispatch/usage/help/hint-corpus/keys; 6
test-first tests. Also fixes a stale "macOS deferred" comment in
release.yaml. ADR-0054 + README index + plan-doc step 1.
2026-06-16 15:57:54 +00:00

1178 lines
49 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! Unified declarative grammar tree (ADR-0024).
//!
//! The grammar tree is the single source of truth for the DSL —
//! parsing, completion, syntax highlighting, parse-error usage
//! rendering, and hint-panel content all derive from this same
//! data structure (ADR-0023 institutional context).
//!
//! Phase A scope (ADR-0024 §migration): the framework lands
//! alongside the eleven app-lifecycle commands (quit, help,
//! rebuild, save, save as, new, load, export, import, mode,
//! messages). The chumsky parser still owns every other
//! command; the router in `dsl::parser` decides which path to
//! take per first-token. Schema-aware nodes (`IdentSource::Tables`
//! and friends) and `DynamicSubgrammar` are declared here but
//! not exercised until Phase B-D.
//!
//! The shape of `Node` mirrors ADR-0024 §node-taxonomy with one
//! pragmatic addition for Phase A: each `Ident` carries an
//! optional content validator, used today by the `mode <value>`
//! / `messages <value>` slots to surface friendly catalog
//! wording (`mode.unknown`, `messages.unknown`) on out-of-set
//! identifiers. The same hook generalises naturally to typed
//! value slots in Phase D.
pub mod app;
pub mod data;
pub mod ddl;
pub mod expr;
pub mod shared;
pub mod sql_expr;
pub mod sql_create_table;
pub mod sql_delete;
pub mod sql_insert;
pub mod sql_select;
pub mod sql_update;
use crate::dsl::command::Command;
use crate::dsl::walker::context::WalkContext;
use crate::dsl::walker::outcome::MatchedPath;
/// Highlight class assigned to a matched terminal.
///
/// Recorded on the `WalkResult::per_byte_class` slice and surfaced
/// by `walker::highlight_runs` to the input/echo-line renderers
/// (ADR-0024 §architecture).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HighlightClass {
Keyword,
Identifier,
/// Column data-type keyword (`int`, `serial`, `text`, …).
/// Distinct from `Keyword` and `Identifier` so learners can
/// tell "this is a type" from a clause keyword or a name they
/// invented (ADR-0022 Amendment 4). Assigned via a type slot's
/// `highlight_override`, not by byte shape.
Type,
Number,
String,
Punct,
Flag,
/// A curated function-vocabulary name — the `seed … set <col> as
/// <generator>` generator names (ADR-0048 D2/§Grammar). Rendered in
/// the existing `tok_function` colour (ADR-0022 Amд6 blue — no new
/// theme colour), assigned via a generator slot's
/// `highlight_override`, not by byte shape.
Function,
Error,
}
/// Where an `Ident` slot's candidates come from at completion time.
///
/// Drives both the walker's `Expectation::Ident { source }` (which
/// the parse-error bridge maps to a human label) and the
/// `SchemaCache` lookup the completion engine uses for Tab
/// candidates. The `Free` and `NewName` variants do not query the
/// schema — `NewName` is for slots where the user invents the
/// identifier, `Free` is the catch-all branch in `mode`/`messages`
/// that funnels unknown values into a friendly validator.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum IdentSource {
/// User invents this name. No schema lookup; no completion
/// candidates beyond the identifier shape itself.
NewName,
/// Existing table name.
Tables,
/// Existing column in the current table.
Columns,
/// Existing relationship name.
Relationships,
/// Existing index name.
Indexes,
/// Closed set from `Type::all()` — surfaced by the walker's
/// content validator on column-type slots; not user-listable
/// from the schema.
Types,
/// Closed, curated set of fake-data generator names (ADR-0048
/// D9) — the `seed … set <col> as <generator>` slot. Like
/// `Types`, not user-listable from the schema; the vocabulary
/// lives in `src/seed` and the completion engine offers it. The
/// grammar slot is purely structural (matches any identifier);
/// an unknown name is flagged live (validity) and rejected at
/// execution.
Generators,
/// Any identifier shape; used by synthetic catch-all branches
/// (e.g., the unknown-value branch of `mode <value>`).
Free,
}
impl IdentSource {
/// Whether this source can be completed from the schema
/// cache (i.e. the candidate list comes from existing
/// entities rather than user invention or a closed set).
#[must_use]
pub const fn completes_from_schema(self) -> bool {
matches!(
self,
Self::Tables | Self::Columns | Self::Relationships | Self::Indexes
)
}
/// Human-facing label used in parse-error wording
/// ("expected table name") and in the completion engine's
/// round-trip from a textual `expected` entry back to a
/// source kind. `Free` and `Types` collapse to "identifier"
/// and "type" respectively.
#[must_use]
pub const fn expected_label(self) -> &'static str {
match self {
Self::NewName | Self::Free => "identifier",
Self::Tables => "table name",
Self::Columns => "column name",
Self::Relationships => "relationship name",
Self::Indexes => "index name",
Self::Types => "type",
Self::Generators => "generator name",
}
}
/// Inverse of `expected_label`. Used by the completion engine
/// to recover the source kind from the `ParseError::Invalid::
/// expected` strings the walker bridge produces. `"identifier"`
/// maps to `NewName` (the only writeable label that uses that
/// wording in production grammars today).
#[must_use]
pub fn from_expected_label(label: &str) -> Option<Self> {
match label {
"identifier" => Some(Self::NewName),
"table name" => Some(Self::Tables),
"column name" => Some(Self::Columns),
"relationship name" => Some(Self::Relationships),
"index name" => Some(Self::Indexes),
"type" => Some(Self::Types),
"generator name" => Some(Self::Generators),
_ => None,
}
}
}
/// Hint-panel mode for an expected node (ADR-0024 §HintMode-per-node).
///
/// `Default` (today's behaviour) shows candidates if any, falls
/// back to a prose ladder otherwise. The other variants
/// override at slot positions where the candidate list would be
/// actively misleading or where the user benefits from format
/// guidance:
///
/// - `ProseOnly(catalog_key)` — show only prose from the
/// catalog; suppress Tab candidates. Used today by the
/// value-literal slot at empty prefix (the "null/true/false"
/// candidate trio is misleading at a slot that more often
/// takes a number / quoted text / date).
/// - `ForceProse(catalog_key)` — force this prose at the
/// catalog key regardless of candidates. Used today by
/// `NewName` ident slots ("Type a name, then `(`").
/// - `IntroProse(catalog_key)` — show prose at slot entry to
/// *introduce* a position whose first-class candidate is an
/// ident slot (which would be invisible in a pure-candidate
/// render) but whose keyword alternatives are also available.
/// Unlike `ProseOnly`, Tab candidates remain available — the
/// user still cycles through the keyword set. Used at the
/// advanced-mode CREATE TABLE element slot, where the
/// column-name `NewName` slot would otherwise be invisible
/// alongside the table-level constraint keywords (issue #4).
/// - `SuppressProse` — show only candidates; never fall back
/// to a prose ladder.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HintMode {
Default,
ForceProse(&'static str),
ProseOnly(&'static str),
IntroProse(&'static str),
SuppressProse,
}
/// A keyword node literal.
///
/// The `aliases` slice is empty for the app-lifecycle commands
/// today; the round-5 `q` removal remains intentional, and any
/// future re-introduction would be a one-line `aliases: &["q"]`
/// addition (ADR-0024 §aliases).
#[derive(Debug, Clone, Copy)]
pub struct Word {
pub primary: &'static str,
pub aliases: &'static [&'static str],
pub highlight_override: Option<HighlightClass>,
}
impl Word {
pub const fn keyword(primary: &'static str) -> Self {
Self {
primary,
aliases: &[],
highlight_override: None,
}
}
/// A keyword that highlights as a column **type** rather than a
/// clause keyword (ADR-0022 Amendment 4). The one user today is
/// the two-word `double precision` SQL alias (ADR-0035 §3): it
/// is matched as keyword tokens, not an `IdentSource::Types`
/// `Ident`, so without this it would render keyword-coloured
/// while its single-word synonyms (`float`, `real`) render as
/// types.
pub const fn type_keyword(primary: &'static str) -> Self {
Self {
primary,
aliases: &[],
highlight_override: Some(HighlightClass::Type),
}
}
/// Case-insensitive match against the primary or any alias.
pub fn matches(&self, candidate: &str) -> bool {
if candidate.eq_ignore_ascii_case(self.primary) {
return true;
}
self.aliases
.iter()
.any(|a| candidate.eq_ignore_ascii_case(a))
}
}
/// Content-level validator for an `Ident` slot. Returns the
/// catalog key + arg list to surface as `WalkOutcome::ValidationFailed`
/// on mismatch.
pub type IdentValidator = fn(matched: &str) -> Result<(), ValidationError>;
/// Content-level validator for a `NumberLit` slot. Same shape
/// as `IdentValidator`; surfaces as `ValidationFailed` on Err.
pub type NumberValidator = fn(matched: &str) -> Result<(), ValidationError>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ValidationError {
pub message_key: &'static str,
pub args: Vec<(&'static str, String)>,
}
/// The grammar-tree node taxonomy (ADR-0024 §node-taxonomy).
///
/// Some variants carry data (`Word` literal, `Punct` char,
/// `Ident` source/role/validator); combinators reference their
/// children through `&'static [Node]` / `&'static Node` slices,
/// which lets the entire registry live in `const`s — no runtime
/// allocation, every command is one declaration block in its
/// grammar file.
pub enum Node {
/// A keyword token. Case-insensitive match (ADR-0009).
Word(Word),
/// A single punctuation character. The exact set comes from
/// the migrated commands' usage — Phase A only needs none of
/// these (app-lifecycle commands are pure keyword + ident +
/// path), but the variant is declared for Phase B+ use.
#[allow(dead_code)]
Punct(char),
/// An identifier slot. `source` drives completion candidates;
/// `role` names the slot for error wording / completion-engine
/// dispatch; `validator` runs after a successful identifier-
/// shape match and may reject the value with a catalog-driven
/// message.
///
/// `writes_table` (Phase D): when `true` and `source ==
/// Tables`, the walker writes the matched ident to
/// `WalkContext::current_table` and resolves
/// `current_table_columns` from the schema cache (if any).
/// `writes_column` (Phase D): when `true` and `source ==
/// Columns`, the walker writes the matched ident's
/// `TableColumn` to `WalkContext::current_column` (resolved
/// against `current_table_columns`). Subsequent value slots
/// dispatch on the column's type.
Ident {
source: IdentSource,
role: &'static str,
validator: Option<IdentValidator>,
#[allow(dead_code)]
highlight_override: Option<HighlightClass>,
writes_table: bool,
writes_column: bool,
/// Append the matched text to
/// `WalkContext::user_listed_columns` (Phase D). Used by
/// the `insert into <T> (col1, col2, …)` column-list
/// idents — when the walker sees these, the form is
/// "Form A" and the inner values slot list mirrors the
/// user's explicit selection instead of the
/// auto-filtered schema default.
writes_user_listed_column: bool,
/// Set the matched text as the alias of the most-
/// recently-pushed `TableBinding` on the top
/// `ScopeFrame`'s `from_scope` (ADR-0032 §10.1). Used by
/// the `[ AS ] alias` slot on `from_clause` /
/// `join_clause` table sources in `sql_select.rs`; a
/// no-op on `IdentSource::NewName` slots that do not
/// follow a table-name push, or when the top frame's
/// `from_scope` is empty.
writes_table_alias: bool,
/// Push a placeholder `CteBinding` (name only, empty
/// columns) onto the top `ScopeFrame`'s `cte_bindings`
/// (ADR-0032 §10.3 stage 1). Used by the CTE-name slot
/// in `with_clause`; the placeholder is rewritten with
/// derived output columns at the body's frame exit
/// (§10.3 stage 2; harvest derivation rules pending).
writes_cte_name: bool,
/// Append the matched text to the top `ScopeFrame`'s
/// `projection_aliases` (ADR-0032 §10.4). Used by the
/// projection-list alias slot (both the bare and `AS`
/// forms) so `ORDER BY` completion can offer aliases as
/// candidates.
writes_projection_alias: bool,
},
/// A number literal. The optional `validator` runs against
/// the matched text (used by Phase D value slots to enforce
/// per-type integer/decimal rules).
NumberLit {
validator: Option<NumberValidator>,
},
/// A literal byte sequence at this position — matches
/// bytes verbatim (whitespace-skipped) with a lookahead so
/// `1` doesn't half-match `12` and `n` doesn't half-match
/// `name`. Used by Phase B's `add 1:n …` for the literal
/// `1`. Surfaces in the expected-set as `` `<literal>` ``,
/// matching chumsky's labelled-token rendering.
Literal(&'static str),
#[allow(dead_code)]
StringLit,
#[allow(dead_code)]
BlobLit,
/// A `--name` flag. Walker matches the flag shape and
/// asserts the name matches the expected literal.
Flag(&'static str),
/// A non-whitespace run consumed verbatim from source. Per
/// ADR-0024's path-bearing-commands UX change, paths with
/// spaces use the quoted form (`StringLit`); `BarePath`
/// terminates at the first whitespace byte.
BarePath,
/// Try each child in order. The first one that matches a
/// non-empty prefix wins; if none match, the choice fails
/// with the union of expectations.
Choice(&'static [Self]),
/// All children must match in order. Whitespace is implicitly
/// allowed between siblings.
Seq(&'static [Self]),
/// The inner node may match or be skipped.
Optional(&'static Self),
/// `inner` matches at least `min` times, separated by
/// `separator` (if any). Phase C+ uses this for `with pk`
/// column lists.
#[allow(dead_code)]
Repeated {
inner: &'static Self,
separator: Option<&'static Self>,
min: usize,
},
/// Walks the referenced `&'static Node` once, mandatory
/// (ADR-0026 §2). The reference indirection is what lets a
/// named `static` grammar fragment appear inside its own
/// subtree: a `Seq` / `Choice` embeds its children by value
/// and so cannot close a cycle, but a `&'static Node`
/// reference can point back at an enclosing fragment. This
/// is the mechanism the stratified WHERE-expression grammar
/// recurses through — the `( or_expr )` branch and the
/// `not_expr` self-reference.
///
/// The walker counts active `Subgrammar` frames in
/// `WalkContext::subgrammar_depth` and refuses past
/// `walker::driver::MAX_SUBGRAMMAR_DEPTH`, so pathologically
/// nested input (`((((…))))`) fails with a friendly error
/// rather than overflowing the parser stack.
///
/// The static counterpart of `DynamicSubgrammar`: that one
/// builds a fresh node from the `WalkContext` at walk time;
/// this one references a fixed fragment already in the
/// grammar tree.
Subgrammar(&'static Self),
/// Like `Subgrammar`, but the walker additionally **pushes a
/// new `ScopeFrame`** onto `WalkContext::from_scope_stack` on
/// entry and pops it on exit (ADR-0032 §10.2). The
/// `subgrammar_depth` counter increments uniformly across
/// both variants — the depth cap applies the same way — so
/// this variant introduces no new walker capability for
/// grammar recursion; it only layers lexical-scope discipline
/// on top.
///
/// Used at every SQL `SELECT` recursion point: subqueries
/// in `sql_expr.rs` (scalar `(SELECT …)`, `IN (SELECT …)`,
/// `[NOT] EXISTS (SELECT …)`) and CTE bodies in
/// `sql_select.rs` reference the compound-SELECT through
/// `Node::ScopedSubgrammar(&SQL_SELECT_COMPOUND)`. DSL `Expr`
/// recursion (ADR-0026) and the `sql_expr.rs` precedence-
/// ladder recursion (ADR-0031) keep using the plain
/// `Subgrammar` variant and never push a scope.
ScopedSubgrammar(&'static Self),
/// Resolves at walk time using the active `WalkContext`.
/// Phase D+ uses this for `column_value_list`. The factory
/// is pure in `ctx`, so the walker memoizes the resolution
/// (one leak per distinct schema shape).
#[allow(dead_code)]
DynamicSubgrammar(fn(&WalkContext) -> Self),
/// Like `DynamicSubgrammar` but the factory also sees the
/// source and the current byte position, so it can look
/// ahead. Used by the insert first-paren to discriminate
/// Form A (`(cols) values (...)`) from Form C (`(vals)`)
/// before walking the contents — Form C then routes through
/// the typed `column_value_list` (ADR-0024 §Phase D, Form C
/// type-awareness). Not memoized: the output depends on the
/// source, not just `ctx`.
Lookahead(fn(&WalkContext, &str, usize) -> Self),
/// Zero-width node that *establishes the active column* for the
/// value slot that follows it (ADR-0036 Phase 3b). Matches the
/// empty string and, as a side effect, sets
/// `WalkContext::current_column` to the referenced column and
/// `pending_value_column` to its name — exactly as an
/// `Ident { writes_column: true }` does, but without consuming a
/// column identifier from the input.
///
/// This is the primitive that gives `INSERT … VALUES (…)`
/// positions a per-position column identity: the positions are
/// positional (no per-position column ident to write
/// `current_column`), so a `DynamicSubgrammar` factory
/// (`sql_insert::sql_value_list`) emits `SetColumn(colᵢ)` before
/// each value position, then the shared boundary-aware `SET_VALUE`
/// slot routes a lone literal to that column's typed slot and any
/// expression to `sql_expr`. The referenced `TableColumn` is
/// leaked by the factory (bounded by the column count, like the
/// `DynamicSubgrammar` `Box::leak`).
SetColumn(&'static crate::completion::TableColumn),
/// Typed value-literal slot (ADR-0024 §Phase D §typed-value-slots).
///
/// Walks `inner` to consume the literal but records the
/// column type in `WalkContext::pending_value_type` so the
/// hint resolver can emit per-type catalog prose ("Type an
/// integer", "Type a date as 'YYYY-MM-DD'", …) at empty
/// prefix at this slot. When `column_name` is `Some`, the
/// walker also writes `pending_value_column` so the hint
/// can be rendered with the actual column name (e.g. "for
/// `Email`: Type a quoted string …") rather than a generic
/// type hint. The recorded values clear on a successful
/// inner match — so positions BETWEEN typed slots
/// (`insert into T values (1` mid-input) don't carry stale
/// hint state.
TypedValueSlot {
ty: crate::dsl::types::Type,
column_name: Option<&'static str>,
inner: &'static Self,
},
/// Annotates `inner` with a hint-panel `HintMode` (ADR-0024
/// §HintMode-per-node). On entry the walker records `mode`
/// in `WalkContext::pending_hint_mode`; on a successful
/// inner match the record clears (so positions past the
/// slot don't carry stale hint state). Transparent to
/// matching, highlighting and the expected-set otherwise —
/// it walks `inner` and returns its result verbatim.
///
/// This is the node-attached replacement for the hint
/// resolver's earlier signature-matching: the grammar tree
/// declares the hint mode at the slot, the walker
/// propagates it, the resolver reads it. Used by the
/// value-literal fallback slot (`ProseOnly`) and `NewName`
/// ident slots (`ForceProse`).
Hinted {
mode: HintMode,
inner: &'static Self,
},
}
/// Which mode group a registered command belongs to (ADR-0030
/// §2, ADR-0033 Amendment 1).
///
/// Category is a *dispatcher* concern, not intrinsic to a
/// command's grammar, so it is attached at the `REGISTRY`
/// registration site rather than as a field on every
/// `CommandNode`. The dispatcher (`walker::walk`) uses it to
/// route a given input by the active input mode:
///
/// - `Simple` commands are the DSL surface; available in both
/// simple and advanced mode.
/// - `Advanced` commands are the SQL surface; available only in
/// advanced mode. In simple mode an advanced-only entry word
/// yields the "this is SQL" hint (`advanced_mode.sql_in_simple`).
///
/// A *shared* entry word (e.g. `insert`, from Phase 3 sub-phase
/// 3b on) carries a node in *both* groups — a `Simple` DSL node
/// and an `Advanced` SQL node. The dispatcher tries the SQL node
/// first in advanced mode and falls back to the DSL node when the
/// SQL shape does not match.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CommandCategory {
Simple,
Advanced,
}
/// Top-level entry record. One per command. The `entry` keyword
/// alone identifies which command the walker dispatches to;
/// `shape` is what follows the entry word.
pub struct CommandNode {
pub entry: Word,
pub shape: Node,
/// Builds the typed `Command` AST from the matched terminal
/// path. May fail with a `ValidationError` for content-level
/// rejections that are easier to express imperatively than
/// as a per-node validator (Phase A: none — every app
/// command's ast_builder is infallible).
///
/// `source` is the full input line being parsed. Most builders
/// reconstruct the `Command` from the matched `MatchedPath`
/// alone and ignore it; SQL builders whose `Command` carries
/// the validated SQL text (ADR-0030 §4/§6, ADR-0031 §2) read
/// it.
pub ast_builder: fn(&MatchedPath, &str) -> Result<Command, ValidationError>,
/// Catalog key (`help.<id>`) for this command's in-app
/// `help` entry. Consumed by `App::note_help`, which
/// iterates the REGISTRY and translates each `help_id` —
/// so a newly-registered command appears in `help`
/// automatically (ADR-0024 §help_id).
pub help_id: Option<&'static str>,
/// Catalog key stems (`hint.cmd.<id>`) for this command's
/// **tier-3** contextual hints (ADR-0053 / H2), **one per form**,
/// mirroring `usage_ids`. A single-form command carries one; a
/// multi-form command (`add`, `drop`, `show`, `create`) carries
/// one per form so a live-input hint can be specific to the form
/// being typed (`hint.cmd.add_relationship`, not a shared `add`
/// block). `hint_key_for_input_in_mode` disambiguates by the form
/// word, reusing `usage_key_for_input_in_mode`'s logic. Empty
/// until a form's tier-3 block is authored (the surface falls back
/// to tier-2 ambient/error text). Distinct from `help_id` (which is
/// `None` on advanced-SQL forms purely to dedup the `help` list).
pub hint_ids: &'static [&'static str],
/// Catalog keys under `parse.usage.*` to render in the
/// "usage:" block when a parse error fires for this command
/// (ADR-0021 §1, ADR-0024 §architecture). Multi-form families
/// like `drop` (drop table / drop column / drop relationship)
/// carry every variant so the user sees the full family on a
/// generic-entry-word failure.
pub usage_ids: &'static [&'static str],
}
/// Look up the usage catalog keys for the entry word at the start
/// of `source`.
///
/// Case-insensitive, whitespace-tolerant. Replaces
/// `dsl::usage::matched_entry` — the walker is the single source
/// of truth for which command a given input belongs to.
///
/// Returns the canonical (primary-form) entry literal and the
/// `usage_ids` list, or `None` if no entry word matches.
#[must_use]
pub fn usage_keys_for_input(source: &str) -> Option<(&'static str, Vec<&'static str>)> {
usage_keys_for_input_in_mode(source, crate::mode::Mode::Simple)
}
/// Mode-aware variant of [`usage_keys_for_input`] (ADR-0042 G3).
///
/// A shared entry word (`create`, `drop`, `insert`, …) registers a
/// `Simple` DSL node *and* one or more `Advanced` SQL nodes. The
/// usage block must reflect the surface the user is actually typing:
/// the SQL forms in `Advanced` mode, the DSL forms in `Simple` mode
/// — otherwise advanced-mode `create` shows the DSL `create table …
/// with pk …` template, which is not valid SQL.
///
/// Selection prefers candidates whose [`CommandCategory`] matches
/// the mode; if the entry word has none in that category (an
/// app-lifecycle command is `Simple`-only yet usable in both modes),
/// every candidate is used. The returned keys are the union of the
/// selected nodes' `usage_ids`, de-duplicated in registry order — so
/// advanced `create` shows both `sql_create_table` and
/// `sql_create_index`.
#[must_use]
pub fn usage_keys_for_input_in_mode(
source: &str,
mode: crate::mode::Mode,
) -> Option<(&'static str, Vec<&'static str>)> {
let pick = selected_nodes_for_input_in_mode(source, mode);
if pick.is_empty() {
return None;
}
let mut keys: Vec<&'static str> = Vec::new();
for (_, node, _) in &pick {
for k in node.usage_ids {
if !keys.contains(k) {
keys.push(*k);
}
}
}
if keys.is_empty() {
return None;
}
let entry = pick[0].1.entry.primary;
Some((entry, keys))
}
/// The single tier-3 hint key (`hint.cmd.<id>` stem) for the command
/// **form** `source` is currently typing, in `mode` (H2 / ADR-0053).
///
/// Mirrors [`usage_key_for_input_in_mode`]: the union of the
/// mode-selected nodes' `hint_ids`, disambiguated to the typed form by
/// [`pick_form_key`] — so `add 1:n relationship` resolves to the
/// relationship hint, and an advanced-SQL form resolves to its own
/// (not its simple sibling's). `None` if no entry word matches or the
/// form has no tier-3 block yet (the caller falls back to tier-2).
#[must_use]
pub fn hint_key_for_input_in_mode(source: &str, mode: crate::mode::Mode) -> Option<&'static str> {
use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace};
let nodes = selected_nodes_for_input_in_mode(source, mode);
if nodes.is_empty() {
return None;
}
// Mode-ordered union (advanced-primary first in advanced mode), so a
// shared entry word resolves to the surface the user is in.
let mut keys: Vec<&'static str> = Vec::new();
for (_, node, _) in &nodes {
for k in node.hint_ids {
if !keys.contains(k) {
keys.push(*k);
}
}
}
if keys.is_empty() {
return None;
}
if keys.len() == 1 {
return Some(keys[0]);
}
// A bare multi-form entry word (no form word yet — `add`⏎) has no
// chosen form: defer to tier-2, which lists the choices.
let start = skip_whitespace(source, 0);
if let Some((_, entry_end)) = consume_ident(source, start)
&& skip_whitespace(source, entry_end) >= source.len()
{
return None;
}
// A form word picks the form (`drop column` → `drop_column`); when
// the second token isn't a form word (`insert into …`, `update …
// set`), fall back to the mode-primary key — in advanced mode the
// SQL form, in simple mode the DSL form.
pick_form_key(source, &keys).or_else(|| keys.first().copied())
}
/// Shared mode-aware command-form selection for the entry word at the
/// start of `source`.
///
/// Extracted so the usage-key and hint-id lookups agree on which form
/// the user is typing.
///
/// Advanced mode: every candidate form is reachable — the SQL nodes
/// are primary, and the DSL nodes remain valid via fallback (verified:
/// `create table … with pk` and `drop column …` both run in advanced
/// mode). Mode-primary (Advanced) first, so a hint never hides input
/// that works. Simple mode: only the DSL forms — the SQL-only forms
/// hit the "this is SQL" rail and are not reachable. (ADR-0042 G3.)
/// Degenerate guard: an advanced-only word in simple mode leaves the
/// selection empty; fall back to all candidates.
fn selected_nodes_for_input_in_mode(
source: &str,
mode: crate::mode::Mode,
) -> Vec<(usize, &'static CommandNode, CommandCategory)> {
use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace};
let start = skip_whitespace(source, 0);
let Some((kw_start, kw_end)) = consume_ident(source, start) else {
return Vec::new();
};
let word = &source[kw_start..kw_end];
let candidates = commands_for_entry_word(word);
if candidates.is_empty() {
return Vec::new();
}
let selected: Vec<(usize, &'static CommandNode, CommandCategory)> =
if mode == crate::mode::Mode::Advanced {
let mut v: Vec<_> = candidates
.iter()
.copied()
.filter(|(_, _, c)| *c == CommandCategory::Advanced)
.collect();
v.extend(
candidates
.iter()
.copied()
.filter(|(_, _, c)| *c != CommandCategory::Advanced),
);
v
} else {
candidates
.iter()
.copied()
.filter(|(_, _, c)| *c == CommandCategory::Simple)
.collect()
};
if selected.is_empty() { candidates } else { selected }
}
/// The single usage template most relevant to `source`, when
/// one is determinable.
///
/// A single-form command resolves to its one usage key. A
/// multi-form command (`add`, `drop`) disambiguates by the
/// form word after the entry keyword — so a parse error in
/// `add index …` resolves to the `add index` usage rather than
/// the first-listed `add column`. Returns `None` for a bare
/// multi-form entry word (`add` with nothing after it), where
/// no form has been chosen — the caller decides whether to
/// show the whole family or nothing.
#[must_use]
pub fn usage_key_for_input(source: &str) -> Option<&'static str> {
usage_key_for_input_in_mode(source, crate::mode::Mode::Simple)
}
/// Mode-aware variant of [`usage_key_for_input`] (ADR-0042 G3) —
/// disambiguates the single most-relevant usage key from the
/// mode-selected key set.
#[must_use]
pub fn usage_key_for_input_in_mode(
source: &str,
mode: crate::mode::Mode,
) -> Option<&'static str> {
let (_entry, keys) = usage_keys_for_input_in_mode(source, mode)?;
pick_form_key(source, &keys)
}
/// From the form word after the entry keyword, pick the single `keys`
/// entry for the form `source` names.
///
/// A single-entry list resolves to its one key; a multi-form list
/// disambiguates by the form word (`add 1:n relationship` → the
/// `…relationship` key, `create m:n …` → the `…m2n` key, else the
/// identifier form word matched against each key's suffix). Shared by
/// the usage-template and tier-3-hint single-key lookups so they agree.
fn pick_form_key<'a>(source: &str, keys: &[&'a str]) -> Option<&'a str> {
use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace};
let first = *keys.first()?;
if keys.len() == 1 {
return Some(first);
}
let start = skip_whitespace(source, 0);
let (_, entry_end) = consume_ident(source, start)?;
let after = skip_whitespace(source, entry_end);
// The `add 1:n relationship` form opens with a digit.
if source.as_bytes().get(after).is_some_and(u8::is_ascii_digit) {
return keys.iter().copied().find(|k| k.ends_with("relationship"));
}
// The `create m:n relationship` form (ADR-0045) opens with `m:n`
// — a letter, so the digit branch misses it; its key ends `…m2n`.
if source[after..].get(..3).is_some_and(|s| s.eq_ignore_ascii_case("m:n")) {
return keys.iter().copied().find(|k| k.ends_with("m2n"));
}
// Otherwise the form word is an identifier — `column`, `index`,
// `table`, `relationship` — matched against each key's suffix.
let (s, e) = consume_ident(source, after)?;
let form = source[s..e].to_ascii_lowercase();
keys.iter().copied().find(|k| k.ends_with(form.as_str()))
}
/// Every command-entry word in the registry, sorted alphabetically
/// by primary literal. Replaces `dsl::usage::entry_keywords_alphabetised`
/// which read the same data through the legacy `usage::REGISTRY`.
#[must_use]
pub fn entry_words_alphabetised() -> Vec<&'static str> {
let mut words: Vec<&'static str> =
REGISTRY.iter().map(|(c, _)| c.entry.primary).collect();
words.sort_unstable();
words.dedup();
words
}
/// The active grammar registry, each command paired with its
/// dispatch [`CommandCategory`] (ADR-0033 Amendment 1).
///
/// Migrated commands route through this; everything else falls
/// through to the chumsky path in `dsl::parser`. `Advanced`
/// commands (`select`, `with`, and — from sub-phase 3b — the SQL
/// `insert` / `update` / `delete` nodes) are the SQL surface;
/// the rest are the DSL surface (`Simple`). A shared entry word
/// will appear twice (one `Simple`, one `Advanced` node); the
/// dispatcher selects by mode.
pub static REGISTRY: &[(&CommandNode, CommandCategory)] = &[
(&app::QUIT, CommandCategory::Simple),
(&app::HELP, CommandCategory::Simple),
(&app::HINT, CommandCategory::Simple),
(&app::VERSION, CommandCategory::Simple),
(&app::REBUILD, CommandCategory::Simple),
(&app::SAVE, CommandCategory::Simple),
(&app::NEW, CommandCategory::Simple),
(&app::LOAD, CommandCategory::Simple),
(&app::EXPORT, CommandCategory::Simple),
(&app::IMPORT, CommandCategory::Simple),
(&app::MODE, CommandCategory::Simple),
(&app::MESSAGES, CommandCategory::Simple),
(&app::UNDO, CommandCategory::Simple),
(&app::REDO, CommandCategory::Simple),
(&app::COPY, CommandCategory::Simple),
(&ddl::DROP, CommandCategory::Simple),
(&ddl::ADD, CommandCategory::Simple),
(&ddl::RENAME, CommandCategory::Simple),
(&ddl::CHANGE, CommandCategory::Simple),
(&ddl::CREATE, CommandCategory::Simple),
(&ddl::CREATE_M2N, CommandCategory::Simple),
(&data::SHOW, CommandCategory::Simple),
(&data::SEED, CommandCategory::Simple),
(&data::INSERT, CommandCategory::Simple),
(&data::UPDATE, CommandCategory::Simple),
(&data::DELETE, CommandCategory::Simple),
(&data::REPLAY, CommandCategory::Simple),
(&data::EXPLAIN, CommandCategory::Simple),
(&data::SELECT, CommandCategory::Advanced),
(&data::WITH, CommandCategory::Advanced),
// Shared entry words (sub-phase 3j, ADR-0033 §2 / Amendment 1):
// `insert` / `update` / `delete` each appear twice — the
// `Simple` DSL node above and this `Advanced` SQL node. The
// dispatcher tries the SQL node first in Advanced mode and falls
// back to the DSL node when the SQL shape does not match.
(&data::SQL_INSERT, CommandCategory::Advanced),
(&data::SQL_UPDATE, CommandCategory::Advanced),
(&data::SQL_DELETE, CommandCategory::Advanced),
// Shared entry word `explain` (ADR-0039): the `Simple` DSL
// `data::EXPLAIN` (above) wraps `show data` / `update` / `delete`;
// this `Advanced` node wraps the SQL `select` / `with` / `insert`
// / `update` / `delete`. SQL-first / DSL-fallback in advanced mode
// (so `explain show data …` and DSL-only `--all-rows` still reach
// the DSL node); DSL-only in simple mode.
(&data::EXPLAIN_SQL, CommandCategory::Advanced),
// Shared entry word `create` (ADR-0035 §2): the simple
// `ddl::CREATE` (above) and these advanced SQL nodes. The
// dispatcher tries the advanced candidates first in advanced mode
// and falls back to the `create table … with pk …` DSL node when no
// SQL shape matches — the `insert` precedent. 4d adds
// SQL_CREATE_INDEX, so `create` now has *two* advanced nodes;
// `decide` tries both (`create table …` → SQL_CREATE_TABLE,
// `create [unique] index …` → SQL_CREATE_INDEX).
(&ddl::SQL_CREATE_TABLE, CommandCategory::Advanced),
(&ddl::SQL_CREATE_INDEX, CommandCategory::Advanced),
// `alter` is a new advanced-*only* DDL entry word (ADR-0035 §2/§4e),
// like `select`/`with` — no simple node, so `is_advanced_only` is
// true and simple-mode `alter …` gets the "this is SQL" hint.
(&ddl::SQL_ALTER_TABLE, CommandCategory::Advanced),
// Shared `drop` entry word: `ddl::DROP` (simple) and these advanced
// SQL nodes. SQL-first in advanced mode; `drop table [if exists] T`
// → SQL_DROP_TABLE, `drop index [if exists] <name>` → SQL_DROP_INDEX
// (4d — `drop` now has *two* advanced nodes; the dispatcher's
// `decide` tries all advanced candidates). `drop column`/`drop
// relationship`/`drop index on T(…)` fall back to the simple `drop`
// node.
(&ddl::SQL_DROP_TABLE, CommandCategory::Advanced),
(&ddl::SQL_DROP_INDEX, CommandCategory::Advanced),
];
/// Whether `entry` names an advanced-mode-only command (ADR-0030
/// §2, ADR-0033 Amendment 1). Case-insensitive, matching
/// keyword-matching elsewhere.
///
/// True when the entry word is registered and *every* candidate
/// for it is `Advanced` — i.e. there is no DSL (`Simple`) command
/// to fall back to. A shared entry word (a Simple DSL node plus
/// an Advanced SQL node) is therefore *not* advanced-only: it is
/// available in simple mode as DSL.
#[must_use]
pub fn is_advanced_only(entry: &str) -> bool {
let mut found = false;
for (c, category) in REGISTRY {
if c.entry.matches(entry) {
found = true;
if *category == CommandCategory::Simple {
return false;
}
}
}
found
}
/// Look up the first `CommandNode` registered for an entry word,
/// case-insensitively. Returns the index into `REGISTRY` so
/// callers can use it as a `WalkOutcome::Match { command_idx }`.
///
/// For shared entry words this returns whichever node is listed
/// first in `REGISTRY`; callers that must distinguish the Simple
/// from the Advanced candidate use [`commands_for_entry_word`].
pub fn command_for_entry_word(word: &str) -> Option<(usize, &'static CommandNode)> {
REGISTRY
.iter()
.enumerate()
.find(|(_, (c, _))| c.entry.matches(word))
.map(|(i, (c, _))| (i, *c))
}
/// Every `CommandNode` registered for an entry word, with its
/// `REGISTRY` index and [`CommandCategory`], case-insensitively
/// (ADR-0033 Amendment 1).
///
/// A non-shared entry word returns a single candidate; a shared
/// entry word (`insert` / `update` / `delete` from sub-phase 3b)
/// returns its `Simple` DSL node and `Advanced` SQL node. The
/// dispatcher picks among them by the active input mode.
#[must_use]
pub fn commands_for_entry_word(
word: &str,
) -> Vec<(usize, &'static CommandNode, CommandCategory)> {
REGISTRY
.iter()
.enumerate()
.filter(|(_, (c, _))| c.entry.matches(word))
.map(|(i, (c, category))| (i, *c, *category))
.collect()
}
#[cfg(test)]
mod hint_key_tests {
use super::hint_key_for_input_in_mode;
use crate::mode::Mode;
/// Per-form hint keying (ADR-0053 D3): a multi-form command
/// resolves the *typed* form, not the node — `add 1:n
/// relationship` → the relationship hint, `add column` → the
/// (as-yet-unauthored) column hint, never the wrong form.
#[test]
fn hint_key_resolves_the_typed_form() {
assert_eq!(
hint_key_for_input_in_mode("add 1:n relationship from A.x to B.y", Mode::Simple),
Some("add_relationship")
);
assert_eq!(
hint_key_for_input_in_mode("add column Note text to T", Mode::Simple),
Some("add_column")
);
assert_eq!(
hint_key_for_input_in_mode("insert into T values (1)", Mode::Simple),
Some("insert")
);
// Multi-form DROP disambiguates to the typed form too.
assert_eq!(
hint_key_for_input_in_mode("drop table T", Mode::Simple),
Some("drop_table")
);
// Mode picks the surface for a shared entry word whose second
// token isn't a form word: SQL form in advanced, DSL in simple.
assert_eq!(
hint_key_for_input_in_mode("insert into T values (1)", Mode::Advanced),
Some("sql_insert")
);
assert_eq!(
hint_key_for_input_in_mode("insert into T values (1)", Mode::Simple),
Some("insert")
);
// `create table` shares a form word — advanced-first ordering
// resolves it to the SQL form in advanced mode.
assert_eq!(
hint_key_for_input_in_mode("create table T (id int)", Mode::Advanced),
Some("sql_create_table")
);
// Unknown entry word → None (tier-2 fallback).
assert_eq!(hint_key_for_input_in_mode("zzz", Mode::Simple), None);
}
/// Comprehensiveness gate (ADR-0053 D6): every command form in the
/// REGISTRY carries at least one `hint_id`, and each resolves to a
/// tier-3 `hint.cmd.<id>` block. `keys.rs` checks referenced keys
/// resolve; this checks every command *has* one.
#[test]
fn every_command_form_has_a_tier3_block() {
let cat = crate::friendly::catalog();
for (node, _category) in super::REGISTRY {
assert!(
!node.hint_ids.is_empty(),
"command `{}` has no hint_ids (ADR-0053 D6)",
node.entry.primary
);
for id in node.hint_ids {
let key = format!("hint.cmd.{id}.what");
assert!(
cat.get(&key).is_some(),
"missing tier-3 block `{key}` for command `{}`",
node.entry.primary
);
}
}
}
/// Comprehensiveness gate (ADR-0053 D6): every runtime error class
/// `friendly::error_hint_class` can return resolves to a tier-3
/// `hint.err.<class>` block. Keep this list in sync with
/// `error_hint_class` (its own unit tests pin the outputs).
/// Diagnostic classes are deferred (issue #38), so not checked here.
#[test]
fn every_runtime_error_class_has_a_tier3_block() {
let cat = crate::friendly::catalog();
let classes = [
"unique",
"foreign_key.child_side",
"foreign_key.parent_side",
"not_null",
"check",
"type_mismatch",
"not_found",
"already_exists",
"generic",
"invalid_value",
];
for c in classes {
let key = format!("hint.err.{c}.what");
assert!(cat.get(&key).is_some(), "missing tier-3 error block `{key}`");
}
}
/// Semantic-verification guard (handoff-71): every `hint.cmd.<form>`
/// **example** must parse in the mode the form is taught for. This
/// backstops the bug class found in the H2 corpus pass — an example
/// that drifts out of the real grammar (a typo, a removed clause, or
/// an argument the command never accepted, e.g. an inline name on
/// `save as` which opens a modal instead). It cannot police the
/// *semantics* of an example that happens to parse (that is the
/// manual pass), but it locks the syntactic floor so future edits
/// can't ship an unparseable teaching line.
///
/// The mode per form mirrors `hint_key_for_input_in_mode`: the
/// advanced-SQL forms are taught in advanced mode; everything else
/// (DSL + app commands) in simple mode.
#[test]
fn every_cmd_hint_example_parses_in_its_mode() {
use crate::dsl::parser::parse_command_in_mode;
use crate::mode::Mode;
// Advanced-mode forms — the SQL surface (ADR-00300039). Every
// other form (DSL + app commands) is taught in simple mode. This
// mirrors the mode split `hint_key_for_input_in_mode` resolves.
const ADVANCED: &[&str] = &[
"sql_create_table",
"sql_alter_table",
"sql_create_index",
"sql_drop_index",
"sql_drop_table",
"sql_insert",
"sql_update",
"sql_delete",
"select",
"with",
"explain_sql",
];
// Iterate the *catalog* (the corpus is the source of truth), not the
// REGISTRY: this reaches every `hint.cmd.<id>` block including any
// not owned by a command node, so an orphaned or mis-keyed example
// can't slip past the guard.
let cat = crate::friendly::catalog();
let mut checked = 0usize;
for key in cat.keys() {
let Some(id) = key
.strip_prefix("hint.cmd.")
.and_then(|rest| rest.strip_suffix(".example"))
else {
continue;
};
let example = cat.get(key).expect("key came from the catalog");
let mode = if ADVANCED.contains(&id) {
Mode::Advanced
} else {
Mode::Simple
};
assert!(
parse_command_in_mode(example, mode).is_ok(),
"hint.cmd.{id}.example does not parse in {mode:?} mode: {example:?}",
);
checked += 1;
}
// Floor guard: the corpus had 49 command forms at the time of
// writing (ADR-0053). If this drops, a block (and its example
// coverage) silently vanished.
assert!(
checked >= 49,
"expected at least 49 hint.cmd.* examples, checked {checked}",
);
}
}
#[cfg(test)]
mod usage_key_tests {
use super::usage_key_for_input;
/// Every multi-form command resolves a typed form to its
/// own usage key — a parse error in one form must never
/// show another form's usage (the handoff-18 `151ed08` fix;
/// regression-locked here, including the `add 1:n
/// relationship` digit-led form).
#[test]
fn multi_form_commands_resolve_to_the_typed_form() {
let cases = [
("add column to T: c (int)", "parse.usage.add_column"),
("add index on T (c)", "parse.usage.add_index"),
(
"add constraint unique to T.c",
"parse.usage.add_constraint",
),
(
"drop constraint check from T.c",
"parse.usage.drop_constraint",
),
(
"add 1:n relationship from A.x to B.y",
"parse.usage.add_relationship",
),
// Trailing junk must not change the resolved form.
(
"add 1:n relationship from A.x to B.y --",
"parse.usage.add_relationship",
),
("drop table T", "parse.usage.drop_table"),
("drop column from table T: c", "parse.usage.drop_column"),
("drop index i", "parse.usage.drop_index"),
(
"drop relationship r",
"parse.usage.drop_relationship",
),
("show data T", "parse.usage.show_data"),
("show table T", "parse.usage.show_table"),
// `create` is multi-form (table vs m:n, ADR-0045): each typed
// form resolves to its own usage key.
("create table T with pk id(int)", "parse.usage.create_table"),
(
"create m:n relationship from A to B",
"parse.usage.create_m2n",
),
];
for (input, expected) in cases {
assert_eq!(
usage_key_for_input(input),
Some(expected),
"usage key for {input:?}",
);
}
}
#[test]
fn a_bare_multi_form_entry_word_resolves_to_no_single_form() {
// `add` / `drop` alone — no form chosen; the caller
// shows the whole family rather than guessing.
assert_eq!(usage_key_for_input("add "), None);
assert_eq!(usage_key_for_input("drop "), None);
}
#[test]
fn a_single_form_command_resolves_to_its_one_key() {
assert_eq!(
usage_key_for_input("create table T with pk"),
Some("parse.usage.create_table"),
);
}
#[test]
fn no_two_registered_commands_share_a_help_id() {
// `note_help` emits one help block per `help_id: Some(_)`
// with no dedup, so a duplicate help_id prints the same
// command twice in `help`. Shared-entry-word `Advanced`
// nodes (SQL_INSERT, …, EXPLAIN_SQL) therefore carry
// `help_id: None` and defer to their `Simple` sibling.
let mut seen = std::collections::HashSet::new();
for (command, _category) in super::REGISTRY {
if let Some(id) = command.help_id {
assert!(
seen.insert(id),
"duplicate help_id `{id}` in REGISTRY would print twice in `help`",
);
}
}
}
}