f1e9484af3
End-to-end `seed <table> [count]` path, both modes: - Command::Seed AST + grammar node (show-data table slot + optional positional count) + REGISTRY registration + build_seed. - Runtime dispatch -> Database::seed -> Request::Seed worker arm -> do_seed. - do_seed (Phase-1 skeleton): generates whole rows for non-FK, non-autogen columns via the seed library and inserts them one at a time through do_insert (reusing validation / autogen autofill / FK-error / persistence). One undo step (snapshot_then wraps it) and one history.log line (only the first row carries the source); default count 20. - help (`help seed`) + parse-usage catalog entries. - Reuses CommandOutcome::Insert for the auto-show; a dedicated SeedResult (capped preview + advisory) replaces it in P1.3. 5 Tier-3 integration tests (parse, populate+persist, default-20, reproducible --seed, one history line). 2327 pass / 0 fail / 0 skip, clippy all-targets clean. Deferred to P1.3: FK sampling, identifier/constraint uniqueness, CHECK derivation, block guard, capped preview, advisory, multi-row path. Deferred to P1.4: completion/highlight/hint/validity wiring + --seed flag.
914 lines
38 KiB
Rust
914 lines
38 KiB
Rust
//! Unified declarative grammar tree (ADR-0024).
|
|
//!
|
|
//! The grammar tree is the single source of truth for the DSL —
|
|
//! parsing, completion, syntax highlighting, parse-error usage
|
|
//! rendering, and hint-panel content all derive from this same
|
|
//! data structure (ADR-0023 institutional context).
|
|
//!
|
|
//! Phase A scope (ADR-0024 §migration): the framework lands
|
|
//! alongside the eleven app-lifecycle commands (quit, help,
|
|
//! rebuild, save, save as, new, load, export, import, mode,
|
|
//! messages). The chumsky parser still owns every other
|
|
//! command; the router in `dsl::parser` decides which path to
|
|
//! take per first-token. Schema-aware nodes (`IdentSource::Tables`
|
|
//! and friends) and `DynamicSubgrammar` are declared here but
|
|
//! not exercised until Phase B-D.
|
|
//!
|
|
//! The shape of `Node` mirrors ADR-0024 §node-taxonomy with one
|
|
//! pragmatic addition for Phase A: each `Ident` carries an
|
|
//! optional content validator, used today by the `mode <value>`
|
|
//! / `messages <value>` slots to surface friendly catalog
|
|
//! wording (`mode.unknown`, `messages.unknown`) on out-of-set
|
|
//! identifiers. The same hook generalises naturally to typed
|
|
//! value slots in Phase D.
|
|
|
|
pub mod app;
|
|
pub mod data;
|
|
pub mod ddl;
|
|
pub mod expr;
|
|
pub mod shared;
|
|
pub mod sql_expr;
|
|
pub mod sql_create_table;
|
|
pub mod sql_delete;
|
|
pub mod sql_insert;
|
|
pub mod sql_select;
|
|
pub mod sql_update;
|
|
|
|
use crate::dsl::command::Command;
|
|
use crate::dsl::walker::context::WalkContext;
|
|
use crate::dsl::walker::outcome::MatchedPath;
|
|
|
|
/// Highlight class assigned to a matched terminal.
|
|
///
|
|
/// Recorded on the `WalkResult::per_byte_class` slice and surfaced
|
|
/// by `walker::highlight_runs` to the input/echo-line renderers
|
|
/// (ADR-0024 §architecture).
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum HighlightClass {
|
|
Keyword,
|
|
Identifier,
|
|
/// Column data-type keyword (`int`, `serial`, `text`, …).
|
|
/// Distinct from `Keyword` and `Identifier` so learners can
|
|
/// tell "this is a type" from a clause keyword or a name they
|
|
/// invented (ADR-0022 Amendment 4). Assigned via a type slot's
|
|
/// `highlight_override`, not by byte shape.
|
|
Type,
|
|
Number,
|
|
String,
|
|
Punct,
|
|
Flag,
|
|
Error,
|
|
}
|
|
|
|
/// Where an `Ident` slot's candidates come from at completion time.
|
|
///
|
|
/// Drives both the walker's `Expectation::Ident { source }` (which
|
|
/// the parse-error bridge maps to a human label) and the
|
|
/// `SchemaCache` lookup the completion engine uses for Tab
|
|
/// candidates. The `Free` and `NewName` variants do not query the
|
|
/// schema — `NewName` is for slots where the user invents the
|
|
/// identifier, `Free` is the catch-all branch in `mode`/`messages`
|
|
/// that funnels unknown values into a friendly validator.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
|
pub enum IdentSource {
|
|
/// User invents this name. No schema lookup; no completion
|
|
/// candidates beyond the identifier shape itself.
|
|
NewName,
|
|
/// Existing table name.
|
|
Tables,
|
|
/// Existing column in the current table.
|
|
Columns,
|
|
/// Existing relationship name.
|
|
Relationships,
|
|
/// Existing index name.
|
|
Indexes,
|
|
/// Closed set from `Type::all()` — surfaced by the walker's
|
|
/// content validator on column-type slots; not user-listable
|
|
/// from the schema.
|
|
Types,
|
|
/// Any identifier shape; used by synthetic catch-all branches
|
|
/// (e.g., the unknown-value branch of `mode <value>`).
|
|
Free,
|
|
}
|
|
|
|
impl IdentSource {
|
|
/// Whether this source can be completed from the schema
|
|
/// cache (i.e. the candidate list comes from existing
|
|
/// entities rather than user invention or a closed set).
|
|
#[must_use]
|
|
pub const fn completes_from_schema(self) -> bool {
|
|
matches!(
|
|
self,
|
|
Self::Tables | Self::Columns | Self::Relationships | Self::Indexes
|
|
)
|
|
}
|
|
|
|
/// Human-facing label used in parse-error wording
|
|
/// ("expected table name") and in the completion engine's
|
|
/// round-trip from a textual `expected` entry back to a
|
|
/// source kind. `Free` and `Types` collapse to "identifier"
|
|
/// and "type" respectively.
|
|
#[must_use]
|
|
pub const fn expected_label(self) -> &'static str {
|
|
match self {
|
|
Self::NewName | Self::Free => "identifier",
|
|
Self::Tables => "table name",
|
|
Self::Columns => "column name",
|
|
Self::Relationships => "relationship name",
|
|
Self::Indexes => "index name",
|
|
Self::Types => "type",
|
|
}
|
|
}
|
|
|
|
/// Inverse of `expected_label`. Used by the completion engine
|
|
/// to recover the source kind from the `ParseError::Invalid::
|
|
/// expected` strings the walker bridge produces. `"identifier"`
|
|
/// maps to `NewName` (the only writeable label that uses that
|
|
/// wording in production grammars today).
|
|
#[must_use]
|
|
pub fn from_expected_label(label: &str) -> Option<Self> {
|
|
match label {
|
|
"identifier" => Some(Self::NewName),
|
|
"table name" => Some(Self::Tables),
|
|
"column name" => Some(Self::Columns),
|
|
"relationship name" => Some(Self::Relationships),
|
|
"index name" => Some(Self::Indexes),
|
|
"type" => Some(Self::Types),
|
|
_ => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Hint-panel mode for an expected node (ADR-0024 §HintMode-per-node).
|
|
///
|
|
/// `Default` (today's behaviour) shows candidates if any, falls
|
|
/// back to a prose ladder otherwise. The other variants
|
|
/// override at slot positions where the candidate list would be
|
|
/// actively misleading or where the user benefits from format
|
|
/// guidance:
|
|
///
|
|
/// - `ProseOnly(catalog_key)` — show only prose from the
|
|
/// catalog; suppress Tab candidates. Used today by the
|
|
/// value-literal slot at empty prefix (the "null/true/false"
|
|
/// candidate trio is misleading at a slot that more often
|
|
/// takes a number / quoted text / date).
|
|
/// - `ForceProse(catalog_key)` — force this prose at the
|
|
/// catalog key regardless of candidates. Used today by
|
|
/// `NewName` ident slots ("Type a name, then `(`").
|
|
/// - `IntroProse(catalog_key)` — show prose at slot entry to
|
|
/// *introduce* a position whose first-class candidate is an
|
|
/// ident slot (which would be invisible in a pure-candidate
|
|
/// render) but whose keyword alternatives are also available.
|
|
/// Unlike `ProseOnly`, Tab candidates remain available — the
|
|
/// user still cycles through the keyword set. Used at the
|
|
/// advanced-mode CREATE TABLE element slot, where the
|
|
/// column-name `NewName` slot would otherwise be invisible
|
|
/// alongside the table-level constraint keywords (issue #4).
|
|
/// - `SuppressProse` — show only candidates; never fall back
|
|
/// to a prose ladder.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum HintMode {
|
|
Default,
|
|
ForceProse(&'static str),
|
|
ProseOnly(&'static str),
|
|
IntroProse(&'static str),
|
|
SuppressProse,
|
|
}
|
|
|
|
/// A keyword node literal.
|
|
///
|
|
/// The `aliases` slice is empty for the app-lifecycle commands
|
|
/// today; the round-5 `q` removal remains intentional, and any
|
|
/// future re-introduction would be a one-line `aliases: &["q"]`
|
|
/// addition (ADR-0024 §aliases).
|
|
#[derive(Debug, Clone, Copy)]
|
|
pub struct Word {
|
|
pub primary: &'static str,
|
|
pub aliases: &'static [&'static str],
|
|
pub highlight_override: Option<HighlightClass>,
|
|
}
|
|
|
|
impl Word {
|
|
pub const fn keyword(primary: &'static str) -> Self {
|
|
Self {
|
|
primary,
|
|
aliases: &[],
|
|
highlight_override: None,
|
|
}
|
|
}
|
|
|
|
/// A keyword that highlights as a column **type** rather than a
|
|
/// clause keyword (ADR-0022 Amendment 4). The one user today is
|
|
/// the two-word `double precision` SQL alias (ADR-0035 §3): it
|
|
/// is matched as keyword tokens, not an `IdentSource::Types`
|
|
/// `Ident`, so without this it would render keyword-coloured
|
|
/// while its single-word synonyms (`float`, `real`) render as
|
|
/// types.
|
|
pub const fn type_keyword(primary: &'static str) -> Self {
|
|
Self {
|
|
primary,
|
|
aliases: &[],
|
|
highlight_override: Some(HighlightClass::Type),
|
|
}
|
|
}
|
|
|
|
/// Case-insensitive match against the primary or any alias.
|
|
pub fn matches(&self, candidate: &str) -> bool {
|
|
if candidate.eq_ignore_ascii_case(self.primary) {
|
|
return true;
|
|
}
|
|
self.aliases
|
|
.iter()
|
|
.any(|a| candidate.eq_ignore_ascii_case(a))
|
|
}
|
|
}
|
|
|
|
/// Content-level validator for an `Ident` slot. Returns the
|
|
/// catalog key + arg list to surface as `WalkOutcome::ValidationFailed`
|
|
/// on mismatch.
|
|
pub type IdentValidator = fn(matched: &str) -> Result<(), ValidationError>;
|
|
|
|
/// Content-level validator for a `NumberLit` slot. Same shape
|
|
/// as `IdentValidator`; surfaces as `ValidationFailed` on Err.
|
|
pub type NumberValidator = fn(matched: &str) -> Result<(), ValidationError>;
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub struct ValidationError {
|
|
pub message_key: &'static str,
|
|
pub args: Vec<(&'static str, String)>,
|
|
}
|
|
|
|
/// The grammar-tree node taxonomy (ADR-0024 §node-taxonomy).
|
|
///
|
|
/// Some variants carry data (`Word` literal, `Punct` char,
|
|
/// `Ident` source/role/validator); combinators reference their
|
|
/// children through `&'static [Node]` / `&'static Node` slices,
|
|
/// which lets the entire registry live in `const`s — no runtime
|
|
/// allocation, every command is one declaration block in its
|
|
/// grammar file.
|
|
pub enum Node {
|
|
/// A keyword token. Case-insensitive match (ADR-0009).
|
|
Word(Word),
|
|
/// A single punctuation character. The exact set comes from
|
|
/// the migrated commands' usage — Phase A only needs none of
|
|
/// these (app-lifecycle commands are pure keyword + ident +
|
|
/// path), but the variant is declared for Phase B+ use.
|
|
#[allow(dead_code)]
|
|
Punct(char),
|
|
/// An identifier slot. `source` drives completion candidates;
|
|
/// `role` names the slot for error wording / completion-engine
|
|
/// dispatch; `validator` runs after a successful identifier-
|
|
/// shape match and may reject the value with a catalog-driven
|
|
/// message.
|
|
///
|
|
/// `writes_table` (Phase D): when `true` and `source ==
|
|
/// Tables`, the walker writes the matched ident to
|
|
/// `WalkContext::current_table` and resolves
|
|
/// `current_table_columns` from the schema cache (if any).
|
|
/// `writes_column` (Phase D): when `true` and `source ==
|
|
/// Columns`, the walker writes the matched ident's
|
|
/// `TableColumn` to `WalkContext::current_column` (resolved
|
|
/// against `current_table_columns`). Subsequent value slots
|
|
/// dispatch on the column's type.
|
|
Ident {
|
|
source: IdentSource,
|
|
role: &'static str,
|
|
validator: Option<IdentValidator>,
|
|
#[allow(dead_code)]
|
|
highlight_override: Option<HighlightClass>,
|
|
writes_table: bool,
|
|
writes_column: bool,
|
|
/// Append the matched text to
|
|
/// `WalkContext::user_listed_columns` (Phase D). Used by
|
|
/// the `insert into <T> (col1, col2, …)` column-list
|
|
/// idents — when the walker sees these, the form is
|
|
/// "Form A" and the inner values slot list mirrors the
|
|
/// user's explicit selection instead of the
|
|
/// auto-filtered schema default.
|
|
writes_user_listed_column: bool,
|
|
/// Set the matched text as the alias of the most-
|
|
/// recently-pushed `TableBinding` on the top
|
|
/// `ScopeFrame`'s `from_scope` (ADR-0032 §10.1). Used by
|
|
/// the `[ AS ] alias` slot on `from_clause` /
|
|
/// `join_clause` table sources in `sql_select.rs`; a
|
|
/// no-op on `IdentSource::NewName` slots that do not
|
|
/// follow a table-name push, or when the top frame's
|
|
/// `from_scope` is empty.
|
|
writes_table_alias: bool,
|
|
/// Push a placeholder `CteBinding` (name only, empty
|
|
/// columns) onto the top `ScopeFrame`'s `cte_bindings`
|
|
/// (ADR-0032 §10.3 stage 1). Used by the CTE-name slot
|
|
/// in `with_clause`; the placeholder is rewritten with
|
|
/// derived output columns at the body's frame exit
|
|
/// (§10.3 stage 2; harvest derivation rules pending).
|
|
writes_cte_name: bool,
|
|
/// Append the matched text to the top `ScopeFrame`'s
|
|
/// `projection_aliases` (ADR-0032 §10.4). Used by the
|
|
/// projection-list alias slot (both the bare and `AS`
|
|
/// forms) so `ORDER BY` completion can offer aliases as
|
|
/// candidates.
|
|
writes_projection_alias: bool,
|
|
},
|
|
/// A number literal. The optional `validator` runs against
|
|
/// the matched text (used by Phase D value slots to enforce
|
|
/// per-type integer/decimal rules).
|
|
NumberLit {
|
|
validator: Option<NumberValidator>,
|
|
},
|
|
/// A literal byte sequence at this position — matches
|
|
/// bytes verbatim (whitespace-skipped) with a lookahead so
|
|
/// `1` doesn't half-match `12` and `n` doesn't half-match
|
|
/// `name`. Used by Phase B's `add 1:n …` for the literal
|
|
/// `1`. Surfaces in the expected-set as `` `<literal>` ``,
|
|
/// matching chumsky's labelled-token rendering.
|
|
Literal(&'static str),
|
|
#[allow(dead_code)]
|
|
StringLit,
|
|
#[allow(dead_code)]
|
|
BlobLit,
|
|
/// A `--name` flag. Walker matches the flag shape and
|
|
/// asserts the name matches the expected literal.
|
|
Flag(&'static str),
|
|
/// A non-whitespace run consumed verbatim from source. Per
|
|
/// ADR-0024's path-bearing-commands UX change, paths with
|
|
/// spaces use the quoted form (`StringLit`); `BarePath`
|
|
/// terminates at the first whitespace byte.
|
|
BarePath,
|
|
/// Try each child in order. The first one that matches a
|
|
/// non-empty prefix wins; if none match, the choice fails
|
|
/// with the union of expectations.
|
|
Choice(&'static [Self]),
|
|
/// All children must match in order. Whitespace is implicitly
|
|
/// allowed between siblings.
|
|
Seq(&'static [Self]),
|
|
/// The inner node may match or be skipped.
|
|
Optional(&'static Self),
|
|
/// `inner` matches at least `min` times, separated by
|
|
/// `separator` (if any). Phase C+ uses this for `with pk`
|
|
/// column lists.
|
|
#[allow(dead_code)]
|
|
Repeated {
|
|
inner: &'static Self,
|
|
separator: Option<&'static Self>,
|
|
min: usize,
|
|
},
|
|
/// Walks the referenced `&'static Node` once, mandatory
|
|
/// (ADR-0026 §2). The reference indirection is what lets a
|
|
/// named `static` grammar fragment appear inside its own
|
|
/// subtree: a `Seq` / `Choice` embeds its children by value
|
|
/// and so cannot close a cycle, but a `&'static Node`
|
|
/// reference can point back at an enclosing fragment. This
|
|
/// is the mechanism the stratified WHERE-expression grammar
|
|
/// recurses through — the `( or_expr )` branch and the
|
|
/// `not_expr` self-reference.
|
|
///
|
|
/// The walker counts active `Subgrammar` frames in
|
|
/// `WalkContext::subgrammar_depth` and refuses past
|
|
/// `walker::driver::MAX_SUBGRAMMAR_DEPTH`, so pathologically
|
|
/// nested input (`((((…))))`) fails with a friendly error
|
|
/// rather than overflowing the parser stack.
|
|
///
|
|
/// The static counterpart of `DynamicSubgrammar`: that one
|
|
/// builds a fresh node from the `WalkContext` at walk time;
|
|
/// this one references a fixed fragment already in the
|
|
/// grammar tree.
|
|
Subgrammar(&'static Self),
|
|
/// Like `Subgrammar`, but the walker additionally **pushes a
|
|
/// new `ScopeFrame`** onto `WalkContext::from_scope_stack` on
|
|
/// entry and pops it on exit (ADR-0032 §10.2). The
|
|
/// `subgrammar_depth` counter increments uniformly across
|
|
/// both variants — the depth cap applies the same way — so
|
|
/// this variant introduces no new walker capability for
|
|
/// grammar recursion; it only layers lexical-scope discipline
|
|
/// on top.
|
|
///
|
|
/// Used at every SQL `SELECT` recursion point: subqueries
|
|
/// in `sql_expr.rs` (scalar `(SELECT …)`, `IN (SELECT …)`,
|
|
/// `[NOT] EXISTS (SELECT …)`) and CTE bodies in
|
|
/// `sql_select.rs` reference the compound-SELECT through
|
|
/// `Node::ScopedSubgrammar(&SQL_SELECT_COMPOUND)`. DSL `Expr`
|
|
/// recursion (ADR-0026) and the `sql_expr.rs` precedence-
|
|
/// ladder recursion (ADR-0031) keep using the plain
|
|
/// `Subgrammar` variant and never push a scope.
|
|
ScopedSubgrammar(&'static Self),
|
|
/// Resolves at walk time using the active `WalkContext`.
|
|
/// Phase D+ uses this for `column_value_list`. The factory
|
|
/// is pure in `ctx`, so the walker memoizes the resolution
|
|
/// (one leak per distinct schema shape).
|
|
#[allow(dead_code)]
|
|
DynamicSubgrammar(fn(&WalkContext) -> Self),
|
|
/// Like `DynamicSubgrammar` but the factory also sees the
|
|
/// source and the current byte position, so it can look
|
|
/// ahead. Used by the insert first-paren to discriminate
|
|
/// Form A (`(cols) values (...)`) from Form C (`(vals)`)
|
|
/// before walking the contents — Form C then routes through
|
|
/// the typed `column_value_list` (ADR-0024 §Phase D, Form C
|
|
/// type-awareness). Not memoized: the output depends on the
|
|
/// source, not just `ctx`.
|
|
Lookahead(fn(&WalkContext, &str, usize) -> Self),
|
|
/// Zero-width node that *establishes the active column* for the
|
|
/// value slot that follows it (ADR-0036 Phase 3b). Matches the
|
|
/// empty string and, as a side effect, sets
|
|
/// `WalkContext::current_column` to the referenced column and
|
|
/// `pending_value_column` to its name — exactly as an
|
|
/// `Ident { writes_column: true }` does, but without consuming a
|
|
/// column identifier from the input.
|
|
///
|
|
/// This is the primitive that gives `INSERT … VALUES (…)`
|
|
/// positions a per-position column identity: the positions are
|
|
/// positional (no per-position column ident to write
|
|
/// `current_column`), so a `DynamicSubgrammar` factory
|
|
/// (`sql_insert::sql_value_list`) emits `SetColumn(colᵢ)` before
|
|
/// each value position, then the shared boundary-aware `SET_VALUE`
|
|
/// slot routes a lone literal to that column's typed slot and any
|
|
/// expression to `sql_expr`. The referenced `TableColumn` is
|
|
/// leaked by the factory (bounded by the column count, like the
|
|
/// `DynamicSubgrammar` `Box::leak`).
|
|
SetColumn(&'static crate::completion::TableColumn),
|
|
/// Typed value-literal slot (ADR-0024 §Phase D §typed-value-slots).
|
|
///
|
|
/// Walks `inner` to consume the literal but records the
|
|
/// column type in `WalkContext::pending_value_type` so the
|
|
/// hint resolver can emit per-type catalog prose ("Type an
|
|
/// integer", "Type a date as 'YYYY-MM-DD'", …) at empty
|
|
/// prefix at this slot. When `column_name` is `Some`, the
|
|
/// walker also writes `pending_value_column` so the hint
|
|
/// can be rendered with the actual column name (e.g. "for
|
|
/// `Email`: Type a quoted string …") rather than a generic
|
|
/// type hint. The recorded values clear on a successful
|
|
/// inner match — so positions BETWEEN typed slots
|
|
/// (`insert into T values (1` mid-input) don't carry stale
|
|
/// hint state.
|
|
TypedValueSlot {
|
|
ty: crate::dsl::types::Type,
|
|
column_name: Option<&'static str>,
|
|
inner: &'static Self,
|
|
},
|
|
/// Annotates `inner` with a hint-panel `HintMode` (ADR-0024
|
|
/// §HintMode-per-node). On entry the walker records `mode`
|
|
/// in `WalkContext::pending_hint_mode`; on a successful
|
|
/// inner match the record clears (so positions past the
|
|
/// slot don't carry stale hint state). Transparent to
|
|
/// matching, highlighting and the expected-set otherwise —
|
|
/// it walks `inner` and returns its result verbatim.
|
|
///
|
|
/// This is the node-attached replacement for the hint
|
|
/// resolver's earlier signature-matching: the grammar tree
|
|
/// declares the hint mode at the slot, the walker
|
|
/// propagates it, the resolver reads it. Used by the
|
|
/// value-literal fallback slot (`ProseOnly`) and `NewName`
|
|
/// ident slots (`ForceProse`).
|
|
Hinted {
|
|
mode: HintMode,
|
|
inner: &'static Self,
|
|
},
|
|
}
|
|
|
|
/// Which mode group a registered command belongs to (ADR-0030
|
|
/// §2, ADR-0033 Amendment 1).
|
|
///
|
|
/// Category is a *dispatcher* concern, not intrinsic to a
|
|
/// command's grammar, so it is attached at the `REGISTRY`
|
|
/// registration site rather than as a field on every
|
|
/// `CommandNode`. The dispatcher (`walker::walk`) uses it to
|
|
/// route a given input by the active input mode:
|
|
///
|
|
/// - `Simple` commands are the DSL surface; available in both
|
|
/// simple and advanced mode.
|
|
/// - `Advanced` commands are the SQL surface; available only in
|
|
/// advanced mode. In simple mode an advanced-only entry word
|
|
/// yields the "this is SQL" hint (`advanced_mode.sql_in_simple`).
|
|
///
|
|
/// A *shared* entry word (e.g. `insert`, from Phase 3 sub-phase
|
|
/// 3b on) carries a node in *both* groups — a `Simple` DSL node
|
|
/// and an `Advanced` SQL node. The dispatcher tries the SQL node
|
|
/// first in advanced mode and falls back to the DSL node when the
|
|
/// SQL shape does not match.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum CommandCategory {
|
|
Simple,
|
|
Advanced,
|
|
}
|
|
|
|
/// Top-level entry record. One per command. The `entry` keyword
|
|
/// alone identifies which command the walker dispatches to;
|
|
/// `shape` is what follows the entry word.
|
|
pub struct CommandNode {
|
|
pub entry: Word,
|
|
pub shape: Node,
|
|
/// Builds the typed `Command` AST from the matched terminal
|
|
/// path. May fail with a `ValidationError` for content-level
|
|
/// rejections that are easier to express imperatively than
|
|
/// as a per-node validator (Phase A: none — every app
|
|
/// command's ast_builder is infallible).
|
|
///
|
|
/// `source` is the full input line being parsed. Most builders
|
|
/// reconstruct the `Command` from the matched `MatchedPath`
|
|
/// alone and ignore it; SQL builders whose `Command` carries
|
|
/// the validated SQL text (ADR-0030 §4/§6, ADR-0031 §2) read
|
|
/// it.
|
|
pub ast_builder: fn(&MatchedPath, &str) -> Result<Command, ValidationError>,
|
|
/// Catalog key (`help.<id>`) for this command's in-app
|
|
/// `help` entry. Consumed by `App::note_help`, which
|
|
/// iterates the REGISTRY and translates each `help_id` —
|
|
/// so a newly-registered command appears in `help`
|
|
/// automatically (ADR-0024 §help_id).
|
|
pub help_id: Option<&'static str>,
|
|
/// Catalog keys under `parse.usage.*` to render in the
|
|
/// "usage:" block when a parse error fires for this command
|
|
/// (ADR-0021 §1, ADR-0024 §architecture). Multi-form families
|
|
/// like `drop` (drop table / drop column / drop relationship)
|
|
/// carry every variant so the user sees the full family on a
|
|
/// generic-entry-word failure.
|
|
pub usage_ids: &'static [&'static str],
|
|
}
|
|
|
|
/// Look up the usage catalog keys for the entry word at the start
|
|
/// of `source`.
|
|
///
|
|
/// Case-insensitive, whitespace-tolerant. Replaces
|
|
/// `dsl::usage::matched_entry` — the walker is the single source
|
|
/// of truth for which command a given input belongs to.
|
|
///
|
|
/// Returns the canonical (primary-form) entry literal and the
|
|
/// `usage_ids` list, or `None` if no entry word matches.
|
|
#[must_use]
|
|
pub fn usage_keys_for_input(source: &str) -> Option<(&'static str, Vec<&'static str>)> {
|
|
usage_keys_for_input_in_mode(source, crate::mode::Mode::Simple)
|
|
}
|
|
|
|
/// Mode-aware variant of [`usage_keys_for_input`] (ADR-0042 G3).
|
|
///
|
|
/// A shared entry word (`create`, `drop`, `insert`, …) registers a
|
|
/// `Simple` DSL node *and* one or more `Advanced` SQL nodes. The
|
|
/// usage block must reflect the surface the user is actually typing:
|
|
/// the SQL forms in `Advanced` mode, the DSL forms in `Simple` mode
|
|
/// — otherwise advanced-mode `create` shows the DSL `create table …
|
|
/// with pk …` template, which is not valid SQL.
|
|
///
|
|
/// Selection prefers candidates whose [`CommandCategory`] matches
|
|
/// the mode; if the entry word has none in that category (an
|
|
/// app-lifecycle command is `Simple`-only yet usable in both modes),
|
|
/// every candidate is used. The returned keys are the union of the
|
|
/// selected nodes' `usage_ids`, de-duplicated in registry order — so
|
|
/// advanced `create` shows both `sql_create_table` and
|
|
/// `sql_create_index`.
|
|
#[must_use]
|
|
pub fn usage_keys_for_input_in_mode(
|
|
source: &str,
|
|
mode: crate::mode::Mode,
|
|
) -> Option<(&'static str, Vec<&'static str>)> {
|
|
use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace};
|
|
let start = skip_whitespace(source, 0);
|
|
let (kw_start, kw_end) = consume_ident(source, start)?;
|
|
let word = &source[kw_start..kw_end];
|
|
let candidates = commands_for_entry_word(word);
|
|
if candidates.is_empty() {
|
|
return None;
|
|
}
|
|
let union = |nodes: &[(usize, &'static CommandNode, CommandCategory)]| -> Vec<&'static str> {
|
|
let mut keys: Vec<&'static str> = Vec::new();
|
|
for (_, node, _) in nodes {
|
|
for k in node.usage_ids {
|
|
if !keys.contains(k) {
|
|
keys.push(*k);
|
|
}
|
|
}
|
|
}
|
|
keys
|
|
};
|
|
// Advanced mode: every candidate form is reachable — the SQL
|
|
// nodes are primary, and the DSL nodes remain valid via fallback
|
|
// (verified: `create table … with pk` and `drop column …` both
|
|
// run in advanced mode). Show them all, mode-primary (Advanced)
|
|
// first, so the usage hint never hides input that works. Simple
|
|
// mode: only the DSL forms — the SQL-only forms hit the "this is
|
|
// SQL" rail and are not reachable. (ADR-0042 G3.)
|
|
let selected: Vec<(usize, &'static CommandNode, CommandCategory)> =
|
|
if mode == crate::mode::Mode::Advanced {
|
|
let mut v: Vec<_> = candidates
|
|
.iter()
|
|
.copied()
|
|
.filter(|(_, _, c)| *c == CommandCategory::Advanced)
|
|
.collect();
|
|
v.extend(
|
|
candidates
|
|
.iter()
|
|
.copied()
|
|
.filter(|(_, _, c)| *c != CommandCategory::Advanced),
|
|
);
|
|
v
|
|
} else {
|
|
candidates
|
|
.iter()
|
|
.copied()
|
|
.filter(|(_, _, c)| *c == CommandCategory::Simple)
|
|
.collect()
|
|
};
|
|
// Degenerate guard: an advanced-only word in simple mode (not
|
|
// normally reachable — it hits the SQL rail first) leaves
|
|
// `selected` empty; fall back to all candidates so a usage block
|
|
// still renders rather than the available-commands fallback.
|
|
let pick = if selected.is_empty() { candidates } else { selected };
|
|
let keys = union(&pick);
|
|
if keys.is_empty() {
|
|
return None;
|
|
}
|
|
let entry = pick[0].1.entry.primary;
|
|
Some((entry, keys))
|
|
}
|
|
|
|
/// The single usage template most relevant to `source`, when
|
|
/// one is determinable.
|
|
///
|
|
/// A single-form command resolves to its one usage key. A
|
|
/// multi-form command (`add`, `drop`) disambiguates by the
|
|
/// form word after the entry keyword — so a parse error in
|
|
/// `add index …` resolves to the `add index` usage rather than
|
|
/// the first-listed `add column`. Returns `None` for a bare
|
|
/// multi-form entry word (`add` with nothing after it), where
|
|
/// no form has been chosen — the caller decides whether to
|
|
/// show the whole family or nothing.
|
|
#[must_use]
|
|
pub fn usage_key_for_input(source: &str) -> Option<&'static str> {
|
|
usage_key_for_input_in_mode(source, crate::mode::Mode::Simple)
|
|
}
|
|
|
|
/// Mode-aware variant of [`usage_key_for_input`] (ADR-0042 G3) —
|
|
/// disambiguates the single most-relevant usage key from the
|
|
/// mode-selected key set.
|
|
#[must_use]
|
|
pub fn usage_key_for_input_in_mode(
|
|
source: &str,
|
|
mode: crate::mode::Mode,
|
|
) -> Option<&'static str> {
|
|
use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace};
|
|
let (_entry, keys) = usage_keys_for_input_in_mode(source, mode)?;
|
|
let first = *keys.first()?;
|
|
if keys.len() == 1 {
|
|
return Some(first);
|
|
}
|
|
// Multi-form: the form is named by the token right after
|
|
// the entry keyword.
|
|
let start = skip_whitespace(source, 0);
|
|
let (_, entry_end) = consume_ident(source, start)?;
|
|
let after = skip_whitespace(source, entry_end);
|
|
// The `add 1:n relationship` form opens with a digit.
|
|
if source.as_bytes().get(after).is_some_and(u8::is_ascii_digit) {
|
|
return keys.iter().copied().find(|k| k.ends_with("relationship"));
|
|
}
|
|
// The `create m:n relationship` form (ADR-0045) opens with `m:n`
|
|
// — a letter, so the digit branch misses it, and its usage key ends
|
|
// `…create_m2n` (not `relationship`).
|
|
if source[after..].get(..3).is_some_and(|s| s.eq_ignore_ascii_case("m:n")) {
|
|
return keys.iter().copied().find(|k| k.ends_with("m2n"));
|
|
}
|
|
// Otherwise the form word is an identifier — `column`,
|
|
// `index`, `table`, `relationship` — matched against the
|
|
// usage key's suffix.
|
|
let (s, e) = consume_ident(source, after)?;
|
|
let form = source[s..e].to_ascii_lowercase();
|
|
keys.iter().copied().find(|k| k.ends_with(form.as_str()))
|
|
}
|
|
|
|
/// Every command-entry word in the registry, sorted alphabetically
|
|
/// by primary literal. Replaces `dsl::usage::entry_keywords_alphabetised`
|
|
/// which read the same data through the legacy `usage::REGISTRY`.
|
|
#[must_use]
|
|
pub fn entry_words_alphabetised() -> Vec<&'static str> {
|
|
let mut words: Vec<&'static str> =
|
|
REGISTRY.iter().map(|(c, _)| c.entry.primary).collect();
|
|
words.sort_unstable();
|
|
words.dedup();
|
|
words
|
|
}
|
|
|
|
/// The active grammar registry, each command paired with its
|
|
/// dispatch [`CommandCategory`] (ADR-0033 Amendment 1).
|
|
///
|
|
/// Migrated commands route through this; everything else falls
|
|
/// through to the chumsky path in `dsl::parser`. `Advanced`
|
|
/// commands (`select`, `with`, and — from sub-phase 3b — the SQL
|
|
/// `insert` / `update` / `delete` nodes) are the SQL surface;
|
|
/// the rest are the DSL surface (`Simple`). A shared entry word
|
|
/// will appear twice (one `Simple`, one `Advanced` node); the
|
|
/// dispatcher selects by mode.
|
|
pub static REGISTRY: &[(&CommandNode, CommandCategory)] = &[
|
|
(&app::QUIT, CommandCategory::Simple),
|
|
(&app::HELP, CommandCategory::Simple),
|
|
(&app::REBUILD, CommandCategory::Simple),
|
|
(&app::SAVE, CommandCategory::Simple),
|
|
(&app::NEW, CommandCategory::Simple),
|
|
(&app::LOAD, CommandCategory::Simple),
|
|
(&app::EXPORT, CommandCategory::Simple),
|
|
(&app::IMPORT, CommandCategory::Simple),
|
|
(&app::MODE, CommandCategory::Simple),
|
|
(&app::MESSAGES, CommandCategory::Simple),
|
|
(&app::UNDO, CommandCategory::Simple),
|
|
(&app::REDO, CommandCategory::Simple),
|
|
(&app::COPY, CommandCategory::Simple),
|
|
(&ddl::DROP, CommandCategory::Simple),
|
|
(&ddl::ADD, CommandCategory::Simple),
|
|
(&ddl::RENAME, CommandCategory::Simple),
|
|
(&ddl::CHANGE, CommandCategory::Simple),
|
|
(&ddl::CREATE, CommandCategory::Simple),
|
|
(&ddl::CREATE_M2N, CommandCategory::Simple),
|
|
(&data::SHOW, CommandCategory::Simple),
|
|
(&data::SEED, CommandCategory::Simple),
|
|
(&data::INSERT, CommandCategory::Simple),
|
|
(&data::UPDATE, CommandCategory::Simple),
|
|
(&data::DELETE, CommandCategory::Simple),
|
|
(&data::REPLAY, CommandCategory::Simple),
|
|
(&data::EXPLAIN, CommandCategory::Simple),
|
|
(&data::SELECT, CommandCategory::Advanced),
|
|
(&data::WITH, CommandCategory::Advanced),
|
|
// Shared entry words (sub-phase 3j, ADR-0033 §2 / Amendment 1):
|
|
// `insert` / `update` / `delete` each appear twice — the
|
|
// `Simple` DSL node above and this `Advanced` SQL node. The
|
|
// dispatcher tries the SQL node first in Advanced mode and falls
|
|
// back to the DSL node when the SQL shape does not match.
|
|
(&data::SQL_INSERT, CommandCategory::Advanced),
|
|
(&data::SQL_UPDATE, CommandCategory::Advanced),
|
|
(&data::SQL_DELETE, CommandCategory::Advanced),
|
|
// Shared entry word `explain` (ADR-0039): the `Simple` DSL
|
|
// `data::EXPLAIN` (above) wraps `show data` / `update` / `delete`;
|
|
// this `Advanced` node wraps the SQL `select` / `with` / `insert`
|
|
// / `update` / `delete`. SQL-first / DSL-fallback in advanced mode
|
|
// (so `explain show data …` and DSL-only `--all-rows` still reach
|
|
// the DSL node); DSL-only in simple mode.
|
|
(&data::EXPLAIN_SQL, CommandCategory::Advanced),
|
|
// Shared entry word `create` (ADR-0035 §2): the simple
|
|
// `ddl::CREATE` (above) and these advanced SQL nodes. The
|
|
// dispatcher tries the advanced candidates first in advanced mode
|
|
// and falls back to the `create table … with pk …` DSL node when no
|
|
// SQL shape matches — the `insert` precedent. 4d adds
|
|
// SQL_CREATE_INDEX, so `create` now has *two* advanced nodes;
|
|
// `decide` tries both (`create table …` → SQL_CREATE_TABLE,
|
|
// `create [unique] index …` → SQL_CREATE_INDEX).
|
|
(&ddl::SQL_CREATE_TABLE, CommandCategory::Advanced),
|
|
(&ddl::SQL_CREATE_INDEX, CommandCategory::Advanced),
|
|
// `alter` is a new advanced-*only* DDL entry word (ADR-0035 §2/§4e),
|
|
// like `select`/`with` — no simple node, so `is_advanced_only` is
|
|
// true and simple-mode `alter …` gets the "this is SQL" hint.
|
|
(&ddl::SQL_ALTER_TABLE, CommandCategory::Advanced),
|
|
// Shared `drop` entry word: `ddl::DROP` (simple) and these advanced
|
|
// SQL nodes. SQL-first in advanced mode; `drop table [if exists] T`
|
|
// → SQL_DROP_TABLE, `drop index [if exists] <name>` → SQL_DROP_INDEX
|
|
// (4d — `drop` now has *two* advanced nodes; the dispatcher's
|
|
// `decide` tries all advanced candidates). `drop column`/`drop
|
|
// relationship`/`drop index on T(…)` fall back to the simple `drop`
|
|
// node.
|
|
(&ddl::SQL_DROP_TABLE, CommandCategory::Advanced),
|
|
(&ddl::SQL_DROP_INDEX, CommandCategory::Advanced),
|
|
];
|
|
|
|
/// Whether `entry` names an advanced-mode-only command (ADR-0030
|
|
/// §2, ADR-0033 Amendment 1). Case-insensitive, matching
|
|
/// keyword-matching elsewhere.
|
|
///
|
|
/// True when the entry word is registered and *every* candidate
|
|
/// for it is `Advanced` — i.e. there is no DSL (`Simple`) command
|
|
/// to fall back to. A shared entry word (a Simple DSL node plus
|
|
/// an Advanced SQL node) is therefore *not* advanced-only: it is
|
|
/// available in simple mode as DSL.
|
|
#[must_use]
|
|
pub fn is_advanced_only(entry: &str) -> bool {
|
|
let mut found = false;
|
|
for (c, category) in REGISTRY {
|
|
if c.entry.matches(entry) {
|
|
found = true;
|
|
if *category == CommandCategory::Simple {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
found
|
|
}
|
|
|
|
/// Look up the first `CommandNode` registered for an entry word,
|
|
/// case-insensitively. Returns the index into `REGISTRY` so
|
|
/// callers can use it as a `WalkOutcome::Match { command_idx }`.
|
|
///
|
|
/// For shared entry words this returns whichever node is listed
|
|
/// first in `REGISTRY`; callers that must distinguish the Simple
|
|
/// from the Advanced candidate use [`commands_for_entry_word`].
|
|
pub fn command_for_entry_word(word: &str) -> Option<(usize, &'static CommandNode)> {
|
|
REGISTRY
|
|
.iter()
|
|
.enumerate()
|
|
.find(|(_, (c, _))| c.entry.matches(word))
|
|
.map(|(i, (c, _))| (i, *c))
|
|
}
|
|
|
|
/// Every `CommandNode` registered for an entry word, with its
|
|
/// `REGISTRY` index and [`CommandCategory`], case-insensitively
|
|
/// (ADR-0033 Amendment 1).
|
|
///
|
|
/// A non-shared entry word returns a single candidate; a shared
|
|
/// entry word (`insert` / `update` / `delete` from sub-phase 3b)
|
|
/// returns its `Simple` DSL node and `Advanced` SQL node. The
|
|
/// dispatcher picks among them by the active input mode.
|
|
#[must_use]
|
|
pub fn commands_for_entry_word(
|
|
word: &str,
|
|
) -> Vec<(usize, &'static CommandNode, CommandCategory)> {
|
|
REGISTRY
|
|
.iter()
|
|
.enumerate()
|
|
.filter(|(_, (c, _))| c.entry.matches(word))
|
|
.map(|(i, (c, category))| (i, *c, *category))
|
|
.collect()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod usage_key_tests {
|
|
use super::usage_key_for_input;
|
|
|
|
/// Every multi-form command resolves a typed form to its
|
|
/// own usage key — a parse error in one form must never
|
|
/// show another form's usage (the handoff-18 `151ed08` fix;
|
|
/// regression-locked here, including the `add 1:n
|
|
/// relationship` digit-led form).
|
|
#[test]
|
|
fn multi_form_commands_resolve_to_the_typed_form() {
|
|
let cases = [
|
|
("add column to T: c (int)", "parse.usage.add_column"),
|
|
("add index on T (c)", "parse.usage.add_index"),
|
|
(
|
|
"add constraint unique to T.c",
|
|
"parse.usage.add_constraint",
|
|
),
|
|
(
|
|
"drop constraint check from T.c",
|
|
"parse.usage.drop_constraint",
|
|
),
|
|
(
|
|
"add 1:n relationship from A.x to B.y",
|
|
"parse.usage.add_relationship",
|
|
),
|
|
// Trailing junk must not change the resolved form.
|
|
(
|
|
"add 1:n relationship from A.x to B.y --",
|
|
"parse.usage.add_relationship",
|
|
),
|
|
("drop table T", "parse.usage.drop_table"),
|
|
("drop column from table T: c", "parse.usage.drop_column"),
|
|
("drop index i", "parse.usage.drop_index"),
|
|
(
|
|
"drop relationship r",
|
|
"parse.usage.drop_relationship",
|
|
),
|
|
("show data T", "parse.usage.show_data"),
|
|
("show table T", "parse.usage.show_table"),
|
|
// `create` is multi-form (table vs m:n, ADR-0045): each typed
|
|
// form resolves to its own usage key.
|
|
("create table T with pk id(int)", "parse.usage.create_table"),
|
|
(
|
|
"create m:n relationship from A to B",
|
|
"parse.usage.create_m2n",
|
|
),
|
|
];
|
|
for (input, expected) in cases {
|
|
assert_eq!(
|
|
usage_key_for_input(input),
|
|
Some(expected),
|
|
"usage key for {input:?}",
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn a_bare_multi_form_entry_word_resolves_to_no_single_form() {
|
|
// `add` / `drop` alone — no form chosen; the caller
|
|
// shows the whole family rather than guessing.
|
|
assert_eq!(usage_key_for_input("add "), None);
|
|
assert_eq!(usage_key_for_input("drop "), None);
|
|
}
|
|
|
|
#[test]
|
|
fn a_single_form_command_resolves_to_its_one_key() {
|
|
assert_eq!(
|
|
usage_key_for_input("create table T with pk"),
|
|
Some("parse.usage.create_table"),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn no_two_registered_commands_share_a_help_id() {
|
|
// `note_help` emits one help block per `help_id: Some(_)`
|
|
// with no dedup, so a duplicate help_id prints the same
|
|
// command twice in `help`. Shared-entry-word `Advanced`
|
|
// nodes (SQL_INSERT, …, EXPLAIN_SQL) therefore carry
|
|
// `help_id: None` and defer to their `Simple` sibling.
|
|
let mut seen = std::collections::HashSet::new();
|
|
for (command, _category) in super::REGISTRY {
|
|
if let Some(id) = command.help_id {
|
|
assert!(
|
|
seen.insert(id),
|
|
"duplicate help_id `{id}` in REGISTRY would print twice in `help`",
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|