diff --git a/src/dsl/grammar/app.rs b/src/dsl/grammar/app.rs new file mode 100644 index 0000000..dc37dd1 --- /dev/null +++ b/src/dsl/grammar/app.rs @@ -0,0 +1,262 @@ +//! App-lifecycle command nodes (ADR-0024 §migration Phase A). +//! +//! Eleven commands: quit, help, rebuild, save (+ save as), new, +//! load, export, import, mode, messages. +//! +//! Each block is one `CommandNode`: entry keyword, shape, AST +//! builder, help / usage references. The ast_builders match +//! against the `MatchedPath` items in declaration order. + +use crate::dsl::command::{AppCommand, Command, MessagesValue, ModeValue}; +use crate::dsl::grammar::{ + CommandNode, IdentSource, IdentValidator, Node, ValidationError, Word, +}; +use crate::dsl::walker::outcome::{MatchedKind, MatchedPath}; + +// --- Validators ---------------------------------------------------- +// +// The catch-all `Ident` branches in `mode ` / +// `messages ` exist solely to convert any out-of-set +// identifier into a friendly `mode.unknown` / `messages.unknown` +// catalog wording. The known values are `Word` siblings in the +// same `Choice`, so they're never reached on the happy path — +// these validators always fail. + +fn validate_unknown_mode(value: &str) -> Result<(), ValidationError> { + Err(ValidationError { + message_key: "mode.unknown", + args: vec![("value", value.to_string())], + }) +} + +fn validate_unknown_messages(value: &str) -> Result<(), ValidationError> { + Err(ValidationError { + message_key: "messages.unknown", + args: vec![("value", value.to_string())], + }) +} + +const UNKNOWN_MODE_VALIDATOR: IdentValidator = validate_unknown_mode; +const UNKNOWN_MESSAGES_VALIDATOR: IdentValidator = validate_unknown_messages; + +// --- Shapes (constants are referenced by Optional/Choice slices) -- + +const SAVE_AS_WORD: Node = Node::Word(Word::keyword("as")); + +const IMPORT_AS_TARGET: Node = Node::Seq(&[ + Node::Word(Word::keyword("as")), + Node::Ident { + source: IdentSource::NewName, + role: "target", + validator: None, + highlight_override: None, + }, +]); +const IMPORT_AS_TARGET_OPT: Node = Node::Optional(&IMPORT_AS_TARGET); + +const IMPORT_PATH_AND_TARGET: Node = Node::Seq(&[Node::BarePath, IMPORT_AS_TARGET_OPT]); + +const EXPORT_PATH_OPT: Node = Node::Optional(&Node::BarePath); +const IMPORT_BODY_OPT: Node = Node::Optional(&IMPORT_PATH_AND_TARGET); + +// `mode `: known keywords are surfaced as `Word` children +// so they appear in the walker's expected set (and feed the +// completion engine's keyword candidates). The trailing `Ident` +// child catches any other identifier shape and funnels it into +// the friendly `mode.unknown` validator. +const MODE_CHOICES: &[Node] = &[ + Node::Word(Word::keyword("simple")), + Node::Word(Word::keyword("advanced")), + Node::Ident { + source: IdentSource::Free, + role: "mode_value", + validator: Some(UNKNOWN_MODE_VALIDATOR), + highlight_override: None, + }, +]; +const MODE_VALUE: Node = Node::Choice(MODE_CHOICES); + +const MESSAGES_CHOICES: &[Node] = &[ + Node::Word(Word::keyword("short")), + Node::Word(Word::keyword("verbose")), + Node::Ident { + source: IdentSource::Free, + role: "messages_value", + validator: Some(UNKNOWN_MESSAGES_VALIDATOR), + highlight_override: None, + }, +]; +const MESSAGES_VALUE: Node = Node::Choice(MESSAGES_CHOICES); +const MESSAGES_VALUE_OPT: Node = Node::Optional(&MESSAGES_VALUE); + +const EMPTY_SEQ: Node = Node::Seq(&[]); +const SAVE_AS_OPT: Node = Node::Optional(&SAVE_AS_WORD); + +// --- AST builders -------------------------------------------------- + +const fn build_quit(_path: &MatchedPath) -> Result { + Ok(Command::App(AppCommand::Quit)) +} + +const fn build_help(_path: &MatchedPath) -> Result { + Ok(Command::App(AppCommand::Help)) +} + +const fn build_rebuild(_path: &MatchedPath) -> Result { + Ok(Command::App(AppCommand::Rebuild)) +} + +fn build_save(path: &MatchedPath) -> Result { + if path.contains_word("as") { + Ok(Command::App(AppCommand::SaveAs)) + } else { + Ok(Command::App(AppCommand::Save)) + } +} + +const fn build_new(_path: &MatchedPath) -> Result { + Ok(Command::App(AppCommand::New)) +} + +const fn build_load(_path: &MatchedPath) -> Result { + Ok(Command::App(AppCommand::Load)) +} + +fn build_export(path: &MatchedPath) -> Result { + let bare = path + .find(|i| matches!(i.kind, MatchedKind::BarePath)) + .map(|i| i.text.clone()); + Ok(Command::App(AppCommand::Export { path: bare })) +} + +fn build_import(path: &MatchedPath) -> Result { + let bare_path = path + .find(|i| matches!(i.kind, MatchedKind::BarePath)) + .map(|i| i.text.clone()) + .unwrap_or_default(); + let target = path + .find(|i| matches!(&i.kind, MatchedKind::Ident { role } if *role == "target")) + .map(|i| i.text.clone()); + Ok(Command::App(AppCommand::Import { + path: bare_path, + target, + })) +} + +fn build_mode(path: &MatchedPath) -> Result { + // The Choice surfaces the matched value as either a `Word` + // (known) or an `Ident` (unknown). The unknown branch's + // validator always errors, so reaching the AST builder + // implies one of the Word branches matched. + let value = if path.contains_word("simple") { + ModeValue::Simple + } else if path.contains_word("advanced") { + ModeValue::Advanced + } else { + ModeValue::Simple + }; + Ok(Command::App(AppCommand::Mode { value })) +} + +fn build_messages(path: &MatchedPath) -> Result { + let value = if path.contains_word("short") { + Some(MessagesValue::Short) + } else if path.contains_word("verbose") { + Some(MessagesValue::Verbose) + } else { + None + }; + Ok(Command::App(AppCommand::Messages { value })) +} + +// --- Command nodes ------------------------------------------------- + +pub static QUIT: CommandNode = CommandNode { + entry: Word::keyword("quit"), + shape: EMPTY_SEQ, + ast_builder: build_quit, + help_id: Some("app.quit"), + usage_id: Some("parse.usage.app.quit"), + hint_mode: None, +}; + +pub static HELP: CommandNode = CommandNode { + entry: Word::keyword("help"), + shape: EMPTY_SEQ, + ast_builder: build_help, + help_id: Some("app.help"), + usage_id: Some("parse.usage.app.help"), + hint_mode: None, +}; + +pub static REBUILD: CommandNode = CommandNode { + entry: Word::keyword("rebuild"), + shape: EMPTY_SEQ, + ast_builder: build_rebuild, + help_id: Some("app.rebuild"), + usage_id: Some("parse.usage.app.rebuild"), + hint_mode: None, +}; + +pub static SAVE: CommandNode = CommandNode { + entry: Word::keyword("save"), + shape: SAVE_AS_OPT, + ast_builder: build_save, + help_id: Some("app.save"), + usage_id: Some("parse.usage.app.save"), + hint_mode: None, +}; + +pub static NEW: CommandNode = CommandNode { + entry: Word::keyword("new"), + shape: EMPTY_SEQ, + ast_builder: build_new, + help_id: Some("app.new"), + usage_id: Some("parse.usage.app.new"), + hint_mode: None, +}; + +pub static LOAD: CommandNode = CommandNode { + entry: Word::keyword("load"), + shape: EMPTY_SEQ, + ast_builder: build_load, + help_id: Some("app.load"), + usage_id: Some("parse.usage.app.load"), + hint_mode: None, +}; + +pub static EXPORT: CommandNode = CommandNode { + entry: Word::keyword("export"), + shape: EXPORT_PATH_OPT, + ast_builder: build_export, + help_id: Some("app.export"), + usage_id: Some("parse.usage.app.export"), + hint_mode: None, +}; + +pub static IMPORT: CommandNode = CommandNode { + entry: Word::keyword("import"), + shape: IMPORT_BODY_OPT, + ast_builder: build_import, + help_id: Some("app.import"), + usage_id: Some("parse.usage.app.import"), + hint_mode: None, +}; + +pub static MODE: CommandNode = CommandNode { + entry: Word::keyword("mode"), + shape: MODE_VALUE, + ast_builder: build_mode, + help_id: Some("app.mode"), + usage_id: Some("parse.usage.app.mode"), + hint_mode: None, +}; + +pub static MESSAGES: CommandNode = CommandNode { + entry: Word::keyword("messages"), + shape: MESSAGES_VALUE_OPT, + ast_builder: build_messages, + help_id: Some("app.messages"), + usage_id: Some("parse.usage.app.messages"), + hint_mode: None, +}; diff --git a/src/dsl/grammar/mod.rs b/src/dsl/grammar/mod.rs new file mode 100644 index 0000000..3f21b57 --- /dev/null +++ b/src/dsl/grammar/mod.rs @@ -0,0 +1,247 @@ +//! Unified declarative grammar tree (ADR-0024). +//! +//! The grammar tree is the single source of truth for the DSL — +//! parsing, completion, syntax highlighting, parse-error usage +//! rendering, and hint-panel content all derive from this same +//! data structure (ADR-0023 institutional context). +//! +//! Phase A scope (ADR-0024 §migration): the framework lands +//! alongside the eleven app-lifecycle commands (quit, help, +//! rebuild, save, save as, new, load, export, import, mode, +//! messages). The chumsky parser still owns every other +//! command; the router in `dsl::parser` decides which path to +//! take per first-token. Schema-aware nodes (`IdentSource::Tables` +//! and friends) and `DynamicSubgrammar` are declared here but +//! not exercised until Phase B-D. +//! +//! The shape of `Node` mirrors ADR-0024 §node-taxonomy with one +//! pragmatic addition for Phase A: each `Ident` carries an +//! optional content validator, used today by the `mode ` +//! / `messages ` slots to surface friendly catalog +//! wording (`mode.unknown`, `messages.unknown`) on out-of-set +//! identifiers. The same hook generalises naturally to typed +//! value slots in Phase D. + +pub mod app; + +use crate::dsl::command::Command; +use crate::dsl::walker::context::WalkContext; +use crate::dsl::walker::outcome::MatchedPath; + +/// Highlight class assigned to a matched terminal. +/// +/// Phase A records these on the `WalkResult::per_byte_class` +/// slice; the existing input-renderer (chumsky-driven) still +/// owns the user-visible highlight today. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[allow(dead_code)] +pub enum HighlightClass { + Keyword, + Identifier, + Number, + String, + Punct, + Flag, + Error, +} + +/// Where an `Ident` slot's candidates come from at completion time. +/// +/// Phase A only exercises `NewName` (the `import … as ` +/// slot) and `Free` (the catch-all branch in `mode`/`messages` +/// that funnels unknown values into a friendly validator). The +/// schema-aware variants land in Phase B-D. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum IdentSource { + /// User invents this name. No schema lookup; no completion + /// candidates beyond the identifier shape itself. + NewName, + /// Existing table name. Phase B+. + #[allow(dead_code)] + Tables, + /// Existing column in the current table. Phase B+. + #[allow(dead_code)] + Columns, + /// Existing relationship name. Phase B+. + #[allow(dead_code)] + Relationships, + /// Closed set from `Type::all()`. Phase B+. + #[allow(dead_code)] + Types, + /// Any identifier shape; used by synthetic catch-all branches + /// (e.g., the unknown-value branch of `mode `). + Free, +} + +/// Hint-panel mode for an expected node. +/// +/// Phase A defaults to `Default`; the `ProseOnly` variant +/// attaches to typed value slots in Phase D so the hint reads +/// "Type a date as 'YYYY-MM-DD'" rather than candidate-cycling. +#[derive(Debug, Clone, Copy)] +#[allow(dead_code)] +pub enum HintMode { + Default, + ForceProse(&'static str), + ProseOnly(&'static str), + SuppressProse, +} + +/// A keyword node literal. +/// +/// The `aliases` slice is empty for the app-lifecycle commands +/// today; the round-5 `q` removal remains intentional, and any +/// future re-introduction would be a one-line `aliases: &["q"]` +/// addition (ADR-0024 §aliases). +#[derive(Debug, Clone, Copy)] +pub struct Word { + pub primary: &'static str, + pub aliases: &'static [&'static str], + pub highlight_override: Option, +} + +impl Word { + pub const fn keyword(primary: &'static str) -> Self { + Self { + primary, + aliases: &[], + highlight_override: None, + } + } + + /// Case-insensitive match against the primary or any alias. + pub fn matches(&self, candidate: &str) -> bool { + if candidate.eq_ignore_ascii_case(self.primary) { + return true; + } + self.aliases + .iter() + .any(|a| candidate.eq_ignore_ascii_case(a)) + } +} + +/// Content-level validator for an `Ident` slot. Returns the +/// catalog key + arg list to surface as `WalkOutcome::ValidationFailed` +/// on mismatch. +pub type IdentValidator = fn(matched: &str) -> Result<(), ValidationError>; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ValidationError { + pub message_key: &'static str, + pub args: Vec<(&'static str, String)>, +} + +/// The grammar-tree node taxonomy (ADR-0024 §node-taxonomy). +/// +/// Some variants carry data (`Word` literal, `Punct` char, +/// `Ident` source/role/validator); combinators reference their +/// children through `&'static [Node]` / `&'static Node` slices, +/// which lets the entire registry live in `const`s — no runtime +/// allocation, every command is one declaration block in its +/// grammar file. +pub enum Node { + /// A keyword token. Case-insensitive match (ADR-0009). + Word(Word), + /// A single punctuation character. The exact set comes from + /// the migrated commands' usage — Phase A only needs none of + /// these (app-lifecycle commands are pure keyword + ident + + /// path), but the variant is declared for Phase B+ use. + #[allow(dead_code)] + Punct(char), + /// An identifier slot. `source` drives completion candidates; + /// `role` names the slot for error wording / completion-engine + /// dispatch; `validator` runs after a successful identifier- + /// shape match and may reject the value with a catalog-driven + /// message. + Ident { + source: IdentSource, + role: &'static str, + validator: Option, + #[allow(dead_code)] + highlight_override: Option, + }, + #[allow(dead_code)] + NumberLit, + #[allow(dead_code)] + StringLit, + #[allow(dead_code)] + BlobLit, + #[allow(dead_code)] + Flag(&'static str), + /// A non-whitespace run consumed verbatim from source. Per + /// ADR-0024's path-bearing-commands UX change, paths with + /// spaces use the quoted form (`StringLit`); `BarePath` + /// terminates at the first whitespace byte. + BarePath, + /// Try each child in order. The first one that matches a + /// non-empty prefix wins; if none match, the choice fails + /// with the union of expectations. + Choice(&'static [Self]), + /// All children must match in order. Whitespace is implicitly + /// allowed between siblings. + Seq(&'static [Self]), + /// The inner node may match or be skipped. + Optional(&'static Self), + /// `inner` matches at least `min` times, separated by + /// `separator` (if any). Phase C+ uses this for `with pk` + /// column lists. + #[allow(dead_code)] + Repeated { + inner: &'static Self, + separator: Option<&'static Self>, + min: usize, + }, + /// Resolves at walk time using the active `WalkContext`. + /// Phase D+ uses this for `column_value_list`. + #[allow(dead_code)] + DynamicSubgrammar(fn(&WalkContext) -> Self), +} + +/// Top-level entry record. One per command. The `entry` keyword +/// alone identifies which command the walker dispatches to; +/// `shape` is what follows the entry word. +pub struct CommandNode { + pub entry: Word, + pub shape: Node, + /// Builds the typed `Command` AST from the matched terminal + /// path. May fail with a `ValidationError` for content-level + /// rejections that are easier to express imperatively than + /// as a per-node validator (Phase A: none — every app + /// command's ast_builder is infallible). + pub ast_builder: fn(&MatchedPath) -> Result, + #[allow(dead_code)] + pub help_id: Option<&'static str>, + #[allow(dead_code)] + pub usage_id: Option<&'static str>, + #[allow(dead_code)] + pub hint_mode: Option, +} + +/// The active grammar registry. Phase A: the eleven app-lifecycle +/// commands. Migrated commands route through this; everything +/// else falls through to the chumsky path in `dsl::parser`. +pub static REGISTRY: &[&CommandNode] = &[ + &app::QUIT, + &app::HELP, + &app::REBUILD, + &app::SAVE, + &app::NEW, + &app::LOAD, + &app::EXPORT, + &app::IMPORT, + &app::MODE, + &app::MESSAGES, +]; + +/// Look up a `CommandNode` by entry word, case-insensitively. +/// +/// Used by the router to decide whether the walker owns this +/// input. Returns the index into `REGISTRY` so callers can +/// later use it as a `WalkOutcome::Match { command_idx }`. +pub fn command_for_entry_word(word: &str) -> Option<(usize, &'static CommandNode)> { + REGISTRY + .iter() + .enumerate() + .find(|(_, c)| c.entry.matches(word)) + .map(|(i, c)| (i, *c)) +} diff --git a/src/dsl/mod.rs b/src/dsl/mod.rs index c84801b..e30892c 100644 --- a/src/dsl/mod.rs +++ b/src/dsl/mod.rs @@ -11,6 +11,7 @@ pub mod action; pub mod command; +pub mod grammar; pub mod ident_slot; pub mod keyword; pub mod lexer; @@ -19,6 +20,7 @@ pub mod shortid; pub mod types; pub mod usage; pub mod value; +pub mod walker; pub use action::ReferentialAction; pub use command::{ diff --git a/src/dsl/parser.rs b/src/dsl/parser.rs index f85c54c..cc8ddb8 100644 --- a/src/dsl/parser.rs +++ b/src/dsl/parser.rs @@ -111,10 +111,16 @@ pub fn parse_tokens(tokens: &[Token], source: &str) -> Result Result Option> { + use crate::dsl::walker::{self, outcome::WalkBound}; + let mut ctx = walker::context::WalkContext::new(); + let (result, command) = walker::walk(source, WalkBound::EndOfInput, &mut ctx); + let result = result?; + Some(walker_outcome_to_parse_result(result, command)) +} + +fn walker_outcome_to_parse_result( + result: crate::dsl::walker::outcome::WalkResult, + command: Option, +) -> Result { + use crate::dsl::walker::outcome::WalkOutcome; + match result.outcome { + WalkOutcome::Match { .. } => command.ok_or_else(|| ParseError::Invalid { + message: crate::t!( + "parse.error_wrapper", + detail = String::from("AST builder failed") + ), + position: 0, + at_eof: false, + expected: Vec::new(), + }), + WalkOutcome::Incomplete { position, expected } => Err(ParseError::Invalid { + message: format_walker_error(true, &expected, None), + position, + at_eof: true, + expected: expected.iter().map(format_expectation).collect(), + }), + WalkOutcome::Mismatch { position, expected } => Err(ParseError::Invalid { + message: format_walker_error(false, &expected, Some(position)), + position, + at_eof: false, + expected: expected.iter().map(format_expectation).collect(), + }), + WalkOutcome::ValidationFailed { position, error } => { + // Runtime catalog lookup: walker carries the catalog + // key + args at `Node::Ident` validators (e.g., + // `mode.unknown`). The `t!` macro requires a literal + // key, so we call `friendly::translate` directly. + let arg_refs: Vec<(&str, &dyn std::fmt::Display)> = error + .args + .iter() + .map(|(k, v)| (*k, v as &dyn std::fmt::Display)) + .collect(); + let message = crate::friendly::translate(error.message_key, &arg_refs); + Err(ParseError::Invalid { + message, + position, + at_eof: false, + expected: Vec::new(), + }) + } + } +} + +fn format_expectation(e: &crate::dsl::walker::outcome::Expectation) -> String { + use crate::dsl::walker::outcome::Expectation; + match e { + Expectation::Word(w) => format!("`{w}`"), + Expectation::Ident { role } => (*role).to_string(), + Expectation::Punct(c) => format!("`{c}`"), + Expectation::NumberLit => "number".to_string(), + Expectation::StringLit => "string literal".to_string(), + Expectation::BlobLit => "blob literal".to_string(), + Expectation::Flag(name) => format!("`--{name}`"), + Expectation::BarePath => "path".to_string(), + Expectation::EndOfInput => "end of input".to_string(), + } +} + +fn format_walker_error( + at_eof: bool, + expected: &[crate::dsl::walker::outcome::Expectation], + _position: Option, +) -> String { + let parts: Vec = expected.iter().map(format_expectation).collect(); + let joined = oxford_join(&parts); + if at_eof { + if joined.is_empty() { + crate::t!("parse.empty") + } else { + format!("expected {joined}") + } + } else if joined.is_empty() { + "unexpected input".to_string() + } else { + format!("expected {joined}") + } +} + +fn oxford_join(items: &[String]) -> String { + match items.len() { + 0 => String::new(), + 1 => items[0].clone(), + 2 => format!("{} or {}", items[0], items[1]), + _ => { + let last = items.len() - 1; + let head = items[..last].join(", "); + format!("{}, or {}", head, items[last]) + } + } +} + /// `replay` source-slice special case (ADR-0020 §6). /// /// `replay ` lets the user write paths containing @@ -166,64 +280,12 @@ fn try_parse_replay_with_bare_path( })) } -/// `export ` / `import [as ]` source-slice -/// special case. Same rationale as `try_parse_replay_with_bare_path` -/// — bare paths contain `/`, `.`, `~` which the lexer would either -/// split into separate tokens or refuse outright. -/// -/// Returns `None` for the bare-keyword forms (`export`, `import` -/// alone), letting the regular chumsky path handle them and -/// surface the no-arg `Command::App(...)` variant. -fn try_parse_app_path_command( - tokens: &[Token], - source: &str, -) -> Option> { - use crate::dsl::command::AppCommand; - let first = tokens.first()?; - let kw = match &first.kind { - TokenKind::Keyword(Keyword::Export) => Keyword::Export, - TokenKind::Keyword(Keyword::Import) => Keyword::Import, - _ => return None, - }; - let after = first.span.1; - let rest = source[after..].trim(); - if rest.is_empty() { - return None; - } - match kw { - Keyword::Export => Some(Ok(Command::App(AppCommand::Export { - path: Some(rest.to_string()), - }))), - Keyword::Import => { - // Trailing `as` with no target is a recognised user - // mistake — surface the usage hint as a parse error - // (catalog wording stays in sync with the existing - // dispatch-time error). - if rest == "as" || rest.ends_with(" as") { - return Some(Err(ParseError::Invalid { - message: crate::t!("project.import_empty_target"), - position: after + rest.len(), - at_eof: true, - expected: Vec::new(), - })); - } - let (path, target) = match rest.split_once(" as ") { - Some((p, t)) => (p.trim().to_string(), Some(t.trim().to_string())), - None => (rest.to_string(), None), - }; - if path.is_empty() { - return Some(Err(ParseError::Invalid { - message: crate::t!("project.import_usage"), - position: after, - at_eof: true, - expected: vec!["path".to_string()], - })); - } - Some(Ok(Command::App(AppCommand::Import { path, target }))) - } - _ => None, - } -} +// ADR-0024 Phase A removed `try_parse_app_path_command`: the +// walker (`crate::dsl::walker`) now owns export / import end-to- +// end (including their path arguments via `BarePath`). The +// chumsky-side bare-keyword branches in `command_parser` +// (`export_no_arg`, `import_no_arg`) are unreachable in practice +// but stay declared until Phase F sweeps the chumsky path. // ========================================================= // Token-aware combinator helpers (ADR-0020 §5) diff --git a/src/dsl/walker/context.rs b/src/dsl/walker/context.rs new file mode 100644 index 0000000..c1c148f --- /dev/null +++ b/src/dsl/walker/context.rs @@ -0,0 +1,43 @@ +//! `WalkContext` — per-walk mutable state that flows through the +//! walker (ADR-0024 §WalkContext). +//! +//! Phase A keeps this minimal: app-lifecycle commands have no +//! schema dependency. The `current_table`, `current_table_columns`, +//! and schema-cache pointer become populated as Phase B-D land +//! the schema-aware DDL/data commands. + +/// Per-walk state. Cheap to construct; `default()` is the right +/// shape for app-lifecycle commands. +#[derive(Debug, Default)] +pub struct WalkContext { + /// Table whose name an `Ident { source: Tables, writes_table: + /// true }` matched earlier in the walk. Phase B+ writes this. + pub current_table: Option, + + /// Columns of `current_table`, resolved against the schema + /// cache when the table identifier matched. Phase D+ uses + /// this to drive the dynamic `column_value_list` sub-grammar. + #[allow(dead_code)] + pub current_table_columns: Option>, + + /// For `set col=…` and `where col=…`, the column whose value + /// is about to be consumed. Phase D+ writes this so the value + /// slot picks the right typed sub-grammar. + #[allow(dead_code)] + pub current_column: Option, +} + +impl WalkContext { + pub fn new() -> Self { + Self::default() + } +} + +/// Schema info for a single column. Phase D+ populates this from +/// the schema cache; Phase A leaves it unused. +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct ColumnInfo { + pub name: String, + pub user_type: crate::dsl::types::Type, +} diff --git a/src/dsl/walker/driver.rs b/src/dsl/walker/driver.rs new file mode 100644 index 0000000..e9920c0 --- /dev/null +++ b/src/dsl/walker/driver.rs @@ -0,0 +1,330 @@ +//! Per-node-kind walk dispatch (ADR-0024 §architecture). +//! +//! `walk_node` is the recursive workhorse that the public +//! `walk()` entry calls into for a `CommandNode`'s `shape`. It +//! tries to match `node` starting at `position`, mutating +//! `path` (matched terminals collected in declaration order) and +//! `per_byte` (highlight class assignments) as it goes. +//! +//! The return value distinguishes four cases: +//! +//! - `Matched { end }` — full match, walker consumed up to `end`. +//! - `NoMatch { … }` — node didn't engage at this position. For +//! `Optional` and `Choice` callers this is benign (try the +//! next branch / skip the optional); for `Seq` it's only +//! benign on the first child. +//! - `Incomplete { … }` — node committed (consumed at least one +//! terminal) but ran out of input. Surfaces as +//! `WalkOutcome::Incomplete` at the top level. +//! - `Failed { … }` — node committed and a content validator +//! rejected the value, or a hard structural failure occurred +//! mid-shape. Surfaces as `WalkOutcome::Mismatch` or +//! `WalkOutcome::ValidationFailed` at the top level. + +use crate::dsl::grammar::{HighlightClass, Node, ValidationError}; +use crate::dsl::walker::context::WalkContext; +use crate::dsl::walker::lex_helpers::{consume_bare_path, consume_ident, skip_whitespace}; +use crate::dsl::walker::outcome::{ + ByteClass, Expectation, MatchedItem, MatchedKind, MatchedPath, +}; + +#[derive(Debug, Clone)] +pub enum NodeWalkResult { + Matched { + end: usize, + }, + /// Did not engage at this position. Caller decides whether + /// this is benign (Optional, Choice fallthrough) or a hard + /// failure (Seq mid-shape). + NoMatch { + position: usize, + expected: Vec, + }, + /// Committed and ran out of input. + Incomplete { + position: usize, + expected: Vec, + }, + /// Committed and hit a hard mismatch or validator failure. + Failed { + position: usize, + kind: FailureKind, + }, +} + +#[derive(Debug, Clone)] +pub enum FailureKind { + Mismatch { expected: Vec }, + Validation(ValidationError), +} + +pub fn walk_node( + source: &str, + position: usize, + node: &Node, + ctx: &mut WalkContext, + path: &mut MatchedPath, + per_byte: &mut Vec, +) -> NodeWalkResult { + let pos = skip_whitespace(source, position); + match node { + Node::Word(word) => walk_word(source, pos, word, path, per_byte), + Node::Punct(ch) => walk_punct(source, pos, *ch, path, per_byte), + Node::Ident { + source: src, + role, + validator, + highlight_override: _, + } => walk_ident(source, pos, *src, role, *validator, path, per_byte), + Node::NumberLit + | Node::StringLit + | Node::BlobLit + | Node::Flag(_) + | Node::Repeated { .. } + | Node::DynamicSubgrammar(_) => { + // Phase A: not exercised by app-lifecycle commands. + // Reaching this branch means a Phase B+ grammar got + // declared without the walker support landing yet — + // surface as a hard failure so the test suite catches + // it loudly instead of silently mis-parsing. + NodeWalkResult::Failed { + position: pos, + kind: FailureKind::Mismatch { expected: vec![] }, + } + } + Node::BarePath => walk_bare_path(source, pos, path, per_byte), + Node::Choice(children) => walk_choice(source, pos, children, ctx, path, per_byte), + Node::Seq(children) => walk_seq(source, pos, children, ctx, path, per_byte), + Node::Optional(child) => walk_optional(source, pos, child, ctx, path, per_byte), + } +} + +fn walk_word( + source: &str, + position: usize, + word: &crate::dsl::grammar::Word, + path: &mut MatchedPath, + per_byte: &mut Vec, +) -> NodeWalkResult { + // First scan an identifier-shape token at `position`; if + // none, we definitely don't have this keyword. If one, check + // it against the word's primary + aliases. + let Some((start, end)) = consume_ident(source, position) else { + return NodeWalkResult::NoMatch { + position, + expected: vec![Expectation::Word(word.primary)], + }; + }; + let candidate = &source[start..end]; + if word.matches(candidate) { + path.push(MatchedItem { + kind: MatchedKind::Word(word.primary), + text: candidate.to_string(), + span: (start, end), + }); + per_byte.push(ByteClass { + start, + end, + class: HighlightClass::Keyword, + }); + NodeWalkResult::Matched { end } + } else { + NodeWalkResult::NoMatch { + position, + expected: vec![Expectation::Word(word.primary)], + } + } +} + +fn walk_punct( + source: &str, + position: usize, + ch: char, + path: &mut MatchedPath, + per_byte: &mut Vec, +) -> NodeWalkResult { + let bytes = source.as_bytes(); + if position < bytes.len() && bytes[position] == ch as u8 { + path.push(MatchedItem { + kind: MatchedKind::Punct(ch), + text: ch.to_string(), + span: (position, position + 1), + }); + per_byte.push(ByteClass { + start: position, + end: position + 1, + class: HighlightClass::Punct, + }); + NodeWalkResult::Matched { + end: position + 1, + } + } else { + NodeWalkResult::NoMatch { + position, + expected: vec![Expectation::Punct(ch)], + } + } +} + +fn walk_ident( + source: &str, + position: usize, + _src: crate::dsl::grammar::IdentSource, + role: &'static str, + validator: Option, + path: &mut MatchedPath, + per_byte: &mut Vec, +) -> NodeWalkResult { + let Some((start, end)) = consume_ident(source, position) else { + return NodeWalkResult::NoMatch { + position, + expected: vec![Expectation::Ident { role }], + }; + }; + let text = source[start..end].to_string(); + if let Some(v) = validator + && let Err(err) = v(&text) + { + return NodeWalkResult::Failed { + position: start, + kind: FailureKind::Validation(err), + }; + } + path.push(MatchedItem { + kind: MatchedKind::Ident { role }, + text, + span: (start, end), + }); + per_byte.push(ByteClass { + start, + end, + class: HighlightClass::Identifier, + }); + NodeWalkResult::Matched { end } +} + +fn walk_bare_path( + source: &str, + position: usize, + path: &mut MatchedPath, + per_byte: &mut Vec, +) -> NodeWalkResult { + let Some((start, end)) = consume_bare_path(source, position) else { + return NodeWalkResult::NoMatch { + position, + expected: vec![Expectation::BarePath], + }; + }; + let text = source[start..end].to_string(); + path.push(MatchedItem { + kind: MatchedKind::BarePath, + text, + span: (start, end), + }); + per_byte.push(ByteClass { + start, + end, + class: HighlightClass::String, + }); + NodeWalkResult::Matched { end } +} + +fn walk_choice( + source: &str, + position: usize, + children: &[Node], + ctx: &mut WalkContext, + path: &mut MatchedPath, + per_byte: &mut Vec, +) -> NodeWalkResult { + let mut all_expected: Vec = Vec::new(); + for child in children { + let saved_path_len = path.items.len(); + let saved_byte_len = per_byte.len(); + match walk_node(source, position, child, ctx, path, per_byte) { + NodeWalkResult::Matched { end } => return NodeWalkResult::Matched { end }, + NodeWalkResult::NoMatch { expected, .. } => { + path.items.truncate(saved_path_len); + per_byte.truncate(saved_byte_len); + merge_expected(&mut all_expected, expected); + } + // Once a choice branch commits, propagate its outcome. + other => return other, + } + } + NodeWalkResult::NoMatch { + position, + expected: all_expected, + } +} + +fn walk_seq( + source: &str, + position: usize, + children: &[Node], + ctx: &mut WalkContext, + path: &mut MatchedPath, + per_byte: &mut Vec, +) -> NodeWalkResult { + let mut cur = position; + let mut idx = 0; + for child in children { + match walk_node(source, cur, child, ctx, path, per_byte) { + NodeWalkResult::Matched { end } => { + cur = end; + idx += 1; + } + NodeWalkResult::NoMatch { position, expected } => { + if idx == 0 { + // Seq didn't even start. + return NodeWalkResult::NoMatch { position, expected }; + } + // Mid-shape: did we run out of input or hit a + // wrong token? + let post_ws = skip_whitespace(source, position); + let kind = if post_ws >= source.len() { + return NodeWalkResult::Incomplete { position: post_ws, expected }; + } else { + FailureKind::Mismatch { expected } + }; + return NodeWalkResult::Failed { position: post_ws, kind }; + } + NodeWalkResult::Incomplete { position, expected } => { + return NodeWalkResult::Incomplete { position, expected }; + } + NodeWalkResult::Failed { position, kind } => { + return NodeWalkResult::Failed { position, kind }; + } + } + } + NodeWalkResult::Matched { end: cur } +} + +fn walk_optional( + source: &str, + position: usize, + child: &Node, + ctx: &mut WalkContext, + path: &mut MatchedPath, + per_byte: &mut Vec, +) -> NodeWalkResult { + let saved_path_len = path.items.len(); + let saved_byte_len = per_byte.len(); + match walk_node(source, position, child, ctx, path, per_byte) { + NodeWalkResult::Matched { end } => NodeWalkResult::Matched { end }, + NodeWalkResult::NoMatch { .. } => { + path.items.truncate(saved_path_len); + per_byte.truncate(saved_byte_len); + NodeWalkResult::Matched { end: position } + } + other => other, + } +} + +fn merge_expected(dst: &mut Vec, src: Vec) { + for e in src { + if !dst.contains(&e) { + dst.push(e); + } + } +} diff --git a/src/dsl/walker/lex_helpers.rs b/src/dsl/walker/lex_helpers.rs new file mode 100644 index 0000000..446c389 --- /dev/null +++ b/src/dsl/walker/lex_helpers.rs @@ -0,0 +1,99 @@ +//! Byte-level helpers for the scannerless walker (ADR-0024 +//! §scannerless). +//! +//! Each helper takes the source string and a byte position, +//! returns either `Some(end_position)` (matched, post-token end) +//! or `None` (didn't match here). Helpers are pure and span- +//! exact; multi-byte UTF-8 within identifiers and string +//! literals is handled byte-correctly. +//! +//! These helpers internally mirror the logic of the legacy +//! `dsl::lexer` module but are invoked per-position by the +//! walker rather than as a pre-pass. + +/// Return the byte index of the first non-whitespace byte at or +/// after `start`. If the rest is all whitespace, returns +/// `source.len()`. +pub fn skip_whitespace(source: &str, start: usize) -> usize { + let bytes = source.as_bytes(); + let mut i = start; + while i < bytes.len() && bytes[i].is_ascii_whitespace() { + i += 1; + } + i +} + +/// Identifier shape: ASCII letter or `_` to start, then ASCII +/// alphanumeric or `_`. Returns `Some((start, end))` on match. +pub fn consume_ident(source: &str, start: usize) -> Option<(usize, usize)> { + let bytes = source.as_bytes(); + let first = *bytes.get(start)?; + if !(first.is_ascii_alphabetic() || first == b'_') { + return None; + } + let mut i = start + 1; + while i < bytes.len() { + let b = bytes[i]; + if b.is_ascii_alphanumeric() || b == b'_' { + i += 1; + } else { + break; + } + } + Some((start, i)) +} + +/// Try to match `keyword` at `position` case-insensitively. +/// +/// The match must end at a non-identifier byte (or end-of-input) +/// so that `save` doesn't half-match the prefix of `saved`. +/// Returns the end byte index on match. +pub fn match_keyword(source: &str, position: usize, keyword: &str) -> Option { + let bytes = source.as_bytes(); + let kw_bytes = keyword.as_bytes(); + if position + kw_bytes.len() > bytes.len() { + return None; + } + for (offset, &kb) in kw_bytes.iter().enumerate() { + let sb = bytes[position + offset]; + if !sb.eq_ignore_ascii_case(&kb) { + return None; + } + } + let end = position + kw_bytes.len(); + if end < bytes.len() { + let next = bytes[end]; + if next.is_ascii_alphanumeric() || next == b'_' { + return None; + } + } + Some(end) +} + +/// Bare-path token: a non-whitespace run. +/// +/// Per ADR-0024 the path-bearing UX dropped the "spaces don't +/// need quoting" feature; paths with spaces use `StringLit`. +/// Phase A's `import` / `export` slots use this. +pub fn consume_bare_path(source: &str, start: usize) -> Option<(usize, usize)> { + let bytes = source.as_bytes(); + if start >= bytes.len() || bytes[start].is_ascii_whitespace() { + return None; + } + let mut i = start; + while i < bytes.len() && !bytes[i].is_ascii_whitespace() { + i += 1; + } + Some((start, i)) +} + +/// Match a single punctuation character at `position`. +#[allow(dead_code)] +pub fn match_punct(source: &str, position: usize, ch: char) -> Option { + let bytes = source.as_bytes(); + if position < bytes.len() && bytes[position] == ch as u8 { + Some(position + 1) + } else { + None + } +} diff --git a/src/dsl/walker/mod.rs b/src/dsl/walker/mod.rs new file mode 100644 index 0000000..5bed77f --- /dev/null +++ b/src/dsl/walker/mod.rs @@ -0,0 +1,432 @@ +//! Walker entry point (ADR-0024 §architecture). +//! +//! The walker is the single source of truth for the migrated +//! commands. Phase A wires the parse consumer; completion + +//! highlighting still flow through the chumsky path until +//! Phase D / F. +//! +//! Routing rule (ADR-0024 §migration): the input's first +//! identifier-shape token decides whether the walker owns this +//! command. If it matches a registered entry word, the walker +//! takes over end-to-end (success or failure). Otherwise, the +//! router falls through to the chumsky parser, which still +//! carries every non-migrated command's grammar through Phase F. + +pub mod context; +pub mod driver; +pub mod lex_helpers; +pub mod outcome; + +use crate::dsl::command::Command; +use crate::dsl::grammar; +use crate::dsl::walker::context::WalkContext; +use crate::dsl::walker::driver::{FailureKind, NodeWalkResult, walk_node}; +use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace}; +use crate::dsl::walker::outcome::{ + Expectation, MatchedPath, WalkBound, WalkOutcome, WalkResult, +}; + +pub use context::ColumnInfo; + +/// Public walk entry. `bound` is `EndOfInput` for parse; +/// `Position(cursor)` for completion / hint (Phase A: not yet +/// wired). +/// +/// Returns: +/// - `(Some(WalkResult), Some(Command))` on full match — the +/// AST builder produced a typed Command. +/// - `(Some(WalkResult), None)` on failure where the walker +/// committed (matched the entry word). Caller surfaces the +/// walker's error. +/// - `(None, None)` when the entry word doesn't match any +/// registered command — the router falls through to chumsky. +pub fn walk( + source: &str, + bound: WalkBound, + ctx: &mut WalkContext, +) -> (Option, Option) { + // Phase A only consumes EndOfInput; Position would slice + // the source, which is the same operation. + let effective_source: &str = match bound { + WalkBound::EndOfInput => source, + WalkBound::Position(end) => &source[..end.min(source.len())], + }; + + let start = skip_whitespace(effective_source, 0); + if start >= effective_source.len() { + return (None, None); + } + + // Identify the command by its entry word. If the first + // identifier-shape token isn't a registered entry, the + // walker yields to chumsky. + let Some((kw_start, kw_end)) = consume_ident(effective_source, start) else { + return (None, None); + }; + let entry_text = &effective_source[kw_start..kw_end]; + let Some((command_idx, command_node)) = grammar::command_for_entry_word(entry_text) + else { + return (None, None); + }; + + let mut path = MatchedPath::new(); + let mut per_byte = Vec::new(); + + // Record the entry-word match. + path.push(crate::dsl::walker::outcome::MatchedItem { + kind: crate::dsl::walker::outcome::MatchedKind::Word(command_node.entry.primary), + text: entry_text.to_string(), + span: (kw_start, kw_end), + }); + per_byte.push(crate::dsl::walker::outcome::ByteClass { + start: kw_start, + end: kw_end, + class: grammar::HighlightClass::Keyword, + }); + + let outcome = match walk_node( + effective_source, + kw_end, + &command_node.shape, + ctx, + &mut path, + &mut per_byte, + ) { + NodeWalkResult::Matched { end } => { + let trailing = skip_whitespace(effective_source, end); + if trailing < effective_source.len() { + WalkOutcome::Mismatch { + position: trailing, + expected: vec![Expectation::EndOfInput], + } + } else { + WalkOutcome::Match { command_idx } + } + } + NodeWalkResult::NoMatch { position, expected } => { + // The shape required content the user hasn't typed. + // (Optional/empty-Seq shapes always return Matched + // even when skipped, so reaching NoMatch here means + // the command really wanted something more.) + let post = skip_whitespace(effective_source, position); + if post >= effective_source.len() { + WalkOutcome::Incomplete { position: post, expected } + } else { + WalkOutcome::Mismatch { position: post, expected } + } + } + NodeWalkResult::Incomplete { position, expected } => { + WalkOutcome::Incomplete { position, expected } + } + NodeWalkResult::Failed { position, kind } => match kind { + FailureKind::Mismatch { expected } => { + WalkOutcome::Mismatch { position, expected } + } + FailureKind::Validation(error) => { + WalkOutcome::ValidationFailed { position, error } + } + }, + }; + + let cmd = if matches!(outcome, WalkOutcome::Match { .. }) { + (command_node.ast_builder)(&path).ok() + } else { + None + }; + + let result = WalkResult { + outcome, + matched_path: path, + per_byte_class: per_byte, + }; + (Some(result), cmd) +} + +#[cfg(test)] +mod tests { + //! Walker behaviour tests — Phase A (ADR-0024 §migration). + //! + //! These cover every app-lifecycle command the walker now + //! owns. Each input is paired with its expected `Command` + //! output (the differential-against-chumsky check + //! materialised as hand-curated expectations — same role + //! the differential test scaffolding plays per ADR-0024 + //! §test-discipline). + //! + //! The handoff document lists these tests as "walker- + //! specific tests for trie-only features" — they pin down + //! the walker's contract for the migrated commands so + //! Phase B-F migrations can refactor without regression. + use crate::dsl::command::{AppCommand, Command, MessagesValue, ModeValue}; + use crate::dsl::parser::parse_command; + + fn parse(input: &str) -> Result { + parse_command(input) + } + + // ---- Bare no-arg commands --------------------------------- + + #[test] + fn walker_parses_quit() { + assert_eq!(parse("quit").unwrap(), Command::App(AppCommand::Quit)); + } + + #[test] + fn walker_parses_help() { + assert_eq!(parse("help").unwrap(), Command::App(AppCommand::Help)); + } + + #[test] + fn walker_parses_rebuild() { + assert_eq!(parse("rebuild").unwrap(), Command::App(AppCommand::Rebuild)); + } + + #[test] + fn walker_parses_new() { + assert_eq!(parse("new").unwrap(), Command::App(AppCommand::New)); + } + + #[test] + fn walker_parses_load() { + assert_eq!(parse("load").unwrap(), Command::App(AppCommand::Load)); + } + + // ---- Save / save as --------------------------------------- + + #[test] + fn walker_parses_save() { + assert_eq!(parse("save").unwrap(), Command::App(AppCommand::Save)); + } + + #[test] + fn walker_parses_save_as() { + assert_eq!(parse("save as").unwrap(), Command::App(AppCommand::SaveAs)); + } + + #[test] + fn walker_save_keywords_case_insensitive() { + assert_eq!(parse("SAVE").unwrap(), Command::App(AppCommand::Save)); + assert_eq!(parse("Save AS").unwrap(), Command::App(AppCommand::SaveAs)); + } + + // ---- Mode ------------------------------------------------- + + #[test] + fn walker_parses_mode_simple() { + assert_eq!( + parse("mode simple").unwrap(), + Command::App(AppCommand::Mode { + value: ModeValue::Simple, + }) + ); + } + + #[test] + fn walker_parses_mode_advanced() { + assert_eq!( + parse("mode advanced").unwrap(), + Command::App(AppCommand::Mode { + value: ModeValue::Advanced, + }) + ); + } + + #[test] + fn walker_mode_unknown_value_emits_friendly_error() { + let err = parse("mode foo").unwrap_err(); + match err { + crate::dsl::ParseError::Invalid { message, .. } => { + // The catalog wording for `mode.unknown` carries + // the user's value verbatim. + assert!(message.contains("foo"), "got: {message}"); + } + other => panic!("expected Invalid, got {other:?}"), + } + } + + // ---- Messages --------------------------------------------- + + #[test] + fn walker_parses_messages_bare() { + assert_eq!( + parse("messages").unwrap(), + Command::App(AppCommand::Messages { value: None }) + ); + } + + #[test] + fn walker_parses_messages_short() { + assert_eq!( + parse("messages short").unwrap(), + Command::App(AppCommand::Messages { + value: Some(MessagesValue::Short), + }) + ); + } + + #[test] + fn walker_parses_messages_verbose() { + assert_eq!( + parse("messages verbose").unwrap(), + Command::App(AppCommand::Messages { + value: Some(MessagesValue::Verbose), + }) + ); + } + + #[test] + fn walker_messages_unknown_value_emits_friendly_error() { + let err = parse("messages bogus").unwrap_err(); + match err { + crate::dsl::ParseError::Invalid { message, .. } => { + assert!(message.contains("bogus"), "got: {message}"); + } + other => panic!("expected Invalid, got {other:?}"), + } + } + + // ---- Export ----------------------------------------------- + + #[test] + fn walker_parses_export_bare() { + assert_eq!( + parse("export").unwrap(), + Command::App(AppCommand::Export { path: None }) + ); + } + + #[test] + fn walker_parses_export_with_path() { + assert_eq!( + parse("export backups/MyExport.zip").unwrap(), + Command::App(AppCommand::Export { + path: Some("backups/MyExport.zip".to_string()), + }) + ); + } + + #[test] + fn walker_export_trims_trailing_whitespace() { + // Pre-migration the source-slice helper trimmed; the + // walker treats " " after `export` as zero BarePath + // matches and produces the bare form. + assert_eq!( + parse("export ").unwrap(), + Command::App(AppCommand::Export { path: None }) + ); + } + + // ---- Import ----------------------------------------------- + + #[test] + fn walker_parses_import_bare() { + assert_eq!( + parse("import").unwrap(), + Command::App(AppCommand::Import { + path: String::new(), + target: None, + }) + ); + } + + #[test] + fn walker_parses_import_with_path() { + assert_eq!( + parse("import some/file.zip").unwrap(), + Command::App(AppCommand::Import { + path: "some/file.zip".to_string(), + target: None, + }) + ); + } + + #[test] + fn walker_parses_import_with_path_and_target() { + assert_eq!( + parse("import some/file.zip as MyImported").unwrap(), + Command::App(AppCommand::Import { + path: "some/file.zip".to_string(), + target: Some("MyImported".to_string()), + }) + ); + } + + #[test] + fn walker_import_keeps_as_inside_path() { + // The lexer-free walker terminates `BarePath` at the + // first whitespace byte. `path/asfile.zip` is one + // token; the `as` *inside* it stays part of the path. + assert_eq!( + parse("import path/asfile.zip").unwrap(), + Command::App(AppCommand::Import { + path: "path/asfile.zip".to_string(), + target: None, + }) + ); + } + + #[test] + fn walker_import_trailing_as_without_target_errors() { + let err = parse("import foo.zip as ").unwrap_err(); + match err { + crate::dsl::ParseError::Invalid { message, expected, .. } => { + // Phase A: the friendly `project.import_empty_target` + // wording moves out of the parser; the walker's + // structural error names the `target` slot. + assert!( + message.contains("target") || expected.iter().any(|e| e == "target"), + "expected mention of target slot; got message={message:?}, expected={expected:?}" + ); + } + other => panic!("expected Invalid, got {other:?}"), + } + } + + // ---- Routing fall-through --------------------------------- + + #[test] + fn walker_does_not_engage_for_non_app_keywords() { + // The router falls through to the chumsky path. The + // existing chumsky parser produces this Command. + assert!(matches!( + parse("drop table Customers").unwrap(), + Command::DropTable { .. } + )); + } + + #[test] + fn walker_does_not_engage_for_unknown_first_token() { + // Not an entry word — chumsky yields its usual + // unknown-command error. + assert!(parse("frobulate").is_err()); + } + + // ---- Trailing-garbage detection --------------------------- + + #[test] + fn walker_quit_with_trailing_garbage_errors() { + assert!(parse("quit nonsense").is_err()); + } + + #[test] + fn walker_save_with_trailing_garbage_errors() { + assert!(parse("save Customers").is_err()); + } + + // ---- Whitespace tolerance --------------------------------- + + #[test] + fn walker_tolerates_leading_and_internal_whitespace() { + assert_eq!(parse(" quit ").unwrap(), Command::App(AppCommand::Quit)); + assert_eq!( + parse("save as").unwrap(), + Command::App(AppCommand::SaveAs) + ); + assert_eq!( + parse("mode\tadvanced").unwrap(), + Command::App(AppCommand::Mode { + value: ModeValue::Advanced, + }) + ); + } +} diff --git a/src/dsl/walker/outcome.rs b/src/dsl/walker/outcome.rs new file mode 100644 index 0000000..23413ca --- /dev/null +++ b/src/dsl/walker/outcome.rs @@ -0,0 +1,159 @@ +//! Walker output types (ADR-0024 §architecture). +//! +//! `WalkResult` carries everything a consumer (parse / completion / +//! highlight / hint) needs from a single walk: the outcome +//! (matched, incomplete, mismatched, validation-failed), the +//! matched-node path the AST builder reads, and the per-byte +//! highlight class assignments collected as terminals matched. +//! +//! Phase A note: only the parse consumer is wired today. The +//! `per_byte_class` field is populated but unused outside +//! tests; completion + highlighting still flow through the +//! chumsky path until Phase D / F. + +use crate::dsl::grammar::{HighlightClass, ValidationError}; + +/// How far into the input the walker should consume. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum WalkBound { + /// Consume all input. Trailing whitespace OK; trailing tokens + /// fail the walk. Used by the parse consumer. + EndOfInput, + /// Consume up to (but not including) the given byte position. + /// Used by completion / hint to ask "what was expected at the + /// cursor?". + #[allow(dead_code)] + Position(usize), +} + +/// Closed shape describing what could legally have continued the +/// walk at its stopping position. Phase A keeps this minimal — +/// only what the router needs to render a parse error. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Expectation { + /// The walker expected one of these literal keywords. + Word(&'static str), + /// The walker expected an identifier of the given role. + Ident { role: &'static str }, + /// The walker expected this exact punctuation character. + Punct(char), + /// The walker expected a number literal. + NumberLit, + /// The walker expected a string literal. + StringLit, + /// The walker expected a blob literal. + #[allow(dead_code)] + BlobLit, + /// The walker expected a flag with this name (without `--`). + #[allow(dead_code)] + Flag(&'static str), + /// The walker expected a bare-path argument (non-whitespace + /// run). + BarePath, + /// The walker expected end of input. + EndOfInput, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum WalkOutcome { + /// Input fully matched a command. `command_idx` is the + /// position of the matched command in the active registry. + Match { command_idx: usize }, + /// Input matched a prefix; more input would have continued + /// the parse. `position` is the byte offset where input ran + /// out (post-whitespace). + Incomplete { + position: usize, + expected: Vec, + }, + /// Input had a token at `position` that no expected node + /// accepts. + Mismatch { + position: usize, + expected: Vec, + }, + /// The walker matched a terminal but a content validator + /// rejected the value (e.g. `mode foo` matched the value + /// slot's identifier shape, then the validator fired + /// `mode.unknown`). + ValidationFailed { + position: usize, + error: ValidationError, + }, +} + +/// One terminal-node match. Combinators (Seq / Choice / Optional / +/// Repeated) shape the order; the AST builder reads the items in +/// declaration order. +#[derive(Debug, Clone)] +pub struct MatchedItem { + pub kind: MatchedKind, + pub text: String, + pub span: (usize, usize), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum MatchedKind { + /// A `Word` node matched. Carries the primary literal (not + /// the alias actually typed) so the AST builder can match + /// on it canonically. + Word(&'static str), + Punct(char), + /// An `Ident` matched. The role identifies which slot. + Ident { role: &'static str }, + NumberLit, + StringLit, + BlobLit, + Flag(&'static str), + BarePath, +} + +/// The path of matched terminals, in order. Optional / Repeated +/// nodes that produced no match contribute nothing. +#[derive(Debug, Clone, Default)] +pub struct MatchedPath { + pub items: Vec, +} + +impl MatchedPath { + pub fn new() -> Self { + Self::default() + } + + pub fn push(&mut self, item: MatchedItem) { + self.items.push(item); + } + + /// Convenience: find the first item matching the predicate. + pub fn find bool>(&self, pred: F) -> Option<&MatchedItem> { + self.items.iter().find(|i| pred(i)) + } + + /// Convenience: did any item match this exact word literal + /// (by primary)? Used by Optional-keyword discrimination + /// (e.g., `save` vs `save as`). + pub fn contains_word(&self, primary: &'static str) -> bool { + self.items + .iter() + .any(|i| matches!(&i.kind, MatchedKind::Word(p) if *p == primary)) + } +} + +/// Per-byte highlight class assignment, collected as terminals +/// match. Phase A keeps this for future consumers; not yet used +/// outside walker-internal tests. +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct ByteClass { + pub start: usize, + pub end: usize, + pub class: HighlightClass, +} + +#[derive(Debug, Clone)] +pub struct WalkResult { + pub outcome: WalkOutcome, + pub matched_path: MatchedPath, + #[allow(dead_code)] + pub per_byte_class: Vec, +}