diff --git a/Cargo.lock b/Cargo.lock index 778f367..aa6fba7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -29,15 +29,6 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" -[[package]] -name = "ar_archive_writer" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b" -dependencies = [ - "object", -] - [[package]] name = "arbitrary" version = "1.4.2" @@ -164,20 +155,6 @@ dependencies = [ "rand_core 0.10.1", ] -[[package]] -name = "chumsky" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d2bfadce76f963d776feff99db6dc33783829539258314776383b33e2a00f8" -dependencies = [ - "hashbrown 0.15.5", - "regex-automata", - "serde", - "stacker", - "unicode-ident", - "unicode-segmentation", -] - [[package]] name = "compact_str" version = "0.9.0" @@ -657,8 +634,6 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ - "allocator-api2", - "equivalent", "foldhash 0.1.5", ] @@ -1059,15 +1034,6 @@ dependencies = [ "objc2-core-foundation", ] -[[package]] -name = "object" -version = "0.37.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" -dependencies = [ - "memchr", -] - [[package]] name = "once_cell" version = "1.21.4" @@ -1260,16 +1226,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "psm" -version = "0.1.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645dbe486e346d9b5de3ef16ede18c26e6c70ad97418f4874b8b1889d6e761ea" -dependencies = [ - "ar_archive_writer", - "cc", -] - [[package]] name = "quote" version = "1.0.45" @@ -1414,7 +1370,6 @@ version = "0.1.0" dependencies = [ "anyhow", "base64", - "chumsky", "crossterm", "csv", "directories", @@ -1722,19 +1677,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "stacker" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "640c8cdd92b6b12f5bcb1803ca3bbf5ab96e5e6b6b96b9ab77dabe9e880b3190" -dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "windows-sys", -] - [[package]] name = "static_assertions" version = "1.1.0" diff --git a/Cargo.toml b/Cargo.toml index a1d72cb..1ae7358 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,6 @@ publish = false [dependencies] anyhow = "1.0.102" base64 = "0.22.1" -chumsky = "0.13.0" crossterm = { version = "0.29.0", features = ["event-stream"] } csv = "1.4.0" directories = "6.0.0" diff --git a/src/dsl/parser.rs b/src/dsl/parser.rs index deb5db5..932846d 100644 --- a/src/dsl/parser.rs +++ b/src/dsl/parser.rs @@ -10,19 +10,8 @@ //! Errors from chumsky are mapped to the local [`ParseError`] type //! so callers do not depend on chumsky's API surface. -use chumsky::error::{RichPattern, RichReason}; -use chumsky::prelude::*; - -use crate::dsl::action::ReferentialAction; -use crate::dsl::command::{ - AppCommand, ChangeColumnMode, ColumnSpec, Command, MessagesValue, ModeValue, - RelationshipSelector, RowFilter, -}; -use crate::dsl::ident_slot::IdentSlot; -use crate::dsl::keyword::{Keyword, Punct}; -use crate::dsl::lexer::{LexError, Token, TokenKind, lex}; -use crate::dsl::types::Type; -use crate::dsl::value::Value; +use crate::dsl::command::Command; +use crate::dsl::lexer::{Token, lex}; #[derive(Debug, Clone, PartialEq, Eq)] pub enum ParseError { @@ -111,18 +100,44 @@ pub fn parse_tokens(tokens: &[Token], source: &str) -> Result Ok(cmd), - Err(errs) => Err(into_parse_error(&errs, tokens, source)), + Err(unknown_command_error(source)) +} + +/// Synthetic ParseError for inputs whose first identifier-shape +/// token isn't a registered command entry word. Replaces the +/// chumsky-side "expected `create`, `drop`, …" structural error +/// the legacy parser produced for the same case. +fn unknown_command_error(source: &str) -> ParseError { + use crate::dsl::grammar::REGISTRY; + use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace}; + let mut entries: Vec = REGISTRY + .iter() + .map(|c| format!("`{}`", c.entry.primary)) + .collect(); + entries.sort(); + let joined = oxford_join(&entries); + let start = skip_whitespace(source, 0); + let (position, found_word) = consume_ident(source, start).map_or_else( + || (start, None), + |(s, e)| (s, Some(&source[s..e])), + ); + let message = found_word.map_or_else( + || format!("expected one of {joined}"), + |w| format!("expected one of {joined}, found `{w}`"), + ); + ParseError::Invalid { + message, + position, + at_eof: false, + expected: entries, } } @@ -269,848 +284,14 @@ fn oxford_join(items: &[String]) -> String { } } -// ADR-0024 Phase E removed `try_parse_replay_with_bare_path`: -// the walker now owns `replay` end-to-end via -// `Choice(StringLit, BarePath)`. The chumsky-side replay -// branch in `command_parser` is unreachable until Phase F -// sweeps the chumsky path. - -// ADR-0024 Phase A removed `try_parse_app_path_command`: the -// walker (`crate::dsl::walker`) now owns export / import end-to- -// end (including their path arguments via `BarePath`). The -// chumsky-side bare-keyword branches in `command_parser` -// (`export_no_arg`, `import_no_arg`) are unreachable in practice -// but stay declared until Phase F sweeps the chumsky path. - -// ========================================================= -// Token-aware combinator helpers (ADR-0020 §5) -// ========================================================= - -/// Match a specific keyword token. -fn kw<'a>( - target: Keyword, -) -> impl Parser<'a, &'a [Token], (), extra::Err>> + Clone { - select_ref! { - Token { kind: TokenKind::Keyword(k), .. } if *k == target => () - } - .labelled(format!("`{}`", target.as_str())) - .as_context() -} - -/// Match a specific punctuation token. -fn punct<'a>( - target: Punct, -) -> impl Parser<'a, &'a [Token], (), extra::Err>> + Clone { - select_ref! { - Token { kind: TokenKind::Punct(p), .. } if *p == target => () - } - .labelled(format!("`{}`", target.as_char())) - .as_context() -} - -/// Match any identifier token, returning its name. Internal — -/// command parsers must use `ident_ctx(slot)` so the -/// completion engine knows what kind of identifier each -/// position expects (ADR-0022 §8). Bare `ident_inner()` calls -/// outside this module would skip the slot annotation. The -/// label is applied by `ident_ctx` (one per call site) — none -/// here. -fn ident_inner<'a>() --> impl Parser<'a, &'a [Token], String, extra::Err>> + Clone { - select_ref! { - Token { kind: TokenKind::Identifier(s), .. } => s.clone() - } -} - -/// Tag-and-parse an identifier slot. The slot's user-facing -/// label (`IdentSlot::expected_label`) replaces the generic -/// "identifier" in the parser's expected-set machinery, so -/// the error message reads "expected table name" / -/// "expected column name" / "expected relationship name" / -/// "expected identifier" depending on the call site -/// (ADR-0022 stage 8c). The completion engine reverses the -/// mapping via `IdentSlot::from_expected_label` to know what -/// schema list to consult. -fn ident_ctx<'a>( - slot: crate::dsl::ident_slot::IdentSlot, -) -> impl Parser<'a, &'a [Token], String, extra::Err>> + Clone { - ident_inner().labelled(slot.expected_label()).as_context() -} - -/// Match a number-literal token, returning a `Value::Number`. -fn number_literal<'a>() --> impl Parser<'a, &'a [Token], Value, extra::Err>> + Clone { - select_ref! { - Token { kind: TokenKind::Number(s), .. } => Value::Number(s.clone()) - } - .labelled("number") - .as_context() -} - -/// Match a string-literal token, returning a `Value::Text`. -fn string_literal<'a>() --> impl Parser<'a, &'a [Token], Value, extra::Err>> + Clone { - select_ref! { - Token { kind: TokenKind::StringLiteral(s), .. } => Value::Text(s.clone()) - } - .labelled("string literal") - .as_context() -} - -/// Match a string-literal token, returning the raw payload -/// (used by the quoted-replay path). -fn string_payload<'a>() --> impl Parser<'a, &'a [Token], String, extra::Err>> + Clone { - select_ref! { - Token { kind: TokenKind::StringLiteral(s), .. } => s.clone() - } - .labelled("path") - .as_context() -} - -/// Match a flag token whose payload equals `name` (the part -/// after `--`). -fn flag<'a>( - name: &'static str, -) -> impl Parser<'a, &'a [Token], (), extra::Err>> + Clone { - select_ref! { - Token { kind: TokenKind::Flag(s), .. } if s == name => () - } - .labelled(format!("`--{name}`")) - .as_context() -} - -/// Match an identifier and parse it as a `Type`. Surfaces the -/// existing "unknown type 'X' (expected one of: …)" message -/// (ADR-0020 §4) — keyword-shape errors aggregate naturally, -/// content errors keep their hand-written voice. -/// -/// Labelled "type" so the structural-error wording reads as -/// "next: type" rather than the unhelpful "something else" -/// the unlabelled `select_ref!` would otherwise produce. -fn type_keyword<'a>() --> impl Parser<'a, &'a [Token], Type, extra::Err>> + Clone { - // Label is applied to the select-ref alone (before - // try_map) so the unknown-type custom error from try_map - // still surfaces — labelled() on the whole chain would - // replace it with "expected type" and lose the - // "unknown type 'X' (expected one of: …)" wording. - select_ref! { - Token { kind: TokenKind::Identifier(s), .. } = e => (s.clone(), e.span()) - } - .labelled("type") - .try_map(|(name, span): (String, SimpleSpan), _| { - name.parse::() - .map_err(|err| Rich::custom(span, err.to_string())) - }) -} - -// ========================================================= -// Top-level command parser -// ========================================================= - -fn command_parser<'a>() --> impl Parser<'a, &'a [Token], Command, extra::Err>> + Clone { - let create_table = kw(Keyword::Create) - .ignore_then(kw(Keyword::Table)) - .ignore_then(ident_ctx(IdentSlot::NewName)) - .then(with_pk_clause()) - .try_map(|(name, pk_specs), span| { - if pk_specs.is_empty() { - return Err(Rich::custom( - span, - crate::t!("parse.custom.create_table_needs_pk"), - )); - } - let columns: Vec = pk_specs - .iter() - .map(|(n, t)| ColumnSpec { - name: n.clone(), - ty: *t, - }) - .collect(); - let primary_key = pk_specs.into_iter().map(|(n, _)| n).collect(); - Ok(Command::CreateTable { - name, - columns, - primary_key, - }) - }); - - let drop_table = kw(Keyword::Drop) - .ignore_then(kw(Keyword::Table)) - .ignore_then(ident_ctx(IdentSlot::TableName)) - .map(|name| Command::DropTable { name }); - - // `add column [to] [table] : ()`. Both - // prepositions independently optional — bare identifiers - // accepted in the unambiguous position. - let add_column = kw(Keyword::Add) - .ignore_then(kw(Keyword::Column)) - .ignore_then(kw(Keyword::To).or_not()) - .ignore_then(kw(Keyword::Table).or_not()) - .ignore_then(ident_ctx(IdentSlot::TableName)) - .then_ignore(punct(Punct::Colon)) - .then(ident_ctx(IdentSlot::NewName)) - .then_ignore(punct(Punct::OpenParen)) - .then(type_keyword()) - .then_ignore(punct(Punct::CloseParen)) - .map(|((table, column), ty)| Command::AddColumn { table, column, ty }); - - let drop_column = kw(Keyword::Drop) - .ignore_then(kw(Keyword::Column)) - .ignore_then(kw(Keyword::From).or_not()) - .ignore_then(kw(Keyword::Table).or_not()) - .ignore_then(ident_ctx(IdentSlot::TableName)) - .then_ignore(punct(Punct::Colon)) - .then(ident_ctx(IdentSlot::Column)) - .map(|(table, column)| Command::DropColumn { table, column }); - - let rename_column = kw(Keyword::Rename) - .ignore_then(kw(Keyword::Column)) - .ignore_then(kw(Keyword::In).or_not()) - .ignore_then(kw(Keyword::Table).or_not()) - .ignore_then(ident_ctx(IdentSlot::TableName)) - .then_ignore(punct(Punct::Colon)) - .then(ident_ctx(IdentSlot::Column)) - .then_ignore(kw(Keyword::To)) - .then(ident_ctx(IdentSlot::NewName)) - .map(|((table, old), new)| Command::RenameColumn { table, old, new }); - - let change_column = kw(Keyword::Change) - .ignore_then(kw(Keyword::Column)) - .ignore_then(kw(Keyword::In).or_not()) - .ignore_then(kw(Keyword::Table).or_not()) - .ignore_then(ident_ctx(IdentSlot::TableName)) - .then_ignore(punct(Punct::Colon)) - .then(ident_ctx(IdentSlot::Column)) - .then_ignore(punct(Punct::OpenParen)) - .then(type_keyword()) - .then_ignore(punct(Punct::CloseParen)) - .then(change_column_flags()) - .map(|(((table, column), ty), mode)| Command::ChangeColumnType { - table, - column, - ty, - mode, - }); - - let add_relationship = add_relationship_parser(); - let drop_relationship = drop_relationship_parser(); - - let show_data = kw(Keyword::Show) - .ignore_then(kw(Keyword::Data)) - .ignore_then(ident_ctx(IdentSlot::TableName)) - .map(|name| Command::ShowData { name }); - - let show_table = kw(Keyword::Show) - .ignore_then(kw(Keyword::Table)) - .ignore_then(ident_ctx(IdentSlot::TableName)) - .map(|name| Command::ShowTable { name }); - - let insert_cmd = insert_parser(); - let update_cmd = update_parser(); - let delete_cmd = delete_parser(); - - // The bare-path replay form is intercepted before chumsky - // sees the tokens (ADR-0020 §6); only the quoted form - // arrives here. - let replay = kw(Keyword::Replay) - .ignore_then(string_payload()) - .map(|path| Command::Replay { path }); - - // ---- App-lifecycle commands ----------------------------- - // No-arg variants and the keyword-value variants. Path- - // bearing variants (`export `, `import [as - // ]`) are handled by `try_parse_app_path_command` - // BEFORE chumsky runs; the bare-keyword forms below - // surface the `Path: None` / no-source variants for - // empty-prompt completion + usage rendering. - let quit_cmd = kw(Keyword::Quit).map(|()| Command::App(AppCommand::Quit)); - let help_cmd = kw(Keyword::Help).map(|()| Command::App(AppCommand::Help)); - let rebuild_cmd = - kw(Keyword::Rebuild).map(|()| Command::App(AppCommand::Rebuild)); - // `save as` must be tried before bare `save` (more specific). - let save_as_cmd = kw(Keyword::Save) - .then_ignore(kw(Keyword::As)) - .map(|()| Command::App(AppCommand::SaveAs)); - let save_cmd = kw(Keyword::Save).map(|()| Command::App(AppCommand::Save)); - let new_cmd = kw(Keyword::New).map(|()| Command::App(AppCommand::New)); - let load_cmd = kw(Keyword::Load).map(|()| Command::App(AppCommand::Load)); - let export_no_arg = - kw(Keyword::Export).map(|()| Command::App(AppCommand::Export { path: None })); - let import_no_arg = kw(Keyword::Import).map(|()| { - Command::App(AppCommand::Import { - path: String::new(), - target: None, - }) - }); - // `mode ` and `messages []` accept either the - // known keyword forms or any identifier — the identifier - // branch funnels through `try_map` into a friendly - // `mode.unknown` / `messages.unknown` error rather than the - // generic structural-error wording. Mirrors the type-name - // pattern in `type_keyword` (ADR-0020 §4). - let known_mode = choice(( - kw(Keyword::Simple).to(ModeValue::Simple), - kw(Keyword::Advanced).to(ModeValue::Advanced), - )); - let unknown_mode = ident_inner().try_map(|s, span| { - Err::(Rich::custom( - span, - crate::t!("mode.unknown", value = s), - )) - }); - let mode_cmd = kw(Keyword::Mode) - .ignore_then(choice((known_mode, unknown_mode))) - .map(|value| Command::App(AppCommand::Mode { value })); - let known_messages = choice(( - kw(Keyword::Short).to(MessagesValue::Short), - kw(Keyword::Verbose).to(MessagesValue::Verbose), - )); - let unknown_messages = ident_inner().try_map(|s, span| { - Err::(Rich::custom( - span, - crate::t!("messages.unknown", value = s), - )) - }); - let messages_cmd = kw(Keyword::Messages) - .ignore_then(choice((known_messages, unknown_messages)).or_not()) - .map(|value| Command::App(AppCommand::Messages { value })); - - choice(( - create_table, - // `drop column` and `drop relationship` come before - // `drop table` because both are more specific — - // chumsky's `choice` tries each in order. - drop_column, - drop_relationship, - drop_table, - add_column, - add_relationship, - rename_column, - change_column, - show_data, - show_table, - insert_cmd, - update_cmd, - delete_cmd, - replay, - // App commands. `save as` before bare `save`; everything - // else order-agnostic. - quit_cmd, - help_cmd, - rebuild_cmd, - save_as_cmd, - save_cmd, - new_cmd, - load_cmd, - export_no_arg, - import_no_arg, - mode_cmd, - messages_cmd, - )) - .then_ignore(end()) -} - -// ========================================================= -// Per-command sub-parsers -// ========================================================= - -fn insert_parser<'a>() --> impl Parser<'a, &'a [Token], Command, extra::Err>> + Clone { - let column_list = punct(Punct::OpenParen) - .ignore_then( - ident_ctx(IdentSlot::Column) - .separated_by(punct(Punct::Comma)) - .at_least(1) - .collect::>(), - ) - .then_ignore(punct(Punct::CloseParen)); - - let value_list = punct(Punct::OpenParen) - .ignore_then( - value_literal() - .separated_by(punct(Punct::Comma)) - .at_least(1) - .collect::>(), - ) - .then_ignore(punct(Punct::CloseParen)); - - let with_columns_and_values = column_list - .clone() - .then_ignore(kw(Keyword::Values)) - .then(value_list.clone()) - .map(|(cols, vals)| (Some(cols), vals)); - - let with_values_keyword_only = kw(Keyword::Values) - .ignore_then(value_list.clone()) - .map(|vals| (None, vals)); - - let bare_value_list = value_list.map(|vals| (None, vals)); - - kw(Keyword::Insert) - .ignore_then(kw(Keyword::Into)) - .ignore_then(ident_ctx(IdentSlot::TableName)) - .then(choice(( - with_columns_and_values, - with_values_keyword_only, - bare_value_list, - ))) - .map(|(table, (columns, values))| Command::Insert { - table, - columns, - values, - }) -} - -fn update_parser<'a>() --> impl Parser<'a, &'a [Token], Command, extra::Err>> + Clone { - let assignment = ident_ctx(IdentSlot::Column) - .then_ignore(punct(Punct::Equals)) - .then(value_literal()); - - let assignments = assignment - .separated_by(punct(Punct::Comma)) - .at_least(1) - .collect::>(); - - kw(Keyword::Update) - .ignore_then(ident_ctx(IdentSlot::TableName)) - .then_ignore(kw(Keyword::Set)) - .then(assignments) - .then(filter_clause()) - .map(|((table, assignments), filter)| Command::Update { - table, - assignments, - filter, - }) -} - -fn delete_parser<'a>() --> impl Parser<'a, &'a [Token], Command, extra::Err>> + Clone { - kw(Keyword::Delete) - .ignore_then(kw(Keyword::From)) - .ignore_then(ident_ctx(IdentSlot::TableName)) - .then(filter_clause()) - .map(|(table, filter)| Command::Delete { table, filter }) -} - -fn filter_clause<'a>() --> impl Parser<'a, &'a [Token], RowFilter, extra::Err>> + Clone { - let where_clause = kw(Keyword::Where) - .ignore_then(ident_ctx(IdentSlot::Column)) - .then_ignore(punct(Punct::Equals)) - .then(value_literal()) - .map(|(column, value)| RowFilter::Where { column, value }); - - let all_rows = flag("all-rows").to(RowFilter::AllRows); - - // No `.labelled()` wrap here: chumsky's expected-set then - // surfaces the constituent options (`` `where` ``, - // `` `--all-rows` ``) individually instead of collapsing - // them to a single descriptive label. The completion - // engine needs the constituents to offer Tab candidates - // (ADR-0022 §8); the resulting error prose ("expected `,`, - // `where`, or `--all-rows`") reads cleanly enough without - // hand-wrapping. - where_clause.or(all_rows) -} - -fn value_literal<'a>() --> impl Parser<'a, &'a [Token], Value, extra::Err>> + Clone { - choice(( - kw(Keyword::Null).to(Value::Null), - kw(Keyword::True).to(Value::Bool(true)), - kw(Keyword::False).to(Value::Bool(false)), - number_literal(), - string_literal(), - )) -} - -fn add_relationship_parser<'a>() --> impl Parser<'a, &'a [Token], Command, extra::Err>> + Clone { - // `1:n` lexes as Number("1"), Punct(Colon), Identifier("n"). - let one_token = select_ref! { - Token { kind: TokenKind::Number(s), .. } if s == "1" => () - } - .labelled("`1`") - .as_context(); - - let n_ident = select_ref! { - Token { kind: TokenKind::Identifier(s), .. } if s.eq_ignore_ascii_case("n") => () - } - .labelled("`n`") - .as_context(); - - let one_to_n = one_token - .ignore_then(punct(Punct::Colon)) - .ignore_then(n_ident); - - let optional_name = kw(Keyword::As).ignore_then(ident_ctx(IdentSlot::NewName)).or_not(); - - kw(Keyword::Add) - .ignore_then(one_to_n) - .ignore_then(kw(Keyword::Relationship)) - .ignore_then(optional_name) - .then_ignore(kw(Keyword::From)) - .then(qualified_column()) - .then_ignore(kw(Keyword::To)) - .then(qualified_column()) - .then(referential_clauses()) - .then(create_fk_flag()) - .map( - |((((name, parent), child), (on_delete, on_update)), create_fk)| { - Command::AddRelationship { - name, - parent_table: parent.0, - parent_column: parent.1, - child_table: child.0, - child_column: child.1, - on_delete, - on_update, - create_fk, - } - }, - ) -} - -fn drop_relationship_parser<'a>() --> impl Parser<'a, &'a [Token], Command, extra::Err>> + Clone { - let endpoints_form = kw(Keyword::From) - .ignore_then(qualified_column()) - .then_ignore(kw(Keyword::To)) - .then(qualified_column()) - .map(|(parent, child)| RelationshipSelector::Endpoints { - parent_table: parent.0, - parent_column: parent.1, - child_table: child.0, - child_column: child.1, - }); - - let named_form = ident_ctx(IdentSlot::RelationshipName) - .map(|name| RelationshipSelector::Named { name }); - - kw(Keyword::Drop) - .ignore_then(kw(Keyword::Relationship)) - .ignore_then(choice((endpoints_form, named_form))) - .map(|selector| Command::DropRelationship { selector }) -} - -fn qualified_column<'a>() --> impl Parser<'a, &'a [Token], (String, String), extra::Err>> + Clone { - ident_ctx(IdentSlot::TableName) - .then_ignore(punct(Punct::Dot)) - .then(ident_ctx(IdentSlot::Column)) -} - -fn referential_clauses<'a>() -> impl Parser< - 'a, - &'a [Token], - (ReferentialAction, ReferentialAction), - extra::Err>, -> + Clone { - let target = kw(Keyword::Delete) - .to(ReferentialActionTarget::Delete) - .or(kw(Keyword::Update).to(ReferentialActionTarget::Update)); - let clause = kw(Keyword::On) - .ignore_then(target) - .then(action_keyword()) - .map(|(t, a)| (t, a)); - clause - .repeated() - .at_most(2) - .collect::>() - .try_map(|clauses, span| { - let mut on_delete = None; - let mut on_update = None; - for (target, action) in clauses { - let slot = match target { - ReferentialActionTarget::Delete => &mut on_delete, - ReferentialActionTarget::Update => &mut on_update, - }; - if slot.is_some() { - return Err(Rich::custom( - span, - crate::t!( - "parse.custom.on_action_specified_twice", - target = target, - ), - )); - } - *slot = Some(action); - } - Ok(( - on_delete.unwrap_or_else(ReferentialAction::default_action), - on_update.unwrap_or_else(ReferentialAction::default_action), - )) - }) -} - -#[derive(Debug, Clone, Copy)] -enum ReferentialActionTarget { - Delete, - Update, -} - -impl std::fmt::Display for ReferentialActionTarget { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(match self { - Self::Delete => "delete", - Self::Update => "update", - }) - } -} - -fn action_keyword<'a>() --> impl Parser<'a, &'a [Token], ReferentialAction, extra::Err>> + Clone { - choice(( - kw(Keyword::Set) - .ignore_then(kw(Keyword::Null)) - .to(ReferentialAction::SetNull), - kw(Keyword::No) - .ignore_then(kw(Keyword::Action)) - .to(ReferentialAction::NoAction), - kw(Keyword::Cascade).to(ReferentialAction::Cascade), - kw(Keyword::Restrict).to(ReferentialAction::Restrict), - )) -} - -fn create_fk_flag<'a>() --> impl Parser<'a, &'a [Token], bool, extra::Err>> + Clone { - flag("create-fk").or_not().map(|opt| opt.is_some()) -} - -fn change_column_flags<'a>() --> impl Parser<'a, &'a [Token], ChangeColumnMode, extra::Err>> + Clone { - let force = flag("force-conversion").to(ChangeColumnMode::ForceConversion); - let dont = flag("dont-convert").to(ChangeColumnMode::DontConvert); - choice((force, dont)) - .repeated() - .collect::>() - .try_map(|flags, span| match flags.as_slice() { - [] => Ok(ChangeColumnMode::Default), - [single] => Ok(*single), - _ => Err(Rich::custom( - span, - crate::t!("parse.custom.change_column_flags_exclusive"), - )), - }) -} - -fn with_pk_clause<'a>() --> impl Parser<'a, &'a [Token], Vec<(String, Type)>, extra::Err>> + Clone { - // Each PK spec names a NEW column inside the table being - // created. `with_pk_clause` is reached only inside - // `create_table`, where the surrounding context is - // building a new schema entity from scratch. - let single = ident_ctx(IdentSlot::NewName) - .then_ignore(punct(Punct::Colon)) - .then(type_keyword()) - .map(|(name, ty)| (name, ty)); - - let spec_list = single - .clone() - .separated_by(punct(Punct::Comma)) - .at_least(1) - .collect::>(); - - kw(Keyword::With) - .ignore_then(kw(Keyword::Pk)) - .ignore_then(spec_list.or_not()) - .map(|maybe_specs| { - // `with pk` alone defaults to a serial id PK. - maybe_specs.unwrap_or_else(|| vec![("id".to_string(), Type::Serial)]) - }) - .or_not() - .map(Option::unwrap_or_default) -} - -// ========================================================= -// Error humanisation -// ========================================================= - -fn into_parse_error(errs: &[Rich<'_, Token>], tokens: &[Token], source: &str) -> ParseError { - // Prefer custom-reason errors over chumsky's structural - // ones — those carry our hand-tuned messages from `try_map` - // (e.g. "unknown type 'varchar' (expected one of: ...)"). - let chosen = errs - .iter() - .find(|e| matches!(e.reason(), RichReason::Custom(_))) - .unwrap_or_else(|| errs.first().expect("parser failure with no error")); - let chumsky_span = chosen.span(); - let position = source_position_at(tokens, chumsky_span.start, source); - let message = humanise(chosen, tokens, source); - let (at_eof, expected) = match chosen.reason() { - // Structural failures know whether they ran out of - // input — `found = None` ⇔ EOF — and carry the - // expected-pattern set chumsky was looking for. - RichReason::ExpectedFound { expected, found } => { - (found.is_none(), describe_expected(expected)) - } - // Custom errors: see the docstring on - // `ParseError::Invalid::at_eof` for why we err on the - // side of `true` (no live overlay; on-submit error - // still fires). Custom errors have no expected-set. - RichReason::Custom(_) => (true, Vec::new()), - }; - ParseError::Invalid { - message, - position, - at_eof, - expected, - } -} - -/// Render a chumsky expected-pattern set into the same -/// human-readable forms `humanise()` uses, but as discrete -/// items rather than an oxford-joined string. Stable order -/// (sorted, deduplicated) so callers don't have to. -fn describe_expected(expected: &[RichPattern<'_, Token>]) -> Vec { - let has_concrete = expected.iter().any(|p| { - matches!( - p, - RichPattern::Token(_) - | RichPattern::Identifier(_) - | RichPattern::Label(_) - | RichPattern::EndOfInput - ) - }); - let mut items: Vec = expected - .iter() - .filter(|p| { - !(has_concrete && matches!(p, RichPattern::Any | RichPattern::SomethingElse)) - }) - .map(describe_pattern) - .collect(); - // Dedup preserving first occurrence (which reflects - // chumsky's traversal order — typically source order for - // `or_not` / `choice` chains). Empirically this gives a - // grammar-natural ordering: `to` before `table` in - // `add column [to] [table] …`, which alphabetical - // (table, to) would invert. - let mut seen = std::collections::HashSet::new(); - items.retain(|s| seen.insert(s.clone())); - items -} - -/// Translate a chumsky token-slice index into a byte position -/// in the original source. If the index points past the last -/// token (an end-of-input failure), use the last token's end -/// or, if there are no tokens, the source length. -fn source_position_at(tokens: &[Token], slice_index: usize, source: &str) -> usize { - if slice_index < tokens.len() { - tokens[slice_index].span.0 - } else { - tokens.last().map_or(source.len(), |t| t.span.1) - } -} - -fn humanise(err: &Rich<'_, Token>, tokens: &[Token], source: &str) -> String { - if let RichReason::Custom(msg) = err.reason() { - return msg.clone(); - } - let RichReason::ExpectedFound { expected, found } = err.reason() else { - unreachable!("RichReason has only two variants today"); - }; - // `found` is the offending token (or None at end of input). - let found_str = found.as_ref().map_or_else( - || "end of input".to_string(), - |maybe_ref| describe_token(maybe_ref), - ); - - let described = describe_expected(expected); - let expected_str = oxford_or(&described); - - let chumsky_span_start = err.span().start; - let consumed = consumed_context(tokens, chumsky_span_start, source); - - if expected.is_empty() { - if consumed.is_empty() { - format!("unexpected {found_str}") - } else { - format!("after `{consumed}`, unexpected {found_str}") - } - } else if consumed.is_empty() { - format!("expected {expected_str}, found {found_str}") - } else { - format!("after `{consumed}`, expected {expected_str}, found {found_str}") - } -} - -fn describe_pattern(p: &RichPattern<'_, Token>) -> String { - match p { - RichPattern::Token(t) => describe_token(t), - RichPattern::Identifier(s) => format!("`{s}`"), - RichPattern::Label(s) => s.to_string(), - RichPattern::Any => "any token".to_string(), - RichPattern::SomethingElse => "something else".to_string(), - RichPattern::EndOfInput => "end of input".to_string(), - // RichPattern is non_exhaustive; cover the catch-all. - _ => "".to_string(), - } -} - -fn describe_token(t: &Token) -> String { - match &t.kind { - TokenKind::Keyword(k) => format!("`{}`", k.as_str()), - TokenKind::Identifier(s) => format!("`{s}`"), - TokenKind::Number(s) => format!("`{s}`"), - TokenKind::StringLiteral(_) => "string literal".to_string(), - TokenKind::Punct(p) => format!("`{}`", p.as_char()), - TokenKind::Flag(s) => format!("`--{s}`"), - TokenKind::Error(LexError::UnknownChar(c)) => { - format!("unrecognised character `{c}`") - } - TokenKind::Error(LexError::UnterminatedString) => { - "unterminated string literal".to_string() - } - TokenKind::Error(LexError::BadFlag) => "malformed flag (bare `--`)".to_string(), - } -} - -/// "A, B, or C" / "A or B" / "A". -fn oxford_or(items: &[String]) -> String { - match items { - [] => String::new(), - [a] => a.clone(), - [a, b] => format!("{a} or {b}"), - rest => { - let (last, head) = rest.split_last().expect("len >= 3"); - format!("{}, or {last}", head.join(", ")) - } - } -} - -/// Source slice covering all tokens before the failure point, -/// trimmed to a sensible length. -fn consumed_context(tokens: &[Token], chumsky_span_start: usize, source: &str) -> String { - if chumsky_span_start == 0 { - return String::new(); - } - let last_consumed_index = chumsky_span_start - 1; - let Some(last_token) = tokens.get(last_consumed_index) else { - return String::new(); - }; - let prefix = source[..last_token.span.1].trim(); - if prefix.is_empty() { - return String::new(); - } - const MAX: usize = 40; - if prefix.chars().count() <= MAX { - prefix.to_string() - } else { - let tail: String = prefix - .chars() - .rev() - .take(MAX) - .collect::>() - .into_iter() - .rev() - .collect(); - format!("…{tail}") - } -} +// ADR-0024 Phase F: the chumsky-side `command_parser` and its +// per-command sub-parsers (replay, export/import, mode/messages, +// the DDL family, data commands) are deleted. The unified-grammar +// walker in `crate::dsl::walker` is the sole parse path. +// `try_parse_replay_with_bare_path` and `try_parse_app_path_command` +// — the source-slice helpers that handled bare paths before the +// walker existed — are also gone; `BarePath` in the walker +// supersedes them. // ========================================================= // Tests @@ -1119,6 +300,12 @@ fn consumed_context(tokens: &[Token], chumsky_span_start: usize, source: &str) - #[cfg(test)] mod tests { use super::*; + use crate::dsl::action::ReferentialAction; + use crate::dsl::command::{ + ChangeColumnMode, ColumnSpec, RelationshipSelector, RowFilter, + }; + use crate::dsl::types::Type; + use crate::dsl::value::Value; use pretty_assertions::assert_eq; fn ok(input: &str) -> Command {