ADR-0024 Phase F (minimal): drop chumsky from the parse path

Delete the chumsky-side command_parser and its per-command
sub-parsers, error humanisation helpers, and keyword/punct/
ident/value-literal combinators. The unified-grammar walker
in `crate::dsl::walker` is now the sole parse path.

parse_tokens flow (post-Phase F):
1. lex(input) — still produces the Token stream that
   completion / highlighting / echo-line consumers depend on.
2. try_walker_route(source) — walker handles every entry
   keyword in REGISTRY (20 commands across 14 entry words).
3. unknown_command_error(source) — synthetic ParseError for
   inputs whose first identifier-shape token isn't a
   registered entry word. Wording mirrors the chumsky-side
   "expected one of `add`, `change`, …, found `<word>`"
   structural error the legacy top-level Choice produced.

Cargo.toml: chumsky dependency dropped (no remaining uses).
Cargo.lock regenerated; ~58 lines net reduction in the
dependency graph.

Scope intentionally deferred (separate follow-up):
- dsl/lexer.rs, dsl/keyword.rs, dsl/ident_slot.rs,
  dsl/usage.rs::REGISTRY: still consumed by completion.rs,
  input_render.rs, app.rs, theme.rs, db.rs, runtime.rs,
  friendly/keys.rs. Removing these requires migrating each
  consumer to the walker's per-byte-class output / grammar
  REGISTRY / IdentSource enum. Substantial blast radius;
  worth a dedicated session.
- parse.token.keyword.* catalog entries (40+): used by
  usage.rs and parse-error rendering for the unmigrated
  consumers above. Collapse follows after the consumer
  migration.

Tests:
- All existing parser tests (`dsl::parser::tests`) ported in
  place; they call `parse_command` which now flows through
  the walker. 844 passed, 0 failed, 1 ignored — same count
  as Phase E (no test additions, no regressions).
- cargo clippy --all-targets -- -D warnings clean.
- cargo build (release-like dev profile) succeeds.
This commit is contained in:
claude@clouddev1
2026-05-15 07:31:43 +00:00
parent dca472f8a5
commit c940ba9cf2
3 changed files with 51 additions and 923 deletions
Generated
-58
View File
@@ -29,15 +29,6 @@ version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
[[package]]
name = "ar_archive_writer"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b"
dependencies = [
"object",
]
[[package]] [[package]]
name = "arbitrary" name = "arbitrary"
version = "1.4.2" version = "1.4.2"
@@ -164,20 +155,6 @@ dependencies = [
"rand_core 0.10.1", "rand_core 0.10.1",
] ]
[[package]]
name = "chumsky"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0d2bfadce76f963d776feff99db6dc33783829539258314776383b33e2a00f8"
dependencies = [
"hashbrown 0.15.5",
"regex-automata",
"serde",
"stacker",
"unicode-ident",
"unicode-segmentation",
]
[[package]] [[package]]
name = "compact_str" name = "compact_str"
version = "0.9.0" version = "0.9.0"
@@ -657,8 +634,6 @@ version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [ dependencies = [
"allocator-api2",
"equivalent",
"foldhash 0.1.5", "foldhash 0.1.5",
] ]
@@ -1059,15 +1034,6 @@ dependencies = [
"objc2-core-foundation", "objc2-core-foundation",
] ]
[[package]]
name = "object"
version = "0.37.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.21.4" version = "1.21.4"
@@ -1260,16 +1226,6 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "psm"
version = "0.1.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "645dbe486e346d9b5de3ef16ede18c26e6c70ad97418f4874b8b1889d6e761ea"
dependencies = [
"ar_archive_writer",
"cc",
]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.45" version = "1.0.45"
@@ -1414,7 +1370,6 @@ version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"base64", "base64",
"chumsky",
"crossterm", "crossterm",
"csv", "csv",
"directories", "directories",
@@ -1722,19 +1677,6 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "stacker"
version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "640c8cdd92b6b12f5bcb1803ca3bbf5ab96e5e6b6b96b9ab77dabe9e880b3190"
dependencies = [
"cc",
"cfg-if",
"libc",
"psm",
"windows-sys",
]
[[package]] [[package]]
name = "static_assertions" name = "static_assertions"
version = "1.1.0" version = "1.1.0"
-1
View File
@@ -11,7 +11,6 @@ publish = false
[dependencies] [dependencies]
anyhow = "1.0.102" anyhow = "1.0.102"
base64 = "0.22.1" base64 = "0.22.1"
chumsky = "0.13.0"
crossterm = { version = "0.29.0", features = ["event-stream"] } crossterm = { version = "0.29.0", features = ["event-stream"] }
csv = "1.4.0" csv = "1.4.0"
directories = "6.0.0" directories = "6.0.0"
+51 -864
View File
@@ -10,19 +10,8 @@
//! Errors from chumsky are mapped to the local [`ParseError`] type //! Errors from chumsky are mapped to the local [`ParseError`] type
//! so callers do not depend on chumsky's API surface. //! so callers do not depend on chumsky's API surface.
use chumsky::error::{RichPattern, RichReason}; use crate::dsl::command::Command;
use chumsky::prelude::*; use crate::dsl::lexer::{Token, lex};
use crate::dsl::action::ReferentialAction;
use crate::dsl::command::{
AppCommand, ChangeColumnMode, ColumnSpec, Command, MessagesValue, ModeValue,
RelationshipSelector, RowFilter,
};
use crate::dsl::ident_slot::IdentSlot;
use crate::dsl::keyword::{Keyword, Punct};
use crate::dsl::lexer::{LexError, Token, TokenKind, lex};
use crate::dsl::types::Type;
use crate::dsl::value::Value;
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum ParseError { pub enum ParseError {
@@ -111,18 +100,44 @@ pub fn parse_tokens(tokens: &[Token], source: &str) -> Result<Command, ParseErro
if tokens.is_empty() { if tokens.is_empty() {
return Err(ParseError::Empty); return Err(ParseError::Empty);
} }
// ADR-0024 Phase A: the unified-grammar walker owns the // ADR-0024 Phase F: the unified-grammar walker now owns
// app-lifecycle commands (quit, help, rebuild, save / save // every command. If the walker doesn't engage (the input's
// as, new, load, export, import, mode, messages). The // first identifier-shape token isn't a registered entry
// walker engages on input whose first identifier-shape // word), produce an "unknown command" error naming the
// token matches a registered entry word; otherwise the // valid entry keywords.
// router falls through to the legacy chumsky path below.
if let Some(result) = try_walker_route(source) { if let Some(result) = try_walker_route(source) {
return result; return result;
} }
match command_parser().parse(tokens).into_result() { Err(unknown_command_error(source))
Ok(cmd) => Ok(cmd), }
Err(errs) => Err(into_parse_error(&errs, tokens, source)),
/// Synthetic ParseError for inputs whose first identifier-shape
/// token isn't a registered command entry word. Replaces the
/// chumsky-side "expected `create`, `drop`, …" structural error
/// the legacy parser produced for the same case.
fn unknown_command_error(source: &str) -> ParseError {
use crate::dsl::grammar::REGISTRY;
use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace};
let mut entries: Vec<String> = REGISTRY
.iter()
.map(|c| format!("`{}`", c.entry.primary))
.collect();
entries.sort();
let joined = oxford_join(&entries);
let start = skip_whitespace(source, 0);
let (position, found_word) = consume_ident(source, start).map_or_else(
|| (start, None),
|(s, e)| (s, Some(&source[s..e])),
);
let message = found_word.map_or_else(
|| format!("expected one of {joined}"),
|w| format!("expected one of {joined}, found `{w}`"),
);
ParseError::Invalid {
message,
position,
at_eof: false,
expected: entries,
} }
} }
@@ -269,848 +284,14 @@ fn oxford_join(items: &[String]) -> String {
} }
} }
// ADR-0024 Phase E removed `try_parse_replay_with_bare_path`: // ADR-0024 Phase F: the chumsky-side `command_parser` and its
// the walker now owns `replay` end-to-end via // per-command sub-parsers (replay, export/import, mode/messages,
// `Choice(StringLit, BarePath)`. The chumsky-side replay // the DDL family, data commands) are deleted. The unified-grammar
// branch in `command_parser` is unreachable until Phase F // walker in `crate::dsl::walker` is the sole parse path.
// sweeps the chumsky path. // `try_parse_replay_with_bare_path` and `try_parse_app_path_command`
// — the source-slice helpers that handled bare paths before the
// ADR-0024 Phase A removed `try_parse_app_path_command`: the // walker existed — are also gone; `BarePath` in the walker
// walker (`crate::dsl::walker`) now owns export / import end-to- // supersedes them.
// end (including their path arguments via `BarePath`). The
// chumsky-side bare-keyword branches in `command_parser`
// (`export_no_arg`, `import_no_arg`) are unreachable in practice
// but stay declared until Phase F sweeps the chumsky path.
// =========================================================
// Token-aware combinator helpers (ADR-0020 §5)
// =========================================================
/// Match a specific keyword token.
fn kw<'a>(
target: Keyword,
) -> impl Parser<'a, &'a [Token], (), extra::Err<Rich<'a, Token>>> + Clone {
select_ref! {
Token { kind: TokenKind::Keyword(k), .. } if *k == target => ()
}
.labelled(format!("`{}`", target.as_str()))
.as_context()
}
/// Match a specific punctuation token.
fn punct<'a>(
target: Punct,
) -> impl Parser<'a, &'a [Token], (), extra::Err<Rich<'a, Token>>> + Clone {
select_ref! {
Token { kind: TokenKind::Punct(p), .. } if *p == target => ()
}
.labelled(format!("`{}`", target.as_char()))
.as_context()
}
/// Match any identifier token, returning its name. Internal —
/// command parsers must use `ident_ctx(slot)` so the
/// completion engine knows what kind of identifier each
/// position expects (ADR-0022 §8). Bare `ident_inner()` calls
/// outside this module would skip the slot annotation. The
/// label is applied by `ident_ctx` (one per call site) — none
/// here.
fn ident_inner<'a>()
-> impl Parser<'a, &'a [Token], String, extra::Err<Rich<'a, Token>>> + Clone {
select_ref! {
Token { kind: TokenKind::Identifier(s), .. } => s.clone()
}
}
/// Tag-and-parse an identifier slot. The slot's user-facing
/// label (`IdentSlot::expected_label`) replaces the generic
/// "identifier" in the parser's expected-set machinery, so
/// the error message reads "expected table name" /
/// "expected column name" / "expected relationship name" /
/// "expected identifier" depending on the call site
/// (ADR-0022 stage 8c). The completion engine reverses the
/// mapping via `IdentSlot::from_expected_label` to know what
/// schema list to consult.
fn ident_ctx<'a>(
slot: crate::dsl::ident_slot::IdentSlot,
) -> impl Parser<'a, &'a [Token], String, extra::Err<Rich<'a, Token>>> + Clone {
ident_inner().labelled(slot.expected_label()).as_context()
}
/// Match a number-literal token, returning a `Value::Number`.
fn number_literal<'a>()
-> impl Parser<'a, &'a [Token], Value, extra::Err<Rich<'a, Token>>> + Clone {
select_ref! {
Token { kind: TokenKind::Number(s), .. } => Value::Number(s.clone())
}
.labelled("number")
.as_context()
}
/// Match a string-literal token, returning a `Value::Text`.
fn string_literal<'a>()
-> impl Parser<'a, &'a [Token], Value, extra::Err<Rich<'a, Token>>> + Clone {
select_ref! {
Token { kind: TokenKind::StringLiteral(s), .. } => Value::Text(s.clone())
}
.labelled("string literal")
.as_context()
}
/// Match a string-literal token, returning the raw payload
/// (used by the quoted-replay path).
fn string_payload<'a>()
-> impl Parser<'a, &'a [Token], String, extra::Err<Rich<'a, Token>>> + Clone {
select_ref! {
Token { kind: TokenKind::StringLiteral(s), .. } => s.clone()
}
.labelled("path")
.as_context()
}
/// Match a flag token whose payload equals `name` (the part
/// after `--`).
fn flag<'a>(
name: &'static str,
) -> impl Parser<'a, &'a [Token], (), extra::Err<Rich<'a, Token>>> + Clone {
select_ref! {
Token { kind: TokenKind::Flag(s), .. } if s == name => ()
}
.labelled(format!("`--{name}`"))
.as_context()
}
/// Match an identifier and parse it as a `Type`. Surfaces the
/// existing "unknown type 'X' (expected one of: …)" message
/// (ADR-0020 §4) — keyword-shape errors aggregate naturally,
/// content errors keep their hand-written voice.
///
/// Labelled "type" so the structural-error wording reads as
/// "next: type" rather than the unhelpful "something else"
/// the unlabelled `select_ref!` would otherwise produce.
fn type_keyword<'a>()
-> impl Parser<'a, &'a [Token], Type, extra::Err<Rich<'a, Token>>> + Clone {
// Label is applied to the select-ref alone (before
// try_map) so the unknown-type custom error from try_map
// still surfaces — labelled() on the whole chain would
// replace it with "expected type" and lose the
// "unknown type 'X' (expected one of: …)" wording.
select_ref! {
Token { kind: TokenKind::Identifier(s), .. } = e => (s.clone(), e.span())
}
.labelled("type")
.try_map(|(name, span): (String, SimpleSpan), _| {
name.parse::<Type>()
.map_err(|err| Rich::custom(span, err.to_string()))
})
}
// =========================================================
// Top-level command parser
// =========================================================
fn command_parser<'a>()
-> impl Parser<'a, &'a [Token], Command, extra::Err<Rich<'a, Token>>> + Clone {
let create_table = kw(Keyword::Create)
.ignore_then(kw(Keyword::Table))
.ignore_then(ident_ctx(IdentSlot::NewName))
.then(with_pk_clause())
.try_map(|(name, pk_specs), span| {
if pk_specs.is_empty() {
return Err(Rich::custom(
span,
crate::t!("parse.custom.create_table_needs_pk"),
));
}
let columns: Vec<ColumnSpec> = pk_specs
.iter()
.map(|(n, t)| ColumnSpec {
name: n.clone(),
ty: *t,
})
.collect();
let primary_key = pk_specs.into_iter().map(|(n, _)| n).collect();
Ok(Command::CreateTable {
name,
columns,
primary_key,
})
});
let drop_table = kw(Keyword::Drop)
.ignore_then(kw(Keyword::Table))
.ignore_then(ident_ctx(IdentSlot::TableName))
.map(|name| Command::DropTable { name });
// `add column [to] [table] <T>: <col> (<type>)`. Both
// prepositions independently optional — bare identifiers
// accepted in the unambiguous position.
let add_column = kw(Keyword::Add)
.ignore_then(kw(Keyword::Column))
.ignore_then(kw(Keyword::To).or_not())
.ignore_then(kw(Keyword::Table).or_not())
.ignore_then(ident_ctx(IdentSlot::TableName))
.then_ignore(punct(Punct::Colon))
.then(ident_ctx(IdentSlot::NewName))
.then_ignore(punct(Punct::OpenParen))
.then(type_keyword())
.then_ignore(punct(Punct::CloseParen))
.map(|((table, column), ty)| Command::AddColumn { table, column, ty });
let drop_column = kw(Keyword::Drop)
.ignore_then(kw(Keyword::Column))
.ignore_then(kw(Keyword::From).or_not())
.ignore_then(kw(Keyword::Table).or_not())
.ignore_then(ident_ctx(IdentSlot::TableName))
.then_ignore(punct(Punct::Colon))
.then(ident_ctx(IdentSlot::Column))
.map(|(table, column)| Command::DropColumn { table, column });
let rename_column = kw(Keyword::Rename)
.ignore_then(kw(Keyword::Column))
.ignore_then(kw(Keyword::In).or_not())
.ignore_then(kw(Keyword::Table).or_not())
.ignore_then(ident_ctx(IdentSlot::TableName))
.then_ignore(punct(Punct::Colon))
.then(ident_ctx(IdentSlot::Column))
.then_ignore(kw(Keyword::To))
.then(ident_ctx(IdentSlot::NewName))
.map(|((table, old), new)| Command::RenameColumn { table, old, new });
let change_column = kw(Keyword::Change)
.ignore_then(kw(Keyword::Column))
.ignore_then(kw(Keyword::In).or_not())
.ignore_then(kw(Keyword::Table).or_not())
.ignore_then(ident_ctx(IdentSlot::TableName))
.then_ignore(punct(Punct::Colon))
.then(ident_ctx(IdentSlot::Column))
.then_ignore(punct(Punct::OpenParen))
.then(type_keyword())
.then_ignore(punct(Punct::CloseParen))
.then(change_column_flags())
.map(|(((table, column), ty), mode)| Command::ChangeColumnType {
table,
column,
ty,
mode,
});
let add_relationship = add_relationship_parser();
let drop_relationship = drop_relationship_parser();
let show_data = kw(Keyword::Show)
.ignore_then(kw(Keyword::Data))
.ignore_then(ident_ctx(IdentSlot::TableName))
.map(|name| Command::ShowData { name });
let show_table = kw(Keyword::Show)
.ignore_then(kw(Keyword::Table))
.ignore_then(ident_ctx(IdentSlot::TableName))
.map(|name| Command::ShowTable { name });
let insert_cmd = insert_parser();
let update_cmd = update_parser();
let delete_cmd = delete_parser();
// The bare-path replay form is intercepted before chumsky
// sees the tokens (ADR-0020 §6); only the quoted form
// arrives here.
let replay = kw(Keyword::Replay)
.ignore_then(string_payload())
.map(|path| Command::Replay { path });
// ---- App-lifecycle commands -----------------------------
// No-arg variants and the keyword-value variants. Path-
// bearing variants (`export <path>`, `import <zip> [as
// <target>]`) are handled by `try_parse_app_path_command`
// BEFORE chumsky runs; the bare-keyword forms below
// surface the `Path: None` / no-source variants for
// empty-prompt completion + usage rendering.
let quit_cmd = kw(Keyword::Quit).map(|()| Command::App(AppCommand::Quit));
let help_cmd = kw(Keyword::Help).map(|()| Command::App(AppCommand::Help));
let rebuild_cmd =
kw(Keyword::Rebuild).map(|()| Command::App(AppCommand::Rebuild));
// `save as` must be tried before bare `save` (more specific).
let save_as_cmd = kw(Keyword::Save)
.then_ignore(kw(Keyword::As))
.map(|()| Command::App(AppCommand::SaveAs));
let save_cmd = kw(Keyword::Save).map(|()| Command::App(AppCommand::Save));
let new_cmd = kw(Keyword::New).map(|()| Command::App(AppCommand::New));
let load_cmd = kw(Keyword::Load).map(|()| Command::App(AppCommand::Load));
let export_no_arg =
kw(Keyword::Export).map(|()| Command::App(AppCommand::Export { path: None }));
let import_no_arg = kw(Keyword::Import).map(|()| {
Command::App(AppCommand::Import {
path: String::new(),
target: None,
})
});
// `mode <value>` and `messages [<value>]` accept either the
// known keyword forms or any identifier — the identifier
// branch funnels through `try_map` into a friendly
// `mode.unknown` / `messages.unknown` error rather than the
// generic structural-error wording. Mirrors the type-name
// pattern in `type_keyword` (ADR-0020 §4).
let known_mode = choice((
kw(Keyword::Simple).to(ModeValue::Simple),
kw(Keyword::Advanced).to(ModeValue::Advanced),
));
let unknown_mode = ident_inner().try_map(|s, span| {
Err::<ModeValue, _>(Rich::custom(
span,
crate::t!("mode.unknown", value = s),
))
});
let mode_cmd = kw(Keyword::Mode)
.ignore_then(choice((known_mode, unknown_mode)))
.map(|value| Command::App(AppCommand::Mode { value }));
let known_messages = choice((
kw(Keyword::Short).to(MessagesValue::Short),
kw(Keyword::Verbose).to(MessagesValue::Verbose),
));
let unknown_messages = ident_inner().try_map(|s, span| {
Err::<MessagesValue, _>(Rich::custom(
span,
crate::t!("messages.unknown", value = s),
))
});
let messages_cmd = kw(Keyword::Messages)
.ignore_then(choice((known_messages, unknown_messages)).or_not())
.map(|value| Command::App(AppCommand::Messages { value }));
choice((
create_table,
// `drop column` and `drop relationship` come before
// `drop table` because both are more specific —
// chumsky's `choice` tries each in order.
drop_column,
drop_relationship,
drop_table,
add_column,
add_relationship,
rename_column,
change_column,
show_data,
show_table,
insert_cmd,
update_cmd,
delete_cmd,
replay,
// App commands. `save as` before bare `save`; everything
// else order-agnostic.
quit_cmd,
help_cmd,
rebuild_cmd,
save_as_cmd,
save_cmd,
new_cmd,
load_cmd,
export_no_arg,
import_no_arg,
mode_cmd,
messages_cmd,
))
.then_ignore(end())
}
// =========================================================
// Per-command sub-parsers
// =========================================================
fn insert_parser<'a>()
-> impl Parser<'a, &'a [Token], Command, extra::Err<Rich<'a, Token>>> + Clone {
let column_list = punct(Punct::OpenParen)
.ignore_then(
ident_ctx(IdentSlot::Column)
.separated_by(punct(Punct::Comma))
.at_least(1)
.collect::<Vec<_>>(),
)
.then_ignore(punct(Punct::CloseParen));
let value_list = punct(Punct::OpenParen)
.ignore_then(
value_literal()
.separated_by(punct(Punct::Comma))
.at_least(1)
.collect::<Vec<_>>(),
)
.then_ignore(punct(Punct::CloseParen));
let with_columns_and_values = column_list
.clone()
.then_ignore(kw(Keyword::Values))
.then(value_list.clone())
.map(|(cols, vals)| (Some(cols), vals));
let with_values_keyword_only = kw(Keyword::Values)
.ignore_then(value_list.clone())
.map(|vals| (None, vals));
let bare_value_list = value_list.map(|vals| (None, vals));
kw(Keyword::Insert)
.ignore_then(kw(Keyword::Into))
.ignore_then(ident_ctx(IdentSlot::TableName))
.then(choice((
with_columns_and_values,
with_values_keyword_only,
bare_value_list,
)))
.map(|(table, (columns, values))| Command::Insert {
table,
columns,
values,
})
}
fn update_parser<'a>()
-> impl Parser<'a, &'a [Token], Command, extra::Err<Rich<'a, Token>>> + Clone {
let assignment = ident_ctx(IdentSlot::Column)
.then_ignore(punct(Punct::Equals))
.then(value_literal());
let assignments = assignment
.separated_by(punct(Punct::Comma))
.at_least(1)
.collect::<Vec<_>>();
kw(Keyword::Update)
.ignore_then(ident_ctx(IdentSlot::TableName))
.then_ignore(kw(Keyword::Set))
.then(assignments)
.then(filter_clause())
.map(|((table, assignments), filter)| Command::Update {
table,
assignments,
filter,
})
}
fn delete_parser<'a>()
-> impl Parser<'a, &'a [Token], Command, extra::Err<Rich<'a, Token>>> + Clone {
kw(Keyword::Delete)
.ignore_then(kw(Keyword::From))
.ignore_then(ident_ctx(IdentSlot::TableName))
.then(filter_clause())
.map(|(table, filter)| Command::Delete { table, filter })
}
fn filter_clause<'a>()
-> impl Parser<'a, &'a [Token], RowFilter, extra::Err<Rich<'a, Token>>> + Clone {
let where_clause = kw(Keyword::Where)
.ignore_then(ident_ctx(IdentSlot::Column))
.then_ignore(punct(Punct::Equals))
.then(value_literal())
.map(|(column, value)| RowFilter::Where { column, value });
let all_rows = flag("all-rows").to(RowFilter::AllRows);
// No `.labelled()` wrap here: chumsky's expected-set then
// surfaces the constituent options (`` `where` ``,
// `` `--all-rows` ``) individually instead of collapsing
// them to a single descriptive label. The completion
// engine needs the constituents to offer Tab candidates
// (ADR-0022 §8); the resulting error prose ("expected `,`,
// `where`, or `--all-rows`") reads cleanly enough without
// hand-wrapping.
where_clause.or(all_rows)
}
fn value_literal<'a>()
-> impl Parser<'a, &'a [Token], Value, extra::Err<Rich<'a, Token>>> + Clone {
choice((
kw(Keyword::Null).to(Value::Null),
kw(Keyword::True).to(Value::Bool(true)),
kw(Keyword::False).to(Value::Bool(false)),
number_literal(),
string_literal(),
))
}
fn add_relationship_parser<'a>()
-> impl Parser<'a, &'a [Token], Command, extra::Err<Rich<'a, Token>>> + Clone {
// `1:n` lexes as Number("1"), Punct(Colon), Identifier("n").
let one_token = select_ref! {
Token { kind: TokenKind::Number(s), .. } if s == "1" => ()
}
.labelled("`1`")
.as_context();
let n_ident = select_ref! {
Token { kind: TokenKind::Identifier(s), .. } if s.eq_ignore_ascii_case("n") => ()
}
.labelled("`n`")
.as_context();
let one_to_n = one_token
.ignore_then(punct(Punct::Colon))
.ignore_then(n_ident);
let optional_name = kw(Keyword::As).ignore_then(ident_ctx(IdentSlot::NewName)).or_not();
kw(Keyword::Add)
.ignore_then(one_to_n)
.ignore_then(kw(Keyword::Relationship))
.ignore_then(optional_name)
.then_ignore(kw(Keyword::From))
.then(qualified_column())
.then_ignore(kw(Keyword::To))
.then(qualified_column())
.then(referential_clauses())
.then(create_fk_flag())
.map(
|((((name, parent), child), (on_delete, on_update)), create_fk)| {
Command::AddRelationship {
name,
parent_table: parent.0,
parent_column: parent.1,
child_table: child.0,
child_column: child.1,
on_delete,
on_update,
create_fk,
}
},
)
}
fn drop_relationship_parser<'a>()
-> impl Parser<'a, &'a [Token], Command, extra::Err<Rich<'a, Token>>> + Clone {
let endpoints_form = kw(Keyword::From)
.ignore_then(qualified_column())
.then_ignore(kw(Keyword::To))
.then(qualified_column())
.map(|(parent, child)| RelationshipSelector::Endpoints {
parent_table: parent.0,
parent_column: parent.1,
child_table: child.0,
child_column: child.1,
});
let named_form = ident_ctx(IdentSlot::RelationshipName)
.map(|name| RelationshipSelector::Named { name });
kw(Keyword::Drop)
.ignore_then(kw(Keyword::Relationship))
.ignore_then(choice((endpoints_form, named_form)))
.map(|selector| Command::DropRelationship { selector })
}
fn qualified_column<'a>()
-> impl Parser<'a, &'a [Token], (String, String), extra::Err<Rich<'a, Token>>> + Clone {
ident_ctx(IdentSlot::TableName)
.then_ignore(punct(Punct::Dot))
.then(ident_ctx(IdentSlot::Column))
}
fn referential_clauses<'a>() -> impl Parser<
'a,
&'a [Token],
(ReferentialAction, ReferentialAction),
extra::Err<Rich<'a, Token>>,
> + Clone {
let target = kw(Keyword::Delete)
.to(ReferentialActionTarget::Delete)
.or(kw(Keyword::Update).to(ReferentialActionTarget::Update));
let clause = kw(Keyword::On)
.ignore_then(target)
.then(action_keyword())
.map(|(t, a)| (t, a));
clause
.repeated()
.at_most(2)
.collect::<Vec<_>>()
.try_map(|clauses, span| {
let mut on_delete = None;
let mut on_update = None;
for (target, action) in clauses {
let slot = match target {
ReferentialActionTarget::Delete => &mut on_delete,
ReferentialActionTarget::Update => &mut on_update,
};
if slot.is_some() {
return Err(Rich::custom(
span,
crate::t!(
"parse.custom.on_action_specified_twice",
target = target,
),
));
}
*slot = Some(action);
}
Ok((
on_delete.unwrap_or_else(ReferentialAction::default_action),
on_update.unwrap_or_else(ReferentialAction::default_action),
))
})
}
#[derive(Debug, Clone, Copy)]
enum ReferentialActionTarget {
Delete,
Update,
}
impl std::fmt::Display for ReferentialActionTarget {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match self {
Self::Delete => "delete",
Self::Update => "update",
})
}
}
fn action_keyword<'a>()
-> impl Parser<'a, &'a [Token], ReferentialAction, extra::Err<Rich<'a, Token>>> + Clone {
choice((
kw(Keyword::Set)
.ignore_then(kw(Keyword::Null))
.to(ReferentialAction::SetNull),
kw(Keyword::No)
.ignore_then(kw(Keyword::Action))
.to(ReferentialAction::NoAction),
kw(Keyword::Cascade).to(ReferentialAction::Cascade),
kw(Keyword::Restrict).to(ReferentialAction::Restrict),
))
}
fn create_fk_flag<'a>()
-> impl Parser<'a, &'a [Token], bool, extra::Err<Rich<'a, Token>>> + Clone {
flag("create-fk").or_not().map(|opt| opt.is_some())
}
fn change_column_flags<'a>()
-> impl Parser<'a, &'a [Token], ChangeColumnMode, extra::Err<Rich<'a, Token>>> + Clone {
let force = flag("force-conversion").to(ChangeColumnMode::ForceConversion);
let dont = flag("dont-convert").to(ChangeColumnMode::DontConvert);
choice((force, dont))
.repeated()
.collect::<Vec<_>>()
.try_map(|flags, span| match flags.as_slice() {
[] => Ok(ChangeColumnMode::Default),
[single] => Ok(*single),
_ => Err(Rich::custom(
span,
crate::t!("parse.custom.change_column_flags_exclusive"),
)),
})
}
fn with_pk_clause<'a>()
-> impl Parser<'a, &'a [Token], Vec<(String, Type)>, extra::Err<Rich<'a, Token>>> + Clone {
// Each PK spec names a NEW column inside the table being
// created. `with_pk_clause` is reached only inside
// `create_table`, where the surrounding context is
// building a new schema entity from scratch.
let single = ident_ctx(IdentSlot::NewName)
.then_ignore(punct(Punct::Colon))
.then(type_keyword())
.map(|(name, ty)| (name, ty));
let spec_list = single
.clone()
.separated_by(punct(Punct::Comma))
.at_least(1)
.collect::<Vec<_>>();
kw(Keyword::With)
.ignore_then(kw(Keyword::Pk))
.ignore_then(spec_list.or_not())
.map(|maybe_specs| {
// `with pk` alone defaults to a serial id PK.
maybe_specs.unwrap_or_else(|| vec![("id".to_string(), Type::Serial)])
})
.or_not()
.map(Option::unwrap_or_default)
}
// =========================================================
// Error humanisation
// =========================================================
fn into_parse_error(errs: &[Rich<'_, Token>], tokens: &[Token], source: &str) -> ParseError {
// Prefer custom-reason errors over chumsky's structural
// ones — those carry our hand-tuned messages from `try_map`
// (e.g. "unknown type 'varchar' (expected one of: ...)").
let chosen = errs
.iter()
.find(|e| matches!(e.reason(), RichReason::Custom(_)))
.unwrap_or_else(|| errs.first().expect("parser failure with no error"));
let chumsky_span = chosen.span();
let position = source_position_at(tokens, chumsky_span.start, source);
let message = humanise(chosen, tokens, source);
let (at_eof, expected) = match chosen.reason() {
// Structural failures know whether they ran out of
// input — `found = None` ⇔ EOF — and carry the
// expected-pattern set chumsky was looking for.
RichReason::ExpectedFound { expected, found } => {
(found.is_none(), describe_expected(expected))
}
// Custom errors: see the docstring on
// `ParseError::Invalid::at_eof` for why we err on the
// side of `true` (no live overlay; on-submit error
// still fires). Custom errors have no expected-set.
RichReason::Custom(_) => (true, Vec::new()),
};
ParseError::Invalid {
message,
position,
at_eof,
expected,
}
}
/// Render a chumsky expected-pattern set into the same
/// human-readable forms `humanise()` uses, but as discrete
/// items rather than an oxford-joined string. Stable order
/// (sorted, deduplicated) so callers don't have to.
fn describe_expected(expected: &[RichPattern<'_, Token>]) -> Vec<String> {
let has_concrete = expected.iter().any(|p| {
matches!(
p,
RichPattern::Token(_)
| RichPattern::Identifier(_)
| RichPattern::Label(_)
| RichPattern::EndOfInput
)
});
let mut items: Vec<String> = expected
.iter()
.filter(|p| {
!(has_concrete && matches!(p, RichPattern::Any | RichPattern::SomethingElse))
})
.map(describe_pattern)
.collect();
// Dedup preserving first occurrence (which reflects
// chumsky's traversal order — typically source order for
// `or_not` / `choice` chains). Empirically this gives a
// grammar-natural ordering: `to` before `table` in
// `add column [to] [table] …`, which alphabetical
// (table, to) would invert.
let mut seen = std::collections::HashSet::new();
items.retain(|s| seen.insert(s.clone()));
items
}
/// Translate a chumsky token-slice index into a byte position
/// in the original source. If the index points past the last
/// token (an end-of-input failure), use the last token's end
/// or, if there are no tokens, the source length.
fn source_position_at(tokens: &[Token], slice_index: usize, source: &str) -> usize {
if slice_index < tokens.len() {
tokens[slice_index].span.0
} else {
tokens.last().map_or(source.len(), |t| t.span.1)
}
}
fn humanise(err: &Rich<'_, Token>, tokens: &[Token], source: &str) -> String {
if let RichReason::Custom(msg) = err.reason() {
return msg.clone();
}
let RichReason::ExpectedFound { expected, found } = err.reason() else {
unreachable!("RichReason has only two variants today");
};
// `found` is the offending token (or None at end of input).
let found_str = found.as_ref().map_or_else(
|| "end of input".to_string(),
|maybe_ref| describe_token(maybe_ref),
);
let described = describe_expected(expected);
let expected_str = oxford_or(&described);
let chumsky_span_start = err.span().start;
let consumed = consumed_context(tokens, chumsky_span_start, source);
if expected.is_empty() {
if consumed.is_empty() {
format!("unexpected {found_str}")
} else {
format!("after `{consumed}`, unexpected {found_str}")
}
} else if consumed.is_empty() {
format!("expected {expected_str}, found {found_str}")
} else {
format!("after `{consumed}`, expected {expected_str}, found {found_str}")
}
}
fn describe_pattern(p: &RichPattern<'_, Token>) -> String {
match p {
RichPattern::Token(t) => describe_token(t),
RichPattern::Identifier(s) => format!("`{s}`"),
RichPattern::Label(s) => s.to_string(),
RichPattern::Any => "any token".to_string(),
RichPattern::SomethingElse => "something else".to_string(),
RichPattern::EndOfInput => "end of input".to_string(),
// RichPattern is non_exhaustive; cover the catch-all.
_ => "<other>".to_string(),
}
}
fn describe_token(t: &Token) -> String {
match &t.kind {
TokenKind::Keyword(k) => format!("`{}`", k.as_str()),
TokenKind::Identifier(s) => format!("`{s}`"),
TokenKind::Number(s) => format!("`{s}`"),
TokenKind::StringLiteral(_) => "string literal".to_string(),
TokenKind::Punct(p) => format!("`{}`", p.as_char()),
TokenKind::Flag(s) => format!("`--{s}`"),
TokenKind::Error(LexError::UnknownChar(c)) => {
format!("unrecognised character `{c}`")
}
TokenKind::Error(LexError::UnterminatedString) => {
"unterminated string literal".to_string()
}
TokenKind::Error(LexError::BadFlag) => "malformed flag (bare `--`)".to_string(),
}
}
/// "A, B, or C" / "A or B" / "A".
fn oxford_or(items: &[String]) -> String {
match items {
[] => String::new(),
[a] => a.clone(),
[a, b] => format!("{a} or {b}"),
rest => {
let (last, head) = rest.split_last().expect("len >= 3");
format!("{}, or {last}", head.join(", "))
}
}
}
/// Source slice covering all tokens before the failure point,
/// trimmed to a sensible length.
fn consumed_context(tokens: &[Token], chumsky_span_start: usize, source: &str) -> String {
if chumsky_span_start == 0 {
return String::new();
}
let last_consumed_index = chumsky_span_start - 1;
let Some(last_token) = tokens.get(last_consumed_index) else {
return String::new();
};
let prefix = source[..last_token.span.1].trim();
if prefix.is_empty() {
return String::new();
}
const MAX: usize = 40;
if prefix.chars().count() <= MAX {
prefix.to_string()
} else {
let tail: String = prefix
.chars()
.rev()
.take(MAX)
.collect::<Vec<_>>()
.into_iter()
.rev()
.collect();
format!("{tail}")
}
}
// ========================================================= // =========================================================
// Tests // Tests
@@ -1119,6 +300,12 @@ fn consumed_context(tokens: &[Token], chumsky_span_start: usize, source: &str) -
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::dsl::action::ReferentialAction;
use crate::dsl::command::{
ChangeColumnMode, ColumnSpec, RelationshipSelector, RowFilter,
};
use crate::dsl::types::Type;
use crate::dsl::value::Value;
use pretty_assertions::assert_eq; use pretty_assertions::assert_eq;
fn ok(input: &str) -> Command { fn ok(input: &str) -> Command {