From 649fdcb38ea8ed4a376d9c22b93a389634bab98f Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Fri, 5 Jun 2026 14:57:20 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20H1a=20parse-error=20gaps=20G2=E2=80=93G?= =?UTF-8?q?4=20+=20advanced=20near-miss=20matrix=20(ADR-0042)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Close the three remaining ADR-0042 triage gaps, each test-first, and lock the advanced-mode near-miss matrix. G2 — bare `select` dumped the 14-item expression first-set. Collapse it to "a projection: `*`, a column, or an expression" in the error message only (parser::format_walker_error), detected by the joint `distinct`+`all` quantifier signature unique to a projection start. Render-only: completion/hints still expand the full set (typing-surface matrix unchanged). G3 — the usage block was mode-blind: advanced `create table` showed the DSL `create table … with pk …` template. usage_key(s)_for_input gain mode-aware `_in_mode` variants selecting candidates by CommandCategory; render_usage_block and the typing-time ambient usage thread the submission mode. Advanced `create` now shows both SQL forms. A fallback covers shared SQL nodes (insert/update/delete) that declare no usage_ids of their own — without it they regressed to the available-commands fallback (caught by the new advanced matrix). G4 — `with` borrowed `select`'s usage template; give it its own parse.usage.with CTE template. Tests: new near_miss_matrix_advanced_mode (12 SQL-surface cases incl. the available-commands regression guard) + per-gap tests; removed the temporary baseline_dump. Full suite green (lib 1578 / it 386 / typing_surface_matrix 192); clippy clean. --- src/app.rs | 14 +- src/dsl/grammar/data.rs | 2 +- src/dsl/grammar/mod.rs | 79 +++++++++++- src/dsl/parser.rs | 27 +++- src/friendly/keys.rs | 2 + src/friendly/strings/en-US.yaml | 13 ++ src/input_render.rs | 4 +- tests/it/parse_error_pedagogy.rs | 211 +++++++++++++++++++------------ 8 files changed, 259 insertions(+), 93 deletions(-) diff --git a/src/app.rs b/src/app.rs index 6e6989c..d2313ae 100644 --- a/src/app.rs +++ b/src/app.rs @@ -1521,7 +1521,7 @@ impl App { for note in notes { self.note_error(note); } - self.note_error(render_usage_block(input)); + self.note_error(render_usage_block(input, mode)); return vec![Action::JournalFailure { source: input.to_string(), }]; @@ -1601,7 +1601,7 @@ impl App { // known command-entry keyword was consumed) or // the available-commands fallback (§5). if let ParseError::Invalid { .. } = &err { - self.note_error(render_usage_block(input)); + self.note_error(render_usage_block(input, mode)); } // ADR-0034 §1/§2: a submitted line that failed to // parse is journalled `err` so it is recallable @@ -2557,16 +2557,18 @@ fn parse_error_message(err: &ParseError) -> String { /// renders every catalog template — multi-form families like /// `drop` show every variant. Otherwise the fallback lists every /// entry keyword alphabetically. -fn render_usage_block(input: &str) -> String { +fn render_usage_block(input: &str, mode: Mode) -> String { // A multi-form command that has committed to a form // (`add index …`) shows only that form's usage; a bare // multi-form entry word (`add`) shows the whole family. + // Mode-aware (ADR-0042 G3): in advanced mode a shared entry + // word shows its SQL forms, not the DSL templates. let catalog_keys: Vec<&'static str> = - crate::dsl::grammar::usage_key_for_input(input) + crate::dsl::grammar::usage_key_for_input_in_mode(input, mode) .map(|key| vec![key]) .or_else(|| { - crate::dsl::grammar::usage_keys_for_input(input) - .map(|(_word, all)| all.to_vec()) + crate::dsl::grammar::usage_keys_for_input_in_mode(input, mode) + .map(|(_word, all)| all) }) .unwrap_or_default(); if !catalog_keys.is_empty() { diff --git a/src/dsl/grammar/data.rs b/src/dsl/grammar/data.rs index c33e0ca..81b901a 100644 --- a/src/dsl/grammar/data.rs +++ b/src/dsl/grammar/data.rs @@ -1445,7 +1445,7 @@ pub static WITH: CommandNode = CommandNode { shape: Node::Subgrammar(&sql_select::SQL_WITH_TAIL), ast_builder: build_select, help_id: None, - usage_ids: &["parse.usage.select"],}; + usage_ids: &["parse.usage.with"],}; /// SQL `INSERT` — the `Advanced`-category node of the shared /// `insert` entry word (ADR-0033 §2, Amendment 1, sub-phase 3j). diff --git a/src/dsl/grammar/mod.rs b/src/dsl/grammar/mod.rs index f39fdc5..c77ebec 100644 --- a/src/dsl/grammar/mod.rs +++ b/src/dsl/grammar/mod.rs @@ -533,13 +533,73 @@ pub struct CommandNode { /// Returns the canonical (primary-form) entry literal and the /// `usage_ids` list, or `None` if no entry word matches. #[must_use] -pub fn usage_keys_for_input(source: &str) -> Option<(&'static str, &'static [&'static str])> { +pub fn usage_keys_for_input(source: &str) -> Option<(&'static str, Vec<&'static str>)> { + usage_keys_for_input_in_mode(source, crate::mode::Mode::Simple) +} + +/// Mode-aware variant of [`usage_keys_for_input`] (ADR-0042 G3). +/// +/// A shared entry word (`create`, `drop`, `insert`, …) registers a +/// `Simple` DSL node *and* one or more `Advanced` SQL nodes. The +/// usage block must reflect the surface the user is actually typing: +/// the SQL forms in `Advanced` mode, the DSL forms in `Simple` mode +/// — otherwise advanced-mode `create` shows the DSL `create table … +/// with pk …` template, which is not valid SQL. +/// +/// Selection prefers candidates whose [`CommandCategory`] matches +/// the mode; if the entry word has none in that category (an +/// app-lifecycle command is `Simple`-only yet usable in both modes), +/// every candidate is used. The returned keys are the union of the +/// selected nodes' `usage_ids`, de-duplicated in registry order — so +/// advanced `create` shows both `sql_create_table` and +/// `sql_create_index`. +#[must_use] +pub fn usage_keys_for_input_in_mode( + source: &str, + mode: crate::mode::Mode, +) -> Option<(&'static str, Vec<&'static str>)> { use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace}; let start = skip_whitespace(source, 0); let (kw_start, kw_end) = consume_ident(source, start)?; let word = &source[kw_start..kw_end]; - let (_, node) = command_for_entry_word(word)?; - Some((node.entry.primary, node.usage_ids)) + let candidates = commands_for_entry_word(word); + if candidates.is_empty() { + return None; + } + let want = if mode == crate::mode::Mode::Advanced { + CommandCategory::Advanced + } else { + CommandCategory::Simple + }; + let union = |nodes: &[(usize, &'static CommandNode, CommandCategory)]| -> Vec<&'static str> { + let mut keys: Vec<&'static str> = Vec::new(); + for (_, node, _) in nodes { + for k in node.usage_ids { + if !keys.contains(k) { + keys.push(*k); + } + } + } + keys + }; + let matched: Vec<(usize, &'static CommandNode, CommandCategory)> = + candidates.iter().copied().filter(|(_, _, cat)| *cat == want).collect(); + // Prefer the mode-matching nodes' usage. But a shared SQL node + // (`SQL_INSERT` / `SQL_UPDATE` / `SQL_DELETE`) declares no + // `usage_ids` of its own — it reuses the DSL template. When the + // mode-preferred set yields no usage keys, fall back to every + // candidate so the entry word still shows a usage block rather + // than the available-commands fallback (regression-locked by + // the advanced near-miss matrix). + let mut keys = union(&matched); + if keys.is_empty() { + keys = union(&candidates); + } + if keys.is_empty() { + return None; + } + let entry = candidates[0].1.entry.primary; + Some((entry, keys)) } /// The single usage template most relevant to `source`, when @@ -555,8 +615,19 @@ pub fn usage_keys_for_input(source: &str) -> Option<(&'static str, &'static [&'s /// show the whole family or nothing. #[must_use] pub fn usage_key_for_input(source: &str) -> Option<&'static str> { + usage_key_for_input_in_mode(source, crate::mode::Mode::Simple) +} + +/// Mode-aware variant of [`usage_key_for_input`] (ADR-0042 G3) — +/// disambiguates the single most-relevant usage key from the +/// mode-selected key set. +#[must_use] +pub fn usage_key_for_input_in_mode( + source: &str, + mode: crate::mode::Mode, +) -> Option<&'static str> { use crate::dsl::walker::lex_helpers::{consume_ident, skip_whitespace}; - let (_entry, keys) = usage_keys_for_input(source)?; + let (_entry, keys) = usage_keys_for_input_in_mode(source, mode)?; let first = *keys.first()?; if keys.len() == 1 { return Some(first); diff --git a/src/dsl/parser.rs b/src/dsl/parser.rs index d9a99ea..1fd2465 100644 --- a/src/dsl/parser.rs +++ b/src/dsl/parser.rs @@ -296,14 +296,37 @@ fn format_expectation(e: &crate::dsl::walker::outcome::Expectation) -> String { } } +/// ADR-0042 G2: a projection start (`select |`, or the projection +/// position inside a subquery / CTE body) expects the full +/// expression first-set — 14 alternatives — plus the SELECT +/// quantifiers `distinct` and `all`. Those two quantifiers are +/// jointly expectable *only* at a projection start, so their joint +/// presence is a precise signature for collapsing the noisy list +/// into one gloss. Render-only: this fires inside +/// `format_walker_error` (the error message), not in the expected +/// set the completion/hint layer consumes. +fn is_select_projection_start(expected: &[crate::dsl::walker::outcome::Expectation]) -> bool { + use crate::dsl::walker::outcome::Expectation; + let has_word = |w: &str| { + expected + .iter() + .any(|e| matches!(e, Expectation::Word(x) if x.eq_ignore_ascii_case(w))) + }; + has_word("distinct") && has_word("all") +} + fn format_walker_error( source: &str, position: usize, at_eof: bool, expected: &[crate::dsl::walker::outcome::Expectation], ) -> String { - let parts: Vec = expected.iter().map(format_expectation).collect(); - let joined = oxford_join(&parts); + let joined = if is_select_projection_start(expected) { + crate::t!("parse.expect.select_projection") + } else { + let parts: Vec = expected.iter().map(format_expectation).collect(); + oxford_join(&parts) + }; // Mirror the chumsky-side wording: "after ``, // expected …" when the parser already consumed something diff --git a/src/friendly/keys.rs b/src/friendly/keys.rs index 1cae166..0dbc0e6 100644 --- a/src/friendly/keys.rs +++ b/src/friendly/keys.rs @@ -307,6 +307,8 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[ ("parse.usage.show_data", &[]), ("parse.usage.show_table", &[]), ("parse.usage.update", &[]), + ("parse.usage.with", &[]), + ("parse.expect.select_projection", &[]), // ---- Project lifecycle event notes ---- ("project.export_failed", &["error"]), ("project.export_ok", &["path"]), diff --git a/src/friendly/strings/en-US.yaml b/src/friendly/strings/en-US.yaml index 0916eb1..0e37724 100644 --- a/src/friendly/strings/en-US.yaml +++ b/src/friendly/strings/en-US.yaml @@ -491,6 +491,15 @@ parse: # command-keyword renderings (each from # `parse.token.keyword.*`). available_commands: "available commands: {commands}" + # ADR-0042 G2: collapse the SELECT projection-start expression + # first-set (14 expression-starters plus the `distinct`/`all` + # quantifiers) into one learner-sized gloss in the error + # message. The detector keys on `distinct` AND `all` being + # jointly expectable, which only happens at a projection start — + # so the raw set is replaced *in the error line only*; + # completion/hints still expand the full first-set. + expect: + select_projection: "a projection: `*`, a column, or an expression" # Per-command usage templates (ADR-0021 §1). Rendered under a # "usage:" prefix when a parse fails after consuming a # known command-entry keyword. The bracket convention `[...]` @@ -550,6 +559,10 @@ parse: replay: "replay | replay ''" # SQL `SELECT` (advanced mode; ADR-0030 / ADR-0031). select: "select (* | [ as ][, ...]) from [where ] [order by [ asc|desc][, ...]] [limit ]" + # SQL `WITH` / CTE (advanced mode; ADR-0032). G4 (ADR-0042): + # its own template — `with` previously borrowed `select`'s, + # which never showed the CTE shape. + with: "with [recursive] [([, ...])] as ()[, ...] select ..." # App-lifecycle commands (per ADR-0003, surfaced through # the parser so they participate in usage templates + # completion). Templates here describe the surface diff --git a/src/input_render.rs b/src/input_render.rs index 6f87742..ec19458 100644 --- a/src/input_render.rs +++ b/src/input_render.rs @@ -866,7 +866,9 @@ fn ambient_hint_core_in_mode( // The form the user has committed to drives the // usage template — `add index …` shows the // `add index` usage, not the first `add` form. - let usage = crate::dsl::grammar::usage_key_for_input(input) + // Mode-aware (ADR-0042 G3): advanced-mode shared + // entry words show their SQL form, not the DSL one. + let usage = crate::dsl::grammar::usage_key_for_input_in_mode(input, mode) .map(|key| crate::friendly::translate(key, &[])); Some(AmbientHint::Prose(match usage { Some(u) => crate::t!( diff --git a/tests/it/parse_error_pedagogy.rs b/tests/it/parse_error_pedagogy.rs index a3350db..c975cfd 100644 --- a/tests/it/parse_error_pedagogy.rs +++ b/tests/it/parse_error_pedagogy.rs @@ -60,85 +60,6 @@ fn dump(input: &str, lines: &[String]) -> String { ) } -/// TEMP baseline-capture (ADR-0042 §1 step 1). Lenient: does not -/// assert pass/fail — just dumps every output line so we can read -/// the current rendering before writing assertions. Run with: -/// cargo test -p ... --test it baseline_dump -- --nocapture --ignored -/// Removed once the matrix assertions land. -#[test] -#[ignore = "baseline capture only; run with --ignored --nocapture"] -fn baseline_dump() { - // (input, advanced?) — salient near-misses across entry words. - let cases: &[(&str, bool)] = &[ - // --- app-lifecycle (simple) --- - ("quit now", false), - ("import", false), - ("mode sideways", false), - ("messages louder", false), - ("copy everything", false), - ("save sideways", false), - // --- DDL bare + missing-slot (simple) --- - ("create", false), - ("create table", false), - ("create table T", false), - ("drop", false), - ("drop table", false), - ("add", false), - ("add column", false), - ("rename", false), - ("rename column", false), - ("change", false), - ("change column", false), - // --- data bare + missing-clause (simple) --- - ("show", false), - ("show data", false), - ("insert", false), - ("insert into", false), - ("insert into T", false), - ("insert into T ('Oli')", false), - ("update", false), - ("update T", false), - ("update T set x=1", false), - ("delete", false), - ("delete from", false), - ("delete from T", false), - ("replay", false), - ("explain", false), - // --- advanced-only entry words --- - ("select", true), - ("select *", true), - ("select * from", true), - ("with", true), - ("alter", true), - ("alter table T", true), - // advanced-only word typed in SIMPLE mode → "this is SQL" hint - ("alter table T add column c int", false), - ("select * from T", false), - // --- advanced SQL variants + genuine gaps --- - ("insert into T", true), - ("update T", true), - ("delete from", true), - ("create", true), - ("create table", true), - ("create index", true), - ("drop", true), - ("drop index", true), - ]; - for (input, advanced) in cases { - let mut app = App::new(); - if *advanced { - app.mode = Mode::Advanced; - } - type_str(&mut app, input); - let actions = submit(&mut app); - let mode = if *advanced { "ADV" } else { "SIM" }; - eprintln!("\n=== [{mode}] {input:?} → actions: {actions:?}"); - for l in &app.output { - eprintln!(" [{:?}] {}", l.kind, l.text); - } - } -} - /// The simple-mode near-miss matrix (ADR-0042 §1). Each row is a /// near-correct input plus substrings that MUST appear across its /// rendered error lines — the structural "name the missing @@ -203,6 +124,138 @@ fn near_miss_matrix_simple_mode() { } } +/// Helper: advanced-mode error lines for `input`. +fn advanced_error_lines_for(input: &str) -> Vec { + let mut app = App::new(); + app.mode = Mode::Advanced; + type_str(&mut app, input); + let _ = submit(&mut app); + app.output + .iter() + .filter(|l| l.kind == OutputKind::Error) + .map(|l| l.text.clone()) + .collect() +} + +#[test] +fn advanced_bare_select_collapses_projection_first_set() { + // ADR-0042 G2: bare `select` dumped the full 14-item + // expression first-set ("`not`, `-`, …, `case`, column name, + // `distinct`, or `all`"). Collapse it to a learner-sized + // projection gloss in the error MESSAGE only — completion + // still expands the raw set (locked by the typing-surface + // matrix). + let lines = advanced_error_lines_for("select"); + let joined = lines.join("\n"); + let dump_msg = dump("select", &lines); + assert!( + joined.contains("a projection: `*`, a column, or an expression"), + "bare `select` should collapse to the projection gloss\n{dump_msg}", + ); + let err_line = lines + .iter() + .find(|l| l.starts_with("parse error")) + .expect("parse error line"); + assert!( + !err_line.contains("`exists`") && !err_line.contains("`case`"), + "projection gloss should replace the raw expression first-set\n{dump_msg}", + ); +} + +#[test] +fn advanced_mode_usage_block_shows_sql_templates_not_dsl() { + // ADR-0042 G3: `render_usage_block` was mode-blind — it + // resolved shared entry words to the first-registered (Simple) + // node, so advanced-mode `create` showed the DSL `create table + // … with pk …` template, which is not valid SQL. Mode-aware + // selection now shows the SQL forms. + let lines = advanced_error_lines_for("create"); + let joined = lines.join("\n"); + let dump_msg = dump("create", &lines); + assert!( + joined.contains("create table [if not exists]"), + "advanced `create` should show the SQL create-table usage\n{dump_msg}", + ); + assert!( + joined.contains("create [unique] index"), + "advanced `create` should show the SQL create-index usage\n{dump_msg}", + ); + assert!( + !joined.contains("with pk"), + "advanced `create` must NOT show the DSL `with pk` template\n{dump_msg}", + ); +} + +/// The advanced-mode near-miss matrix (ADR-0042 §1/§3). Mirrors +/// the simple-mode matrix for the SQL surface. Every row must show +/// a per-command `usage:` block (never the available-commands +/// fallback — that is for unconsumed entry words only). +#[test] +fn near_miss_matrix_advanced_mode() { + let matrix: &[(&str, &[&str])] = &[ + // SQL select / with (G2, G4) + ("select", &["expected a projection: `*`, a column, or an expression", "select (* |"]), + ("select * from", &["after `select * from`, expected table name", "select (* |"]), + ("with", &["after `with`, expected identifier or `recursive`", "with [recursive]", "as ("]), + // create / drop / alter — SQL templates (G3) + ("create", &["after `create`, expected `table`", "create table [if not exists]", "create [unique] index"]), + ("create table", &["after `create table`, expected identifier or `if`", "create table [if not exists]"]), + ("create index", &["after `create index`, expected `on`", "create [unique] index"]), + ("drop", &["after `drop`, expected `table`", "drop table [if exists]"]), + ("alter", &["after `alter`, expected `table`", "alter table
add column"]), + ("alter table T", &["expected `add`, `drop`, `rename`, or `alter`", "alter table
"]), + // shared insert/update/delete — must show usage, not the + // available-commands fallback (regression guard for the + // empty-usage_ids SQL nodes). + ("insert into T", &["after `insert into T`, expected `values`, `with`, `select`, or `(`", "insert into
"]), + ("update T", &["after `update T`, expected `set`", "update
set"]), + ("delete from", &["after `delete from`, expected table name", "delete from
"]), + ]; + for (input, needles) in matrix { + let lines = advanced_error_lines_for(input); + let dump_msg = dump(input, &lines); + assert!( + lines.iter().any(|l| l.starts_with("parse error")), + "missing `parse error` line for {input:?}\n{dump_msg}", + ); + // A consumed entry word must yield a usage block, never the + // available-commands fallback. + assert!( + !lines.iter().any(|l| l.starts_with("available commands:")), + "advanced {input:?} fell back to available-commands instead of a usage block\n{dump_msg}", + ); + let joined = lines.join("\n"); + for needle in *needles { + assert!( + joined.contains(needle), + "advanced near-miss {input:?} missing expected substring {needle:?}\n{dump_msg}", + ); + } + } +} + +#[test] +fn with_alone_renders_cte_usage_not_select() { + // ADR-0042 G4: `with` (advanced-only CTE entry word) borrowed + // the `select` usage template, which never mentions the CTE + // shape. It now carries its own `parse.usage.with`. + let mut app = App::new(); + app.mode = Mode::Advanced; + type_str(&mut app, "with"); + let _ = submit(&mut app); + let lines: Vec = app + .output + .iter() + .filter(|l| l.kind == OutputKind::Error) + .map(|l| l.text.clone()) + .collect(); + let dump_msg = dump("with", &lines); + assert!( + lines.iter().any(|l| l.trim_start().starts_with("with ") && l.contains("as (")), + "missing CTE-specific `with … as (…)` usage template\n{dump_msg}", + ); +} + #[test] fn create_alone_renders_create_table_usage() { let lines = error_lines_for("create");