From 7dfa718c6e342446356f0caa3a3b491217526985 Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Fri, 8 May 2026 13:21:39 +0000 Subject: [PATCH] parser: structural error rendering, source echo, and caret pointer The old humanise() fell back to chumsky's terse Display for non- custom errors and appended "(near `X`)", which on top of an already-cryptic "found 'i' expected ':'" turned the message into a puzzle. Now humanise() reads the structured RichReason, lists expected RichPatterns in plain prose, and prefixes the message with the consumed context. Before: parse error: found 'i' expected ':' (near `i`) After: parse error: after `change column Rich`, expected `:`, found `i` dispatch_dsl additionally echoes the source line on parse failure (matching the success path's "running: ...") and prints a `^` caret under the failure position, so the user can see what got submitted and where the parser broke without re-reading from scratch. Known limit: keyword_ci's custom-error mismatches don't aggregate across choice alternatives, so messages like "expected DATA or TABLE" (bison-equivalent) aren't yet possible. That's a structural fix to the keyword matcher, deferred to a future parser-affordances ADR. Tests: +2 structural-error regression tests. --- src/app.rs | 28 +++++++++ src/dsl/parser.rs | 152 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 169 insertions(+), 11 deletions(-) diff --git a/src/app.rs b/src/app.rs index b8e30ef..bd6650c 100644 --- a/src/app.rs +++ b/src/app.rs @@ -717,6 +717,22 @@ impl App { } Err(ParseError::Empty) => Vec::new(), Err(err) => { + // Echo the source line so the user can see what + // got submitted (and copy-paste it back to fix). + self.push_output(OutputLine { + text: format!("running: {input}"), + kind: OutputKind::Echo, + mode_at_submission: submission_mode, + }); + // Caret pointer at the failure position, when we + // have one. Aligned to the "running: " prefix so + // the caret sits under the offending character. + if let ParseError::Invalid { position, .. } = &err { + let prefix = "running: "; + let trimmed_offset = leading_trim_offset(input); + let pad = prefix.chars().count() + trimmed_offset + position; + self.note_error(format!("{}^", " ".repeat(pad))); + } self.note_error(format!("parse error: {}", parse_error_message(&err))); Vec::new() } @@ -820,6 +836,9 @@ impl App { // Wrap the command portion in quotes so the message // reads cleanly: "...failed: " rather than the // command running into "failed: ..." with no break. + // `note_error` splits on newlines internally — refusal + // diagnostics from `change column …` (ADR-0017 §7) flow + // through as a multi-line bordered table. self.note_error(format!( "\"{} {}\" failed: {error}", command.verb(), @@ -1154,6 +1173,7 @@ impl App { " drop column [from] [table] : ", " rename column [in] [table] : to ", " change column [in] [table] : ()", + " [--force-conversion | --dont-convert]", " add 1:n relationship [as ] from

. to .", " [on delete ] [on update ] [--create-fk]", " drop relationship ", @@ -1251,6 +1271,14 @@ fn parse_error_message(err: &ParseError) -> String { } } +/// Number of leading whitespace characters in `s`. The parser +/// trims its input before parsing, so a position returned by the +/// parser is relative to the trimmed string. The caret needs the +/// pre-trim offset to align under the user's literal input. +fn leading_trim_offset(s: &str) -> usize { + s.chars().take_while(|c| c.is_whitespace()).count() +} + fn render_cascade_effect(effect: &CascadeEffect) -> String { use crate::dsl::ReferentialAction; let what = match effect.action { diff --git a/src/dsl/parser.rs b/src/dsl/parser.rs index 4f9bb70..ea5505c 100644 --- a/src/dsl/parser.rs +++ b/src/dsl/parser.rs @@ -10,7 +10,7 @@ //! so callers do not depend on chumsky's API surface — that //! keeps the parser swappable if we ever revisit the choice. -use chumsky::error::RichReason; +use chumsky::error::{RichPattern, RichReason}; use chumsky::prelude::*; use crate::dsl::action::ReferentialAction; @@ -69,21 +69,120 @@ const fn has_custom_reason(reason: &RichReason<'_, T, C>) -> bool { } fn humanise(err: &Rich<'_, char>, input: &str) -> String { - // For custom errors, the underlying message is what we want - // to show, not chumsky's "found ... expected ..." rendering. + // Custom errors carry hand-tuned messages from `try_map` + // (e.g. "unknown type 'varchar'"); show those verbatim. if let Some(msg) = first_custom_message(err.reason()) { return msg; } - let span = err.span(); - let snippet: String = input - .chars() - .skip(span.start) - .take((span.end - span.start).max(1)) + // Otherwise the error is chumsky's structural one: at this + // position, the parser was looking for one of `expected` and + // found `found` instead. Render that in plain prose rather + // than falling back to chumsky's terse Display. + match err.reason() { + RichReason::ExpectedFound { expected, found } => { + format_expected_found(expected, found.as_ref(), err.span().start, input) + } + RichReason::Custom(_) => unreachable!("handled above"), + } +} + +fn format_expected_found( + expected: &[RichPattern<'_, char>], + found: Option<&chumsky::util::MaybeRef<'_, char>>, + pos: usize, + input: &str, +) -> String { + let found_str = found.map_or_else(|| "end of input".to_string(), |c| describe_char(**c)); + if expected.is_empty() { + return format!("unexpected {found_str}"); + } + // If the expected set contains concrete patterns (named + // tokens, identifiers, labels), drop the generic `Any` / + // `SomethingElse` wildcards — they're an artefact of our + // `any().filter(...)` keyword matchers and add noise rather + // than information. + let has_concrete = expected.iter().any(|p| { + matches!( + p, + RichPattern::Token(_) + | RichPattern::Identifier(_) + | RichPattern::Label(_) + | RichPattern::EndOfInput + ) + }); + let mut described: Vec = expected + .iter() + .filter(|p| { + !(has_concrete + && matches!(p, RichPattern::Any | RichPattern::SomethingElse)) + }) + .map(describe_pattern) .collect(); - if snippet.is_empty() { - format!("{err}") + described.sort(); + described.dedup(); + let expected_str = oxford_or(&described); + // Provide a "context" snippet of what successfully parsed + // before the failure point, so the user knows where in the + // command the error sits without re-reading from scratch. + // We trim to a sensible length to avoid wall-of-text errors. + let consumed = consumed_context(input, pos); + if consumed.is_empty() { + format!("expected {expected_str}, found {found_str}") } else { - format!("{err} (near `{snippet}`)") + format!("after `{consumed}`, expected {expected_str}, found {found_str}") + } +} + +fn describe_pattern(p: &RichPattern<'_, char>) -> String { + match p { + RichPattern::Token(c) => format!("`{}`", **c), + RichPattern::Identifier(s) => format!("`{s}`"), + RichPattern::Label(s) => s.to_string(), + RichPattern::Any => "any character".to_string(), + RichPattern::SomethingElse => "something else".to_string(), + RichPattern::EndOfInput => "end of input".to_string(), + // RichPattern is non_exhaustive; cover the catch-all. + _ => "".to_string(), + } +} + +fn describe_char(c: char) -> String { + if c.is_control() { + format!("control character (U+{:04X})", c as u32) + } else { + format!("`{c}`") + } +} + +/// English-style "A, B, or C" / "A or B" / "A". +fn oxford_or(items: &[String]) -> String { + match items { + [] => String::new(), + [a] => a.clone(), + [a, b] => format!("{a} or {b}"), + rest => { + let (last, head) = rest.split_last().expect("len >= 3"); + format!("{}, or {last}", head.join(", ")) + } + } +} + +/// The substring of `input` from the start up to `pos`, trimmed +/// of trailing whitespace. Returns at most the last ~40 chars +/// (with a leading `…`) so a long line still produces a readable +/// "after `…blah blah`" hint. +fn consumed_context(input: &str, pos: usize) -> String { + let prefix: String = input.chars().take(pos).collect(); + let trimmed = prefix.trim_end(); + if trimmed.is_empty() { + return String::new(); + } + const MAX: usize = 40; + if trimmed.chars().count() <= MAX { + trimmed.to_string() + } else { + let tail: String = trimmed.chars().rev().take(MAX).collect::>().into_iter().rev().collect(); + format!("…{tail}") } } @@ -640,6 +739,8 @@ fn identifier<'a>() s }) .padded() + .labelled("identifier") + .as_context() } /// One of the supported type keywords, mapped to `Type`. The @@ -704,6 +805,35 @@ mod tests { parse_command(input).expect_err("expected parse error") } + fn err_message(input: &str) -> String { + match err(input) { + ParseError::Invalid { message, .. } => message, + ParseError::Empty => panic!("unexpected empty error"), + } + } + + #[test] + fn structural_error_for_show_data_without_arg() { + // ADR-0017 follow-up: humanise() surfaces chumsky's + // structural information instead of the terse "found + // end of input expected any" rendering. + let msg = err_message("show data"); + assert!(msg.contains("after `show data`"), "{msg}"); + assert!(msg.contains("expected identifier"), "{msg}"); + assert!(msg.contains("found end of input"), "{msg}"); + } + + #[test] + fn structural_error_for_change_column_with_swapped_args() { + // User wrote column-name-first; parser accepts that + // identifier as the table name and then expects `:`. + // The error message names the consumed prefix and the + // expected continuation. + let msg = err_message("change column Rich in Customers: Rich (text)"); + assert!(msg.contains("after `change column Rich`"), "{msg}"); + assert!(msg.contains("expected `:`"), "{msg}"); + } + fn col(name: &str, ty: Type) -> ColumnSpec { ColumnSpec { name: name.to_string(),