parser: structural error rendering, source echo, and caret pointer

The old humanise() fell back to chumsky's terse Display for non-
custom errors and appended "(near `X`)", which on top of an
already-cryptic "found 'i' expected ':'" turned the message into
a puzzle. Now humanise() reads the structured RichReason, lists
expected RichPatterns in plain prose, and prefixes the message
with the consumed context.

Before:  parse error: found 'i' expected ':' (near `i`)
After:   parse error: after `change column Rich`, expected `:`,
         found `i`

dispatch_dsl additionally echoes the source line on parse failure
(matching the success path's "running: ...") and prints a `^` caret
under the failure position, so the user can see what got submitted
and where the parser broke without re-reading from scratch.

Known limit: keyword_ci's custom-error mismatches don't aggregate
across choice alternatives, so messages like "expected DATA or
TABLE" (bison-equivalent) aren't yet possible. That's a structural
fix to the keyword matcher, deferred to a future parser-affordances
ADR.

Tests: +2 structural-error regression tests.
This commit is contained in:
claude@clouddev1
2026-05-08 13:21:39 +00:00
parent 00947b928c
commit 7dfa718c6e
2 changed files with 169 additions and 11 deletions
+28
View File
@@ -717,6 +717,22 @@ impl App {
}
Err(ParseError::Empty) => Vec::new(),
Err(err) => {
// Echo the source line so the user can see what
// got submitted (and copy-paste it back to fix).
self.push_output(OutputLine {
text: format!("running: {input}"),
kind: OutputKind::Echo,
mode_at_submission: submission_mode,
});
// Caret pointer at the failure position, when we
// have one. Aligned to the "running: " prefix so
// the caret sits under the offending character.
if let ParseError::Invalid { position, .. } = &err {
let prefix = "running: ";
let trimmed_offset = leading_trim_offset(input);
let pad = prefix.chars().count() + trimmed_offset + position;
self.note_error(format!("{}^", " ".repeat(pad)));
}
self.note_error(format!("parse error: {}", parse_error_message(&err)));
Vec::new()
}
@@ -820,6 +836,9 @@ impl App {
// Wrap the command portion in quotes so the message
// reads cleanly: "...failed: <reason>" rather than the
// command running into "failed: ..." with no break.
// `note_error` splits on newlines internally — refusal
// diagnostics from `change column …` (ADR-0017 §7) flow
// through as a multi-line bordered table.
self.note_error(format!(
"\"{} {}\" failed: {error}",
command.verb(),
@@ -1154,6 +1173,7 @@ impl App {
" drop column [from] [table] <T>: <col>",
" rename column [in] [table] <T>: <old> to <new>",
" change column [in] [table] <T>: <col> (<newtype>)",
" [--force-conversion | --dont-convert]",
" add 1:n relationship [as <name>] from <P>.<col> to <C>.<col>",
" [on delete <action>] [on update <action>] [--create-fk]",
" drop relationship <name>",
@@ -1251,6 +1271,14 @@ fn parse_error_message(err: &ParseError) -> String {
}
}
/// Number of leading whitespace characters in `s`. The parser
/// trims its input before parsing, so a position returned by the
/// parser is relative to the trimmed string. The caret needs the
/// pre-trim offset to align under the user's literal input.
fn leading_trim_offset(s: &str) -> usize {
s.chars().take_while(|c| c.is_whitespace()).count()
}
fn render_cascade_effect(effect: &CascadeEffect) -> String {
use crate::dsl::ReferentialAction;
let what = match effect.action {
+141 -11
View File
@@ -10,7 +10,7 @@
//! so callers do not depend on chumsky's API surface — that
//! keeps the parser swappable if we ever revisit the choice.
use chumsky::error::RichReason;
use chumsky::error::{RichPattern, RichReason};
use chumsky::prelude::*;
use crate::dsl::action::ReferentialAction;
@@ -69,21 +69,120 @@ const fn has_custom_reason<T, C>(reason: &RichReason<'_, T, C>) -> bool {
}
fn humanise(err: &Rich<'_, char>, input: &str) -> String {
// For custom errors, the underlying message is what we want
// to show, not chumsky's "found ... expected ..." rendering.
// Custom errors carry hand-tuned messages from `try_map`
// (e.g. "unknown type 'varchar'"); show those verbatim.
if let Some(msg) = first_custom_message(err.reason()) {
return msg;
}
let span = err.span();
let snippet: String = input
.chars()
.skip(span.start)
.take((span.end - span.start).max(1))
// Otherwise the error is chumsky's structural one: at this
// position, the parser was looking for one of `expected` and
// found `found` instead. Render that in plain prose rather
// than falling back to chumsky's terse Display.
match err.reason() {
RichReason::ExpectedFound { expected, found } => {
format_expected_found(expected, found.as_ref(), err.span().start, input)
}
RichReason::Custom(_) => unreachable!("handled above"),
}
}
fn format_expected_found(
expected: &[RichPattern<'_, char>],
found: Option<&chumsky::util::MaybeRef<'_, char>>,
pos: usize,
input: &str,
) -> String {
let found_str = found.map_or_else(|| "end of input".to_string(), |c| describe_char(**c));
if expected.is_empty() {
return format!("unexpected {found_str}");
}
// If the expected set contains concrete patterns (named
// tokens, identifiers, labels), drop the generic `Any` /
// `SomethingElse` wildcards — they're an artefact of our
// `any().filter(...)` keyword matchers and add noise rather
// than information.
let has_concrete = expected.iter().any(|p| {
matches!(
p,
RichPattern::Token(_)
| RichPattern::Identifier(_)
| RichPattern::Label(_)
| RichPattern::EndOfInput
)
});
let mut described: Vec<String> = expected
.iter()
.filter(|p| {
!(has_concrete
&& matches!(p, RichPattern::Any | RichPattern::SomethingElse))
})
.map(describe_pattern)
.collect();
if snippet.is_empty() {
format!("{err}")
described.sort();
described.dedup();
let expected_str = oxford_or(&described);
// Provide a "context" snippet of what successfully parsed
// before the failure point, so the user knows where in the
// command the error sits without re-reading from scratch.
// We trim to a sensible length to avoid wall-of-text errors.
let consumed = consumed_context(input, pos);
if consumed.is_empty() {
format!("expected {expected_str}, found {found_str}")
} else {
format!("{err} (near `{snippet}`)")
format!("after `{consumed}`, expected {expected_str}, found {found_str}")
}
}
fn describe_pattern(p: &RichPattern<'_, char>) -> String {
match p {
RichPattern::Token(c) => format!("`{}`", **c),
RichPattern::Identifier(s) => format!("`{s}`"),
RichPattern::Label(s) => s.to_string(),
RichPattern::Any => "any character".to_string(),
RichPattern::SomethingElse => "something else".to_string(),
RichPattern::EndOfInput => "end of input".to_string(),
// RichPattern is non_exhaustive; cover the catch-all.
_ => "<other>".to_string(),
}
}
fn describe_char(c: char) -> String {
if c.is_control() {
format!("control character (U+{:04X})", c as u32)
} else {
format!("`{c}`")
}
}
/// English-style "A, B, or C" / "A or B" / "A".
fn oxford_or(items: &[String]) -> String {
match items {
[] => String::new(),
[a] => a.clone(),
[a, b] => format!("{a} or {b}"),
rest => {
let (last, head) = rest.split_last().expect("len >= 3");
format!("{}, or {last}", head.join(", "))
}
}
}
/// The substring of `input` from the start up to `pos`, trimmed
/// of trailing whitespace. Returns at most the last ~40 chars
/// (with a leading `…`) so a long line still produces a readable
/// "after `…blah blah`" hint.
fn consumed_context(input: &str, pos: usize) -> String {
let prefix: String = input.chars().take(pos).collect();
let trimmed = prefix.trim_end();
if trimmed.is_empty() {
return String::new();
}
const MAX: usize = 40;
if trimmed.chars().count() <= MAX {
trimmed.to_string()
} else {
let tail: String = trimmed.chars().rev().take(MAX).collect::<Vec<_>>().into_iter().rev().collect();
format!("{tail}")
}
}
@@ -640,6 +739,8 @@ fn identifier<'a>()
s
})
.padded()
.labelled("identifier")
.as_context()
}
/// One of the supported type keywords, mapped to `Type`. The
@@ -704,6 +805,35 @@ mod tests {
parse_command(input).expect_err("expected parse error")
}
fn err_message(input: &str) -> String {
match err(input) {
ParseError::Invalid { message, .. } => message,
ParseError::Empty => panic!("unexpected empty error"),
}
}
#[test]
fn structural_error_for_show_data_without_arg() {
// ADR-0017 follow-up: humanise() surfaces chumsky's
// structural information instead of the terse "found
// end of input expected any" rendering.
let msg = err_message("show data");
assert!(msg.contains("after `show data`"), "{msg}");
assert!(msg.contains("expected identifier"), "{msg}");
assert!(msg.contains("found end of input"), "{msg}");
}
#[test]
fn structural_error_for_change_column_with_swapped_args() {
// User wrote column-name-first; parser accepts that
// identifier as the table name and then expects `:`.
// The error message names the consumed prefix and the
// expected continuation.
let msg = err_message("change column Rich in Customers: Rich (text)");
assert!(msg.contains("after `change column Rich`"), "{msg}");
assert!(msg.contains("expected `:`"), "{msg}");
}
fn col(name: &str, ty: Type) -> ColumnSpec {
ColumnSpec {
name: name.to_string(),