parser: structural error rendering, source echo, and caret pointer
The old humanise() fell back to chumsky's terse Display for non-
custom errors and appended "(near `X`)", which on top of an
already-cryptic "found 'i' expected ':'" turned the message into
a puzzle. Now humanise() reads the structured RichReason, lists
expected RichPatterns in plain prose, and prefixes the message
with the consumed context.
Before: parse error: found 'i' expected ':' (near `i`)
After: parse error: after `change column Rich`, expected `:`,
found `i`
dispatch_dsl additionally echoes the source line on parse failure
(matching the success path's "running: ...") and prints a `^` caret
under the failure position, so the user can see what got submitted
and where the parser broke without re-reading from scratch.
Known limit: keyword_ci's custom-error mismatches don't aggregate
across choice alternatives, so messages like "expected DATA or
TABLE" (bison-equivalent) aren't yet possible. That's a structural
fix to the keyword matcher, deferred to a future parser-affordances
ADR.
Tests: +2 structural-error regression tests.
This commit is contained in:
+28
@@ -717,6 +717,22 @@ impl App {
|
|||||||
}
|
}
|
||||||
Err(ParseError::Empty) => Vec::new(),
|
Err(ParseError::Empty) => Vec::new(),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
|
// Echo the source line so the user can see what
|
||||||
|
// got submitted (and copy-paste it back to fix).
|
||||||
|
self.push_output(OutputLine {
|
||||||
|
text: format!("running: {input}"),
|
||||||
|
kind: OutputKind::Echo,
|
||||||
|
mode_at_submission: submission_mode,
|
||||||
|
});
|
||||||
|
// Caret pointer at the failure position, when we
|
||||||
|
// have one. Aligned to the "running: " prefix so
|
||||||
|
// the caret sits under the offending character.
|
||||||
|
if let ParseError::Invalid { position, .. } = &err {
|
||||||
|
let prefix = "running: ";
|
||||||
|
let trimmed_offset = leading_trim_offset(input);
|
||||||
|
let pad = prefix.chars().count() + trimmed_offset + position;
|
||||||
|
self.note_error(format!("{}^", " ".repeat(pad)));
|
||||||
|
}
|
||||||
self.note_error(format!("parse error: {}", parse_error_message(&err)));
|
self.note_error(format!("parse error: {}", parse_error_message(&err)));
|
||||||
Vec::new()
|
Vec::new()
|
||||||
}
|
}
|
||||||
@@ -820,6 +836,9 @@ impl App {
|
|||||||
// Wrap the command portion in quotes so the message
|
// Wrap the command portion in quotes so the message
|
||||||
// reads cleanly: "...failed: <reason>" rather than the
|
// reads cleanly: "...failed: <reason>" rather than the
|
||||||
// command running into "failed: ..." with no break.
|
// command running into "failed: ..." with no break.
|
||||||
|
// `note_error` splits on newlines internally — refusal
|
||||||
|
// diagnostics from `change column …` (ADR-0017 §7) flow
|
||||||
|
// through as a multi-line bordered table.
|
||||||
self.note_error(format!(
|
self.note_error(format!(
|
||||||
"\"{} {}\" failed: {error}",
|
"\"{} {}\" failed: {error}",
|
||||||
command.verb(),
|
command.verb(),
|
||||||
@@ -1154,6 +1173,7 @@ impl App {
|
|||||||
" drop column [from] [table] <T>: <col>",
|
" drop column [from] [table] <T>: <col>",
|
||||||
" rename column [in] [table] <T>: <old> to <new>",
|
" rename column [in] [table] <T>: <old> to <new>",
|
||||||
" change column [in] [table] <T>: <col> (<newtype>)",
|
" change column [in] [table] <T>: <col> (<newtype>)",
|
||||||
|
" [--force-conversion | --dont-convert]",
|
||||||
" add 1:n relationship [as <name>] from <P>.<col> to <C>.<col>",
|
" add 1:n relationship [as <name>] from <P>.<col> to <C>.<col>",
|
||||||
" [on delete <action>] [on update <action>] [--create-fk]",
|
" [on delete <action>] [on update <action>] [--create-fk]",
|
||||||
" drop relationship <name>",
|
" drop relationship <name>",
|
||||||
@@ -1251,6 +1271,14 @@ fn parse_error_message(err: &ParseError) -> String {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Number of leading whitespace characters in `s`. The parser
|
||||||
|
/// trims its input before parsing, so a position returned by the
|
||||||
|
/// parser is relative to the trimmed string. The caret needs the
|
||||||
|
/// pre-trim offset to align under the user's literal input.
|
||||||
|
fn leading_trim_offset(s: &str) -> usize {
|
||||||
|
s.chars().take_while(|c| c.is_whitespace()).count()
|
||||||
|
}
|
||||||
|
|
||||||
fn render_cascade_effect(effect: &CascadeEffect) -> String {
|
fn render_cascade_effect(effect: &CascadeEffect) -> String {
|
||||||
use crate::dsl::ReferentialAction;
|
use crate::dsl::ReferentialAction;
|
||||||
let what = match effect.action {
|
let what = match effect.action {
|
||||||
|
|||||||
+141
-11
@@ -10,7 +10,7 @@
|
|||||||
//! so callers do not depend on chumsky's API surface — that
|
//! so callers do not depend on chumsky's API surface — that
|
||||||
//! keeps the parser swappable if we ever revisit the choice.
|
//! keeps the parser swappable if we ever revisit the choice.
|
||||||
|
|
||||||
use chumsky::error::RichReason;
|
use chumsky::error::{RichPattern, RichReason};
|
||||||
use chumsky::prelude::*;
|
use chumsky::prelude::*;
|
||||||
|
|
||||||
use crate::dsl::action::ReferentialAction;
|
use crate::dsl::action::ReferentialAction;
|
||||||
@@ -69,21 +69,120 @@ const fn has_custom_reason<T, C>(reason: &RichReason<'_, T, C>) -> bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn humanise(err: &Rich<'_, char>, input: &str) -> String {
|
fn humanise(err: &Rich<'_, char>, input: &str) -> String {
|
||||||
// For custom errors, the underlying message is what we want
|
// Custom errors carry hand-tuned messages from `try_map`
|
||||||
// to show, not chumsky's "found ... expected ..." rendering.
|
// (e.g. "unknown type 'varchar'"); show those verbatim.
|
||||||
if let Some(msg) = first_custom_message(err.reason()) {
|
if let Some(msg) = first_custom_message(err.reason()) {
|
||||||
return msg;
|
return msg;
|
||||||
}
|
}
|
||||||
let span = err.span();
|
// Otherwise the error is chumsky's structural one: at this
|
||||||
let snippet: String = input
|
// position, the parser was looking for one of `expected` and
|
||||||
.chars()
|
// found `found` instead. Render that in plain prose rather
|
||||||
.skip(span.start)
|
// than falling back to chumsky's terse Display.
|
||||||
.take((span.end - span.start).max(1))
|
match err.reason() {
|
||||||
|
RichReason::ExpectedFound { expected, found } => {
|
||||||
|
format_expected_found(expected, found.as_ref(), err.span().start, input)
|
||||||
|
}
|
||||||
|
RichReason::Custom(_) => unreachable!("handled above"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_expected_found(
|
||||||
|
expected: &[RichPattern<'_, char>],
|
||||||
|
found: Option<&chumsky::util::MaybeRef<'_, char>>,
|
||||||
|
pos: usize,
|
||||||
|
input: &str,
|
||||||
|
) -> String {
|
||||||
|
let found_str = found.map_or_else(|| "end of input".to_string(), |c| describe_char(**c));
|
||||||
|
if expected.is_empty() {
|
||||||
|
return format!("unexpected {found_str}");
|
||||||
|
}
|
||||||
|
// If the expected set contains concrete patterns (named
|
||||||
|
// tokens, identifiers, labels), drop the generic `Any` /
|
||||||
|
// `SomethingElse` wildcards — they're an artefact of our
|
||||||
|
// `any().filter(...)` keyword matchers and add noise rather
|
||||||
|
// than information.
|
||||||
|
let has_concrete = expected.iter().any(|p| {
|
||||||
|
matches!(
|
||||||
|
p,
|
||||||
|
RichPattern::Token(_)
|
||||||
|
| RichPattern::Identifier(_)
|
||||||
|
| RichPattern::Label(_)
|
||||||
|
| RichPattern::EndOfInput
|
||||||
|
)
|
||||||
|
});
|
||||||
|
let mut described: Vec<String> = expected
|
||||||
|
.iter()
|
||||||
|
.filter(|p| {
|
||||||
|
!(has_concrete
|
||||||
|
&& matches!(p, RichPattern::Any | RichPattern::SomethingElse))
|
||||||
|
})
|
||||||
|
.map(describe_pattern)
|
||||||
.collect();
|
.collect();
|
||||||
if snippet.is_empty() {
|
described.sort();
|
||||||
format!("{err}")
|
described.dedup();
|
||||||
|
let expected_str = oxford_or(&described);
|
||||||
|
// Provide a "context" snippet of what successfully parsed
|
||||||
|
// before the failure point, so the user knows where in the
|
||||||
|
// command the error sits without re-reading from scratch.
|
||||||
|
// We trim to a sensible length to avoid wall-of-text errors.
|
||||||
|
let consumed = consumed_context(input, pos);
|
||||||
|
if consumed.is_empty() {
|
||||||
|
format!("expected {expected_str}, found {found_str}")
|
||||||
} else {
|
} else {
|
||||||
format!("{err} (near `{snippet}`)")
|
format!("after `{consumed}`, expected {expected_str}, found {found_str}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn describe_pattern(p: &RichPattern<'_, char>) -> String {
|
||||||
|
match p {
|
||||||
|
RichPattern::Token(c) => format!("`{}`", **c),
|
||||||
|
RichPattern::Identifier(s) => format!("`{s}`"),
|
||||||
|
RichPattern::Label(s) => s.to_string(),
|
||||||
|
RichPattern::Any => "any character".to_string(),
|
||||||
|
RichPattern::SomethingElse => "something else".to_string(),
|
||||||
|
RichPattern::EndOfInput => "end of input".to_string(),
|
||||||
|
// RichPattern is non_exhaustive; cover the catch-all.
|
||||||
|
_ => "<other>".to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn describe_char(c: char) -> String {
|
||||||
|
if c.is_control() {
|
||||||
|
format!("control character (U+{:04X})", c as u32)
|
||||||
|
} else {
|
||||||
|
format!("`{c}`")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// English-style "A, B, or C" / "A or B" / "A".
|
||||||
|
fn oxford_or(items: &[String]) -> String {
|
||||||
|
match items {
|
||||||
|
[] => String::new(),
|
||||||
|
[a] => a.clone(),
|
||||||
|
[a, b] => format!("{a} or {b}"),
|
||||||
|
rest => {
|
||||||
|
let (last, head) = rest.split_last().expect("len >= 3");
|
||||||
|
format!("{}, or {last}", head.join(", "))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The substring of `input` from the start up to `pos`, trimmed
|
||||||
|
/// of trailing whitespace. Returns at most the last ~40 chars
|
||||||
|
/// (with a leading `…`) so a long line still produces a readable
|
||||||
|
/// "after `…blah blah`" hint.
|
||||||
|
fn consumed_context(input: &str, pos: usize) -> String {
|
||||||
|
let prefix: String = input.chars().take(pos).collect();
|
||||||
|
let trimmed = prefix.trim_end();
|
||||||
|
if trimmed.is_empty() {
|
||||||
|
return String::new();
|
||||||
|
}
|
||||||
|
const MAX: usize = 40;
|
||||||
|
if trimmed.chars().count() <= MAX {
|
||||||
|
trimmed.to_string()
|
||||||
|
} else {
|
||||||
|
let tail: String = trimmed.chars().rev().take(MAX).collect::<Vec<_>>().into_iter().rev().collect();
|
||||||
|
format!("…{tail}")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -640,6 +739,8 @@ fn identifier<'a>()
|
|||||||
s
|
s
|
||||||
})
|
})
|
||||||
.padded()
|
.padded()
|
||||||
|
.labelled("identifier")
|
||||||
|
.as_context()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// One of the supported type keywords, mapped to `Type`. The
|
/// One of the supported type keywords, mapped to `Type`. The
|
||||||
@@ -704,6 +805,35 @@ mod tests {
|
|||||||
parse_command(input).expect_err("expected parse error")
|
parse_command(input).expect_err("expected parse error")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn err_message(input: &str) -> String {
|
||||||
|
match err(input) {
|
||||||
|
ParseError::Invalid { message, .. } => message,
|
||||||
|
ParseError::Empty => panic!("unexpected empty error"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn structural_error_for_show_data_without_arg() {
|
||||||
|
// ADR-0017 follow-up: humanise() surfaces chumsky's
|
||||||
|
// structural information instead of the terse "found
|
||||||
|
// end of input expected any" rendering.
|
||||||
|
let msg = err_message("show data");
|
||||||
|
assert!(msg.contains("after `show data`"), "{msg}");
|
||||||
|
assert!(msg.contains("expected identifier"), "{msg}");
|
||||||
|
assert!(msg.contains("found end of input"), "{msg}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn structural_error_for_change_column_with_swapped_args() {
|
||||||
|
// User wrote column-name-first; parser accepts that
|
||||||
|
// identifier as the table name and then expects `:`.
|
||||||
|
// The error message names the consumed prefix and the
|
||||||
|
// expected continuation.
|
||||||
|
let msg = err_message("change column Rich in Customers: Rich (text)");
|
||||||
|
assert!(msg.contains("after `change column Rich`"), "{msg}");
|
||||||
|
assert!(msg.contains("expected `:`"), "{msg}");
|
||||||
|
}
|
||||||
|
|
||||||
fn col(name: &str, ty: Type) -> ColumnSpec {
|
fn col(name: &str, ty: Type) -> ColumnSpec {
|
||||||
ColumnSpec {
|
ColumnSpec {
|
||||||
name: name.to_string(),
|
name: name.to_string(),
|
||||||
|
|||||||
Reference in New Issue
Block a user