ADR-0022 stage 4/8: render-time parse + error overlay

Add `classify_input(&str) -> InputState` that returns one of {Empty, Valid, IncompleteAtEof, DefiniteErrorAt(byte)}. The renderer uses this to overlay tok_error on the failing token of mid-typed input that can never be valid. ParseError::Invalid gains an `at_eof: bool` field populated by `into_parse_error`: - structural failures: at_eof = found.is_none() (chumsky's own "ran out of input" discriminator); - custom errors from try_map: at_eof = true, conservatively. The conservative custom-error classification is a deliberate under-highlighting bias. It means three classes of error currently DO NOT get a live red overlay (only on submit): - "tables need at least one column" (correct: this is genuinely an incomplete state — adding `with pk ...` fixes it); - "unknown type 'varchar'" (sub-optimal: should overlay); - "--force-conversion and --dont-convert are mutually exclusive" (sub-optimal: should overlay). The trade-off is documented inline on the at_eof field. A future refinement could carry an explicit definite/incomplete tag through Custom errors (would change RichReason::Custom's payload from String to a typed value). render_input_runs now applies the overlay on the failing token's run before injecting the cursor. Tokens after the error keep their lex-class colour — fixes one thing at a time per ADR-0022 §4. Lex errors continue to render in tok_error from stage 2. Pattern-matches on ParseError::Invalid throughout the codebase use `..` and are unaffected; only the two constructions in parser.rs needed updating. Tests: 693 passing, 0 failing, 1 ignored (683 baseline → +10: 7 classify + overlay tests, +1 adapted full-command test, +2 valid-vs-incomplete coverage). Clippy clean. Stage 5 lights up the hint panel as the verbose-feedback surface — needs the InputState classifier from this stage.
2026-05-10 17:37:50 +00:00
parent 39da399add
commit 313d4f8346
2 changed files with 228 additions and 15 deletions
@@ -25,7 +25,27 @@ use crate::dsl::value::Value;
 #[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
 pub enum ParseError {
    #[error("could not parse command: {message}")]
-    Invalid { message: String, position: usize },
+    Invalid {
        message: String,
        position: usize,
        /// True when the parse failed because more input was
        /// expected — i.e. a structural failure with no
        /// next-token to point at. Used by the input renderer
        /// (ADR-0022 §4) to distinguish "incomplete but
        /// plausible" from "definite error" mid-typing.
        ///
        /// Custom errors raised by `try_map` are conservatively
        /// classified as `at_eof = true` because we cannot, at
        /// this layer, tell apart "tables need at least one
        /// column" (incomplete: more input would help) from
        /// "--force-conversion and --dont-convert are mutually
        /// exclusive" (definite: user must remove a token).
        /// Erring on `true` means custom-error inputs do not
        /// get a live error overlay; the parse error still
        /// fires on submit. A future refinement may carry an
        /// explicit `is_definite` tag through custom errors.
        at_eof: bool,
    },
    #[error("empty input")]
    Empty,
 }
@@ -38,6 +58,14 @@ impl ParseError {
            Self::Empty => None,
        }
    }
    #[must_use]
    pub const fn at_eof(&self) -> bool {
        match self {
            Self::Invalid { at_eof, .. } => *at_eof,
            Self::Empty => true,
        }
    }
 }
 /// Parse a single DSL command end-to-end.
@@ -103,6 +131,7 @@ fn try_parse_replay_with_bare_path(
        return Some(Err(ParseError::Invalid {
            message: "expected a path after `replay`".to_string(),
            position: after_replay,
            at_eof: true,
        }));
    }
    Some(Ok(Command::Replay {
@@ -657,7 +686,21 @@ fn into_parse_error(errs: &[Rich<'_, Token>], tokens: &[Token], source: &str) ->
    let chumsky_span = chosen.span();
    let position = source_position_at(tokens, chumsky_span.start, source);
    let message = humanise(chosen, tokens, source);
-    ParseError::Invalid { message, position }
+    let at_eof = match chosen.reason() {
        // Structural failures know whether they ran out of
        // input — `found = None` ⇔ EOF.
        RichReason::ExpectedFound { found, .. } => found.is_none(),
        // Custom errors: see the docstring on
        // `ParseError::Invalid::at_eof` for why we err on the
        // side of `true` (no live overlay; on-submit error
        // still fires).
        RichReason::Custom(_) => true,
    };
    ParseError::Invalid {
        message,
        position,
        at_eof,
    }
 }
 /// Translate a chumsky token-slice index into a byte position
@@ -25,6 +25,7 @@
 use ratatui::style::{Modifier, Style};
 use crate::dsl::lexer::lex;
 use crate::dsl::{ParseError, parse_command};
 use crate::theme::Theme;
 /// A run of text with its byte range in the source and the
@@ -51,15 +52,79 @@ impl StyledRun {
 /// Build the run sequence for the input panel.
 ///
 /// Lexes `input`, assigns each token its `theme.token_color`,
-/// preserves whitespace gaps as `theme.fg` runs, then injects
+/// applies the parse-error overlay if the input is in the
-/// the cursor at `cursor_byte` (clamped to `input.len()`).
+/// definite-error state (ADR-0022 §1, §4), preserves whitespace
 /// gaps as `theme.fg` runs, then injects the cursor at
 /// `cursor_byte` (clamped to `input.len()`).
 #[must_use]
 pub fn render_input_runs(input: &str, cursor_byte: usize, theme: &Theme) -> Vec<StyledRun> {
    let mut runs = lex_to_runs(input, theme);
    if let InputState::DefiniteErrorAt(pos) = classify_input(input) {
        overlay_error(&mut runs, pos, theme);
    }
    inject_cursor(&mut runs, input, cursor_byte, theme);
    runs
 }
 /// One of three mid-typing classifications (ADR-0022 §1).
 ///
 /// Distinguishes "the user isn't done yet" from "this token
 /// can never fit". Drives error overlay (this stage) and the
 /// hint panel ambient mode (stage 5).
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum InputState {
    /// No tokens at all (empty / whitespace-only input).
    Empty,
    /// Parses to a complete `Command`. The user can submit.
    Valid,
    /// Parse failed because more input was expected — every
    /// consumed token fits a known command, just not all of
    /// it is here yet.
    IncompleteAtEof,
    /// Parse failed at a token strictly inside the input —
    /// no continuation can recover. The byte offset is the
    /// failing token's start.
    DefiniteErrorAt(usize),
 }
 /// Classify `input` into one of the three mid-typing states.
 /// Cheap (lex + parse) per ADR-0022 §13.
 #[must_use]
 pub fn classify_input(input: &str) -> InputState {
    if input.trim().is_empty() {
        return InputState::Empty;
    }
    match parse_command(input) {
        Ok(_) => InputState::Valid,
        Err(ParseError::Empty) => InputState::Empty,
        Err(err @ ParseError::Invalid { position, .. }) => {
            // `at_eof` is the parser's own classification: true
            // when more input would (potentially) help, false
            // when a specific token is in the wrong place.
            // Custom-error inputs (try_map failures) currently
            // map to `at_eof = true` — see the field docstring
            // on `ParseError::Invalid::at_eof`.
            if err.at_eof() {
                InputState::IncompleteAtEof
            } else {
                InputState::DefiniteErrorAt(position)
            }
        }
    }
 }
 fn overlay_error(runs: &mut [StyledRun], error_byte: usize, theme: &Theme) {
    // Failing tokens have their byte_range starting exactly at
    // `error_byte`. Override the fg colour while preserving any
    // other style bits the base run carried.
    if let Some(run) = runs.iter_mut().find(|r| r.byte_range.0 == error_byte) {
        run.style = run.style.fg(theme.tok_error);
    }
    // If no run starts at error_byte, the failure is past the
    // last token (an EOF failure misclassified as definite —
    // shouldn't happen given classify_input's contract). No-op.
 }
 /// Cursor-less variant: tokenises `input` into styled runs
 /// covering the full byte range, with no inverted cursor.
 /// Used by the echo-line renderer (ADR-0022 §5) where there's
@@ -258,21 +323,126 @@ mod tests {
        assert!(reversed(last));
    }
    // ---- classify_input + error overlay (stage 4) ----
    #[test]
-    fn full_command_lexes_to_each_token_class() {
+    fn classify_empty_input() {
-        let theme = dark();
+        assert_eq!(classify_input(""), InputState::Empty);
-        let runs = render_input_runs(
+        assert_eq!(classify_input("   "), InputState::Empty);
-            "insert into T values (1, 'hi', null) --all-rows",
+    }
-            47,
+
-            &theme,
+    #[test]
    fn classify_complete_command_is_valid() {
        assert_eq!(
            classify_input("create table Customers with pk"),
            InputState::Valid,
        );
-        // Spot-check: there's at least one run of each token class.
+    }
    #[test]
    fn classify_partial_keyword_only_is_incomplete() {
        // `create` alone — parser fails at EOF expecting `table`.
        assert_eq!(classify_input("create"), InputState::IncompleteAtEof);
    }
    #[test]
    fn classify_partial_command_mid_clause_is_incomplete() {
        assert_eq!(
            classify_input("create table Customers"),
            InputState::IncompleteAtEof,
        );
    }
    #[test]
    fn classify_unknown_command_is_definite_error_at_zero() {
        assert_eq!(
            classify_input("frobulate widgets"),
            InputState::DefiniteErrorAt(0),
        );
    }
    #[test]
    fn classify_wrong_token_mid_command_is_definite_error_at_token_position() {
        // `create table` consumed (12 bytes inc. trailing space
        // skipped by lexer); `1Bad` lexes as Number(13)+Identifier(14).
        // Parser expects ident at position 13, finds Number — fails.
        let state = classify_input("create table 1Bad");
        match state {
            InputState::DefiniteErrorAt(pos) => assert_eq!(pos, 13),
            other => panic!("expected DefiniteErrorAt(13), got {other:?}"),
        }
    }
    #[test]
    fn classify_trailing_whitespace_does_not_create_definite_error() {
        // Trailing whitespace alone shouldn't promote an
        // incomplete-at-EOF state into a definite error.
        assert_eq!(
            classify_input("create   "),
            InputState::IncompleteAtEof,
        );
    }
    #[test]
    fn render_input_runs_overlays_error_on_failing_token() {
        let theme = dark();
        let runs = render_input_runs("frobulate widgets", 17, &theme);
        // First run is `frobulate` at (0,9). Should be tok_error
        // colour (definite error overlay).
        assert_eq!(runs[0].byte_range, (0, 9));
        assert_eq!(runs[0].style.fg, Some(theme.tok_error));
        // Second run is whitespace, third is `widgets` — these
        // don't get the overlay (only the failing token).
        let widgets = runs.iter().find(|r| r.byte_range == (10, 17));
        assert!(widgets.is_some());
        assert_eq!(
            widgets.unwrap().style.fg,
            Some(theme.tok_identifier),
            "tokens after the error stay in their lex-class colour",
        );
    }
    #[test]
    fn render_input_runs_does_not_overlay_for_incomplete_input() {
        let theme = dark();
        let runs = render_input_runs("create", 6, &theme);
        // No error overlay — `create` keeps tok_keyword.
        assert_eq!(runs[0].byte_range, (0, 6));
        assert_eq!(runs[0].style.fg, Some(theme.tok_keyword));
    }
    #[test]
    fn render_input_runs_does_not_overlay_for_valid_input() {
        let theme = dark();
        let runs = render_input_runs("create table T with pk", 22, &theme);
        // None of the tokens should be tok_error.
        for r in &runs {
            assert_ne!(
                r.style.fg,
                Some(theme.tok_error),
                "no error overlay for valid input: {r:?}",
            );
        }
    }
    #[test]
    fn full_valid_command_lexes_to_each_token_class() {
        // Use a valid command — `update ... --all-rows` —
        // so the error overlay (stage 4) doesn't replace any
        // class colours with tok_error. Tokens: keyword(s),
        // identifier(s), string literal, punct (=), flag.
        let theme = dark();
        let input = "update T set Name='hi' --all-rows";
        let runs = render_input_runs(input, input.len(), &theme);
        let fgs: Vec<_> = runs.iter().filter_map(|r| r.style.fg).collect();
-        assert!(fgs.contains(&theme.tok_keyword)); // insert / into / values / null
+        assert!(fgs.contains(&theme.tok_keyword)); // update / set
-        assert!(fgs.contains(&theme.tok_identifier)); // T / hi (string is separate)
+        assert!(fgs.contains(&theme.tok_identifier)); // T / Name
        assert!(fgs.contains(&theme.tok_number)); // 1
        assert!(fgs.contains(&theme.tok_string)); // 'hi'
-        assert!(fgs.contains(&theme.tok_punct)); // ( , )
+        assert!(fgs.contains(&theme.tok_punct)); // =
        assert!(fgs.contains(&theme.tok_flag)); // --all-rows
        // The valid command must not have any error overlay.
        for r in &runs {
            assert_ne!(r.style.fg, Some(theme.tok_error));
        }
    }
 }