ADR-0022 stage 4/8: render-time parse + error overlay

Add `classify_input(&str) -> InputState` that returns one of {Empty, Valid, IncompleteAtEof, DefiniteErrorAt(byte)}. The renderer uses this to overlay tok_error on the failing token of mid-typed input that can never be valid. ParseError::Invalid gains an `at_eof: bool` field populated by `into_parse_error`: - structural failures: at_eof = found.is_none() (chumsky's own "ran out of input" discriminator); - custom errors from try_map: at_eof = true, conservatively. The conservative custom-error classification is a deliberate under-highlighting bias. It means three classes of error currently DO NOT get a live red overlay (only on submit): - "tables need at least one column" (correct: this is genuinely an incomplete state — adding `with pk ...` fixes it); - "unknown type 'varchar'" (sub-optimal: should overlay); - "--force-conversion and --dont-convert are mutually exclusive" (sub-optimal: should overlay). The trade-off is documented inline on the at_eof field. A future refinement could carry an explicit definite/incomplete tag through Custom errors (would change RichReason::Custom's payload from String to a typed value). render_input_runs now applies the overlay on the failing token's run before injecting the cursor. Tokens after the error keep their lex-class colour — fixes one thing at a time per ADR-0022 §4. Lex errors continue to render in tok_error from stage 2. Pattern-matches on ParseError::Invalid throughout the codebase use `..` and are unaffected; only the two constructions in parser.rs needed updating. Tests: 693 passing, 0 failing, 1 ignored (683 baseline → +10: 7 classify + overlay tests, +1 adapted full-command test, +2 valid-vs-incomplete coverage). Clippy clean. Stage 5 lights up the hint panel as the verbose-feedback surface — needs the InputState classifier from this stage.
2026-05-10 17:37:50 +00:00
parent 39da399add
commit 313d4f8346
2 changed files with 228 additions and 15 deletions
@@ -25,6 +25,7 @@
 use ratatui::style::{Modifier, Style};

 use crate::dsl::lexer::lex;
+use crate::dsl::{ParseError, parse_command};
 use crate::theme::Theme;

 /// A run of text with its byte range in the source and the
@@ -51,15 +52,79 @@ impl StyledRun {
 /// Build the run sequence for the input panel.
 ///
 /// Lexes `input`, assigns each token its `theme.token_color`,
-/// preserves whitespace gaps as `theme.fg` runs, then injects
-/// the cursor at `cursor_byte` (clamped to `input.len()`).
+/// applies the parse-error overlay if the input is in the
+/// definite-error state (ADR-0022 §1, §4), preserves whitespace
+/// gaps as `theme.fg` runs, then injects the cursor at
+/// `cursor_byte` (clamped to `input.len()`).
 #[must_use]
 pub fn render_input_runs(input: &str, cursor_byte: usize, theme: &Theme) -> Vec<StyledRun> {
    let mut runs = lex_to_runs(input, theme);
+    if let InputState::DefiniteErrorAt(pos) = classify_input(input) {
+        overlay_error(&mut runs, pos, theme);
+    }
    inject_cursor(&mut runs, input, cursor_byte, theme);
    runs
 }

+/// One of three mid-typing classifications (ADR-0022 §1).
+///
+/// Distinguishes "the user isn't done yet" from "this token
+/// can never fit". Drives error overlay (this stage) and the
+/// hint panel ambient mode (stage 5).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum InputState {
+    /// No tokens at all (empty / whitespace-only input).
+    Empty,
+    /// Parses to a complete `Command`. The user can submit.
+    Valid,
+    /// Parse failed because more input was expected — every
+    /// consumed token fits a known command, just not all of
+    /// it is here yet.
+    IncompleteAtEof,
+    /// Parse failed at a token strictly inside the input —
+    /// no continuation can recover. The byte offset is the
+    /// failing token's start.
+    DefiniteErrorAt(usize),
+}
+
+/// Classify `input` into one of the three mid-typing states.
+/// Cheap (lex + parse) per ADR-0022 §13.
+#[must_use]
+pub fn classify_input(input: &str) -> InputState {
+    if input.trim().is_empty() {
+        return InputState::Empty;
+    }
+    match parse_command(input) {
+        Ok(_) => InputState::Valid,
+        Err(ParseError::Empty) => InputState::Empty,
+        Err(err @ ParseError::Invalid { position, .. }) => {
+            // `at_eof` is the parser's own classification: true
+            // when more input would (potentially) help, false
+            // when a specific token is in the wrong place.
+            // Custom-error inputs (try_map failures) currently
+            // map to `at_eof = true` — see the field docstring
+            // on `ParseError::Invalid::at_eof`.
+            if err.at_eof() {
+                InputState::IncompleteAtEof
+            } else {
+                InputState::DefiniteErrorAt(position)
+            }
+        }
+    }
+}
+
+fn overlay_error(runs: &mut [StyledRun], error_byte: usize, theme: &Theme) {
+    // Failing tokens have their byte_range starting exactly at
+    // `error_byte`. Override the fg colour while preserving any
+    // other style bits the base run carried.
+    if let Some(run) = runs.iter_mut().find(|r| r.byte_range.0 == error_byte) {
+        run.style = run.style.fg(theme.tok_error);
+    }
+    // If no run starts at error_byte, the failure is past the
+    // last token (an EOF failure misclassified as definite —
+    // shouldn't happen given classify_input's contract). No-op.
+}
+
 /// Cursor-less variant: tokenises `input` into styled runs
 /// covering the full byte range, with no inverted cursor.
 /// Used by the echo-line renderer (ADR-0022 §5) where there's
@@ -258,21 +323,126 @@ mod tests {
        assert!(reversed(last));
    }

+    // ---- classify_input + error overlay (stage 4) ----
+
    #[test]
-    fn full_command_lexes_to_each_token_class() {
-        let theme = dark();
-        let runs = render_input_runs(
-            "insert into T values (1, 'hi', null) --all-rows",
-            47,
-            &theme,
+    fn classify_empty_input() {
+        assert_eq!(classify_input(""), InputState::Empty);
+        assert_eq!(classify_input("   "), InputState::Empty);
+    }
+
+    #[test]
+    fn classify_complete_command_is_valid() {
+        assert_eq!(
+            classify_input("create table Customers with pk"),
+            InputState::Valid,
        );
-        // Spot-check: there's at least one run of each token class.
+    }
+
+    #[test]
+    fn classify_partial_keyword_only_is_incomplete() {
+        // `create` alone — parser fails at EOF expecting `table`.
+        assert_eq!(classify_input("create"), InputState::IncompleteAtEof);
+    }
+
+    #[test]
+    fn classify_partial_command_mid_clause_is_incomplete() {
+        assert_eq!(
+            classify_input("create table Customers"),
+            InputState::IncompleteAtEof,
+        );
+    }
+
+    #[test]
+    fn classify_unknown_command_is_definite_error_at_zero() {
+        assert_eq!(
+            classify_input("frobulate widgets"),
+            InputState::DefiniteErrorAt(0),
+        );
+    }
+
+    #[test]
+    fn classify_wrong_token_mid_command_is_definite_error_at_token_position() {
+        // `create table` consumed (12 bytes inc. trailing space
+        // skipped by lexer); `1Bad` lexes as Number(13)+Identifier(14).
+        // Parser expects ident at position 13, finds Number — fails.
+        let state = classify_input("create table 1Bad");
+        match state {
+            InputState::DefiniteErrorAt(pos) => assert_eq!(pos, 13),
+            other => panic!("expected DefiniteErrorAt(13), got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn classify_trailing_whitespace_does_not_create_definite_error() {
+        // Trailing whitespace alone shouldn't promote an
+        // incomplete-at-EOF state into a definite error.
+        assert_eq!(
+            classify_input("create   "),
+            InputState::IncompleteAtEof,
+        );
+    }
+
+    #[test]
+    fn render_input_runs_overlays_error_on_failing_token() {
+        let theme = dark();
+        let runs = render_input_runs("frobulate widgets", 17, &theme);
+        // First run is `frobulate` at (0,9). Should be tok_error
+        // colour (definite error overlay).
+        assert_eq!(runs[0].byte_range, (0, 9));
+        assert_eq!(runs[0].style.fg, Some(theme.tok_error));
+        // Second run is whitespace, third is `widgets` — these
+        // don't get the overlay (only the failing token).
+        let widgets = runs.iter().find(|r| r.byte_range == (10, 17));
+        assert!(widgets.is_some());
+        assert_eq!(
+            widgets.unwrap().style.fg,
+            Some(theme.tok_identifier),
+            "tokens after the error stay in their lex-class colour",
+        );
+    }
+
+    #[test]
+    fn render_input_runs_does_not_overlay_for_incomplete_input() {
+        let theme = dark();
+        let runs = render_input_runs("create", 6, &theme);
+        // No error overlay — `create` keeps tok_keyword.
+        assert_eq!(runs[0].byte_range, (0, 6));
+        assert_eq!(runs[0].style.fg, Some(theme.tok_keyword));
+    }
+
+    #[test]
+    fn render_input_runs_does_not_overlay_for_valid_input() {
+        let theme = dark();
+        let runs = render_input_runs("create table T with pk", 22, &theme);
+        // None of the tokens should be tok_error.
+        for r in &runs {
+            assert_ne!(
+                r.style.fg,
+                Some(theme.tok_error),
+                "no error overlay for valid input: {r:?}",
+            );
+        }
+    }
+
+    #[test]
+    fn full_valid_command_lexes_to_each_token_class() {
+        // Use a valid command — `update ... --all-rows` —
+        // so the error overlay (stage 4) doesn't replace any
+        // class colours with tok_error. Tokens: keyword(s),
+        // identifier(s), string literal, punct (=), flag.
+        let theme = dark();
+        let input = "update T set Name='hi' --all-rows";
+        let runs = render_input_runs(input, input.len(), &theme);
        let fgs: Vec<_> = runs.iter().filter_map(|r| r.style.fg).collect();
-        assert!(fgs.contains(&theme.tok_keyword)); // insert / into / values / null
-        assert!(fgs.contains(&theme.tok_identifier)); // T / hi (string is separate)
-        assert!(fgs.contains(&theme.tok_number)); // 1
+        assert!(fgs.contains(&theme.tok_keyword)); // update / set
+        assert!(fgs.contains(&theme.tok_identifier)); // T / Name
        assert!(fgs.contains(&theme.tok_string)); // 'hi'
-        assert!(fgs.contains(&theme.tok_punct)); // ( , )
+        assert!(fgs.contains(&theme.tok_punct)); // =
        assert!(fgs.contains(&theme.tok_flag)); // --all-rows
+        // The valid command must not have any error overlay.
+        for r in &runs {
+            assert_ne!(r.style.fg, Some(theme.tok_error));
+        }
    }
 }