From 313d4f834644eef70237be1a6590c09d9e5c8f88 Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Sun, 10 May 2026 17:37:50 +0000 Subject: [PATCH] ADR-0022 stage 4/8: render-time parse + error overlay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `classify_input(&str) -> InputState` that returns one of {Empty, Valid, IncompleteAtEof, DefiniteErrorAt(byte)}. The renderer uses this to overlay tok_error on the failing token of mid-typed input that can never be valid. ParseError::Invalid gains an `at_eof: bool` field populated by `into_parse_error`: - structural failures: at_eof = found.is_none() (chumsky's own "ran out of input" discriminator); - custom errors from try_map: at_eof = true, conservatively. The conservative custom-error classification is a deliberate under-highlighting bias. It means three classes of error currently DO NOT get a live red overlay (only on submit): - "tables need at least one column" (correct: this is genuinely an incomplete state — adding `with pk ...` fixes it); - "unknown type 'varchar'" (sub-optimal: should overlay); - "--force-conversion and --dont-convert are mutually exclusive" (sub-optimal: should overlay). The trade-off is documented inline on the at_eof field. A future refinement could carry an explicit definite/incomplete tag through Custom errors (would change RichReason::Custom's payload from String to a typed value). render_input_runs now applies the overlay on the failing token's run before injecting the cursor. Tokens after the error keep their lex-class colour — fixes one thing at a time per ADR-0022 §4. Lex errors continue to render in tok_error from stage 2. Pattern-matches on ParseError::Invalid throughout the codebase use `..` and are unaffected; only the two constructions in parser.rs needed updating. Tests: 693 passing, 0 failing, 1 ignored (683 baseline → +10: 7 classify + overlay tests, +1 adapted full-command test, +2 valid-vs-incomplete coverage). Clippy clean. Stage 5 lights up the hint panel as the verbose-feedback surface — needs the InputState classifier from this stage. --- src/dsl/parser.rs | 47 ++++++++++- src/input_render.rs | 196 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 228 insertions(+), 15 deletions(-) diff --git a/src/dsl/parser.rs b/src/dsl/parser.rs index 2fd670a..df4aef6 100644 --- a/src/dsl/parser.rs +++ b/src/dsl/parser.rs @@ -25,7 +25,27 @@ use crate::dsl::value::Value; #[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)] pub enum ParseError { #[error("could not parse command: {message}")] - Invalid { message: String, position: usize }, + Invalid { + message: String, + position: usize, + /// True when the parse failed because more input was + /// expected — i.e. a structural failure with no + /// next-token to point at. Used by the input renderer + /// (ADR-0022 §4) to distinguish "incomplete but + /// plausible" from "definite error" mid-typing. + /// + /// Custom errors raised by `try_map` are conservatively + /// classified as `at_eof = true` because we cannot, at + /// this layer, tell apart "tables need at least one + /// column" (incomplete: more input would help) from + /// "--force-conversion and --dont-convert are mutually + /// exclusive" (definite: user must remove a token). + /// Erring on `true` means custom-error inputs do not + /// get a live error overlay; the parse error still + /// fires on submit. A future refinement may carry an + /// explicit `is_definite` tag through custom errors. + at_eof: bool, + }, #[error("empty input")] Empty, } @@ -38,6 +58,14 @@ impl ParseError { Self::Empty => None, } } + + #[must_use] + pub const fn at_eof(&self) -> bool { + match self { + Self::Invalid { at_eof, .. } => *at_eof, + Self::Empty => true, + } + } } /// Parse a single DSL command end-to-end. @@ -103,6 +131,7 @@ fn try_parse_replay_with_bare_path( return Some(Err(ParseError::Invalid { message: "expected a path after `replay`".to_string(), position: after_replay, + at_eof: true, })); } Some(Ok(Command::Replay { @@ -657,7 +686,21 @@ fn into_parse_error(errs: &[Rich<'_, Token>], tokens: &[Token], source: &str) -> let chumsky_span = chosen.span(); let position = source_position_at(tokens, chumsky_span.start, source); let message = humanise(chosen, tokens, source); - ParseError::Invalid { message, position } + let at_eof = match chosen.reason() { + // Structural failures know whether they ran out of + // input — `found = None` ⇔ EOF. + RichReason::ExpectedFound { found, .. } => found.is_none(), + // Custom errors: see the docstring on + // `ParseError::Invalid::at_eof` for why we err on the + // side of `true` (no live overlay; on-submit error + // still fires). + RichReason::Custom(_) => true, + }; + ParseError::Invalid { + message, + position, + at_eof, + } } /// Translate a chumsky token-slice index into a byte position diff --git a/src/input_render.rs b/src/input_render.rs index d950ec4..310e5ce 100644 --- a/src/input_render.rs +++ b/src/input_render.rs @@ -25,6 +25,7 @@ use ratatui::style::{Modifier, Style}; use crate::dsl::lexer::lex; +use crate::dsl::{ParseError, parse_command}; use crate::theme::Theme; /// A run of text with its byte range in the source and the @@ -51,15 +52,79 @@ impl StyledRun { /// Build the run sequence for the input panel. /// /// Lexes `input`, assigns each token its `theme.token_color`, -/// preserves whitespace gaps as `theme.fg` runs, then injects -/// the cursor at `cursor_byte` (clamped to `input.len()`). +/// applies the parse-error overlay if the input is in the +/// definite-error state (ADR-0022 §1, §4), preserves whitespace +/// gaps as `theme.fg` runs, then injects the cursor at +/// `cursor_byte` (clamped to `input.len()`). #[must_use] pub fn render_input_runs(input: &str, cursor_byte: usize, theme: &Theme) -> Vec { let mut runs = lex_to_runs(input, theme); + if let InputState::DefiniteErrorAt(pos) = classify_input(input) { + overlay_error(&mut runs, pos, theme); + } inject_cursor(&mut runs, input, cursor_byte, theme); runs } +/// One of three mid-typing classifications (ADR-0022 §1). +/// +/// Distinguishes "the user isn't done yet" from "this token +/// can never fit". Drives error overlay (this stage) and the +/// hint panel ambient mode (stage 5). +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum InputState { + /// No tokens at all (empty / whitespace-only input). + Empty, + /// Parses to a complete `Command`. The user can submit. + Valid, + /// Parse failed because more input was expected — every + /// consumed token fits a known command, just not all of + /// it is here yet. + IncompleteAtEof, + /// Parse failed at a token strictly inside the input — + /// no continuation can recover. The byte offset is the + /// failing token's start. + DefiniteErrorAt(usize), +} + +/// Classify `input` into one of the three mid-typing states. +/// Cheap (lex + parse) per ADR-0022 §13. +#[must_use] +pub fn classify_input(input: &str) -> InputState { + if input.trim().is_empty() { + return InputState::Empty; + } + match parse_command(input) { + Ok(_) => InputState::Valid, + Err(ParseError::Empty) => InputState::Empty, + Err(err @ ParseError::Invalid { position, .. }) => { + // `at_eof` is the parser's own classification: true + // when more input would (potentially) help, false + // when a specific token is in the wrong place. + // Custom-error inputs (try_map failures) currently + // map to `at_eof = true` — see the field docstring + // on `ParseError::Invalid::at_eof`. + if err.at_eof() { + InputState::IncompleteAtEof + } else { + InputState::DefiniteErrorAt(position) + } + } + } +} + +fn overlay_error(runs: &mut [StyledRun], error_byte: usize, theme: &Theme) { + // Failing tokens have their byte_range starting exactly at + // `error_byte`. Override the fg colour while preserving any + // other style bits the base run carried. + if let Some(run) = runs.iter_mut().find(|r| r.byte_range.0 == error_byte) { + run.style = run.style.fg(theme.tok_error); + } + // If no run starts at error_byte, the failure is past the + // last token (an EOF failure misclassified as definite — + // shouldn't happen given classify_input's contract). No-op. +} + /// Cursor-less variant: tokenises `input` into styled runs /// covering the full byte range, with no inverted cursor. /// Used by the echo-line renderer (ADR-0022 §5) where there's @@ -258,21 +323,126 @@ mod tests { assert!(reversed(last)); } + // ---- classify_input + error overlay (stage 4) ---- + #[test] - fn full_command_lexes_to_each_token_class() { - let theme = dark(); - let runs = render_input_runs( - "insert into T values (1, 'hi', null) --all-rows", - 47, - &theme, + fn classify_empty_input() { + assert_eq!(classify_input(""), InputState::Empty); + assert_eq!(classify_input(" "), InputState::Empty); + } + + #[test] + fn classify_complete_command_is_valid() { + assert_eq!( + classify_input("create table Customers with pk"), + InputState::Valid, ); - // Spot-check: there's at least one run of each token class. + } + + #[test] + fn classify_partial_keyword_only_is_incomplete() { + // `create` alone — parser fails at EOF expecting `table`. + assert_eq!(classify_input("create"), InputState::IncompleteAtEof); + } + + #[test] + fn classify_partial_command_mid_clause_is_incomplete() { + assert_eq!( + classify_input("create table Customers"), + InputState::IncompleteAtEof, + ); + } + + #[test] + fn classify_unknown_command_is_definite_error_at_zero() { + assert_eq!( + classify_input("frobulate widgets"), + InputState::DefiniteErrorAt(0), + ); + } + + #[test] + fn classify_wrong_token_mid_command_is_definite_error_at_token_position() { + // `create table` consumed (12 bytes inc. trailing space + // skipped by lexer); `1Bad` lexes as Number(13)+Identifier(14). + // Parser expects ident at position 13, finds Number — fails. + let state = classify_input("create table 1Bad"); + match state { + InputState::DefiniteErrorAt(pos) => assert_eq!(pos, 13), + other => panic!("expected DefiniteErrorAt(13), got {other:?}"), + } + } + + #[test] + fn classify_trailing_whitespace_does_not_create_definite_error() { + // Trailing whitespace alone shouldn't promote an + // incomplete-at-EOF state into a definite error. + assert_eq!( + classify_input("create "), + InputState::IncompleteAtEof, + ); + } + + #[test] + fn render_input_runs_overlays_error_on_failing_token() { + let theme = dark(); + let runs = render_input_runs("frobulate widgets", 17, &theme); + // First run is `frobulate` at (0,9). Should be tok_error + // colour (definite error overlay). + assert_eq!(runs[0].byte_range, (0, 9)); + assert_eq!(runs[0].style.fg, Some(theme.tok_error)); + // Second run is whitespace, third is `widgets` — these + // don't get the overlay (only the failing token). + let widgets = runs.iter().find(|r| r.byte_range == (10, 17)); + assert!(widgets.is_some()); + assert_eq!( + widgets.unwrap().style.fg, + Some(theme.tok_identifier), + "tokens after the error stay in their lex-class colour", + ); + } + + #[test] + fn render_input_runs_does_not_overlay_for_incomplete_input() { + let theme = dark(); + let runs = render_input_runs("create", 6, &theme); + // No error overlay — `create` keeps tok_keyword. + assert_eq!(runs[0].byte_range, (0, 6)); + assert_eq!(runs[0].style.fg, Some(theme.tok_keyword)); + } + + #[test] + fn render_input_runs_does_not_overlay_for_valid_input() { + let theme = dark(); + let runs = render_input_runs("create table T with pk", 22, &theme); + // None of the tokens should be tok_error. + for r in &runs { + assert_ne!( + r.style.fg, + Some(theme.tok_error), + "no error overlay for valid input: {r:?}", + ); + } + } + + #[test] + fn full_valid_command_lexes_to_each_token_class() { + // Use a valid command — `update ... --all-rows` — + // so the error overlay (stage 4) doesn't replace any + // class colours with tok_error. Tokens: keyword(s), + // identifier(s), string literal, punct (=), flag. + let theme = dark(); + let input = "update T set Name='hi' --all-rows"; + let runs = render_input_runs(input, input.len(), &theme); let fgs: Vec<_> = runs.iter().filter_map(|r| r.style.fg).collect(); - assert!(fgs.contains(&theme.tok_keyword)); // insert / into / values / null - assert!(fgs.contains(&theme.tok_identifier)); // T / hi (string is separate) - assert!(fgs.contains(&theme.tok_number)); // 1 + assert!(fgs.contains(&theme.tok_keyword)); // update / set + assert!(fgs.contains(&theme.tok_identifier)); // T / Name assert!(fgs.contains(&theme.tok_string)); // 'hi' - assert!(fgs.contains(&theme.tok_punct)); // ( , ) + assert!(fgs.contains(&theme.tok_punct)); // = assert!(fgs.contains(&theme.tok_flag)); // --all-rows + // The valid command must not have any error overlay. + for r in &runs { + assert_ne!(r.style.fg, Some(theme.tok_error)); + } } }