ADR-0022 stage 8e: invalid-identifier detection + hint variant

Per the user's #5: "if our candidate selection works correctly, then entering a character that removes all matches is the same as entering an invalid token." Closes the loop between schema cache (8c/8d) and live error feedback (4). New `completion::invalid_ident_at_cursor(input, cursor, cache)` returns `Some(InvalidIdent { range, found, slot })` when: - the cursor is on a partial identifier-shaped token; - the parser's expected-set at the start of that token contains a known-set IdentSlot (TableName / Column / RelationshipName); - no schema entry across those slots prefix-matches the typed text. `render_input_runs` extended to take a `&SchemaCache` and overlay the invalid-identifier range with `tok_error` — same visual treatment as the parse-error overlay (4), unified red signal regardless of which detector fires. `ambient_hint` extended to surface `hint.ambient_invalid_ident` when invalid_ident_at_cursor returns Some — wording "no such {kind}: `{found}`" mirrors ADR-0019's engine-error voice for consistency. Catalog + KEYS_AND_PLACEHOLDERS declaration added; validator passes. Render priority: candidates win over invalid-ident (if any schema match exists for the partial prefix, the state is "in-progress completion" not "invalid"). Falls through to the existing parse-error/incomplete/Valid framings otherwise. NewName slots are filtered out at the source — typing into a "user invents this name" position is never invalid (per `IdentSlot::completes_from_schema`). Tests: 744 passing, 0 failing, 1 ignored (738 baseline → +6: 5 invalid_ident_at_cursor cases covering unknown-prefix-fires, prefix-match-doesn't-fire, NewName-immune, no-cursor-token, keyword-slot-immune; plus 1 ambient_hint integration test). Clippy clean. This closes ADR-0022. Stages 1-8e together deliver the ambient-typing-assistance feature: token highlighting, error overlay, hint panel ambient, hint panel multi- candidate display with scroll markers, Tab/Shift-Tab cycling with one-keystroke Esc/Backspace undo, schema-aware identifier completion, and invalid-identifier live feedback. Total stage-8 footprint: 5 commits, ~1600 lines.
2026-05-11 21:01:44 +00:00
parent 7a32c13bd5
commit 8214e4136a
5 changed files with 218 additions and 15 deletions
@@ -143,6 +143,87 @@ fn strip_backticks(s: &str) -> Option<&str> {
    s.strip_prefix('`').and_then(|s| s.strip_suffix('`'))
 }

+/// What the user has typed in an identifier slot whose schema
+/// list contains nothing matching the prefix (ADR-0022 stage 8e
+/// + the user's #5).
+///
+/// The renderer overlays the partial token with `tok_error`;
+/// the hint panel renders an "invalid …" message.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct InvalidIdent {
+    /// Byte range of the typed-but-not-found identifier.
+    pub range: (usize, usize),
+    /// The text the user typed in the slot.
+    pub found: String,
+    /// Which known-set slot this position expected.
+    pub slot: IdentSlot,
+}
+
+/// Detect "the user has typed an identifier here that the
+/// schema doesn't have." Returns `None` for any of:
+/// - cursor at empty / whitespace partial;
+/// - cursor at a position that doesn't expect a known-set
+///   identifier (keyword slot, NewName slot, complete input);
+/// - cursor partial matches at least one schema name.
+#[must_use]
+pub fn invalid_ident_at_cursor(
+    input: &str,
+    cursor: usize,
+    cache: &SchemaCache,
+) -> Option<InvalidIdent> {
+    let cursor = cursor.min(input.len());
+    let bytes = input.as_bytes();
+    let mut start = cursor;
+    while start > 0 {
+        let prev = bytes[start - 1];
+        if prev.is_ascii_alphanumeric() || prev == b'_' {
+            start -= 1;
+        } else {
+            break;
+        }
+    }
+    if start == cursor {
+        // No partial token at the cursor — nothing to flag.
+        return None;
+    }
+    let partial = &input[start..cursor];
+    let leading = &input[..start];
+    let expected = expected_set(leading);
+    if expected.is_empty() {
+        return None;
+    }
+    // Find every known-set slot in the expected list.
+    let slots: Vec<IdentSlot> = expected
+        .iter()
+        .filter_map(|item| IdentSlot::from_expected_label(item))
+        .filter(|slot| slot.completes_from_schema())
+        .collect();
+    if slots.is_empty() {
+        return None;
+    }
+    let lowered = partial.to_lowercase();
+    // If any schema entry across the matching slots matches
+    // the prefix, the partial is not "invalid" — it's an
+    // in-progress lookup.
+    let any_match = slots
+        .iter()
+        .flat_map(|slot| cache.for_slot(*slot))
+        .any(|name| name.to_lowercase().starts_with(&lowered));
+    if any_match {
+        return None;
+    }
+    // Pick the first slot kind for the diagnostic — when
+    // multiple are expected (e.g. `drop relationship …`
+    // expects RelationshipName *or* the `from` keyword;
+    // here only the schema slot survives the filter) we
+    // surface the first.
+    Some(InvalidIdent {
+        range: (start, cursor),
+        found: partial.to_string(),
+        slot: slots[0],
+    })
+}
+
 /// The expected-token set at the end of `leading`. Empty
 /// `leading` (whitespace only) yields every command-entry
 /// keyword — there's no parser failure to drive this from, so
@@ -372,6 +453,65 @@ mod tests {
        assert!(cs.is_empty(), "got {cs:?}");
    }

+    // ---- invalid_ident_at_cursor (stage 8e) ----
+
+    #[test]
+    fn invalid_ident_fires_for_unknown_table_prefix() {
+        let cache = SchemaCache {
+            tables: vec!["Customers".to_string()],
+            ..SchemaCache::default()
+        };
+        // `show data Cust` matches → no invalid.
+        assert!(invalid_ident_at_cursor("show data Cust", 14, &cache).is_none());
+        // `show data Cust` plus a typo: `show data Custp`. No
+        // table starts with "Custp" → invalid.
+        let invalid = invalid_ident_at_cursor("show data Custp", 15, &cache)
+            .expect("should be invalid");
+        assert_eq!(invalid.range, (10, 15));
+        assert_eq!(invalid.found, "Custp");
+        assert_eq!(invalid.slot, IdentSlot::TableName);
+    }
+
+    #[test]
+    fn invalid_ident_does_not_fire_when_partial_matches_some_schema_entry() {
+        let cache = SchemaCache {
+            tables: vec!["Customers".to_string(), "Orders".to_string()],
+            ..SchemaCache::default()
+        };
+        // "C" matches Customers (prefix), so not invalid.
+        assert!(invalid_ident_at_cursor("show data C", 11, &cache).is_none());
+    }
+
+    #[test]
+    fn invalid_ident_does_not_fire_in_new_name_slot() {
+        // `create table Cust` — Cust is a NewName slot. Even
+        // if no schema entry matches, the user invents the
+        // name; not invalid.
+        let cache = SchemaCache {
+            tables: vec!["Existing".to_string()],
+            ..SchemaCache::default()
+        };
+        assert!(invalid_ident_at_cursor("create table Cust", 17, &cache).is_none());
+    }
+
+    #[test]
+    fn invalid_ident_does_not_fire_when_cursor_not_at_partial_token() {
+        let cache = SchemaCache::default();
+        // Cursor at a whitespace position — no partial token.
+        assert!(invalid_ident_at_cursor("show data ", 10, &cache).is_none());
+    }
+
+    #[test]
+    fn invalid_ident_does_not_fire_at_keyword_slot() {
+        // `cra` at the entry-keyword position — no keyword
+        // starts with "cra", but the slot is keyword (not a
+        // known-schema slot), so invalid_ident doesn't fire.
+        // The render path's regular parse-error overlay handles
+        // this case.
+        let cache = SchemaCache::default();
+        assert!(invalid_ident_at_cursor("cra", 3, &cache).is_none());
+    }
+
    #[test]
    fn new_name_slot_offers_no_candidates_even_with_populated_cache() {
        // `create table ` — the table-name slot is NewName.
@@ -128,6 +128,10 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[
        &["message", "usage"],
    ),
    ("hint.ambient_expected", &["expected"]),
+    (
+        "hint.ambient_invalid_ident",
+        &["kind", "found"],
+    ),
    // ---- Parse error rendering ----
    ("parse.available_commands", &["commands"]),
    ("parse.caret", &["padding"]),
@@ -258,6 +258,10 @@ hint:
  ambient_complete: "submit with Enter"
  ambient_expected: "expected: {expected}"
  ambient_error_with_usage: "{message} — usage: {usage}"
+  # Invalid identifier in a schema slot (ADR-0022 stage 8e
+  # + the user's #5). Voice mirrors ADR-0019's "no such
+  # {kind}" wording for consistency with engine errors.
+  ambient_invalid_ident: "no such {kind}: `{found}`"

 parse:
  # Wrapper around chumsky's structural error message. The
@@ -53,15 +53,25 @@ impl StyledRun {
 ///
 /// Lexes `input`, assigns each token its `theme.token_color`,
 /// applies the parse-error overlay if the input is in the
-/// definite-error state (ADR-0022 §1, §4), preserves whitespace
+/// definite-error state (ADR-0022 §1, §4), applies the
+/// invalid-identifier overlay if the cursor is in a known-set
+/// slot with no schema match (stage 8e), preserves whitespace
 /// gaps as `theme.fg` runs, then injects the cursor at
 /// `cursor_byte` (clamped to `input.len()`).
 #[must_use]
-pub fn render_input_runs(input: &str, cursor_byte: usize, theme: &Theme) -> Vec<StyledRun> {
+pub fn render_input_runs(
+    input: &str,
+    cursor_byte: usize,
+    theme: &Theme,
+    cache: &crate::completion::SchemaCache,
+) -> Vec<StyledRun> {
    let mut runs = lex_to_runs(input, theme);
    if let InputState::DefiniteErrorAt(pos) = classify_input(input) {
        overlay_error(&mut runs, pos, theme);
    }
+    if let Some(inv) = crate::completion::invalid_ident_at_cursor(input, cursor_byte, cache) {
+        overlay_error(&mut runs, inv.range.0, theme);
+    }
    inject_cursor(&mut runs, input, cursor_byte, theme);
    runs
 }
@@ -164,6 +174,26 @@ pub fn ambient_hint(
            selected,
        });
    }
+    // Invalid identifier: cursor sits in a known-set slot but
+    // the typed prefix matches nothing in the schema. (Stage
+    // 8e / the user's #5.)
+    if let Some(inv) = crate::completion::invalid_ident_at_cursor(input, cursor, cache) {
+        let kind = match inv.slot {
+            crate::dsl::ident_slot::IdentSlot::TableName => "table",
+            crate::dsl::ident_slot::IdentSlot::Column => "column",
+            crate::dsl::ident_slot::IdentSlot::RelationshipName => "relationship",
+            // `NewName` is filtered out by `invalid_ident_at_cursor`
+            // (it only fires for known-set slots), so this arm
+            // is unreachable in practice; render a neutral
+            // fallback rather than panic.
+            crate::dsl::ident_slot::IdentSlot::NewName => "identifier",
+        };
+        return Some(AmbientHint::Prose(crate::t!(
+            "hint.ambient_invalid_ident",
+            kind = kind,
+            found = inv.found,
+        )));
+    }
    // Otherwise fall back to the prose framings from stage 5.
    match parse_command(input) {
        Ok(_) => Some(AmbientHint::Prose(crate::t!("hint.ambient_complete"))),
@@ -335,7 +365,7 @@ mod tests {

    #[test]
    fn empty_input_renders_only_the_end_of_input_cursor() {
-        let runs = render_input_runs("", 0, &dark());
+        let runs = render_input_runs("", 0, &dark(), &empty_cache());
        assert_eq!(runs.len(), 1);
        assert_eq!(runs[0].byte_range, (0, 0));
        assert!(reversed(&runs[0]));
@@ -344,7 +374,7 @@ mod tests {
    #[test]
    fn keyword_token_takes_keyword_colour() {
        let theme = dark();
-        let runs = render_input_runs("create", 6, &theme);
+        let runs = render_input_runs("create", 6, &theme, &empty_cache());
        // Token + end-of-input cursor.
        assert_eq!(runs.len(), 2);
        assert_eq!(runs[0].byte_range, (0, 6));
@@ -356,7 +386,7 @@ mod tests {
    #[test]
    fn cursor_inside_token_splits_into_three_runs_keeping_colour() {
        let theme = dark();
-        let runs = render_input_runs("create", 3, &theme);
+        let runs = render_input_runs("create", 3, &theme, &empty_cache());
        assert_eq!(runs.len(), 3);
        assert_eq!(runs[0].byte_range, (0, 3));
        assert_eq!(runs[1].byte_range, (3, 4));
@@ -374,7 +404,7 @@ mod tests {
    fn cursor_on_whitespace_inverts_a_single_space() {
        let theme = dark();
        // "create table" has whitespace at byte 6.
-        let runs = render_input_runs("create table", 6, &theme);
+        let runs = render_input_runs("create table", 6, &theme, &empty_cache());
        // base: keyword, ws(6,7), keyword. After cursor injection
        // at the start of ws: under=(6,7) REVERSED. The
        // before/after slices are empty so we get 3 runs total.
@@ -388,7 +418,7 @@ mod tests {
    #[test]
    fn lex_error_token_renders_in_error_colour() {
        let theme = dark();
-        let runs = render_input_runs("$", 1, &theme);
+        let runs = render_input_runs("$", 1, &theme, &empty_cache());
        // Error token (0,1), then end-of-input cursor (1,1).
        assert_eq!(runs.len(), 2);
        assert_eq!(runs[0].style.fg, Some(theme.tok_error));
@@ -397,7 +427,7 @@ mod tests {
    #[test]
    fn whitespace_between_tokens_takes_default_fg() {
        let theme = dark();
-        let runs = render_input_runs("create table", 12, &theme);
+        let runs = render_input_runs("create table", 12, &theme, &empty_cache());
        // base: keyword(0,6), ws(6,7), keyword(7,12). Plus
        // end-of-input cursor (12,12) = 4 runs.
        assert_eq!(runs.len(), 4);
@@ -412,7 +442,7 @@ mod tests {
        let theme = dark();
        // 'café' = ['(0)', c(1), a(2), f(3), é(4-5), '(6)] — é is 2 bytes.
        // Cursor at byte 4: inside é. char_end advances to 6.
-        let runs = render_input_runs("'café'", 4, &theme);
+        let runs = render_input_runs("'café'", 4, &theme, &empty_cache());
        let r_under: Vec<_> = runs.iter().filter(|r| reversed(r)).collect();
        assert_eq!(r_under.len(), 1);
        assert_eq!(r_under[0].byte_range, (4, 6));
@@ -420,7 +450,7 @@ mod tests {

    #[test]
    fn end_of_input_cursor_is_an_empty_range() {
-        let runs = render_input_runs("create", 6, &dark());
+        let runs = render_input_runs("create", 6, &dark(), &empty_cache());
        let last = runs.last().expect("non-empty");
        assert_eq!(last.byte_range, (6, 6));
        assert!(reversed(last));
@@ -510,6 +540,26 @@ mod tests {
        );
    }

+    #[test]
+    fn ambient_hint_for_invalid_identifier_says_no_such() {
+        use crate::completion::SchemaCache;
+        // Schema knows "Customers"; user typed "Custp" — no match.
+        let cache = SchemaCache {
+            tables: vec!["Customers".to_string()],
+            ..SchemaCache::default()
+        };
+        match ambient_hint("show data Custp", 15, None, &cache) {
+            Some(AmbientHint::Prose(p)) => {
+                assert!(
+                    p.contains("no such table"),
+                    "expected 'no such table' wording, got {p:?}",
+                );
+                assert!(p.contains("Custp"), "should name the bad ident, got {p:?}");
+            }
+            other => panic!("expected Prose for invalid-ident, got {other:?}"),
+        }
+    }
+
    #[test]
    fn ambient_hint_with_memo_carries_selected_index() {
        use crate::completion::LastCompletion;
@@ -596,7 +646,7 @@ mod tests {
    #[test]
    fn render_input_runs_overlays_error_on_failing_token() {
        let theme = dark();
-        let runs = render_input_runs("frobulate widgets", 17, &theme);
+        let runs = render_input_runs("frobulate widgets", 17, &theme, &empty_cache());
        // First run is `frobulate` at (0,9). Should be tok_error
        // colour (definite error overlay).
        assert_eq!(runs[0].byte_range, (0, 9));
@@ -615,7 +665,7 @@ mod tests {
    #[test]
    fn render_input_runs_does_not_overlay_for_incomplete_input() {
        let theme = dark();
-        let runs = render_input_runs("create", 6, &theme);
+        let runs = render_input_runs("create", 6, &theme, &empty_cache());
        // No error overlay — `create` keeps tok_keyword.
        assert_eq!(runs[0].byte_range, (0, 6));
        assert_eq!(runs[0].style.fg, Some(theme.tok_keyword));
@@ -624,7 +674,7 @@ mod tests {
    #[test]
    fn render_input_runs_does_not_overlay_for_valid_input() {
        let theme = dark();
-        let runs = render_input_runs("create table T with pk", 22, &theme);
+        let runs = render_input_runs("create table T with pk", 22, &theme, &empty_cache());
        // None of the tokens should be tok_error.
        for r in &runs {
            assert_ne!(
@@ -643,7 +693,7 @@ mod tests {
        // identifier(s), string literal, punct (=), flag.
        let theme = dark();
        let input = "update T set Name='hi' --all-rows";
-        let runs = render_input_runs(input, input.len(), &theme);
+        let runs = render_input_runs(input, input.len(), &theme, &empty_cache());
        let fgs: Vec<_> = runs.iter().filter_map(|r| r.style.fg).collect();
        assert!(fgs.contains(&theme.tok_keyword)); // update / set
        assert!(fgs.contains(&theme.tok_identifier)); // T / Name
@@ -616,7 +616,12 @@ fn render_input_panel(app: &App, theme: &Theme, frame: &mut Frame<'_>, area: Rec
    let cursor = app.input_cursor.min(app.input.len());
    let spans = match effective {
        EffectiveMode::Simple => {
-            let runs = crate::input_render::render_input_runs(&app.input, cursor, theme);
+            let runs = crate::input_render::render_input_runs(
+                &app.input,
+                cursor,
+                theme,
+                &app.schema_cache,
+            );
            runs_to_spans(&app.input, &runs)
        }
        EffectiveMode::AdvancedPersistent | EffectiveMode::AdvancedOneShot => {