ADR-0024 Phase F (full) step 1: walker-driven highlighting

Replaces the lex()-driven `base_runs` span builder in
`input_render.rs` with `walker::highlight_runs`. The new
walker-side `dsl::walker::highlight` module returns per-byte
`HighlightClass` assignments for every token shape in the source:

- For commands the walker engages on, `WalkResult::per_byte_class`
  is the authoritative source (keyword / identifier / number /
  string / punct / flag).
- Trailing junk past a partial match — and inputs the walker
  doesn't engage on at all (no registered entry word) — fall
  through to a byte-shape scanner over `lex_helpers` so unknown
  command words, stray punctuation, and unterminated strings
  still highlight sensibly.

`Theme::highlight_class_color` is the walker-side analogue of
`token_color(&TokenKind)`; the renderer reads `walker::highlight_runs`
output and looks up colours through it. `token_color` and the
`lex()` pre-pass remain in place for now — the lexer module is
still consumed by usage rendering and completion until the
remaining Phase F steps land.

`HighlightClass`'s and `WalkResult::per_byte_class`'s
`#[allow(dead_code)]` annotations come off — they're now part of
the production highlight path.

Tests:
- 16 new tests under `dsl::walker::highlight` cover end-to-end
  walks, byte-shape fallbacks (unknown commands, bare flags,
  numbers, punctuation), UTF-8 codepoint advance, and trailing-
  token handling after partial walks.
- Existing `input_render` tests pass unchanged.
- 860 total tests passing (727 lib + 133 integration), 1 ignored.

Clippy clean with `nursery` lints + `-D warnings`.
This commit is contained in:
claude@clouddev1
2026-05-15 08:19:52 +00:00
parent b3d3bdfe5b
commit 7bdd3987e1
6 changed files with 354 additions and 12 deletions
+3 -4
View File
@@ -33,11 +33,10 @@ use crate::dsl::walker::outcome::MatchedPath;
/// Highlight class assigned to a matched terminal. /// Highlight class assigned to a matched terminal.
/// ///
/// Phase A records these on the `WalkResult::per_byte_class` /// Recorded on the `WalkResult::per_byte_class` slice and surfaced
/// slice; the existing input-renderer (chumsky-driven) still /// by `walker::highlight_runs` to the input/echo-line renderers
/// owns the user-visible highlight today. /// (ADR-0024 §architecture).
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[allow(dead_code)]
pub enum HighlightClass { pub enum HighlightClass {
Keyword, Keyword,
Identifier, Identifier,
+319
View File
@@ -0,0 +1,319 @@
//! Walker-driven highlighting (ADR-0024 §migration Phase F).
//!
//! `highlight_runs(source)` returns the per-byte highlight class
//! assignments for every token shape in `source`. It is the
//! single entry point that consumers (input panel, echo lines)
//! should use to colour DSL input — there is no separate lexer
//! pre-pass.
//!
//! Strategy:
//!
//! - Try the walker first. Whatever it consumed end-to-end (entry
//! word + matching nodes) contributes `WalkResult::per_byte_class`.
//! - For any bytes the walker did not cover — input the walker
//! doesn't engage on at all (no registered entry word), trailing
//! junk after a partial match, or content past a structural
//! failure — fall back to a byte-shape scanner that classifies
//! each consumed token by its shape using the same `lex_helpers`
//! primitives the walker uses internally.
//!
//! The two streams are returned in source-byte order; whitespace
//! gaps are not represented (the renderer fills them with the
//! default foreground colour).
use crate::dsl::grammar::HighlightClass;
use crate::dsl::walker::context::WalkContext;
use crate::dsl::walker::lex_helpers::{
consume_bare_path, consume_flag, consume_ident, consume_number_literal,
consume_string_literal, skip_whitespace,
};
use crate::dsl::walker::outcome::{ByteClass, WalkBound};
/// Produce the per-byte highlight classes for `source`.
///
/// On a successful walk this is exactly the walker's recorded
/// classes. On partial / unmatched input the byte-shape scanner
/// fills the gap so the renderer keeps colouring through trailing
/// tokens and unknown-command inputs.
#[must_use]
pub fn highlight_runs(source: &str) -> Vec<ByteClass> {
let mut ctx = WalkContext::new();
let (result, _cmd) = super::walk(source, WalkBound::EndOfInput, &mut ctx);
let mut classes: Vec<ByteClass> = result
.map(|r| r.per_byte_class)
.unwrap_or_default();
let scan_start = classes.last().map_or(0, |c| c.end);
scan_remainder(source, scan_start, &mut classes);
classes
}
/// Byte-shape scan from `start` to end of source, appending each
/// classified token to `classes`. Whitespace gaps are skipped.
fn scan_remainder(source: &str, start: usize, classes: &mut Vec<ByteClass>) {
let bytes = source.as_bytes();
let mut pos = start;
while pos < bytes.len() {
pos = skip_whitespace(source, pos);
if pos >= bytes.len() {
break;
}
let b = bytes[pos];
// Identifier first — covers keywords-by-shape, since at
// the highlight layer we no longer distinguish keyword from
// identifier without a successful walker match.
if (b.is_ascii_alphabetic() || b == b'_')
&& let Some((s, e)) = consume_ident(source, pos)
{
classes.push(ByteClass {
start: s,
end: e,
class: HighlightClass::Identifier,
});
pos = e;
continue;
}
if b == b'\'' {
// Quoted string. Unterminated → mark the rest as Error
// so the user sees the unclosed run highlighted.
if let Some(((s, e), _)) = consume_string_literal(source, pos) {
classes.push(ByteClass {
start: s,
end: e,
class: HighlightClass::String,
});
pos = e;
} else {
classes.push(ByteClass {
start: pos,
end: bytes.len(),
class: HighlightClass::Error,
});
pos = bytes.len();
}
continue;
}
if b == b'-' && bytes.get(pos + 1) == Some(&b'-') {
// Flag. `--` without a body is BadFlag → Error.
if let Some((s, e)) = consume_flag(source, pos) {
classes.push(ByteClass {
start: s,
end: e,
class: HighlightClass::Flag,
});
pos = e;
} else {
classes.push(ByteClass {
start: pos,
end: pos + 2,
class: HighlightClass::Error,
});
pos += 2;
}
continue;
}
let looks_like_number = b.is_ascii_digit()
|| (b == b'-'
&& bytes
.get(pos + 1)
.copied()
.is_some_and(|c| c.is_ascii_digit()));
if looks_like_number
&& let Some((s, e)) = consume_number_literal(source, pos)
{
classes.push(ByteClass {
start: s,
end: e,
class: HighlightClass::Number,
});
pos = e;
continue;
}
if matches!(b, b':' | b'(' | b')' | b',' | b'=' | b'.') {
classes.push(ByteClass {
start: pos,
end: pos + 1,
class: HighlightClass::Punct,
});
pos += 1;
continue;
}
// Bare-path tail (e.g., trailing `frobulate widgets` past
// a partial command match): only used when we know the
// remainder isn't structured. Without a grammar context
// here we conservatively treat as Error so the user sees
// the unknown-shape byte highlighted.
//
// For multi-byte UTF-8 (emoji, unknown unicode) advance
// one whole codepoint as Error.
let ch = source[pos..]
.chars()
.next()
.expect("pos < bytes.len() ⇒ at least one char");
let len = ch.len_utf8();
// If the char is alphanumeric (unusual at this fall-through
// — should already have been caught above), classify as
// Identifier-ish. Otherwise Error.
let class = if ch.is_ascii_alphanumeric() || ch == '_' {
HighlightClass::Identifier
} else if ch.is_whitespace() {
// Whitespace is filtered above; this branch is unreachable
// in practice.
pos += len;
continue;
} else {
HighlightClass::Error
};
let _ = consume_bare_path; // silence unused-import lint when not exercised
classes.push(ByteClass {
start: pos,
end: pos + len,
class,
});
pos += len;
}
}
#[cfg(test)]
mod tests {
use super::*;
fn run(input: &str) -> Vec<(usize, usize, HighlightClass)> {
highlight_runs(input)
.into_iter()
.map(|c| (c.start, c.end, c.class))
.collect()
}
#[test]
fn empty_input_yields_no_runs() {
assert!(highlight_runs("").is_empty());
assert!(highlight_runs(" ").is_empty());
}
#[test]
fn entry_keyword_classified_as_keyword() {
assert_eq!(run("quit"), vec![(0, 4, HighlightClass::Keyword)]);
}
#[test]
fn keyword_plus_identifier_via_walker() {
// `show data Customers` walks end-to-end.
let runs = run("show data Customers");
assert_eq!(
runs,
vec![
(0, 4, HighlightClass::Keyword),
(5, 9, HighlightClass::Keyword),
(10, 19, HighlightClass::Identifier),
],
);
}
#[test]
fn unknown_command_word_classified_by_byte_shape() {
// Walker doesn't engage; fallback classifies as Identifier.
assert_eq!(run("frobulate"), vec![(0, 9, HighlightClass::Identifier)]);
}
#[test]
fn unknown_chars_classified_as_error() {
assert_eq!(run("$"), vec![(0, 1, HighlightClass::Error)]);
}
#[test]
fn unterminated_string_classified_as_error_through_to_eof() {
assert_eq!(run("'oops"), vec![(0, 5, HighlightClass::Error)]);
}
#[test]
fn string_literal_classified() {
assert_eq!(run("'hello'"), vec![(0, 7, HighlightClass::String)]);
}
#[test]
fn flag_classified_via_fallback() {
// Walker doesn't engage for a bare `--all-rows`.
assert_eq!(
run("--all-rows"),
vec![(0, 10, HighlightClass::Flag)],
);
}
#[test]
fn bare_double_dash_classified_as_error() {
assert_eq!(run("--"), vec![(0, 2, HighlightClass::Error)]);
}
#[test]
fn number_classified_via_fallback() {
assert_eq!(run("42"), vec![(0, 2, HighlightClass::Number)]);
assert_eq!(run("-5"), vec![(0, 2, HighlightClass::Number)]);
assert_eq!(run("3.14"), vec![(0, 4, HighlightClass::Number)]);
}
#[test]
fn punct_classified_via_fallback() {
// Bare `:` outside any walker context.
assert_eq!(run(":"), vec![(0, 1, HighlightClass::Punct)]);
}
#[test]
fn trailing_tokens_after_partial_walk_are_byte_scanned() {
// `quit nonsense` — walker matches `quit`, then trailing
// `nonsense` is fallback-classified.
let runs = run("quit nonsense");
assert_eq!(
runs,
vec![
(0, 4, HighlightClass::Keyword),
(5, 13, HighlightClass::Identifier),
],
);
}
#[test]
fn whitespace_gaps_are_not_represented_as_runs() {
// Two whitespace-separated tokens produce exactly two
// class spans; the renderer fills the gap with fg.
let runs = run("show table");
assert_eq!(runs.len(), 2);
assert_eq!(runs[0].2, HighlightClass::Keyword);
assert_eq!(runs[1].2, HighlightClass::Keyword);
}
#[test]
fn full_command_walks_with_each_class() {
// `update T set Name='hi' --all-rows` — walker covers it
// all end-to-end; the per-byte class slice carries each
// node's contribution.
let runs = highlight_runs("update T set Name='hi' --all-rows");
let classes: Vec<HighlightClass> = runs.iter().map(|c| c.class).collect();
assert!(classes.contains(&HighlightClass::Keyword));
assert!(classes.contains(&HighlightClass::Identifier));
assert!(classes.contains(&HighlightClass::String));
assert!(classes.contains(&HighlightClass::Punct));
assert!(classes.contains(&HighlightClass::Flag));
}
#[test]
fn utf8_unknown_char_advances_one_codepoint() {
// ✓ is a 3-byte UTF-8 codepoint; the fallback emits a
// 3-byte Error span (not three 1-byte spans).
let runs = run("");
assert_eq!(runs.len(), 1);
assert_eq!(runs[0].2, HighlightClass::Error);
assert_eq!(runs[0].1 - runs[0].0, "".len());
}
#[test]
fn string_with_multi_byte_unicode_classified_as_string() {
// 'café' is a single string literal; the walker doesn't
// engage here (no `'café'` entry keyword), so the fallback
// scans and classifies as String.
let runs = run("'café'");
assert_eq!(runs.len(), 1);
assert_eq!(runs[0].2, HighlightClass::String);
assert_eq!(runs[0].1, "'café'".len());
}
}
+2
View File
@@ -14,6 +14,7 @@
pub mod context; pub mod context;
pub mod driver; pub mod driver;
pub mod highlight;
pub mod lex_helpers; pub mod lex_helpers;
pub mod outcome; pub mod outcome;
@@ -27,6 +28,7 @@ use crate::dsl::walker::outcome::{
}; };
pub use context::ColumnInfo; pub use context::ColumnInfo;
pub use highlight::highlight_runs;
/// Public walk entry. `bound` is `EndOfInput` for parse; /// Public walk entry. `bound` is `EndOfInput` for parse;
/// `Position(cursor)` for completion / hint (Phase A: not yet /// `Position(cursor)` for completion / hint (Phase A: not yet
-2
View File
@@ -153,7 +153,6 @@ impl MatchedPath {
/// match. Phase A keeps this for future consumers; not yet used /// match. Phase A keeps this for future consumers; not yet used
/// outside walker-internal tests. /// outside walker-internal tests.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct ByteClass { pub struct ByteClass {
pub start: usize, pub start: usize,
pub end: usize, pub end: usize,
@@ -164,6 +163,5 @@ pub struct ByteClass {
pub struct WalkResult { pub struct WalkResult {
pub outcome: WalkOutcome, pub outcome: WalkOutcome,
pub matched_path: MatchedPath, pub matched_path: MatchedPath,
#[allow(dead_code)]
pub per_byte_class: Vec<ByteClass>, pub per_byte_class: Vec<ByteClass>,
} }
+10 -6
View File
@@ -25,6 +25,7 @@
use ratatui::style::{Modifier, Style}; use ratatui::style::{Modifier, Style};
use crate::dsl::lexer::lex; use crate::dsl::lexer::lex;
use crate::dsl::walker;
use crate::dsl::{ParseError, parse_command}; use crate::dsl::{ParseError, parse_command};
use crate::theme::Theme; use crate::theme::Theme;
@@ -300,13 +301,16 @@ pub fn lex_to_runs(input: &str, theme: &Theme) -> Vec<StyledRun> {
} }
fn base_runs(input: &str, theme: &Theme) -> Vec<StyledRun> { fn base_runs(input: &str, theme: &Theme) -> Vec<StyledRun> {
let tokens = lex(input); // Walker-driven highlighting (ADR-0024 §architecture, Phase F).
let mut runs = Vec::with_capacity(tokens.len() * 2); // `walker::highlight_runs` returns per-byte classes for every
// token shape in the source; whitespace gaps are not represented
// and we fill them with the default foreground colour below.
let classes = walker::highlight_runs(input);
let mut runs = Vec::with_capacity(classes.len() * 2);
let mut pos = 0; let mut pos = 0;
for tok in tokens { for class in classes {
let (start, end) = tok.span; let (start, end) = (class.start, class.end);
if pos < start { if pos < start {
// Whitespace gap before this token.
runs.push(StyledRun { runs.push(StyledRun {
byte_range: (pos, start), byte_range: (pos, start),
style: Style::default().fg(theme.fg), style: Style::default().fg(theme.fg),
@@ -314,7 +318,7 @@ fn base_runs(input: &str, theme: &Theme) -> Vec<StyledRun> {
} }
runs.push(StyledRun { runs.push(StyledRun {
byte_range: (start, end), byte_range: (start, end),
style: Style::default().fg(theme.token_color(&tok.kind)), style: Style::default().fg(theme.highlight_class_color(class.class)),
}); });
pos = end; pos = end;
} }
+20
View File
@@ -18,6 +18,7 @@
use ratatui::style::Color; use ratatui::style::Color;
use crate::dsl::grammar::HighlightClass;
use crate::dsl::lexer::TokenKind; use crate::dsl::lexer::TokenKind;
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -121,6 +122,25 @@ impl Theme {
TokenKind::Error(_) => self.tok_error, TokenKind::Error(_) => self.tok_error,
} }
} }
/// Map a walker `HighlightClass` to its display colour
/// (ADR-0024 §architecture, Phase F). This is the walker-side
/// equivalent of `token_color` — the renderer consumes
/// `walker::highlight_runs` output, which produces
/// `HighlightClass` per byte range, and looks up colours
/// through this method.
#[must_use]
pub const fn highlight_class_color(&self, class: HighlightClass) -> Color {
match class {
HighlightClass::Keyword => self.tok_keyword,
HighlightClass::Identifier => self.tok_identifier,
HighlightClass::Number => self.tok_number,
HighlightClass::String => self.tok_string,
HighlightClass::Punct => self.tok_punct,
HighlightClass::Flag => self.tok_flag,
HighlightClass::Error => self.tok_error,
}
}
} }
impl Default for Theme { impl Default for Theme {