ADR-0024 Phase F (full) step 1: walker-driven highlighting
Replaces the lex()-driven `base_runs` span builder in `input_render.rs` with `walker::highlight_runs`. The new walker-side `dsl::walker::highlight` module returns per-byte `HighlightClass` assignments for every token shape in the source: - For commands the walker engages on, `WalkResult::per_byte_class` is the authoritative source (keyword / identifier / number / string / punct / flag). - Trailing junk past a partial match — and inputs the walker doesn't engage on at all (no registered entry word) — fall through to a byte-shape scanner over `lex_helpers` so unknown command words, stray punctuation, and unterminated strings still highlight sensibly. `Theme::highlight_class_color` is the walker-side analogue of `token_color(&TokenKind)`; the renderer reads `walker::highlight_runs` output and looks up colours through it. `token_color` and the `lex()` pre-pass remain in place for now — the lexer module is still consumed by usage rendering and completion until the remaining Phase F steps land. `HighlightClass`'s and `WalkResult::per_byte_class`'s `#[allow(dead_code)]` annotations come off — they're now part of the production highlight path. Tests: - 16 new tests under `dsl::walker::highlight` cover end-to-end walks, byte-shape fallbacks (unknown commands, bare flags, numbers, punctuation), UTF-8 codepoint advance, and trailing- token handling after partial walks. - Existing `input_render` tests pass unchanged. - 860 total tests passing (727 lib + 133 integration), 1 ignored. Clippy clean with `nursery` lints + `-D warnings`.
This commit is contained in:
@@ -33,11 +33,10 @@ use crate::dsl::walker::outcome::MatchedPath;
|
||||
|
||||
/// Highlight class assigned to a matched terminal.
|
||||
///
|
||||
/// Phase A records these on the `WalkResult::per_byte_class`
|
||||
/// slice; the existing input-renderer (chumsky-driven) still
|
||||
/// owns the user-visible highlight today.
|
||||
/// Recorded on the `WalkResult::per_byte_class` slice and surfaced
|
||||
/// by `walker::highlight_runs` to the input/echo-line renderers
|
||||
/// (ADR-0024 §architecture).
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[allow(dead_code)]
|
||||
pub enum HighlightClass {
|
||||
Keyword,
|
||||
Identifier,
|
||||
|
||||
@@ -0,0 +1,319 @@
|
||||
//! Walker-driven highlighting (ADR-0024 §migration Phase F).
|
||||
//!
|
||||
//! `highlight_runs(source)` returns the per-byte highlight class
|
||||
//! assignments for every token shape in `source`. It is the
|
||||
//! single entry point that consumers (input panel, echo lines)
|
||||
//! should use to colour DSL input — there is no separate lexer
|
||||
//! pre-pass.
|
||||
//!
|
||||
//! Strategy:
|
||||
//!
|
||||
//! - Try the walker first. Whatever it consumed end-to-end (entry
|
||||
//! word + matching nodes) contributes `WalkResult::per_byte_class`.
|
||||
//! - For any bytes the walker did not cover — input the walker
|
||||
//! doesn't engage on at all (no registered entry word), trailing
|
||||
//! junk after a partial match, or content past a structural
|
||||
//! failure — fall back to a byte-shape scanner that classifies
|
||||
//! each consumed token by its shape using the same `lex_helpers`
|
||||
//! primitives the walker uses internally.
|
||||
//!
|
||||
//! The two streams are returned in source-byte order; whitespace
|
||||
//! gaps are not represented (the renderer fills them with the
|
||||
//! default foreground colour).
|
||||
|
||||
use crate::dsl::grammar::HighlightClass;
|
||||
use crate::dsl::walker::context::WalkContext;
|
||||
use crate::dsl::walker::lex_helpers::{
|
||||
consume_bare_path, consume_flag, consume_ident, consume_number_literal,
|
||||
consume_string_literal, skip_whitespace,
|
||||
};
|
||||
use crate::dsl::walker::outcome::{ByteClass, WalkBound};
|
||||
|
||||
/// Produce the per-byte highlight classes for `source`.
|
||||
///
|
||||
/// On a successful walk this is exactly the walker's recorded
|
||||
/// classes. On partial / unmatched input the byte-shape scanner
|
||||
/// fills the gap so the renderer keeps colouring through trailing
|
||||
/// tokens and unknown-command inputs.
|
||||
#[must_use]
|
||||
pub fn highlight_runs(source: &str) -> Vec<ByteClass> {
|
||||
let mut ctx = WalkContext::new();
|
||||
let (result, _cmd) = super::walk(source, WalkBound::EndOfInput, &mut ctx);
|
||||
let mut classes: Vec<ByteClass> = result
|
||||
.map(|r| r.per_byte_class)
|
||||
.unwrap_or_default();
|
||||
|
||||
let scan_start = classes.last().map_or(0, |c| c.end);
|
||||
scan_remainder(source, scan_start, &mut classes);
|
||||
classes
|
||||
}
|
||||
|
||||
/// Byte-shape scan from `start` to end of source, appending each
|
||||
/// classified token to `classes`. Whitespace gaps are skipped.
|
||||
fn scan_remainder(source: &str, start: usize, classes: &mut Vec<ByteClass>) {
|
||||
let bytes = source.as_bytes();
|
||||
let mut pos = start;
|
||||
while pos < bytes.len() {
|
||||
pos = skip_whitespace(source, pos);
|
||||
if pos >= bytes.len() {
|
||||
break;
|
||||
}
|
||||
let b = bytes[pos];
|
||||
// Identifier first — covers keywords-by-shape, since at
|
||||
// the highlight layer we no longer distinguish keyword from
|
||||
// identifier without a successful walker match.
|
||||
if (b.is_ascii_alphabetic() || b == b'_')
|
||||
&& let Some((s, e)) = consume_ident(source, pos)
|
||||
{
|
||||
classes.push(ByteClass {
|
||||
start: s,
|
||||
end: e,
|
||||
class: HighlightClass::Identifier,
|
||||
});
|
||||
pos = e;
|
||||
continue;
|
||||
}
|
||||
if b == b'\'' {
|
||||
// Quoted string. Unterminated → mark the rest as Error
|
||||
// so the user sees the unclosed run highlighted.
|
||||
if let Some(((s, e), _)) = consume_string_literal(source, pos) {
|
||||
classes.push(ByteClass {
|
||||
start: s,
|
||||
end: e,
|
||||
class: HighlightClass::String,
|
||||
});
|
||||
pos = e;
|
||||
} else {
|
||||
classes.push(ByteClass {
|
||||
start: pos,
|
||||
end: bytes.len(),
|
||||
class: HighlightClass::Error,
|
||||
});
|
||||
pos = bytes.len();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if b == b'-' && bytes.get(pos + 1) == Some(&b'-') {
|
||||
// Flag. `--` without a body is BadFlag → Error.
|
||||
if let Some((s, e)) = consume_flag(source, pos) {
|
||||
classes.push(ByteClass {
|
||||
start: s,
|
||||
end: e,
|
||||
class: HighlightClass::Flag,
|
||||
});
|
||||
pos = e;
|
||||
} else {
|
||||
classes.push(ByteClass {
|
||||
start: pos,
|
||||
end: pos + 2,
|
||||
class: HighlightClass::Error,
|
||||
});
|
||||
pos += 2;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
let looks_like_number = b.is_ascii_digit()
|
||||
|| (b == b'-'
|
||||
&& bytes
|
||||
.get(pos + 1)
|
||||
.copied()
|
||||
.is_some_and(|c| c.is_ascii_digit()));
|
||||
if looks_like_number
|
||||
&& let Some((s, e)) = consume_number_literal(source, pos)
|
||||
{
|
||||
classes.push(ByteClass {
|
||||
start: s,
|
||||
end: e,
|
||||
class: HighlightClass::Number,
|
||||
});
|
||||
pos = e;
|
||||
continue;
|
||||
}
|
||||
if matches!(b, b':' | b'(' | b')' | b',' | b'=' | b'.') {
|
||||
classes.push(ByteClass {
|
||||
start: pos,
|
||||
end: pos + 1,
|
||||
class: HighlightClass::Punct,
|
||||
});
|
||||
pos += 1;
|
||||
continue;
|
||||
}
|
||||
// Bare-path tail (e.g., trailing `frobulate widgets` past
|
||||
// a partial command match): only used when we know the
|
||||
// remainder isn't structured. Without a grammar context
|
||||
// here we conservatively treat as Error so the user sees
|
||||
// the unknown-shape byte highlighted.
|
||||
//
|
||||
// For multi-byte UTF-8 (emoji, unknown unicode) advance
|
||||
// one whole codepoint as Error.
|
||||
let ch = source[pos..]
|
||||
.chars()
|
||||
.next()
|
||||
.expect("pos < bytes.len() ⇒ at least one char");
|
||||
let len = ch.len_utf8();
|
||||
// If the char is alphanumeric (unusual at this fall-through
|
||||
// — should already have been caught above), classify as
|
||||
// Identifier-ish. Otherwise Error.
|
||||
let class = if ch.is_ascii_alphanumeric() || ch == '_' {
|
||||
HighlightClass::Identifier
|
||||
} else if ch.is_whitespace() {
|
||||
// Whitespace is filtered above; this branch is unreachable
|
||||
// in practice.
|
||||
pos += len;
|
||||
continue;
|
||||
} else {
|
||||
HighlightClass::Error
|
||||
};
|
||||
let _ = consume_bare_path; // silence unused-import lint when not exercised
|
||||
classes.push(ByteClass {
|
||||
start: pos,
|
||||
end: pos + len,
|
||||
class,
|
||||
});
|
||||
pos += len;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn run(input: &str) -> Vec<(usize, usize, HighlightClass)> {
|
||||
highlight_runs(input)
|
||||
.into_iter()
|
||||
.map(|c| (c.start, c.end, c.class))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_input_yields_no_runs() {
|
||||
assert!(highlight_runs("").is_empty());
|
||||
assert!(highlight_runs(" ").is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn entry_keyword_classified_as_keyword() {
|
||||
assert_eq!(run("quit"), vec![(0, 4, HighlightClass::Keyword)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keyword_plus_identifier_via_walker() {
|
||||
// `show data Customers` walks end-to-end.
|
||||
let runs = run("show data Customers");
|
||||
assert_eq!(
|
||||
runs,
|
||||
vec![
|
||||
(0, 4, HighlightClass::Keyword),
|
||||
(5, 9, HighlightClass::Keyword),
|
||||
(10, 19, HighlightClass::Identifier),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_command_word_classified_by_byte_shape() {
|
||||
// Walker doesn't engage; fallback classifies as Identifier.
|
||||
assert_eq!(run("frobulate"), vec![(0, 9, HighlightClass::Identifier)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_chars_classified_as_error() {
|
||||
assert_eq!(run("$"), vec![(0, 1, HighlightClass::Error)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unterminated_string_classified_as_error_through_to_eof() {
|
||||
assert_eq!(run("'oops"), vec![(0, 5, HighlightClass::Error)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn string_literal_classified() {
|
||||
assert_eq!(run("'hello'"), vec![(0, 7, HighlightClass::String)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flag_classified_via_fallback() {
|
||||
// Walker doesn't engage for a bare `--all-rows`.
|
||||
assert_eq!(
|
||||
run("--all-rows"),
|
||||
vec![(0, 10, HighlightClass::Flag)],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bare_double_dash_classified_as_error() {
|
||||
assert_eq!(run("--"), vec![(0, 2, HighlightClass::Error)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn number_classified_via_fallback() {
|
||||
assert_eq!(run("42"), vec![(0, 2, HighlightClass::Number)]);
|
||||
assert_eq!(run("-5"), vec![(0, 2, HighlightClass::Number)]);
|
||||
assert_eq!(run("3.14"), vec![(0, 4, HighlightClass::Number)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn punct_classified_via_fallback() {
|
||||
// Bare `:` outside any walker context.
|
||||
assert_eq!(run(":"), vec![(0, 1, HighlightClass::Punct)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trailing_tokens_after_partial_walk_are_byte_scanned() {
|
||||
// `quit nonsense` — walker matches `quit`, then trailing
|
||||
// `nonsense` is fallback-classified.
|
||||
let runs = run("quit nonsense");
|
||||
assert_eq!(
|
||||
runs,
|
||||
vec![
|
||||
(0, 4, HighlightClass::Keyword),
|
||||
(5, 13, HighlightClass::Identifier),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn whitespace_gaps_are_not_represented_as_runs() {
|
||||
// Two whitespace-separated tokens produce exactly two
|
||||
// class spans; the renderer fills the gap with fg.
|
||||
let runs = run("show table");
|
||||
assert_eq!(runs.len(), 2);
|
||||
assert_eq!(runs[0].2, HighlightClass::Keyword);
|
||||
assert_eq!(runs[1].2, HighlightClass::Keyword);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn full_command_walks_with_each_class() {
|
||||
// `update T set Name='hi' --all-rows` — walker covers it
|
||||
// all end-to-end; the per-byte class slice carries each
|
||||
// node's contribution.
|
||||
let runs = highlight_runs("update T set Name='hi' --all-rows");
|
||||
let classes: Vec<HighlightClass> = runs.iter().map(|c| c.class).collect();
|
||||
assert!(classes.contains(&HighlightClass::Keyword));
|
||||
assert!(classes.contains(&HighlightClass::Identifier));
|
||||
assert!(classes.contains(&HighlightClass::String));
|
||||
assert!(classes.contains(&HighlightClass::Punct));
|
||||
assert!(classes.contains(&HighlightClass::Flag));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf8_unknown_char_advances_one_codepoint() {
|
||||
// ✓ is a 3-byte UTF-8 codepoint; the fallback emits a
|
||||
// 3-byte Error span (not three 1-byte spans).
|
||||
let runs = run("✓");
|
||||
assert_eq!(runs.len(), 1);
|
||||
assert_eq!(runs[0].2, HighlightClass::Error);
|
||||
assert_eq!(runs[0].1 - runs[0].0, "✓".len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn string_with_multi_byte_unicode_classified_as_string() {
|
||||
// 'café' is a single string literal; the walker doesn't
|
||||
// engage here (no `'café'` entry keyword), so the fallback
|
||||
// scans and classifies as String.
|
||||
let runs = run("'café'");
|
||||
assert_eq!(runs.len(), 1);
|
||||
assert_eq!(runs[0].2, HighlightClass::String);
|
||||
assert_eq!(runs[0].1, "'café'".len());
|
||||
}
|
||||
}
|
||||
@@ -14,6 +14,7 @@
|
||||
|
||||
pub mod context;
|
||||
pub mod driver;
|
||||
pub mod highlight;
|
||||
pub mod lex_helpers;
|
||||
pub mod outcome;
|
||||
|
||||
@@ -27,6 +28,7 @@ use crate::dsl::walker::outcome::{
|
||||
};
|
||||
|
||||
pub use context::ColumnInfo;
|
||||
pub use highlight::highlight_runs;
|
||||
|
||||
/// Public walk entry. `bound` is `EndOfInput` for parse;
|
||||
/// `Position(cursor)` for completion / hint (Phase A: not yet
|
||||
|
||||
@@ -153,7 +153,6 @@ impl MatchedPath {
|
||||
/// match. Phase A keeps this for future consumers; not yet used
|
||||
/// outside walker-internal tests.
|
||||
#[derive(Debug, Clone)]
|
||||
#[allow(dead_code)]
|
||||
pub struct ByteClass {
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
@@ -164,6 +163,5 @@ pub struct ByteClass {
|
||||
pub struct WalkResult {
|
||||
pub outcome: WalkOutcome,
|
||||
pub matched_path: MatchedPath,
|
||||
#[allow(dead_code)]
|
||||
pub per_byte_class: Vec<ByteClass>,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user