Files
rdbms-playground/src/dsl/walker/highlight.rs
T
claude@clouddev1 380c4238ef test+docs: 3k Phase-3 verification sweep — e2e DML + filled cross-cut matrix
Sub-phase 3k of ADR-0033. Adds the Tier-3 end-to-end DML suite (tests/sql_dml_e2e.rs) and the cross-cut gap-fill tests, fills the verification matrix (every row a verified file::function), and produces the phase-exit report.

- tests/sql_dml_e2e.rs: INSERT…SELECT cross-table, all-ten-type multi-row INSERT + RETURNING type recovery, UPDATE-with-subquery-in-SET, cascade DELETE, UPSERT round-trip, RETURNING x3, history.log replay, OOS rejections (full §13 table), validity-indicator-from-SQL-DML.
- walker/mod.rs, highlight.rs, completion.rs, input_render.rs: inherited-diagnostic, DML-keyword highlight, INSERT INTO completion, and advanced-mode DML hint-panel cross-cuts.
- Matrix correction (user-confirmed): predicate warnings fire on row-scoped DML slots; INSERT VALUES has no row scope (ADR-0033 §8.4).
- Auto-snapshot row marked N/A (user-confirmed): ADR-0006 unimplemented for both paths; deferred.

/runda round: added an advanced-mode DML hint-panel test (A6 was attributed to simple-mode prose under the §8 advanced heading); extended OOS coverage to the full ADR-0033 §13 table (OOS-5 INDEXED BY / OOS-6 multi-statement) + a trailing-semicolon guard.

1645 passing / 0 failing / 0 skipped / 1 ignored. Clippy clean.
2026-05-23 22:26:04 +00:00

435 lines
15 KiB
Rust

//! Walker-driven highlighting (ADR-0024 §migration Phase F).
//!
//! `highlight_runs(source)` returns the per-byte highlight class
//! assignments for every token shape in `source`. It is the
//! single entry point that consumers (input panel, echo lines)
//! should use to colour DSL input — there is no separate lexer
//! pre-pass.
//!
//! Strategy:
//!
//! - Try the walker first. Whatever it consumed end-to-end (entry
//! word + matching nodes) contributes `WalkResult::per_byte_class`.
//! - For any bytes the walker did not cover — input the walker
//! doesn't engage on at all (no registered entry word), trailing
//! junk after a partial match, or content past a structural
//! failure — fall back to a byte-shape scanner that classifies
//! each consumed token by its shape using the same `lex_helpers`
//! primitives the walker uses internally.
//!
//! The two streams are returned in source-byte order; whitespace
//! gaps are not represented (the renderer fills them with the
//! default foreground colour).
use crate::dsl::grammar::HighlightClass;
use crate::dsl::walker::context::WalkContext;
use crate::dsl::walker::lex_helpers::{
consume_bare_path, consume_flag, consume_ident, consume_number_literal,
consume_string_literal, skip_whitespace,
};
use crate::dsl::walker::outcome::{ByteClass, WalkBound};
/// Produce the per-byte highlight classes for `source`.
///
/// Defaults to `Mode::Simple`. Callers in advanced-mode UIs
/// should use [`highlight_runs_in_mode`] so SQL keywords get
/// matched and highlighted past the entry word (the simple-mode
/// gate at the dispatcher truncates the walker on advanced-only
/// commands, ADR-0030 §2).
#[must_use]
pub fn highlight_runs(source: &str) -> Vec<ByteClass> {
highlight_runs_in_mode(source, crate::mode::Mode::Simple)
}
/// Mode-aware [`highlight_runs`] (ADR-0032 §10.6 follow-up).
///
/// In `Mode::Advanced` the walker matches every Phase-2 SQL
/// token, producing the keyword classes the renderer needs to
/// colour `select` / `from` / `where` / `union` / `case` / etc.
#[must_use]
pub fn highlight_runs_in_mode(
source: &str,
mode: crate::mode::Mode,
) -> Vec<ByteClass> {
let mut ctx = WalkContext::new();
ctx.mode = mode;
let (result, _cmd) = super::walk(source, WalkBound::EndOfInput, &mut ctx);
let mut classes: Vec<ByteClass> = result
.map(|r| r.per_byte_class)
.unwrap_or_default();
let scan_start = classes.last().map_or(0, |c| c.end);
scan_remainder(source, scan_start, &mut classes);
classes
}
/// Byte-shape scan from `start` to end of source, appending each
/// classified token to `classes`. Whitespace gaps are skipped.
fn scan_remainder(source: &str, start: usize, classes: &mut Vec<ByteClass>) {
let bytes = source.as_bytes();
let mut pos = start;
while pos < bytes.len() {
pos = skip_whitespace(source, pos);
if pos >= bytes.len() {
break;
}
let b = bytes[pos];
// Identifier first — covers keywords-by-shape, since at
// the highlight layer we no longer distinguish keyword from
// identifier without a successful walker match.
if (b.is_ascii_alphabetic() || b == b'_')
&& let Some((s, e)) = consume_ident(source, pos)
{
classes.push(ByteClass {
start: s,
end: e,
class: HighlightClass::Identifier,
});
pos = e;
continue;
}
if b == b'\'' {
// Quoted string. Unterminated → mark the rest as Error
// so the user sees the unclosed run highlighted.
if let Some(((s, e), _)) = consume_string_literal(source, pos) {
classes.push(ByteClass {
start: s,
end: e,
class: HighlightClass::String,
});
pos = e;
} else {
classes.push(ByteClass {
start: pos,
end: bytes.len(),
class: HighlightClass::Error,
});
pos = bytes.len();
}
continue;
}
if b == b'-' && bytes.get(pos + 1) == Some(&b'-') {
// Flag. `--` without a body is BadFlag → Error.
if let Some((s, e)) = consume_flag(source, pos) {
classes.push(ByteClass {
start: s,
end: e,
class: HighlightClass::Flag,
});
pos = e;
} else {
classes.push(ByteClass {
start: pos,
end: pos + 2,
class: HighlightClass::Error,
});
pos += 2;
}
continue;
}
let looks_like_number = b.is_ascii_digit()
|| (b == b'-'
&& bytes
.get(pos + 1)
.copied()
.is_some_and(|c| c.is_ascii_digit()));
if looks_like_number
&& let Some((s, e)) = consume_number_literal(source, pos)
{
classes.push(ByteClass {
start: s,
end: e,
class: HighlightClass::Number,
});
pos = e;
continue;
}
if matches!(b, b':' | b'(' | b')' | b',' | b'=' | b'.') {
classes.push(ByteClass {
start: pos,
end: pos + 1,
class: HighlightClass::Punct,
});
pos += 1;
continue;
}
// Bare-path tail (e.g., trailing `frobulate widgets` past
// a partial command match): only used when we know the
// remainder isn't structured. Without a grammar context
// here we conservatively treat as Error so the user sees
// the unknown-shape byte highlighted.
//
// For multi-byte UTF-8 (emoji, unknown unicode) advance
// one whole codepoint as Error.
let ch = source[pos..]
.chars()
.next()
.expect("pos < bytes.len() ⇒ at least one char");
let len = ch.len_utf8();
// If the char is alphanumeric (unusual at this fall-through
// — should already have been caught above), classify as
// Identifier-ish. Otherwise Error.
let class = if ch.is_ascii_alphanumeric() || ch == '_' {
HighlightClass::Identifier
} else if ch.is_whitespace() {
// Whitespace is filtered above; this branch is unreachable
// in practice.
pos += len;
continue;
} else {
HighlightClass::Error
};
let _ = consume_bare_path; // silence unused-import lint when not exercised
classes.push(ByteClass {
start: pos,
end: pos + len,
class,
});
pos += len;
}
}
#[cfg(test)]
mod tests {
use super::*;
fn run(input: &str) -> Vec<(usize, usize, HighlightClass)> {
highlight_runs(input)
.into_iter()
.map(|c| (c.start, c.end, c.class))
.collect()
}
#[test]
fn empty_input_yields_no_runs() {
assert!(highlight_runs("").is_empty());
assert!(highlight_runs(" ").is_empty());
}
#[test]
fn entry_keyword_classified_as_keyword() {
assert_eq!(run("quit"), vec![(0, 4, HighlightClass::Keyword)]);
}
#[test]
fn keyword_plus_identifier_via_walker() {
// `show data Customers` walks end-to-end.
let runs = run("show data Customers");
assert_eq!(
runs,
vec![
(0, 4, HighlightClass::Keyword),
(5, 9, HighlightClass::Keyword),
(10, 19, HighlightClass::Identifier),
],
);
}
#[test]
fn unknown_command_word_classified_by_byte_shape() {
// Walker doesn't engage; fallback classifies as Identifier.
assert_eq!(run("frobulate"), vec![(0, 9, HighlightClass::Identifier)]);
}
#[test]
fn unknown_chars_classified_as_error() {
assert_eq!(run("$"), vec![(0, 1, HighlightClass::Error)]);
}
#[test]
fn unterminated_string_classified_as_error_through_to_eof() {
assert_eq!(run("'oops"), vec![(0, 5, HighlightClass::Error)]);
}
#[test]
fn string_literal_classified() {
assert_eq!(run("'hello'"), vec![(0, 7, HighlightClass::String)]);
}
#[test]
fn flag_classified_via_fallback() {
// Walker doesn't engage for a bare `--all-rows`.
assert_eq!(
run("--all-rows"),
vec![(0, 10, HighlightClass::Flag)],
);
}
#[test]
fn bare_double_dash_classified_as_error() {
assert_eq!(run("--"), vec![(0, 2, HighlightClass::Error)]);
}
#[test]
fn number_classified_via_fallback() {
assert_eq!(run("42"), vec![(0, 2, HighlightClass::Number)]);
assert_eq!(run("-5"), vec![(0, 2, HighlightClass::Number)]);
assert_eq!(run("3.14"), vec![(0, 4, HighlightClass::Number)]);
}
#[test]
fn punct_classified_via_fallback() {
// Bare `:` outside any walker context.
assert_eq!(run(":"), vec![(0, 1, HighlightClass::Punct)]);
}
#[test]
fn trailing_tokens_after_partial_walk_are_byte_scanned() {
// `quit nonsense` — walker matches `quit`, then trailing
// `nonsense` is fallback-classified.
let runs = run("quit nonsense");
assert_eq!(
runs,
vec![
(0, 4, HighlightClass::Keyword),
(5, 13, HighlightClass::Identifier),
],
);
}
#[test]
fn whitespace_gaps_are_not_represented_as_runs() {
// Two whitespace-separated tokens produce exactly two
// class spans; the renderer fills the gap with fg.
let runs = run("show table");
assert_eq!(runs.len(), 2);
assert_eq!(runs[0].2, HighlightClass::Keyword);
assert_eq!(runs[1].2, HighlightClass::Keyword);
}
#[test]
fn full_command_walks_with_each_class() {
// `update T set Name='hi' --all-rows` — walker covers it
// all end-to-end; the per-byte class slice carries each
// node's contribution.
let runs = highlight_runs("update T set Name='hi' --all-rows");
let classes: Vec<HighlightClass> = runs.iter().map(|c| c.class).collect();
assert!(classes.contains(&HighlightClass::Keyword));
assert!(classes.contains(&HighlightClass::Identifier));
assert!(classes.contains(&HighlightClass::String));
assert!(classes.contains(&HighlightClass::Punct));
assert!(classes.contains(&HighlightClass::Flag));
}
#[test]
fn utf8_unknown_char_advances_one_codepoint() {
// ✓ is a 3-byte UTF-8 codepoint; the fallback emits a
// 3-byte Error span (not three 1-byte spans).
let runs = run("");
assert_eq!(runs.len(), 1);
assert_eq!(runs[0].2, HighlightClass::Error);
assert_eq!(runs[0].1 - runs[0].0, "".len());
}
#[test]
fn string_with_multi_byte_unicode_classified_as_string() {
// 'café' is a single string literal; the walker doesn't
// engage here (no `'café'` entry keyword), so the fallback
// scans and classifies as String.
let runs = run("'café'");
assert_eq!(runs.len(), 1);
assert_eq!(runs[0].2, HighlightClass::String);
assert_eq!(runs[0].1, "'café'".len());
}
// ---- ADR-0030 §8 / ADR-0032 — SQL keyword highlighting ----
fn run_advanced(input: &str) -> Vec<(usize, usize, HighlightClass)> {
highlight_runs_in_mode(input, crate::mode::Mode::Advanced)
.into_iter()
.map(|c| (c.start, c.end, c.class))
.collect()
}
#[test]
fn sql_select_keywords_classified() {
// ADR-0030 §8 — `select` / `from` get keyword class in
// Advanced mode (Simple mode gates SELECT out at the
// dispatcher, so only the entry word would highlight).
let runs = run_advanced("select * from t");
assert!(
runs.iter().any(|(s, e, c)| {
*c == HighlightClass::Keyword && (*s, *e) == (0, 6)
}),
"expected `select` keyword span 0..6; got {runs:?}",
);
assert!(
runs.iter().any(|(s, e, c)| {
*c == HighlightClass::Keyword && (*s, *e) == (9, 13)
}),
"expected `from` keyword span 9..13; got {runs:?}",
);
}
#[test]
fn sql_expression_operators_classified_as_keywords() {
// ADR-0031 §5: LIKE / BETWEEN / IN / IS / AND / OR / NOT
// are part of the predicate ladder. Walker matches them
// as Word nodes; highlight class = Keyword.
let input = "select * from t where a like 'x' and b between 1 and 5";
let runs = run_advanced(input);
let keywords: Vec<&str> = runs
.iter()
.filter(|(_, _, c)| *c == HighlightClass::Keyword)
.map(|(s, e, _)| &input[*s..*e])
.collect();
assert!(keywords.contains(&"like"), "no `like`; got {keywords:?}");
assert!(keywords.contains(&"and"), "no `and`; got {keywords:?}");
assert!(
keywords.contains(&"between"),
"no `between`; got {keywords:?}",
);
}
#[test]
fn sql_case_expression_keywords_classified() {
let input = "select case when a = 1 then 'one' else 'other' end from t";
let runs = run_advanced(input);
let keywords: Vec<&str> = runs
.iter()
.filter(|(_, _, c)| *c == HighlightClass::Keyword)
.map(|(s, e, _)| &input[*s..*e])
.collect();
for kw in ["case", "when", "then", "else", "end"] {
assert!(
keywords.contains(&kw),
"missing `{kw}` keyword; got {keywords:?}",
);
}
}
#[test]
fn sql_dml_keywords_classified() {
// ADR-0030 §8 / ADR-0033 — the DML entry words and clause
// keywords (INSERT / INTO / VALUES / ON / CONFLICT /
// RETURNING / UPDATE / SET / DELETE / FROM) all get the
// Keyword class in Advanced mode. 3k cross-cut: the
// ambient highlighter covers the DML surface, not just
// SELECT.
let keywords_of = |input: &'static str| -> Vec<&'static str> {
run_advanced(input)
.into_iter()
.filter(|(_, _, c)| *c == HighlightClass::Keyword)
.map(|(s, e, _)| &input[s..e])
.collect()
};
let insert = keywords_of(
"insert into t (a) values (1) on conflict (a) do update set a = excluded.a returning a",
);
for kw in ["insert", "into", "values", "on", "conflict", "do", "update", "set", "returning"] {
assert!(insert.contains(&kw), "INSERT/UPSERT: missing `{kw}`; got {insert:?}");
}
let update = keywords_of("update t set a = 1 where id = 2 returning a");
for kw in ["update", "set", "where", "returning"] {
assert!(update.contains(&kw), "UPDATE: missing `{kw}`; got {update:?}");
}
let delete = keywords_of("delete from t where id = 1 returning *");
for kw in ["delete", "from", "where", "returning"] {
assert!(delete.contains(&kw), "DELETE: missing `{kw}`; got {delete:?}");
}
}
}