380c4238ef
Sub-phase 3k of ADR-0033. Adds the Tier-3 end-to-end DML suite (tests/sql_dml_e2e.rs) and the cross-cut gap-fill tests, fills the verification matrix (every row a verified file::function), and produces the phase-exit report. - tests/sql_dml_e2e.rs: INSERT…SELECT cross-table, all-ten-type multi-row INSERT + RETURNING type recovery, UPDATE-with-subquery-in-SET, cascade DELETE, UPSERT round-trip, RETURNING x3, history.log replay, OOS rejections (full §13 table), validity-indicator-from-SQL-DML. - walker/mod.rs, highlight.rs, completion.rs, input_render.rs: inherited-diagnostic, DML-keyword highlight, INSERT INTO completion, and advanced-mode DML hint-panel cross-cuts. - Matrix correction (user-confirmed): predicate warnings fire on row-scoped DML slots; INSERT VALUES has no row scope (ADR-0033 §8.4). - Auto-snapshot row marked N/A (user-confirmed): ADR-0006 unimplemented for both paths; deferred. /runda round: added an advanced-mode DML hint-panel test (A6 was attributed to simple-mode prose under the §8 advanced heading); extended OOS coverage to the full ADR-0033 §13 table (OOS-5 INDEXED BY / OOS-6 multi-statement) + a trailing-semicolon guard. 1645 passing / 0 failing / 0 skipped / 1 ignored. Clippy clean.
435 lines
15 KiB
Rust
435 lines
15 KiB
Rust
//! Walker-driven highlighting (ADR-0024 §migration Phase F).
|
|
//!
|
|
//! `highlight_runs(source)` returns the per-byte highlight class
|
|
//! assignments for every token shape in `source`. It is the
|
|
//! single entry point that consumers (input panel, echo lines)
|
|
//! should use to colour DSL input — there is no separate lexer
|
|
//! pre-pass.
|
|
//!
|
|
//! Strategy:
|
|
//!
|
|
//! - Try the walker first. Whatever it consumed end-to-end (entry
|
|
//! word + matching nodes) contributes `WalkResult::per_byte_class`.
|
|
//! - For any bytes the walker did not cover — input the walker
|
|
//! doesn't engage on at all (no registered entry word), trailing
|
|
//! junk after a partial match, or content past a structural
|
|
//! failure — fall back to a byte-shape scanner that classifies
|
|
//! each consumed token by its shape using the same `lex_helpers`
|
|
//! primitives the walker uses internally.
|
|
//!
|
|
//! The two streams are returned in source-byte order; whitespace
|
|
//! gaps are not represented (the renderer fills them with the
|
|
//! default foreground colour).
|
|
|
|
use crate::dsl::grammar::HighlightClass;
|
|
use crate::dsl::walker::context::WalkContext;
|
|
use crate::dsl::walker::lex_helpers::{
|
|
consume_bare_path, consume_flag, consume_ident, consume_number_literal,
|
|
consume_string_literal, skip_whitespace,
|
|
};
|
|
use crate::dsl::walker::outcome::{ByteClass, WalkBound};
|
|
|
|
/// Produce the per-byte highlight classes for `source`.
|
|
///
|
|
/// Defaults to `Mode::Simple`. Callers in advanced-mode UIs
|
|
/// should use [`highlight_runs_in_mode`] so SQL keywords get
|
|
/// matched and highlighted past the entry word (the simple-mode
|
|
/// gate at the dispatcher truncates the walker on advanced-only
|
|
/// commands, ADR-0030 §2).
|
|
#[must_use]
|
|
pub fn highlight_runs(source: &str) -> Vec<ByteClass> {
|
|
highlight_runs_in_mode(source, crate::mode::Mode::Simple)
|
|
}
|
|
|
|
/// Mode-aware [`highlight_runs`] (ADR-0032 §10.6 follow-up).
|
|
///
|
|
/// In `Mode::Advanced` the walker matches every Phase-2 SQL
|
|
/// token, producing the keyword classes the renderer needs to
|
|
/// colour `select` / `from` / `where` / `union` / `case` / etc.
|
|
#[must_use]
|
|
pub fn highlight_runs_in_mode(
|
|
source: &str,
|
|
mode: crate::mode::Mode,
|
|
) -> Vec<ByteClass> {
|
|
let mut ctx = WalkContext::new();
|
|
ctx.mode = mode;
|
|
let (result, _cmd) = super::walk(source, WalkBound::EndOfInput, &mut ctx);
|
|
let mut classes: Vec<ByteClass> = result
|
|
.map(|r| r.per_byte_class)
|
|
.unwrap_or_default();
|
|
|
|
let scan_start = classes.last().map_or(0, |c| c.end);
|
|
scan_remainder(source, scan_start, &mut classes);
|
|
classes
|
|
}
|
|
|
|
/// Byte-shape scan from `start` to end of source, appending each
|
|
/// classified token to `classes`. Whitespace gaps are skipped.
|
|
fn scan_remainder(source: &str, start: usize, classes: &mut Vec<ByteClass>) {
|
|
let bytes = source.as_bytes();
|
|
let mut pos = start;
|
|
while pos < bytes.len() {
|
|
pos = skip_whitespace(source, pos);
|
|
if pos >= bytes.len() {
|
|
break;
|
|
}
|
|
let b = bytes[pos];
|
|
// Identifier first — covers keywords-by-shape, since at
|
|
// the highlight layer we no longer distinguish keyword from
|
|
// identifier without a successful walker match.
|
|
if (b.is_ascii_alphabetic() || b == b'_')
|
|
&& let Some((s, e)) = consume_ident(source, pos)
|
|
{
|
|
classes.push(ByteClass {
|
|
start: s,
|
|
end: e,
|
|
class: HighlightClass::Identifier,
|
|
});
|
|
pos = e;
|
|
continue;
|
|
}
|
|
if b == b'\'' {
|
|
// Quoted string. Unterminated → mark the rest as Error
|
|
// so the user sees the unclosed run highlighted.
|
|
if let Some(((s, e), _)) = consume_string_literal(source, pos) {
|
|
classes.push(ByteClass {
|
|
start: s,
|
|
end: e,
|
|
class: HighlightClass::String,
|
|
});
|
|
pos = e;
|
|
} else {
|
|
classes.push(ByteClass {
|
|
start: pos,
|
|
end: bytes.len(),
|
|
class: HighlightClass::Error,
|
|
});
|
|
pos = bytes.len();
|
|
}
|
|
continue;
|
|
}
|
|
if b == b'-' && bytes.get(pos + 1) == Some(&b'-') {
|
|
// Flag. `--` without a body is BadFlag → Error.
|
|
if let Some((s, e)) = consume_flag(source, pos) {
|
|
classes.push(ByteClass {
|
|
start: s,
|
|
end: e,
|
|
class: HighlightClass::Flag,
|
|
});
|
|
pos = e;
|
|
} else {
|
|
classes.push(ByteClass {
|
|
start: pos,
|
|
end: pos + 2,
|
|
class: HighlightClass::Error,
|
|
});
|
|
pos += 2;
|
|
}
|
|
continue;
|
|
}
|
|
let looks_like_number = b.is_ascii_digit()
|
|
|| (b == b'-'
|
|
&& bytes
|
|
.get(pos + 1)
|
|
.copied()
|
|
.is_some_and(|c| c.is_ascii_digit()));
|
|
if looks_like_number
|
|
&& let Some((s, e)) = consume_number_literal(source, pos)
|
|
{
|
|
classes.push(ByteClass {
|
|
start: s,
|
|
end: e,
|
|
class: HighlightClass::Number,
|
|
});
|
|
pos = e;
|
|
continue;
|
|
}
|
|
if matches!(b, b':' | b'(' | b')' | b',' | b'=' | b'.') {
|
|
classes.push(ByteClass {
|
|
start: pos,
|
|
end: pos + 1,
|
|
class: HighlightClass::Punct,
|
|
});
|
|
pos += 1;
|
|
continue;
|
|
}
|
|
// Bare-path tail (e.g., trailing `frobulate widgets` past
|
|
// a partial command match): only used when we know the
|
|
// remainder isn't structured. Without a grammar context
|
|
// here we conservatively treat as Error so the user sees
|
|
// the unknown-shape byte highlighted.
|
|
//
|
|
// For multi-byte UTF-8 (emoji, unknown unicode) advance
|
|
// one whole codepoint as Error.
|
|
let ch = source[pos..]
|
|
.chars()
|
|
.next()
|
|
.expect("pos < bytes.len() ⇒ at least one char");
|
|
let len = ch.len_utf8();
|
|
// If the char is alphanumeric (unusual at this fall-through
|
|
// — should already have been caught above), classify as
|
|
// Identifier-ish. Otherwise Error.
|
|
let class = if ch.is_ascii_alphanumeric() || ch == '_' {
|
|
HighlightClass::Identifier
|
|
} else if ch.is_whitespace() {
|
|
// Whitespace is filtered above; this branch is unreachable
|
|
// in practice.
|
|
pos += len;
|
|
continue;
|
|
} else {
|
|
HighlightClass::Error
|
|
};
|
|
let _ = consume_bare_path; // silence unused-import lint when not exercised
|
|
classes.push(ByteClass {
|
|
start: pos,
|
|
end: pos + len,
|
|
class,
|
|
});
|
|
pos += len;
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn run(input: &str) -> Vec<(usize, usize, HighlightClass)> {
|
|
highlight_runs(input)
|
|
.into_iter()
|
|
.map(|c| (c.start, c.end, c.class))
|
|
.collect()
|
|
}
|
|
|
|
#[test]
|
|
fn empty_input_yields_no_runs() {
|
|
assert!(highlight_runs("").is_empty());
|
|
assert!(highlight_runs(" ").is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn entry_keyword_classified_as_keyword() {
|
|
assert_eq!(run("quit"), vec![(0, 4, HighlightClass::Keyword)]);
|
|
}
|
|
|
|
#[test]
|
|
fn keyword_plus_identifier_via_walker() {
|
|
// `show data Customers` walks end-to-end.
|
|
let runs = run("show data Customers");
|
|
assert_eq!(
|
|
runs,
|
|
vec![
|
|
(0, 4, HighlightClass::Keyword),
|
|
(5, 9, HighlightClass::Keyword),
|
|
(10, 19, HighlightClass::Identifier),
|
|
],
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn unknown_command_word_classified_by_byte_shape() {
|
|
// Walker doesn't engage; fallback classifies as Identifier.
|
|
assert_eq!(run("frobulate"), vec![(0, 9, HighlightClass::Identifier)]);
|
|
}
|
|
|
|
#[test]
|
|
fn unknown_chars_classified_as_error() {
|
|
assert_eq!(run("$"), vec![(0, 1, HighlightClass::Error)]);
|
|
}
|
|
|
|
#[test]
|
|
fn unterminated_string_classified_as_error_through_to_eof() {
|
|
assert_eq!(run("'oops"), vec![(0, 5, HighlightClass::Error)]);
|
|
}
|
|
|
|
#[test]
|
|
fn string_literal_classified() {
|
|
assert_eq!(run("'hello'"), vec![(0, 7, HighlightClass::String)]);
|
|
}
|
|
|
|
#[test]
|
|
fn flag_classified_via_fallback() {
|
|
// Walker doesn't engage for a bare `--all-rows`.
|
|
assert_eq!(
|
|
run("--all-rows"),
|
|
vec![(0, 10, HighlightClass::Flag)],
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn bare_double_dash_classified_as_error() {
|
|
assert_eq!(run("--"), vec![(0, 2, HighlightClass::Error)]);
|
|
}
|
|
|
|
#[test]
|
|
fn number_classified_via_fallback() {
|
|
assert_eq!(run("42"), vec![(0, 2, HighlightClass::Number)]);
|
|
assert_eq!(run("-5"), vec![(0, 2, HighlightClass::Number)]);
|
|
assert_eq!(run("3.14"), vec![(0, 4, HighlightClass::Number)]);
|
|
}
|
|
|
|
#[test]
|
|
fn punct_classified_via_fallback() {
|
|
// Bare `:` outside any walker context.
|
|
assert_eq!(run(":"), vec![(0, 1, HighlightClass::Punct)]);
|
|
}
|
|
|
|
#[test]
|
|
fn trailing_tokens_after_partial_walk_are_byte_scanned() {
|
|
// `quit nonsense` — walker matches `quit`, then trailing
|
|
// `nonsense` is fallback-classified.
|
|
let runs = run("quit nonsense");
|
|
assert_eq!(
|
|
runs,
|
|
vec![
|
|
(0, 4, HighlightClass::Keyword),
|
|
(5, 13, HighlightClass::Identifier),
|
|
],
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn whitespace_gaps_are_not_represented_as_runs() {
|
|
// Two whitespace-separated tokens produce exactly two
|
|
// class spans; the renderer fills the gap with fg.
|
|
let runs = run("show table");
|
|
assert_eq!(runs.len(), 2);
|
|
assert_eq!(runs[0].2, HighlightClass::Keyword);
|
|
assert_eq!(runs[1].2, HighlightClass::Keyword);
|
|
}
|
|
|
|
#[test]
|
|
fn full_command_walks_with_each_class() {
|
|
// `update T set Name='hi' --all-rows` — walker covers it
|
|
// all end-to-end; the per-byte class slice carries each
|
|
// node's contribution.
|
|
let runs = highlight_runs("update T set Name='hi' --all-rows");
|
|
let classes: Vec<HighlightClass> = runs.iter().map(|c| c.class).collect();
|
|
assert!(classes.contains(&HighlightClass::Keyword));
|
|
assert!(classes.contains(&HighlightClass::Identifier));
|
|
assert!(classes.contains(&HighlightClass::String));
|
|
assert!(classes.contains(&HighlightClass::Punct));
|
|
assert!(classes.contains(&HighlightClass::Flag));
|
|
}
|
|
|
|
#[test]
|
|
fn utf8_unknown_char_advances_one_codepoint() {
|
|
// ✓ is a 3-byte UTF-8 codepoint; the fallback emits a
|
|
// 3-byte Error span (not three 1-byte spans).
|
|
let runs = run("✓");
|
|
assert_eq!(runs.len(), 1);
|
|
assert_eq!(runs[0].2, HighlightClass::Error);
|
|
assert_eq!(runs[0].1 - runs[0].0, "✓".len());
|
|
}
|
|
|
|
#[test]
|
|
fn string_with_multi_byte_unicode_classified_as_string() {
|
|
// 'café' is a single string literal; the walker doesn't
|
|
// engage here (no `'café'` entry keyword), so the fallback
|
|
// scans and classifies as String.
|
|
let runs = run("'café'");
|
|
assert_eq!(runs.len(), 1);
|
|
assert_eq!(runs[0].2, HighlightClass::String);
|
|
assert_eq!(runs[0].1, "'café'".len());
|
|
}
|
|
|
|
// ---- ADR-0030 §8 / ADR-0032 — SQL keyword highlighting ----
|
|
|
|
fn run_advanced(input: &str) -> Vec<(usize, usize, HighlightClass)> {
|
|
highlight_runs_in_mode(input, crate::mode::Mode::Advanced)
|
|
.into_iter()
|
|
.map(|c| (c.start, c.end, c.class))
|
|
.collect()
|
|
}
|
|
|
|
#[test]
|
|
fn sql_select_keywords_classified() {
|
|
// ADR-0030 §8 — `select` / `from` get keyword class in
|
|
// Advanced mode (Simple mode gates SELECT out at the
|
|
// dispatcher, so only the entry word would highlight).
|
|
let runs = run_advanced("select * from t");
|
|
assert!(
|
|
runs.iter().any(|(s, e, c)| {
|
|
*c == HighlightClass::Keyword && (*s, *e) == (0, 6)
|
|
}),
|
|
"expected `select` keyword span 0..6; got {runs:?}",
|
|
);
|
|
assert!(
|
|
runs.iter().any(|(s, e, c)| {
|
|
*c == HighlightClass::Keyword && (*s, *e) == (9, 13)
|
|
}),
|
|
"expected `from` keyword span 9..13; got {runs:?}",
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn sql_expression_operators_classified_as_keywords() {
|
|
// ADR-0031 §5: LIKE / BETWEEN / IN / IS / AND / OR / NOT
|
|
// are part of the predicate ladder. Walker matches them
|
|
// as Word nodes; highlight class = Keyword.
|
|
let input = "select * from t where a like 'x' and b between 1 and 5";
|
|
let runs = run_advanced(input);
|
|
let keywords: Vec<&str> = runs
|
|
.iter()
|
|
.filter(|(_, _, c)| *c == HighlightClass::Keyword)
|
|
.map(|(s, e, _)| &input[*s..*e])
|
|
.collect();
|
|
assert!(keywords.contains(&"like"), "no `like`; got {keywords:?}");
|
|
assert!(keywords.contains(&"and"), "no `and`; got {keywords:?}");
|
|
assert!(
|
|
keywords.contains(&"between"),
|
|
"no `between`; got {keywords:?}",
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn sql_case_expression_keywords_classified() {
|
|
let input = "select case when a = 1 then 'one' else 'other' end from t";
|
|
let runs = run_advanced(input);
|
|
let keywords: Vec<&str> = runs
|
|
.iter()
|
|
.filter(|(_, _, c)| *c == HighlightClass::Keyword)
|
|
.map(|(s, e, _)| &input[*s..*e])
|
|
.collect();
|
|
for kw in ["case", "when", "then", "else", "end"] {
|
|
assert!(
|
|
keywords.contains(&kw),
|
|
"missing `{kw}` keyword; got {keywords:?}",
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn sql_dml_keywords_classified() {
|
|
// ADR-0030 §8 / ADR-0033 — the DML entry words and clause
|
|
// keywords (INSERT / INTO / VALUES / ON / CONFLICT /
|
|
// RETURNING / UPDATE / SET / DELETE / FROM) all get the
|
|
// Keyword class in Advanced mode. 3k cross-cut: the
|
|
// ambient highlighter covers the DML surface, not just
|
|
// SELECT.
|
|
let keywords_of = |input: &'static str| -> Vec<&'static str> {
|
|
run_advanced(input)
|
|
.into_iter()
|
|
.filter(|(_, _, c)| *c == HighlightClass::Keyword)
|
|
.map(|(s, e, _)| &input[s..e])
|
|
.collect()
|
|
};
|
|
|
|
let insert = keywords_of(
|
|
"insert into t (a) values (1) on conflict (a) do update set a = excluded.a returning a",
|
|
);
|
|
for kw in ["insert", "into", "values", "on", "conflict", "do", "update", "set", "returning"] {
|
|
assert!(insert.contains(&kw), "INSERT/UPSERT: missing `{kw}`; got {insert:?}");
|
|
}
|
|
|
|
let update = keywords_of("update t set a = 1 where id = 2 returning a");
|
|
for kw in ["update", "set", "where", "returning"] {
|
|
assert!(update.contains(&kw), "UPDATE: missing `{kw}`; got {update:?}");
|
|
}
|
|
|
|
let delete = keywords_of("delete from t where id = 1 returning *");
|
|
for kw in ["delete", "from", "where", "returning"] {
|
|
assert!(delete.contains(&kw), "DELETE: missing `{kw}`; got {delete:?}");
|
|
}
|
|
}
|
|
}
|