ADR-0020 implementation: lexer + parser refactor over &[Token]

New `dsl::keyword` module: macro-driven Keyword and Punct
enums (single source of truth — enum, lex-side mapping,
catalog-key derivation generated from one declaration).

New `dsl::lexer` module: tokenizer producing a span-tagged
Vec<Token>. Always succeeds; lex-shape errors (unterminated
string, unrecognised character, malformed flag) embed as
TokenKind::Error tokens so I4 can highlight invalid input
uniformly.

Parser refactored from `Parser<'a, &'a str, ...>` to
`Parser<'a, &'a [Token], ...>`. All 50+ existing parser
unit tests ported and passing; aggregation across `choice`
now works as designed (e.g. `add` → "expected `1` or
`column`", `drop` → "expected `column`, `relationship`, or
`table`", `frobulate Customers` lists all ten command-entry
keywords). Custom `try_map` content errors (unknown type,
mutually-exclusive flags, "with pk needs at least one
column", "specified twice") preserved.

`replay` bare-path UX kept via the source-slice special
case from ADR-0020 §6 (~10 lines, documented inline).

Tests: 650 passing, 0 failing, 1 ignored (610 baseline + 40
new lexer/keyword tests). Clippy clean.
This commit is contained in:
claude@clouddev1
2026-05-10 09:22:13 +00:00
parent 857ee753f2
commit fdaf7e3e0e
4 changed files with 1353 additions and 527 deletions
+287
View File
@@ -0,0 +1,287 @@
//! Keyword and punctuation tables for the DSL lexer (ADR-0020 §2a).
//!
//! `define_keywords!` and `define_punct!` are the single source
//! of truth from which the enums, the lex-side string→variant
//! mappings, and the `parse.token.*` catalog-key derivations
//! all come. Adding a new keyword is one line in the
//! `define_keywords!` invocation plus one line in
//! `src/friendly/strings/en-US.yaml` under
//! `parse.token.keyword.<lit>` (the catalog validator catches a
//! missing entry at test time per ADR-0021 §7). Adding a new
//! punctuation kind is symmetric.
macro_rules! define_keywords {
( $( $variant:ident => $literal:literal ),+ $(,)? ) => {
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Keyword {
$( $variant ),+
}
impl Keyword {
/// Every variant paired with its canonical lowercase
/// literal. Iteration order is the macro
/// declaration order.
pub const ALL: &'static [(Keyword, &'static str)] = &[
$( (Keyword::$variant, $literal) ),+
];
/// Lex-side mapping. Case-insensitive per ADR-0009.
/// `None` for any input that isn't a reserved word —
/// the lexer then keeps the input as
/// `TokenKind::Identifier`.
#[must_use]
pub fn from_word(s: &str) -> Option<Self> {
Self::ALL
.iter()
.find(|(_, lit)| s.eq_ignore_ascii_case(lit))
.map(|(kw, _)| *kw)
}
/// Canonical lowercase literal for this variant.
#[must_use]
pub fn as_str(self) -> &'static str {
Self::ALL
.iter()
.find(|(kw, _)| *kw == self)
.map(|(_, lit)| *lit)
.expect("ALL covers every variant by construction")
}
/// Catalog key under `parse.token.keyword.*`
/// (ADR-0021 §4). The renderer looks this up to get
/// the user-facing wording for the keyword.
#[must_use]
pub fn catalog_token_key(self) -> String {
format!("parse.token.keyword.{}", self.as_str())
}
}
impl std::fmt::Display for Keyword {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
};
}
define_keywords! {
// Commands (entry keywords).
Create => "create",
Drop => "drop",
Add => "add",
Rename => "rename",
Change => "change",
Show => "show",
Insert => "insert",
Update => "update",
Delete => "delete",
Replay => "replay",
// Object words.
Table => "table",
Column => "column",
Data => "data",
Relationship => "relationship",
Pk => "pk",
// Connectives.
With => "with",
From => "from",
To => "to",
Into => "into",
As => "as",
In => "in",
On => "on",
Set => "set",
Where => "where",
Values => "values",
// Value literals.
Null => "null",
True => "true",
False => "false",
// Referential-action vocabulary (ADR-0013). `set` and `null`
// re-use the connective and value-literal keywords above —
// `set null` is the parser's job to recognise as a sequence,
// not the lexer's.
Cascade => "cascade",
Restrict => "restrict",
Action => "action",
No => "no",
}
macro_rules! define_punct {
( $( $variant:ident => ($literal:literal, $name:literal) ),+ $(,)? ) => {
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Punct {
$( $variant ),+
}
impl Punct {
/// Every variant paired with its character and
/// snake-case name suffix.
pub const ALL: &'static [(Punct, char, &'static str)] = &[
$( (Punct::$variant, $literal, $name) ),+
];
/// Lex-side mapping. `None` for any character that
/// isn't punctuation — the lexer then either
/// classifies it as part of another token or
/// emits an `Error(LexError::UnknownChar)`.
#[must_use]
pub fn from_char(c: char) -> Option<Self> {
Self::ALL
.iter()
.find(|(_, lit, _)| *lit == c)
.map(|(p, _, _)| *p)
}
#[must_use]
pub fn as_char(self) -> char {
Self::ALL
.iter()
.find(|(p, _, _)| *p == self)
.map(|(_, c, _)| *c)
.expect("ALL covers every variant by construction")
}
/// Catalog key under `parse.token.punct.*`
/// (ADR-0021 §4).
#[must_use]
pub fn catalog_token_key(self) -> String {
let suffix = Self::ALL
.iter()
.find(|(p, _, _)| *p == self)
.map(|(_, _, n)| *n)
.expect("ALL covers every variant by construction");
format!("parse.token.punct.{suffix}")
}
}
impl std::fmt::Display for Punct {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use std::fmt::Write;
f.write_char(self.as_char())
}
}
};
}
define_punct! {
Colon => (':', "colon"),
OpenParen => ('(', "open_paren"),
CloseParen => (')', "close_paren"),
Comma => (',', "comma"),
Equals => ('=', "equals"),
Dot => ('.', "dot"),
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn keyword_from_word_round_trips_every_variant() {
for &(kw, lit) in Keyword::ALL {
assert_eq!(Keyword::from_word(lit), Some(kw));
assert_eq!(kw.as_str(), lit);
}
}
#[test]
fn keyword_from_word_is_case_insensitive() {
assert_eq!(Keyword::from_word("CREATE"), Some(Keyword::Create));
assert_eq!(Keyword::from_word("Create"), Some(Keyword::Create));
assert_eq!(Keyword::from_word("cReAtE"), Some(Keyword::Create));
}
#[test]
fn keyword_from_word_returns_none_for_non_keyword() {
assert_eq!(Keyword::from_word("Customers"), None);
assert_eq!(Keyword::from_word("frobulate"), None);
// Type-name candidates explicitly stay non-keyword
// (ADR-0020 §2): they remain identifiers that the
// parser validates via `Type::from_str`.
assert_eq!(Keyword::from_word("text"), None);
assert_eq!(Keyword::from_word("int"), None);
assert_eq!(Keyword::from_word("varchar"), None);
}
#[test]
fn keyword_literals_are_unique() {
let mut lits: Vec<&str> = Keyword::ALL.iter().map(|(_, lit)| *lit).collect();
lits.sort_unstable();
let count_before = lits.len();
lits.dedup();
assert_eq!(lits.len(), count_before, "keyword literals must be unique");
}
#[test]
fn keyword_catalog_token_key_format() {
assert_eq!(
Keyword::Create.catalog_token_key(),
"parse.token.keyword.create"
);
assert_eq!(
Keyword::Pk.catalog_token_key(),
"parse.token.keyword.pk"
);
}
#[test]
fn keyword_display_uses_canonical_lowercase() {
assert_eq!(format!("{}", Keyword::Create), "create");
assert_eq!(format!("{}", Keyword::Relationship), "relationship");
}
#[test]
fn punct_round_trips_every_variant() {
for &(p, c, _) in Punct::ALL {
assert_eq!(Punct::from_char(c), Some(p));
assert_eq!(p.as_char(), c);
}
}
#[test]
fn punct_from_char_returns_none_for_non_punct() {
assert_eq!(Punct::from_char('a'), None);
assert_eq!(Punct::from_char(' '), None);
assert_eq!(Punct::from_char('-'), None);
assert_eq!(Punct::from_char('\''), None);
}
#[test]
fn punct_chars_are_unique() {
let mut chars: Vec<char> = Punct::ALL.iter().map(|(_, c, _)| *c).collect();
chars.sort_unstable();
let count_before = chars.len();
chars.dedup();
assert_eq!(chars.len(), count_before, "punct chars must be unique");
}
#[test]
fn punct_catalog_token_key_format() {
assert_eq!(
Punct::Colon.catalog_token_key(),
"parse.token.punct.colon"
);
assert_eq!(
Punct::OpenParen.catalog_token_key(),
"parse.token.punct.open_paren"
);
}
#[test]
fn every_command_entry_keyword_is_declared() {
// Sanity: the ten command entry keywords from
// ADR-0009/0014/0006 must all be reachable. If a future
// ADR adds a command, this list grows alongside it.
for cmd in [
"create", "drop", "add", "rename", "change", "show",
"insert", "update", "delete", "replay",
] {
assert!(
Keyword::from_word(cmd).is_some(),
"command entry keyword `{cmd}` must be declared",
);
}
}
}
+598
View File
@@ -0,0 +1,598 @@
//! DSL lexer (ADR-0020).
//!
//! Pure tokenizer: takes the source `&str` and produces a
//! `Vec<Token>` with byte-offset spans. Lex-shape errors
//! (unterminated string, unrecognised character, malformed
//! `--` flag) surface as `TokenKind::Error(_)` tokens — not a
//! `Result` variant. The parser sees `Error` tokens and raises
//! a structural error at that point; I4 (syntax highlighting,
//! future) walks the same token stream and renders Error tokens
//! with an error glyph. ADR-0020 §2 explains the rationale for
//! the in-stream error model.
use crate::dsl::keyword::{Keyword, Punct};
pub type Span = (usize, usize);
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Token {
pub kind: TokenKind,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum TokenKind {
/// Reserved word recognised against the closed `Keyword`
/// set. Case-insensitive at lex time per ADR-0009.
Keyword(Keyword),
/// Anything alphabetic-or-underscore-then-alphanumeric that
/// did not match a keyword. Case is preserved per ADR-0009.
Identifier(String),
/// Numeric literal, raw text. The parser is responsible for
/// any further validation (e.g. `Value::Number` storage). A
/// leading `-` is included when present and immediately
/// adjacent to a digit (no whitespace).
Number(String),
/// Single-quoted string literal, with the `''` escape
/// processed (so `'don''t'` produces `"don't"`). The span
/// covers the surrounding quotes; the payload does not.
StringLiteral(String),
/// One-character punctuation per the closed `Punct` set.
Punct(Punct),
/// `--name` flag. The payload is the part after `--`.
Flag(String),
/// Lex-time shape error. The parser surfaces this with a
/// catalog-driven message (ADR-0021 §4
/// `parse.token.error.*`).
Error(LexError),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LexError {
/// `'` opened a string literal that ran to end of input
/// without a closing `'`. Span covers the opening quote
/// through end-of-input.
UnterminatedString,
/// Character not recognised at this position. Span covers
/// the single character (UTF-8 width respected).
UnknownChar(char),
/// `--` not followed by an identifier-shaped tail. Today
/// only reachable with literal trailing `--`; reserved as
/// a distinct kind so the renderer can produce a sharper
/// hint than "unknown character".
BadFlag,
}
/// Tokenize an input string.
///
/// Always succeeds in producing a `Vec<Token>` — lex-shape
/// errors are embedded as `TokenKind::Error` tokens. Whitespace
/// between tokens is silently skipped (ADR-0009: liberal
/// whitespace).
#[must_use]
pub fn lex(input: &str) -> Vec<Token> {
let mut tokens = Vec::new();
let bytes = input.as_bytes();
let mut pos = 0;
while pos < bytes.len() {
let b = bytes[pos];
if b.is_ascii_whitespace() {
pos += 1;
continue;
}
if b.is_ascii_alphabetic() || b == b'_' {
let (tok, next) = lex_identifier(input, pos);
tokens.push(tok);
pos = next;
continue;
}
if b.is_ascii_digit() {
let (tok, next) = lex_number(input, pos, false);
tokens.push(tok);
pos = next;
continue;
}
if b == b'-' {
// `--name` flag, `-<digit>` negative-number literal,
// or a bare `-` (UnknownChar — no Minus variant in
// the current grammar).
let next_b = bytes.get(pos + 1).copied();
if next_b == Some(b'-') {
let (tok, next) = lex_flag(input, pos);
tokens.push(tok);
pos = next;
continue;
}
if next_b.is_some_and(|c| c.is_ascii_digit()) {
let (tok, next) = lex_number(input, pos, true);
tokens.push(tok);
pos = next;
continue;
}
tokens.push(Token {
kind: TokenKind::Error(LexError::UnknownChar('-')),
span: (pos, pos + 1),
});
pos += 1;
continue;
}
if b == b'\'' {
let (tok, next) = lex_string(input, pos);
tokens.push(tok);
pos = next;
continue;
}
if let Some(p) = Punct::from_char(b as char) {
tokens.push(Token {
kind: TokenKind::Punct(p),
span: (pos, pos + 1),
});
pos += 1;
continue;
}
// Anything else: read one whole char (UTF-8 safe) and
// emit an UnknownChar error token covering its bytes.
let ch = input[pos..]
.chars()
.next()
.expect("pos < bytes.len() ⇒ at least one char");
let len = ch.len_utf8();
tokens.push(Token {
kind: TokenKind::Error(LexError::UnknownChar(ch)),
span: (pos, pos + len),
});
pos += len;
}
tokens
}
fn lex_identifier(input: &str, start: usize) -> (Token, usize) {
let bytes = input.as_bytes();
let mut end = start + 1; // first byte already validated by caller
while end < bytes.len() {
let b = bytes[end];
if b.is_ascii_alphanumeric() || b == b'_' {
end += 1;
} else {
break;
}
}
let word = &input[start..end];
let kind = Keyword::from_word(word).map_or_else(
|| TokenKind::Identifier(word.to_string()),
TokenKind::Keyword,
);
(
Token {
kind,
span: (start, end),
},
end,
)
}
fn lex_number(input: &str, start: usize, leading_minus: bool) -> (Token, usize) {
let bytes = input.as_bytes();
let mut end = start;
if leading_minus {
end += 1; // consume the leading '-'
}
while end < bytes.len() && bytes[end].is_ascii_digit() {
end += 1;
}
// Optional fractional part: `.` followed by ≥1 digit. A
// trailing `.` with no digits behind it is left alone (it
// lexes as a separate Punct(Dot) — useful for `Customers.id`
// when an identifier is misread as a number, though that
// path is not currently reachable).
if end < bytes.len() && bytes[end] == b'.' {
let after_dot = end + 1;
if after_dot < bytes.len() && bytes[after_dot].is_ascii_digit() {
end = after_dot;
while end < bytes.len() && bytes[end].is_ascii_digit() {
end += 1;
}
}
}
(
Token {
kind: TokenKind::Number(input[start..end].to_string()),
span: (start, end),
},
end,
)
}
fn lex_string(input: &str, start: usize) -> (Token, usize) {
let bytes = input.as_bytes();
debug_assert_eq!(bytes[start], b'\'');
let mut content = String::new();
let mut i = start + 1;
while i < bytes.len() {
if bytes[i] == b'\'' {
// `''` escape: append one literal `'` and continue.
if bytes.get(i + 1) == Some(&b'\'') {
content.push('\'');
i += 2;
continue;
}
// Closing quote.
return (
Token {
kind: TokenKind::StringLiteral(content),
span: (start, i + 1),
},
i + 1,
);
}
let ch = input[i..]
.chars()
.next()
.expect("i < bytes.len() ⇒ at least one char");
content.push(ch);
i += ch.len_utf8();
}
(
Token {
kind: TokenKind::Error(LexError::UnterminatedString),
span: (start, bytes.len()),
},
bytes.len(),
)
}
fn lex_flag(input: &str, start: usize) -> (Token, usize) {
let bytes = input.as_bytes();
debug_assert!(bytes[start..].starts_with(b"--"));
let mut end = start + 2;
while end < bytes.len() {
let b = bytes[end];
if b.is_ascii_alphanumeric() || b == b'-' || b == b'_' {
end += 1;
} else {
break;
}
}
if end == start + 2 {
return (
Token {
kind: TokenKind::Error(LexError::BadFlag),
span: (start, end),
},
end,
);
}
(
Token {
kind: TokenKind::Flag(input[start + 2..end].to_string()),
span: (start, end),
},
end,
)
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
fn kinds(input: &str) -> Vec<TokenKind> {
lex(input).into_iter().map(|t| t.kind).collect()
}
#[test]
fn empty_input_produces_no_tokens() {
assert_eq!(lex(""), Vec::<Token>::new());
}
#[test]
fn whitespace_only_produces_no_tokens() {
assert_eq!(lex(" "), Vec::<Token>::new());
assert_eq!(lex("\t\n \r"), Vec::<Token>::new());
}
#[test]
fn single_keyword_lexes_to_keyword_variant() {
assert_eq!(
kinds("create"),
vec![TokenKind::Keyword(Keyword::Create)],
);
}
#[test]
fn keyword_match_is_case_insensitive() {
assert_eq!(
kinds("CREATE"),
vec![TokenKind::Keyword(Keyword::Create)],
);
assert_eq!(
kinds("CrEaTe"),
vec![TokenKind::Keyword(Keyword::Create)],
);
}
#[test]
fn non_keyword_word_lexes_to_identifier_preserving_case() {
assert_eq!(
kinds("Customers"),
vec![TokenKind::Identifier("Customers".to_string())],
);
assert_eq!(
kinds("customer_v2"),
vec![TokenKind::Identifier("customer_v2".to_string())],
);
// Type names stay as identifiers (ADR-0020 §2).
assert_eq!(
kinds("text"),
vec![TokenKind::Identifier("text".to_string())],
);
assert_eq!(
kinds("varchar"),
vec![TokenKind::Identifier("varchar".to_string())],
);
}
#[test]
fn identifier_starts_with_letter_or_underscore_only() {
// A bare digit lexes as a number, not the start of an
// identifier. The parser then rejects it where an
// identifier was expected — this behaviour matches the
// pre-lexer parser.
assert_eq!(
kinds("1Customers"),
vec![
TokenKind::Number("1".to_string()),
TokenKind::Identifier("Customers".to_string()),
],
);
}
#[test]
fn positive_integer_lexes_as_number() {
assert_eq!(kinds("42"), vec![TokenKind::Number("42".to_string())]);
}
#[test]
fn negative_integer_lexes_with_sign_attached() {
assert_eq!(kinds("-5"), vec![TokenKind::Number("-5".to_string())]);
}
#[test]
fn fractional_number_lexes_as_one_token() {
assert_eq!(
kinds("3.14"),
vec![TokenKind::Number("3.14".to_string())],
);
assert_eq!(
kinds("-3.14"),
vec![TokenKind::Number("-3.14".to_string())],
);
}
#[test]
fn trailing_dot_without_digits_does_not_attach() {
// `1.` lexes as Number("1") then Punct(Dot). The parser
// can decide what (if anything) that combination means.
assert_eq!(
kinds("1."),
vec![
TokenKind::Number("1".to_string()),
TokenKind::Punct(Punct::Dot),
],
);
}
#[test]
fn dot_inside_qualified_name_lexes_as_punct() {
// `Customers.id` is identifier, dot, identifier — the
// parser composes these for `<Table>.<Col>` references.
assert_eq!(
kinds("Customers.id"),
vec![
TokenKind::Identifier("Customers".to_string()),
TokenKind::Punct(Punct::Dot),
TokenKind::Identifier("id".to_string()),
],
);
}
#[test]
fn bare_minus_lexes_as_unknown_char() {
assert_eq!(
kinds("-"),
vec![TokenKind::Error(LexError::UnknownChar('-'))],
);
}
#[test]
fn string_literal_lexes_with_escape_processed() {
assert_eq!(
kinds("'hello'"),
vec![TokenKind::StringLiteral("hello".to_string())],
);
assert_eq!(
kinds("'don''t'"),
vec![TokenKind::StringLiteral("don't".to_string())],
);
}
#[test]
fn empty_string_literal_lexes_to_empty_payload() {
assert_eq!(
kinds("''"),
vec![TokenKind::StringLiteral(String::new())],
);
}
#[test]
fn string_literal_preserves_internal_whitespace() {
assert_eq!(
kinds("'a b\tc'"),
vec![TokenKind::StringLiteral("a b\tc".to_string())],
);
}
#[test]
fn unterminated_string_emits_error_token() {
assert_eq!(
kinds("'oops"),
vec![TokenKind::Error(LexError::UnterminatedString)],
);
}
#[test]
fn string_literal_with_multi_byte_unicode_is_safe() {
let toks = lex("'café'");
assert_eq!(toks.len(), 1);
assert_eq!(
toks[0].kind,
TokenKind::StringLiteral("café".to_string()),
);
// Span covers all bytes including the multi-byte é.
assert_eq!(toks[0].span, (0, "'café'".len()));
}
#[test]
fn each_punct_lexes_to_its_variant() {
for &(p, c, _) in Punct::ALL {
assert_eq!(
kinds(&c.to_string()),
vec![TokenKind::Punct(p)],
"lexing `{c}`",
);
}
}
#[test]
fn flag_lexes_with_payload_minus_dashes() {
assert_eq!(
kinds("--all-rows"),
vec![TokenKind::Flag("all-rows".to_string())],
);
assert_eq!(
kinds("--create-fk"),
vec![TokenKind::Flag("create-fk".to_string())],
);
assert_eq!(
kinds("--force-conversion"),
vec![TokenKind::Flag("force-conversion".to_string())],
);
}
#[test]
fn bare_double_dash_emits_bad_flag_error() {
assert_eq!(kinds("--"), vec![TokenKind::Error(LexError::BadFlag)]);
}
#[test]
fn unknown_character_emits_error_token() {
assert_eq!(
kinds("$"),
vec![TokenKind::Error(LexError::UnknownChar('$'))],
);
}
#[test]
fn unknown_character_with_multi_byte_does_not_panic() {
// Unicode emoji as an unknown char — span must respect
// UTF-8 width.
let toks = lex("");
assert_eq!(toks.len(), 1);
assert!(matches!(
toks[0].kind,
TokenKind::Error(LexError::UnknownChar('✓'))
));
assert_eq!(toks[0].span, (0, "".len()));
}
#[test]
fn whitespace_separates_otherwise_adjacent_tokens() {
assert_eq!(
kinds("create table"),
vec![
TokenKind::Keyword(Keyword::Create),
TokenKind::Keyword(Keyword::Table),
],
);
}
#[test]
fn create_table_full_command_lexes_to_expected_sequence() {
assert_eq!(
kinds("create table Customers with pk id:int"),
vec![
TokenKind::Keyword(Keyword::Create),
TokenKind::Keyword(Keyword::Table),
TokenKind::Identifier("Customers".to_string()),
TokenKind::Keyword(Keyword::With),
TokenKind::Keyword(Keyword::Pk),
TokenKind::Identifier("id".to_string()),
TokenKind::Punct(Punct::Colon),
TokenKind::Identifier("int".to_string()),
],
);
}
#[test]
fn one_to_n_cardinality_lexes_as_number_colon_identifier() {
assert_eq!(
kinds("1:n"),
vec![
TokenKind::Number("1".to_string()),
TokenKind::Punct(Punct::Colon),
TokenKind::Identifier("n".to_string()),
],
);
}
#[test]
fn insert_with_value_list_lexes_correctly() {
assert_eq!(
kinds("insert into T values (1, 'hi', null)"),
vec![
TokenKind::Keyword(Keyword::Insert),
TokenKind::Keyword(Keyword::Into),
TokenKind::Identifier("T".to_string()),
TokenKind::Keyword(Keyword::Values),
TokenKind::Punct(Punct::OpenParen),
TokenKind::Number("1".to_string()),
TokenKind::Punct(Punct::Comma),
TokenKind::StringLiteral("hi".to_string()),
TokenKind::Punct(Punct::Comma),
TokenKind::Keyword(Keyword::Null),
TokenKind::Punct(Punct::CloseParen),
],
);
}
#[test]
fn spans_are_byte_exact_for_simple_input() {
let toks = lex("create table");
assert_eq!(toks.len(), 2);
assert_eq!(toks[0].span, (0, "create".len()));
assert_eq!(toks[1].span, ("create ".len(), "create table".len()));
}
#[test]
fn trailing_whitespace_is_stripped() {
assert_eq!(
kinds("create "),
vec![TokenKind::Keyword(Keyword::Create)],
);
}
#[test]
fn error_tokens_appear_in_stream_alongside_valid_tokens() {
// The lexer keeps producing tokens after an error; the
// parser will reject the Error token at whatever point
// it tries to consume it.
assert_eq!(
kinds("create $ table"),
vec![
TokenKind::Keyword(Keyword::Create),
TokenKind::Error(LexError::UnknownChar('$')),
TokenKind::Keyword(Keyword::Table),
],
);
}
}
+2
View File
@@ -11,6 +11,8 @@
pub mod action;
pub mod command;
pub mod keyword;
pub mod lexer;
pub mod parser;
pub mod shortid;
pub mod types;
+465 -526
View File
File diff suppressed because it is too large Load Diff