From 266b4c2ef4ad441164b72e1b6f166aaf9b8f283a Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Fri, 15 May 2026 08:33:59 +0000 Subject: [PATCH] ADR-0024 Phase F (full) step 3: delete legacy parser modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removes the last consumers of `dsl::lexer`, `dsl::keyword`, and `dsl::ident_slot`, then deletes the modules. - `Theme::token_color(&TokenKind)` deleted along with its test; `Theme::highlight_class_color(HighlightClass)` is the sole highlight-colour mapper (the walker's `per_byte_class` feeds it directly). - `IdentSource` (`dsl::grammar`) absorbs the schema-list / expected-label / round-trip semantics that previously lived on `IdentSlot`. Adds `completes_from_schema`, `expected_label`, and `from_expected_label` methods. The walker's `Expectation::Ident { source }` and the schema-lookup request on the database worker now share one enum. - `SchemaCache::for_slot(IdentSlot)` → `for_source(IdentSource)`. - `Database::list_names_for` and the `Request::ListNamesFor` worker variant take `IdentSource`. Internal tables and column / relationship lookups dispatch on the same enum. - `InvalidIdent.slot: IdentSlot` → `InvalidIdent.source: IdentSource`. The `invalid_ident_at_cursor` rendering branch in `input_render.rs::ambient_hint` updates accordingly. - Completion's keyword filter (`Keyword::from_word`) becomes "backticked items whose payload is all ASCII alphabetic" — punct and digit literals still surface through their own candidate sources (composite-literal, flag, schema-ident); the alphabetic filter excludes them from the keyword bucket. - `friendly::keys::tests::keyword_and_punct_have_complete_token_vocabulary` is dropped. It cross-checked `Keyword::ALL` / `Punct::ALL` against catalog entries; both enums are gone. The `parse.token.keyword.*` / `parse.token.punct.*` catalog entries themselves survive for one more commit (catalog cleanup, ADR-0024 §cleanup-pass); the `keys_validate_against_catalog` test still pins them. - Modules deleted: `src/dsl/lexer.rs`, `src/dsl/keyword.rs`, `src/dsl/ident_slot.rs`. Tests: 806 passing, 0 failing, 1 ignored. The drop from 852 reflects the removed module-internal tests (~32 lexer, 7 keyword, 4 ident_slot, 1 theme token_color, 1 friendly keys keyword/punct), and is the expected outcome. Clippy clean with `nursery` lints + `-D warnings`. --- src/completion.rs | 61 +++-- src/db.rs | 61 ++--- src/dsl/grammar/mod.rs | 70 ++++- src/dsl/ident_slot.rs | 140 ---------- src/dsl/keyword.rs | 311 --------------------- src/dsl/lexer.rs | 598 ----------------------------------------- src/dsl/mod.rs | 3 - src/friendly/keys.rs | 43 +-- src/input_render.rs | 19 +- src/runtime.rs | 11 +- src/theme.rs | 72 +---- 11 files changed, 153 insertions(+), 1236 deletions(-) delete mode 100644 src/dsl/ident_slot.rs delete mode 100644 src/dsl/keyword.rs delete mode 100644 src/dsl/lexer.rs diff --git a/src/completion.rs b/src/completion.rs index 7ea63cf..d38b376 100644 --- a/src/completion.rs +++ b/src/completion.rs @@ -14,8 +14,7 @@ //! The cycling memo (`LastCompletion` on `App`) lives in //! `app.rs`; this module owns the candidate computation. -use crate::dsl::ident_slot::IdentSlot; -use crate::dsl::keyword::Keyword; +use crate::dsl::grammar::IdentSource; use crate::dsl::types::Type; use crate::dsl::{ParseError, parse_command}; @@ -53,15 +52,15 @@ pub struct SchemaCache { impl SchemaCache { /// Lookup the candidate list for an identifier slot. - /// `NewName` always returns `&[]` — the user invents - /// these names. + /// Sources that don't read from the schema (`NewName`, + /// `Types`, `Free`) return `&[]`. #[must_use] - pub fn for_slot(&self, slot: IdentSlot) -> &[String] { - match slot { - IdentSlot::NewName => &[], - IdentSlot::TableName => &self.tables, - IdentSlot::Column => &self.columns, - IdentSlot::RelationshipName => &self.relationships, + pub fn for_source(&self, source: IdentSource) -> &[String] { + match source { + IdentSource::Tables => &self.tables, + IdentSource::Columns => &self.columns, + IdentSource::Relationships => &self.relationships, + IdentSource::NewName | IdentSource::Types | IdentSource::Free => &[], } } } @@ -110,7 +109,7 @@ pub struct Completion { /// bare keywords (excluding punctuation and descriptive /// labels per ADR-0022 §10). /// - **Schema identifiers**: when the parser's expected-set -/// includes an `IdentSlot::expected_label()`, the matching +/// includes an `IdentSource::expected_label()`, the matching /// schema list from `cache` is added (skipping the `NewName` /// slot — the user invents those). /// @@ -172,7 +171,13 @@ pub fn candidates_at_cursor( let mut keywords: Vec = expected .iter() .filter_map(|item| strip_backticks(item)) - .filter_map(|name| Keyword::from_word(name).map(|_| name.to_string())) + // Backticked items are walker `Expectation::Word`s or + // `Expectation::Literal`s. Keywords are the + // alphabetic-only ones; punct (`,`, `=`) and digit + // literals (`1`) live in the same expected-set but + // surface through other candidate sources. + .filter(|name| !name.is_empty() && name.chars().all(|c| c.is_ascii_alphabetic())) + .map(str::to_string) .filter(|name| matches_prefix(name)) .collect(); let mut seen_kw = std::collections::HashSet::new(); @@ -245,8 +250,8 @@ pub fn candidates_at_cursor( // matching known-set slot. `NewName` slots return `&[]`. let mut identifiers: Vec = expected .iter() - .filter_map(|item| IdentSlot::from_expected_label(item)) - .flat_map(|slot| cache.for_slot(slot).iter().cloned()) + .filter_map(|item| IdentSource::from_expected_label(item)) + .flat_map(|source| cache.for_source(source).iter().cloned()) .filter(|name| matches_prefix(name)) .collect(); identifiers.sort(); @@ -365,7 +370,7 @@ pub struct InvalidIdent { /// The text the user typed in the slot. pub found: String, /// Which known-set slot this position expected. - pub slot: IdentSlot, + pub source: IdentSource, } /// "User is typing a name" cursor state (round-3 follow-up). @@ -408,8 +413,8 @@ pub fn typing_name_at_cursor(input: &str, cursor: usize) -> Option { let expected = expected_set(leading); let is_new_name_slot = expected .iter() - .filter_map(|item| IdentSlot::from_expected_label(item)) - .any(|slot| slot == IdentSlot::NewName); + .filter_map(|item| IdentSource::from_expected_label(item)) + .any(|source| source == IdentSource::NewName); if !is_new_name_slot { return None; } @@ -485,34 +490,34 @@ pub fn invalid_ident_at_cursor( return None; } // Find every known-set slot in the expected list. - let slots: Vec = expected + let sources: Vec = expected .iter() - .filter_map(|item| IdentSlot::from_expected_label(item)) - .filter(|slot| slot.completes_from_schema()) + .filter_map(|item| IdentSource::from_expected_label(item)) + .filter(|s| s.completes_from_schema()) .collect(); - if slots.is_empty() { + if sources.is_empty() { return None; } let lowered = partial.to_lowercase(); // If any schema entry across the matching slots matches // the prefix, the partial is not "invalid" — it's an // in-progress lookup. - let any_match = slots + let any_match = sources .iter() - .flat_map(|slot| cache.for_slot(*slot)) + .flat_map(|s| cache.for_source(*s)) .any(|name| name.to_lowercase().starts_with(&lowered)); if any_match { return None; } - // Pick the first slot kind for the diagnostic — when + // Pick the first source kind for the diagnostic — when // multiple are expected (e.g. `drop relationship …` - // expects RelationshipName *or* the `from` keyword; - // here only the schema slot survives the filter) we + // expects Relationships *or* the `from` keyword; + // here only the schema source survives the filter) we // surface the first. Some(InvalidIdent { range: (start, cursor), found: partial.to_string(), - slot: slots[0], + source: sources[0], }) } @@ -1123,7 +1128,7 @@ mod tests { .expect("should be invalid"); assert_eq!(invalid.range, (10, 15)); assert_eq!(invalid.found, "Custp"); - assert_eq!(invalid.slot, IdentSlot::TableName); + assert_eq!(invalid.source, IdentSource::Tables); } #[test] diff --git a/src/db.rs b/src/db.rs index 35c66fb..584a938 100644 --- a/src/db.rs +++ b/src/db.rs @@ -507,7 +507,7 @@ enum Request { /// duplicates. The reply is small even for projects with /// hundreds of tables/columns. ListNamesFor { - slot: crate::dsl::ident_slot::IdentSlot, + source: crate::dsl::grammar::IdentSource, reply: oneshot::Sender, DbError>>, }, } @@ -854,30 +854,31 @@ impl Database { recv.await.map_err(|_| DbError::WorkerGone)? } - /// List schema entity names for an identifier slot - /// (ADR-0022 §9). + /// List schema entity names for an identifier source + /// (ADR-0022 §9, ADR-0024 §architecture). /// /// Returns alphabetised, deduplicated names suitable for /// the completion menu: - /// - `IdentSlot::TableName` → user tables (filters + /// - `IdentSource::Tables` → user tables (filters /// `__rdbms_*` internal tables); - /// - `IdentSlot::Column` → distinct column names across - /// all user tables (v1 simplification — no + /// - `IdentSource::Columns` → distinct column names + /// across all user tables (v1 simplification — no /// table-context binding); - /// - `IdentSlot::RelationshipName` → relationship - /// names from the metadata table; - /// - `IdentSlot::NewName` → returns `Ok(vec![])` - /// immediately without a worker round-trip (the user - /// invents these names). + /// - `IdentSource::Relationships` → relationship names + /// from the metadata table; + /// - `IdentSource::NewName`, `Types`, `Free` → returns + /// `Ok(vec![])` immediately without a worker round-trip + /// (the user invents these names, or the source is + /// synthetic). pub async fn list_names_for( &self, - slot: crate::dsl::ident_slot::IdentSlot, + source: crate::dsl::grammar::IdentSource, ) -> Result, DbError> { - if !slot.completes_from_schema() { + if !source.completes_from_schema() { return Ok(Vec::new()); } let (reply, recv) = oneshot::channel(); - self.send(Request::ListNamesFor { slot, reply }).await?; + self.send(Request::ListNamesFor { source, reply }).await?; recv.await.map_err(|_| DbError::WorkerGone)? } @@ -1202,25 +1203,24 @@ fn handle_request(conn: &Connection, persistence: Option<&Persistence>, req: Req let result = do_find_rows_matching(conn, &table, &column, &value, limit); let _ = reply.send(result); } - Request::ListNamesFor { slot, reply } => { - let result = do_list_names_for(conn, slot); + Request::ListNamesFor { source, reply } => { + let result = do_list_names_for(conn, source); let _ = reply.send(result); } } } /// Schema-name lookup for the completion engine -/// (ADR-0022 §9). `NewName` never reaches here — the public -/// `list_names_for` short-circuits. +/// (ADR-0022 §9). Non-schema sources (`NewName`, `Types`, `Free`) +/// never reach here — the public `list_names_for` short-circuits. fn do_list_names_for( conn: &Connection, - slot: crate::dsl::ident_slot::IdentSlot, + source: crate::dsl::grammar::IdentSource, ) -> Result, DbError> { - use crate::dsl::ident_slot::IdentSlot; - match slot { - IdentSlot::NewName => Ok(Vec::new()), - IdentSlot::TableName => do_list_tables(conn), - IdentSlot::Column => { + use crate::dsl::grammar::IdentSource; + match source { + IdentSource::Tables => do_list_tables(conn), + IdentSource::Columns => { // Distinct column names across all user tables. // v1 simplification: no table-context binding // (ADR-0022 stage 6 note). @@ -1240,7 +1240,7 @@ fn do_list_names_for( } Ok(out) } - IdentSlot::RelationshipName => { + IdentSource::Relationships => { let mut stmt = conn .prepare(&format!( "SELECT name FROM {REL_TABLE} ORDER BY name;" @@ -1255,6 +1255,7 @@ fn do_list_names_for( } Ok(out) } + IdentSource::NewName | IdentSource::Types | IdentSource::Free => Ok(Vec::new()), } } @@ -7136,7 +7137,7 @@ mod tests { // touching the worker. let db = db(); let names = db - .list_names_for(crate::dsl::ident_slot::IdentSlot::NewName) + .list_names_for(crate::dsl::grammar::IdentSource::NewName) .await .unwrap(); assert!(names.is_empty()); @@ -7148,7 +7149,7 @@ mod tests { make_id_table(&db, "Customers").await; make_id_table(&db, "Orders").await; let names = db - .list_names_for(crate::dsl::ident_slot::IdentSlot::TableName) + .list_names_for(crate::dsl::grammar::IdentSource::Tables) .await .unwrap(); assert_eq!(names, vec!["Customers".to_string(), "Orders".to_string()]); @@ -7161,7 +7162,7 @@ mod tests { let db = db(); make_id_table(&db, "Customers").await; let names = db - .list_names_for(crate::dsl::ident_slot::IdentSlot::TableName) + .list_names_for(crate::dsl::grammar::IdentSource::Tables) .await .unwrap(); assert_eq!(names, vec!["Customers".to_string()]); @@ -7195,7 +7196,7 @@ mod tests { .await .unwrap(); let names = db - .list_names_for(crate::dsl::ident_slot::IdentSlot::Column) + .list_names_for(crate::dsl::grammar::IdentSource::Columns) .await .unwrap(); // `id` appears once despite being in both tables (DISTINCT). @@ -7238,7 +7239,7 @@ mod tests { .await .unwrap(); let names = db - .list_names_for(crate::dsl::ident_slot::IdentSlot::RelationshipName) + .list_names_for(crate::dsl::grammar::IdentSource::Relationships) .await .unwrap(); assert_eq!(names, vec!["cust_orders".to_string()]); diff --git a/src/dsl/grammar/mod.rs b/src/dsl/grammar/mod.rs index 2f7af82..effbe0d 100644 --- a/src/dsl/grammar/mod.rs +++ b/src/dsl/grammar/mod.rs @@ -49,32 +49,76 @@ pub enum HighlightClass { /// Where an `Ident` slot's candidates come from at completion time. /// -/// Phase A only exercises `NewName` (the `import … as ` -/// slot) and `Free` (the catch-all branch in `mode`/`messages` -/// that funnels unknown values into a friendly validator). The -/// schema-aware variants land in Phase B-D. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +/// Drives both the walker's `Expectation::Ident { source }` (which +/// the parse-error bridge maps to a human label) and the +/// `SchemaCache` lookup the completion engine uses for Tab +/// candidates. The `Free` and `NewName` variants do not query the +/// schema — `NewName` is for slots where the user invents the +/// identifier, `Free` is the catch-all branch in `mode`/`messages` +/// that funnels unknown values into a friendly validator. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum IdentSource { /// User invents this name. No schema lookup; no completion /// candidates beyond the identifier shape itself. NewName, - /// Existing table name. Phase B+. - #[allow(dead_code)] + /// Existing table name. Tables, - /// Existing column in the current table. Phase B+. - #[allow(dead_code)] + /// Existing column in the current table. Columns, - /// Existing relationship name. Phase B+. - #[allow(dead_code)] + /// Existing relationship name. Relationships, - /// Closed set from `Type::all()`. Phase B+. - #[allow(dead_code)] + /// Closed set from `Type::all()` — surfaced by the walker's + /// content validator on column-type slots; not user-listable + /// from the schema. Types, /// Any identifier shape; used by synthetic catch-all branches /// (e.g., the unknown-value branch of `mode `). Free, } +impl IdentSource { + /// Whether this source can be completed from the schema + /// cache (i.e. the candidate list comes from existing + /// entities rather than user invention or a closed set). + #[must_use] + pub const fn completes_from_schema(self) -> bool { + matches!(self, Self::Tables | Self::Columns | Self::Relationships) + } + + /// Human-facing label used in parse-error wording + /// ("expected table name") and in the completion engine's + /// round-trip from a textual `expected` entry back to a + /// source kind. `Free` and `Types` collapse to "identifier" + /// and "type" respectively. + #[must_use] + pub const fn expected_label(self) -> &'static str { + match self { + Self::NewName | Self::Free => "identifier", + Self::Tables => "table name", + Self::Columns => "column name", + Self::Relationships => "relationship name", + Self::Types => "type", + } + } + + /// Inverse of `expected_label`. Used by the completion engine + /// to recover the source kind from the `ParseError::Invalid:: + /// expected` strings the walker bridge produces. `"identifier"` + /// maps to `NewName` (the only writeable label that uses that + /// wording in production grammars today). + #[must_use] + pub fn from_expected_label(label: &str) -> Option { + match label { + "identifier" => Some(Self::NewName), + "table name" => Some(Self::Tables), + "column name" => Some(Self::Columns), + "relationship name" => Some(Self::Relationships), + "type" => Some(Self::Types), + _ => None, + } + } +} + /// Hint-panel mode for an expected node. /// /// Phase A defaults to `Default`; the `ProseOnly` variant diff --git a/src/dsl/ident_slot.rs b/src/dsl/ident_slot.rs deleted file mode 100644 index e3dddc2..0000000 --- a/src/dsl/ident_slot.rs +++ /dev/null @@ -1,140 +0,0 @@ -//! Identifier-slot taxonomy for ambient typing assistance -//! (ADR-0022 §8). -//! -//! Each `ident()` call in the DSL parser plays a particular -//! semantic role: a new name the user is inventing, the name -//! of an existing table, the name of an existing column, the -//! name of an existing relationship. The completion engine -//! (ADR-0022 §9) reads the slot type to know what candidates -//! to offer. -//! -//! Rather than carry slot data through chumsky's `extra` -//! payload (which would require a non-trivial type -//! refactor), we annotate each call site with a tag via the -//! `ident_ctx(slot)` wrapper in `parser.rs`. The wrapper -//! currently treats the slot as documentation only — it does -//! not propagate to the chumsky machinery — but the -//! call-site annotation forces every parser author to -//! consider the slot at the moment of writing the combinator, -//! and a unit test asserts no bare `ident_inner()` calls -//! escape into the command parsers (only `ident_ctx`-wrapped -//! sites). -//! -//! v1 scope (deliberately simple): -//! -//! - `NewName`: the user invents this identifier (new table -//! name, new column name, new relationship alias). No -//! completion candidates. -//! - `TableName`: an existing table. Completion candidates -//! come from the schema's table list. -//! - `Column`: an existing column. v1 does not bind the -//! column to a specific table; the completion engine in -//! stage 8 may union all columns or refine further. The -//! `TableRef` wrinkle (ADR-0022 §8 pseudocode) is deferred -//! until that stage demonstrates a need. -//! - `RelationshipName`: an existing relationship. Schema -//! queries for completion will hit `read_relationships`. - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum IdentSlot { - /// User invents this name. No completion candidates. - NewName, - /// An existing table. Completion candidates: schema - /// table list. - TableName, - /// An existing column. v1 does not bind to a specific - /// table — see module docs. - Column, - /// An existing relationship. - RelationshipName, -} - -impl IdentSlot { - /// Whether the completion engine should produce - /// candidates for this slot at all. `false` for - /// `NewName` (the user invents the name). - #[must_use] - pub const fn completes_from_schema(self) -> bool { - match self { - Self::NewName => false, - Self::TableName | Self::Column | Self::RelationshipName => true, - } - } - - /// Human-readable label for the parser's expected-set - /// machinery (ADR-0022 §8 + stage 8c). Carried through - /// chumsky labels by `ident_ctx(slot)` so error messages - /// say "expected table name" instead of the generic - /// "expected identifier", and so the completion engine - /// can recover the slot from the parser's expected set - /// via `from_expected_label`. - #[must_use] - pub const fn expected_label(self) -> &'static str { - match self { - Self::NewName => "identifier", - Self::TableName => "table name", - Self::Column => "column name", - Self::RelationshipName => "relationship name", - } - } - - /// Round-trip from the human label back to the slot kind. - /// `None` for any string that isn't one of the four - /// `expected_label()` outputs. - #[must_use] - pub fn from_expected_label(label: &str) -> Option { - match label { - "identifier" => Some(Self::NewName), - "table name" => Some(Self::TableName), - "column name" => Some(Self::Column), - "relationship name" => Some(Self::RelationshipName), - _ => None, - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn new_name_does_not_complete_from_schema() { - assert!(!IdentSlot::NewName.completes_from_schema()); - } - - #[test] - fn schema_kinds_complete_from_schema() { - for slot in [ - IdentSlot::TableName, - IdentSlot::Column, - IdentSlot::RelationshipName, - ] { - assert!( - slot.completes_from_schema(), - "{slot:?} should complete from schema", - ); - } - } - - #[test] - fn expected_label_round_trips_for_every_variant() { - for slot in [ - IdentSlot::NewName, - IdentSlot::TableName, - IdentSlot::Column, - IdentSlot::RelationshipName, - ] { - assert_eq!( - IdentSlot::from_expected_label(slot.expected_label()), - Some(slot), - "round-trip failed for {slot:?}", - ); - } - } - - #[test] - fn unknown_expected_label_returns_none() { - assert_eq!(IdentSlot::from_expected_label("blob"), None); - assert_eq!(IdentSlot::from_expected_label("`create`"), None); - } -} diff --git a/src/dsl/keyword.rs b/src/dsl/keyword.rs deleted file mode 100644 index 535af20..0000000 --- a/src/dsl/keyword.rs +++ /dev/null @@ -1,311 +0,0 @@ -//! Keyword and punctuation tables for the DSL lexer (ADR-0020 §2a). -//! -//! `define_keywords!` and `define_punct!` are the single source -//! of truth from which the enums, the lex-side string→variant -//! mappings, and the `parse.token.*` catalog-key derivations -//! all come. Adding a new keyword is one line in the -//! `define_keywords!` invocation plus one line in -//! `src/friendly/strings/en-US.yaml` under -//! `parse.token.keyword.` (the catalog validator catches a -//! missing entry at test time per ADR-0021 §7). Adding a new -//! punctuation kind is symmetric. - -macro_rules! define_keywords { - ( $( $variant:ident => $literal:literal ),+ $(,)? ) => { - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - pub enum Keyword { - $( $variant ),+ - } - - impl Keyword { - /// Every variant paired with its canonical lowercase - /// literal. Iteration order is the macro - /// declaration order. - pub const ALL: &'static [(Keyword, &'static str)] = &[ - $( (Keyword::$variant, $literal) ),+ - ]; - - /// Lex-side mapping. Case-insensitive per ADR-0009. - /// `None` for any input that isn't a reserved word — - /// the lexer then keeps the input as - /// `TokenKind::Identifier`. - #[must_use] - pub fn from_word(s: &str) -> Option { - Self::ALL - .iter() - .find(|(_, lit)| s.eq_ignore_ascii_case(lit)) - .map(|(kw, _)| *kw) - } - - /// Canonical lowercase literal for this variant. - #[must_use] - pub fn as_str(self) -> &'static str { - Self::ALL - .iter() - .find(|(kw, _)| *kw == self) - .map(|(_, lit)| *lit) - .expect("ALL covers every variant by construction") - } - - /// Catalog key under `parse.token.keyword.*` - /// (ADR-0021 §4). The renderer looks this up to get - /// the user-facing wording for the keyword. - #[must_use] - pub fn catalog_token_key(self) -> String { - format!("parse.token.keyword.{}", self.as_str()) - } - } - - impl std::fmt::Display for Keyword { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(self.as_str()) - } - } - }; -} - -define_keywords! { - // Commands (entry keywords). - Create => "create", - Drop => "drop", - Add => "add", - Rename => "rename", - Change => "change", - Show => "show", - Insert => "insert", - Update => "update", - Delete => "delete", - Replay => "replay", - // Object words. - Table => "table", - Column => "column", - Data => "data", - Relationship => "relationship", - Pk => "pk", - // Connectives. - With => "with", - From => "from", - To => "to", - Into => "into", - As => "as", - In => "in", - On => "on", - Set => "set", - Where => "where", - Values => "values", - // Value literals. - Null => "null", - True => "true", - False => "false", - // Referential-action vocabulary (ADR-0013). `set` and `null` - // re-use the connective and value-literal keywords above — - // `set null` is the parser's job to recognise as a sequence, - // not the lexer's. - Cascade => "cascade", - Restrict => "restrict", - Action => "action", - No => "no", - // App-lifecycle commands (folded into the DSL parser so they - // surface in Tab completion and the parse-error usage - // templates). The dispatch handlers in app.rs branch on the - // parsed `Command::App(...)` variant before mode-specific - // routing so these work in both simple and advanced modes - // (per ADR-0003). - Quit => "quit", - Help => "help", - Rebuild => "rebuild", - Save => "save", - New => "new", - Load => "load", - Export => "export", - Import => "import", - Mode => "mode", - Messages => "messages", - // Value vocabulary for `mode ` and `messages `. - // Free as identifier-shapes outside their slots (no command - // uses `simple` / `advanced` / `short` / `verbose` as an - // entity name today). - Simple => "simple", - Advanced => "advanced", - Short => "short", - Verbose => "verbose", -} - -macro_rules! define_punct { - ( $( $variant:ident => ($literal:literal, $name:literal) ),+ $(,)? ) => { - #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - pub enum Punct { - $( $variant ),+ - } - - impl Punct { - /// Every variant paired with its character and - /// snake-case name suffix. - pub const ALL: &'static [(Punct, char, &'static str)] = &[ - $( (Punct::$variant, $literal, $name) ),+ - ]; - - /// Lex-side mapping. `None` for any character that - /// isn't punctuation — the lexer then either - /// classifies it as part of another token or - /// emits an `Error(LexError::UnknownChar)`. - #[must_use] - pub fn from_char(c: char) -> Option { - Self::ALL - .iter() - .find(|(_, lit, _)| *lit == c) - .map(|(p, _, _)| *p) - } - - #[must_use] - pub fn as_char(self) -> char { - Self::ALL - .iter() - .find(|(p, _, _)| *p == self) - .map(|(_, c, _)| *c) - .expect("ALL covers every variant by construction") - } - - /// Catalog key under `parse.token.punct.*` - /// (ADR-0021 §4). - #[must_use] - pub fn catalog_token_key(self) -> String { - let suffix = Self::ALL - .iter() - .find(|(p, _, _)| *p == self) - .map(|(_, _, n)| *n) - .expect("ALL covers every variant by construction"); - format!("parse.token.punct.{suffix}") - } - } - - impl std::fmt::Display for Punct { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use std::fmt::Write; - f.write_char(self.as_char()) - } - } - }; -} - -define_punct! { - Colon => (':', "colon"), - OpenParen => ('(', "open_paren"), - CloseParen => (')', "close_paren"), - Comma => (',', "comma"), - Equals => ('=', "equals"), - Dot => ('.', "dot"), -} - -#[cfg(test)] -mod tests { - use super::*; - use pretty_assertions::assert_eq; - - #[test] - fn keyword_from_word_round_trips_every_variant() { - for &(kw, lit) in Keyword::ALL { - assert_eq!(Keyword::from_word(lit), Some(kw)); - assert_eq!(kw.as_str(), lit); - } - } - - #[test] - fn keyword_from_word_is_case_insensitive() { - assert_eq!(Keyword::from_word("CREATE"), Some(Keyword::Create)); - assert_eq!(Keyword::from_word("Create"), Some(Keyword::Create)); - assert_eq!(Keyword::from_word("cReAtE"), Some(Keyword::Create)); - } - - #[test] - fn keyword_from_word_returns_none_for_non_keyword() { - assert_eq!(Keyword::from_word("Customers"), None); - assert_eq!(Keyword::from_word("frobulate"), None); - // Type-name candidates explicitly stay non-keyword - // (ADR-0020 §2): they remain identifiers that the - // parser validates via `Type::from_str`. - assert_eq!(Keyword::from_word("text"), None); - assert_eq!(Keyword::from_word("int"), None); - assert_eq!(Keyword::from_word("varchar"), None); - } - - #[test] - fn keyword_literals_are_unique() { - let mut lits: Vec<&str> = Keyword::ALL.iter().map(|(_, lit)| *lit).collect(); - lits.sort_unstable(); - let count_before = lits.len(); - lits.dedup(); - assert_eq!(lits.len(), count_before, "keyword literals must be unique"); - } - - #[test] - fn keyword_catalog_token_key_format() { - assert_eq!( - Keyword::Create.catalog_token_key(), - "parse.token.keyword.create" - ); - assert_eq!( - Keyword::Pk.catalog_token_key(), - "parse.token.keyword.pk" - ); - } - - #[test] - fn keyword_display_uses_canonical_lowercase() { - assert_eq!(format!("{}", Keyword::Create), "create"); - assert_eq!(format!("{}", Keyword::Relationship), "relationship"); - } - - #[test] - fn punct_round_trips_every_variant() { - for &(p, c, _) in Punct::ALL { - assert_eq!(Punct::from_char(c), Some(p)); - assert_eq!(p.as_char(), c); - } - } - - #[test] - fn punct_from_char_returns_none_for_non_punct() { - assert_eq!(Punct::from_char('a'), None); - assert_eq!(Punct::from_char(' '), None); - assert_eq!(Punct::from_char('-'), None); - assert_eq!(Punct::from_char('\''), None); - } - - #[test] - fn punct_chars_are_unique() { - let mut chars: Vec = Punct::ALL.iter().map(|(_, c, _)| *c).collect(); - chars.sort_unstable(); - let count_before = chars.len(); - chars.dedup(); - assert_eq!(chars.len(), count_before, "punct chars must be unique"); - } - - #[test] - fn punct_catalog_token_key_format() { - assert_eq!( - Punct::Colon.catalog_token_key(), - "parse.token.punct.colon" - ); - assert_eq!( - Punct::OpenParen.catalog_token_key(), - "parse.token.punct.open_paren" - ); - } - - #[test] - fn every_command_entry_keyword_is_declared() { - // Sanity: the ten command entry keywords from - // ADR-0009/0014/0006 must all be reachable. If a future - // ADR adds a command, this list grows alongside it. - for cmd in [ - "create", "drop", "add", "rename", "change", "show", - "insert", "update", "delete", "replay", - ] { - assert!( - Keyword::from_word(cmd).is_some(), - "command entry keyword `{cmd}` must be declared", - ); - } - } -} diff --git a/src/dsl/lexer.rs b/src/dsl/lexer.rs deleted file mode 100644 index 6c90670..0000000 --- a/src/dsl/lexer.rs +++ /dev/null @@ -1,598 +0,0 @@ -//! DSL lexer (ADR-0020). -//! -//! Pure tokenizer: takes the source `&str` and produces a -//! `Vec` with byte-offset spans. Lex-shape errors -//! (unterminated string, unrecognised character, malformed -//! `--` flag) surface as `TokenKind::Error(_)` tokens — not a -//! `Result` variant. The parser sees `Error` tokens and raises -//! a structural error at that point; I4 (syntax highlighting, -//! future) walks the same token stream and renders Error tokens -//! with an error glyph. ADR-0020 §2 explains the rationale for -//! the in-stream error model. - -use crate::dsl::keyword::{Keyword, Punct}; - -pub type Span = (usize, usize); - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Token { - pub kind: TokenKind, - pub span: Span, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum TokenKind { - /// Reserved word recognised against the closed `Keyword` - /// set. Case-insensitive at lex time per ADR-0009. - Keyword(Keyword), - /// Anything alphabetic-or-underscore-then-alphanumeric that - /// did not match a keyword. Case is preserved per ADR-0009. - Identifier(String), - /// Numeric literal, raw text. The parser is responsible for - /// any further validation (e.g. `Value::Number` storage). A - /// leading `-` is included when present and immediately - /// adjacent to a digit (no whitespace). - Number(String), - /// Single-quoted string literal, with the `''` escape - /// processed (so `'don''t'` produces `"don't"`). The span - /// covers the surrounding quotes; the payload does not. - StringLiteral(String), - /// One-character punctuation per the closed `Punct` set. - Punct(Punct), - /// `--name` flag. The payload is the part after `--`. - Flag(String), - /// Lex-time shape error. The parser surfaces this with a - /// catalog-driven message (ADR-0021 §4 - /// `parse.token.error.*`). - Error(LexError), -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum LexError { - /// `'` opened a string literal that ran to end of input - /// without a closing `'`. Span covers the opening quote - /// through end-of-input. - UnterminatedString, - /// Character not recognised at this position. Span covers - /// the single character (UTF-8 width respected). - UnknownChar(char), - /// `--` not followed by an identifier-shaped tail. Today - /// only reachable with literal trailing `--`; reserved as - /// a distinct kind so the renderer can produce a sharper - /// hint than "unknown character". - BadFlag, -} - -/// Tokenize an input string. -/// -/// Always succeeds in producing a `Vec` — lex-shape -/// errors are embedded as `TokenKind::Error` tokens. Whitespace -/// between tokens is silently skipped (ADR-0009: liberal -/// whitespace). -#[must_use] -pub fn lex(input: &str) -> Vec { - let mut tokens = Vec::new(); - let bytes = input.as_bytes(); - let mut pos = 0; - while pos < bytes.len() { - let b = bytes[pos]; - if b.is_ascii_whitespace() { - pos += 1; - continue; - } - if b.is_ascii_alphabetic() || b == b'_' { - let (tok, next) = lex_identifier(input, pos); - tokens.push(tok); - pos = next; - continue; - } - if b.is_ascii_digit() { - let (tok, next) = lex_number(input, pos, false); - tokens.push(tok); - pos = next; - continue; - } - if b == b'-' { - // `--name` flag, `-` negative-number literal, - // or a bare `-` (UnknownChar — no Minus variant in - // the current grammar). - let next_b = bytes.get(pos + 1).copied(); - if next_b == Some(b'-') { - let (tok, next) = lex_flag(input, pos); - tokens.push(tok); - pos = next; - continue; - } - if next_b.is_some_and(|c| c.is_ascii_digit()) { - let (tok, next) = lex_number(input, pos, true); - tokens.push(tok); - pos = next; - continue; - } - tokens.push(Token { - kind: TokenKind::Error(LexError::UnknownChar('-')), - span: (pos, pos + 1), - }); - pos += 1; - continue; - } - if b == b'\'' { - let (tok, next) = lex_string(input, pos); - tokens.push(tok); - pos = next; - continue; - } - if let Some(p) = Punct::from_char(b as char) { - tokens.push(Token { - kind: TokenKind::Punct(p), - span: (pos, pos + 1), - }); - pos += 1; - continue; - } - // Anything else: read one whole char (UTF-8 safe) and - // emit an UnknownChar error token covering its bytes. - let ch = input[pos..] - .chars() - .next() - .expect("pos < bytes.len() ⇒ at least one char"); - let len = ch.len_utf8(); - tokens.push(Token { - kind: TokenKind::Error(LexError::UnknownChar(ch)), - span: (pos, pos + len), - }); - pos += len; - } - tokens -} - -fn lex_identifier(input: &str, start: usize) -> (Token, usize) { - let bytes = input.as_bytes(); - let mut end = start + 1; // first byte already validated by caller - while end < bytes.len() { - let b = bytes[end]; - if b.is_ascii_alphanumeric() || b == b'_' { - end += 1; - } else { - break; - } - } - let word = &input[start..end]; - let kind = Keyword::from_word(word).map_or_else( - || TokenKind::Identifier(word.to_string()), - TokenKind::Keyword, - ); - ( - Token { - kind, - span: (start, end), - }, - end, - ) -} - -fn lex_number(input: &str, start: usize, leading_minus: bool) -> (Token, usize) { - let bytes = input.as_bytes(); - let mut end = start; - if leading_minus { - end += 1; // consume the leading '-' - } - while end < bytes.len() && bytes[end].is_ascii_digit() { - end += 1; - } - // Optional fractional part: `.` followed by ≥1 digit. A - // trailing `.` with no digits behind it is left alone (it - // lexes as a separate Punct(Dot) — useful for `Customers.id` - // when an identifier is misread as a number, though that - // path is not currently reachable). - if end < bytes.len() && bytes[end] == b'.' { - let after_dot = end + 1; - if after_dot < bytes.len() && bytes[after_dot].is_ascii_digit() { - end = after_dot; - while end < bytes.len() && bytes[end].is_ascii_digit() { - end += 1; - } - } - } - ( - Token { - kind: TokenKind::Number(input[start..end].to_string()), - span: (start, end), - }, - end, - ) -} - -fn lex_string(input: &str, start: usize) -> (Token, usize) { - let bytes = input.as_bytes(); - debug_assert_eq!(bytes[start], b'\''); - let mut content = String::new(); - let mut i = start + 1; - while i < bytes.len() { - if bytes[i] == b'\'' { - // `''` escape: append one literal `'` and continue. - if bytes.get(i + 1) == Some(&b'\'') { - content.push('\''); - i += 2; - continue; - } - // Closing quote. - return ( - Token { - kind: TokenKind::StringLiteral(content), - span: (start, i + 1), - }, - i + 1, - ); - } - let ch = input[i..] - .chars() - .next() - .expect("i < bytes.len() ⇒ at least one char"); - content.push(ch); - i += ch.len_utf8(); - } - ( - Token { - kind: TokenKind::Error(LexError::UnterminatedString), - span: (start, bytes.len()), - }, - bytes.len(), - ) -} - -fn lex_flag(input: &str, start: usize) -> (Token, usize) { - let bytes = input.as_bytes(); - debug_assert!(bytes[start..].starts_with(b"--")); - let mut end = start + 2; - while end < bytes.len() { - let b = bytes[end]; - if b.is_ascii_alphanumeric() || b == b'-' || b == b'_' { - end += 1; - } else { - break; - } - } - if end == start + 2 { - return ( - Token { - kind: TokenKind::Error(LexError::BadFlag), - span: (start, end), - }, - end, - ); - } - ( - Token { - kind: TokenKind::Flag(input[start + 2..end].to_string()), - span: (start, end), - }, - end, - ) -} - -#[cfg(test)] -mod tests { - use super::*; - use pretty_assertions::assert_eq; - - fn kinds(input: &str) -> Vec { - lex(input).into_iter().map(|t| t.kind).collect() - } - - #[test] - fn empty_input_produces_no_tokens() { - assert_eq!(lex(""), Vec::::new()); - } - - #[test] - fn whitespace_only_produces_no_tokens() { - assert_eq!(lex(" "), Vec::::new()); - assert_eq!(lex("\t\n \r"), Vec::::new()); - } - - #[test] - fn single_keyword_lexes_to_keyword_variant() { - assert_eq!( - kinds("create"), - vec![TokenKind::Keyword(Keyword::Create)], - ); - } - - #[test] - fn keyword_match_is_case_insensitive() { - assert_eq!( - kinds("CREATE"), - vec![TokenKind::Keyword(Keyword::Create)], - ); - assert_eq!( - kinds("CrEaTe"), - vec![TokenKind::Keyword(Keyword::Create)], - ); - } - - #[test] - fn non_keyword_word_lexes_to_identifier_preserving_case() { - assert_eq!( - kinds("Customers"), - vec![TokenKind::Identifier("Customers".to_string())], - ); - assert_eq!( - kinds("customer_v2"), - vec![TokenKind::Identifier("customer_v2".to_string())], - ); - // Type names stay as identifiers (ADR-0020 §2). - assert_eq!( - kinds("text"), - vec![TokenKind::Identifier("text".to_string())], - ); - assert_eq!( - kinds("varchar"), - vec![TokenKind::Identifier("varchar".to_string())], - ); - } - - #[test] - fn identifier_starts_with_letter_or_underscore_only() { - // A bare digit lexes as a number, not the start of an - // identifier. The parser then rejects it where an - // identifier was expected — this behaviour matches the - // pre-lexer parser. - assert_eq!( - kinds("1Customers"), - vec![ - TokenKind::Number("1".to_string()), - TokenKind::Identifier("Customers".to_string()), - ], - ); - } - - #[test] - fn positive_integer_lexes_as_number() { - assert_eq!(kinds("42"), vec![TokenKind::Number("42".to_string())]); - } - - #[test] - fn negative_integer_lexes_with_sign_attached() { - assert_eq!(kinds("-5"), vec![TokenKind::Number("-5".to_string())]); - } - - #[test] - fn fractional_number_lexes_as_one_token() { - assert_eq!( - kinds("3.14"), - vec![TokenKind::Number("3.14".to_string())], - ); - assert_eq!( - kinds("-3.14"), - vec![TokenKind::Number("-3.14".to_string())], - ); - } - - #[test] - fn trailing_dot_without_digits_does_not_attach() { - // `1.` lexes as Number("1") then Punct(Dot). The parser - // can decide what (if anything) that combination means. - assert_eq!( - kinds("1."), - vec![ - TokenKind::Number("1".to_string()), - TokenKind::Punct(Punct::Dot), - ], - ); - } - - #[test] - fn dot_inside_qualified_name_lexes_as_punct() { - // `Customers.id` is identifier, dot, identifier — the - // parser composes these for `.` references. - assert_eq!( - kinds("Customers.id"), - vec![ - TokenKind::Identifier("Customers".to_string()), - TokenKind::Punct(Punct::Dot), - TokenKind::Identifier("id".to_string()), - ], - ); - } - - #[test] - fn bare_minus_lexes_as_unknown_char() { - assert_eq!( - kinds("-"), - vec![TokenKind::Error(LexError::UnknownChar('-'))], - ); - } - - #[test] - fn string_literal_lexes_with_escape_processed() { - assert_eq!( - kinds("'hello'"), - vec![TokenKind::StringLiteral("hello".to_string())], - ); - assert_eq!( - kinds("'don''t'"), - vec![TokenKind::StringLiteral("don't".to_string())], - ); - } - - #[test] - fn empty_string_literal_lexes_to_empty_payload() { - assert_eq!( - kinds("''"), - vec![TokenKind::StringLiteral(String::new())], - ); - } - - #[test] - fn string_literal_preserves_internal_whitespace() { - assert_eq!( - kinds("'a b\tc'"), - vec![TokenKind::StringLiteral("a b\tc".to_string())], - ); - } - - #[test] - fn unterminated_string_emits_error_token() { - assert_eq!( - kinds("'oops"), - vec![TokenKind::Error(LexError::UnterminatedString)], - ); - } - - #[test] - fn string_literal_with_multi_byte_unicode_is_safe() { - let toks = lex("'café'"); - assert_eq!(toks.len(), 1); - assert_eq!( - toks[0].kind, - TokenKind::StringLiteral("café".to_string()), - ); - // Span covers all bytes including the multi-byte é. - assert_eq!(toks[0].span, (0, "'café'".len())); - } - - #[test] - fn each_punct_lexes_to_its_variant() { - for &(p, c, _) in Punct::ALL { - assert_eq!( - kinds(&c.to_string()), - vec![TokenKind::Punct(p)], - "lexing `{c}`", - ); - } - } - - #[test] - fn flag_lexes_with_payload_minus_dashes() { - assert_eq!( - kinds("--all-rows"), - vec![TokenKind::Flag("all-rows".to_string())], - ); - assert_eq!( - kinds("--create-fk"), - vec![TokenKind::Flag("create-fk".to_string())], - ); - assert_eq!( - kinds("--force-conversion"), - vec![TokenKind::Flag("force-conversion".to_string())], - ); - } - - #[test] - fn bare_double_dash_emits_bad_flag_error() { - assert_eq!(kinds("--"), vec![TokenKind::Error(LexError::BadFlag)]); - } - - #[test] - fn unknown_character_emits_error_token() { - assert_eq!( - kinds("$"), - vec![TokenKind::Error(LexError::UnknownChar('$'))], - ); - } - - #[test] - fn unknown_character_with_multi_byte_does_not_panic() { - // Unicode emoji as an unknown char — span must respect - // UTF-8 width. - let toks = lex("✓"); - assert_eq!(toks.len(), 1); - assert!(matches!( - toks[0].kind, - TokenKind::Error(LexError::UnknownChar('✓')) - )); - assert_eq!(toks[0].span, (0, "✓".len())); - } - - #[test] - fn whitespace_separates_otherwise_adjacent_tokens() { - assert_eq!( - kinds("create table"), - vec![ - TokenKind::Keyword(Keyword::Create), - TokenKind::Keyword(Keyword::Table), - ], - ); - } - - #[test] - fn create_table_full_command_lexes_to_expected_sequence() { - assert_eq!( - kinds("create table Customers with pk id:int"), - vec![ - TokenKind::Keyword(Keyword::Create), - TokenKind::Keyword(Keyword::Table), - TokenKind::Identifier("Customers".to_string()), - TokenKind::Keyword(Keyword::With), - TokenKind::Keyword(Keyword::Pk), - TokenKind::Identifier("id".to_string()), - TokenKind::Punct(Punct::Colon), - TokenKind::Identifier("int".to_string()), - ], - ); - } - - #[test] - fn one_to_n_cardinality_lexes_as_number_colon_identifier() { - assert_eq!( - kinds("1:n"), - vec![ - TokenKind::Number("1".to_string()), - TokenKind::Punct(Punct::Colon), - TokenKind::Identifier("n".to_string()), - ], - ); - } - - #[test] - fn insert_with_value_list_lexes_correctly() { - assert_eq!( - kinds("insert into T values (1, 'hi', null)"), - vec![ - TokenKind::Keyword(Keyword::Insert), - TokenKind::Keyword(Keyword::Into), - TokenKind::Identifier("T".to_string()), - TokenKind::Keyword(Keyword::Values), - TokenKind::Punct(Punct::OpenParen), - TokenKind::Number("1".to_string()), - TokenKind::Punct(Punct::Comma), - TokenKind::StringLiteral("hi".to_string()), - TokenKind::Punct(Punct::Comma), - TokenKind::Keyword(Keyword::Null), - TokenKind::Punct(Punct::CloseParen), - ], - ); - } - - #[test] - fn spans_are_byte_exact_for_simple_input() { - let toks = lex("create table"); - assert_eq!(toks.len(), 2); - assert_eq!(toks[0].span, (0, "create".len())); - assert_eq!(toks[1].span, ("create ".len(), "create table".len())); - } - - #[test] - fn trailing_whitespace_is_stripped() { - assert_eq!( - kinds("create "), - vec![TokenKind::Keyword(Keyword::Create)], - ); - } - - #[test] - fn error_tokens_appear_in_stream_alongside_valid_tokens() { - // The lexer keeps producing tokens after an error; the - // parser will reject the Error token at whatever point - // it tries to consume it. - assert_eq!( - kinds("create $ table"), - vec![ - TokenKind::Keyword(Keyword::Create), - TokenKind::Error(LexError::UnknownChar('$')), - TokenKind::Keyword(Keyword::Table), - ], - ); - } -} diff --git a/src/dsl/mod.rs b/src/dsl/mod.rs index 3b3d93f..e16e62e 100644 --- a/src/dsl/mod.rs +++ b/src/dsl/mod.rs @@ -12,9 +12,6 @@ pub mod action; pub mod command; pub mod grammar; -pub mod ident_slot; -pub mod keyword; -pub mod lexer; pub mod parser; pub mod shortid; pub mod types; diff --git a/src/friendly/keys.rs b/src/friendly/keys.rs index bc5b989..7fb3cb9 100644 --- a/src/friendly/keys.rs +++ b/src/friendly/keys.rs @@ -459,42 +459,19 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[ #[cfg(test)] mod tests { use super::KEYS_AND_PLACEHOLDERS; - use crate::dsl::keyword::{Keyword, Punct}; use crate::friendly::format::catalog; use std::collections::HashSet; - /// Every `Keyword` variant must have a - /// `parse.token.keyword.` entry; every `Punct` - /// variant must have a `parse.token.punct.` entry. - /// Catches the case where a keyword or punct is added to - /// the macro but not to the catalog (ADR-0021 §7). - #[test] - fn keyword_and_punct_have_complete_token_vocabulary() { - let declared: HashSet<&str> = - KEYS_AND_PLACEHOLDERS.iter().map(|(k, _)| *k).collect(); - let mut missing: Vec = Vec::new(); - for &(kw, _) in Keyword::ALL { - let key = kw.catalog_token_key(); - if !declared.contains(key.as_str()) { - missing.push(format!( - "Keyword::{kw:?} ⇒ catalog key `{key}` not declared in keys.rs" - )); - } - } - for &(p, _, _) in Punct::ALL { - let key = p.catalog_token_key(); - if !declared.contains(key.as_str()) { - missing.push(format!( - "Punct::{p:?} ⇒ catalog key `{key}` not declared in keys.rs" - )); - } - } - assert!( - missing.is_empty(), - "token vocabulary incomplete:\n {}", - missing.join("\n "), - ); - } + // The pre-Phase-F `keyword_and_punct_have_complete_token_vocabulary` + // test cross-checked the `Keyword` / `Punct` enums against + // `parse.token.keyword.*` / `parse.token.punct.*` catalog + // keys. With those enums deleted (ADR-0024 §migration Phase F) + // and the walker rendering keyword wording via + // `format!("`{word}`")`, the catalog entries survive only as + // historic vocabulary; the `keys_validate_against_catalog` + // test below still asserts every key in `KEYS_AND_PLACEHOLDERS` + // resolves and vice versa, which keeps the catalog itself + // honest. The dead entries collapse in ADR-0024 §cleanup-pass. /// Walks `KEYS_AND_PLACEHOLDERS` and verifies every entry /// matches the catalog. ADR-0019 §8.6. diff --git a/src/input_render.rs b/src/input_render.rs index 9caa105..bad6fe5 100644 --- a/src/input_render.rs +++ b/src/input_render.rs @@ -210,15 +210,16 @@ pub fn ambient_hint( // the typed prefix matches nothing in the schema. (Stage // 8e / the user's #5.) if let Some(inv) = crate::completion::invalid_ident_at_cursor(input, cursor, cache) { - let kind = match inv.slot { - crate::dsl::ident_slot::IdentSlot::TableName => "table", - crate::dsl::ident_slot::IdentSlot::Column => "column", - crate::dsl::ident_slot::IdentSlot::RelationshipName => "relationship", - // `NewName` is filtered out by `invalid_ident_at_cursor` - // (it only fires for known-set slots), so this arm - // is unreachable in practice; render a neutral - // fallback rather than panic. - crate::dsl::ident_slot::IdentSlot::NewName => "identifier", + let kind = match inv.source { + crate::dsl::grammar::IdentSource::Tables => "table", + crate::dsl::grammar::IdentSource::Columns => "column", + crate::dsl::grammar::IdentSource::Relationships => "relationship", + // `NewName`, `Types`, `Free` are filtered out by + // `invalid_ident_at_cursor` (it only fires for + // known-set sources via `completes_from_schema`), so + // these arms are unreachable in practice — render a + // neutral fallback rather than panic. + _ => "identifier", }; return Some(AmbientHint::Prose(crate::t!( "hint.ambient_invalid_ident", diff --git a/src/runtime.rs b/src/runtime.rs index 93dddac..46dd30d 100644 --- a/src/runtime.rs +++ b/src/runtime.rs @@ -839,18 +839,15 @@ async fn refresh_schema_cache( event_tx: &mpsc::Sender, ) { use crate::completion::SchemaCache; - use crate::dsl::ident_slot::IdentSlot; + use crate::dsl::grammar::IdentSource; let mut cache = SchemaCache::default(); - if let Ok(tables) = database.list_names_for(IdentSlot::TableName).await { + if let Ok(tables) = database.list_names_for(IdentSource::Tables).await { cache.tables = tables; } - if let Ok(columns) = database.list_names_for(IdentSlot::Column).await { + if let Ok(columns) = database.list_names_for(IdentSource::Columns).await { cache.columns = columns; } - if let Ok(rels) = database - .list_names_for(IdentSlot::RelationshipName) - .await - { + if let Ok(rels) = database.list_names_for(IdentSource::Relationships).await { cache.relationships = rels; } let _ = event_tx.send(AppEvent::SchemaCacheRefreshed(cache)).await; diff --git a/src/theme.rs b/src/theme.rs index b4a456a..32fed72 100644 --- a/src/theme.rs +++ b/src/theme.rs @@ -19,7 +19,6 @@ use ratatui::style::Color; use crate::dsl::grammar::HighlightClass; -use crate::dsl::lexer::TokenKind; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Background { @@ -106,23 +105,6 @@ impl Theme { } } - /// Map a `TokenKind` to its display colour for ambient - /// highlighting (ADR-0022 §3). Lex-error tokens always render - /// in `tok_error`, regardless of the parse-time error overlay - /// applied separately by the renderer. - #[must_use] - pub const fn token_color(&self, kind: &TokenKind) -> Color { - match kind { - TokenKind::Keyword(_) => self.tok_keyword, - TokenKind::Identifier(_) => self.tok_identifier, - TokenKind::Number(_) => self.tok_number, - TokenKind::StringLiteral(_) => self.tok_string, - TokenKind::Punct(_) => self.tok_punct, - TokenKind::Flag(_) => self.tok_flag, - TokenKind::Error(_) => self.tok_error, - } - } - /// Map a walker `HighlightClass` to its display colour /// (ADR-0024 §architecture, Phase F). This is the walker-side /// equivalent of `token_color` — the renderer consumes @@ -152,8 +134,6 @@ impl Default for Theme { #[cfg(test)] mod tests { use super::*; - use crate::dsl::keyword::{Keyword, Punct}; - use crate::dsl::lexer::LexError; #[test] fn dark_theme_token_colours_differ_from_background() { @@ -190,50 +170,14 @@ mod tests { } #[test] - fn token_color_maps_each_kind_to_the_expected_field() { + fn highlight_class_color_maps_each_variant() { let t = Theme::dark(); - assert_eq!( - t.token_color(&TokenKind::Keyword(Keyword::Create)), - t.tok_keyword, - ); - assert_eq!( - t.token_color(&TokenKind::Identifier("Customers".to_string())), - t.tok_identifier, - ); - assert_eq!( - t.token_color(&TokenKind::Number("42".to_string())), - t.tok_number, - ); - assert_eq!( - t.token_color(&TokenKind::StringLiteral("hi".to_string())), - t.tok_string, - ); - assert_eq!( - t.token_color(&TokenKind::Punct(Punct::Colon)), - t.tok_punct, - ); - assert_eq!( - t.token_color(&TokenKind::Flag("all-rows".to_string())), - t.tok_flag, - ); - assert_eq!( - t.token_color(&TokenKind::Error(LexError::UnknownChar('$'))), - t.tok_error, - ); - } - - #[test] - fn lex_error_tokens_render_in_tok_error_regardless_of_kind() { - let t = Theme::dark(); - for err in [ - LexError::UnknownChar('$'), - LexError::UnterminatedString, - LexError::BadFlag, - ] { - assert_eq!( - t.token_color(&TokenKind::Error(err)), - t.tok_error, - ); - } + assert_eq!(t.highlight_class_color(HighlightClass::Keyword), t.tok_keyword); + assert_eq!(t.highlight_class_color(HighlightClass::Identifier), t.tok_identifier); + assert_eq!(t.highlight_class_color(HighlightClass::Number), t.tok_number); + assert_eq!(t.highlight_class_color(HighlightClass::String), t.tok_string); + assert_eq!(t.highlight_class_color(HighlightClass::Punct), t.tok_punct); + assert_eq!(t.highlight_class_color(HighlightClass::Flag), t.tok_flag); + assert_eq!(t.highlight_class_color(HighlightClass::Error), t.tok_error); } }