ADR-0024 Phase F (full) step 3: delete legacy parser modules
Removes the last consumers of `dsl::lexer`, `dsl::keyword`, and
`dsl::ident_slot`, then deletes the modules.
- `Theme::token_color(&TokenKind)` deleted along with its test;
`Theme::highlight_class_color(HighlightClass)` is the sole
highlight-colour mapper (the walker's `per_byte_class` feeds
it directly).
- `IdentSource` (`dsl::grammar`) absorbs the schema-list /
expected-label / round-trip semantics that previously lived
on `IdentSlot`. Adds `completes_from_schema`, `expected_label`,
and `from_expected_label` methods. The walker's
`Expectation::Ident { source }` and the schema-lookup request
on the database worker now share one enum.
- `SchemaCache::for_slot(IdentSlot)` → `for_source(IdentSource)`.
- `Database::list_names_for` and the `Request::ListNamesFor`
worker variant take `IdentSource`. Internal tables and column
/ relationship lookups dispatch on the same enum.
- `InvalidIdent.slot: IdentSlot` → `InvalidIdent.source: IdentSource`.
The `invalid_ident_at_cursor` rendering branch in
`input_render.rs::ambient_hint` updates accordingly.
- Completion's keyword filter (`Keyword::from_word`) becomes
"backticked items whose payload is all ASCII alphabetic" —
punct and digit literals still surface through their own
candidate sources (composite-literal, flag, schema-ident);
the alphabetic filter excludes them from the keyword bucket.
- `friendly::keys::tests::keyword_and_punct_have_complete_token_vocabulary`
is dropped. It cross-checked `Keyword::ALL` / `Punct::ALL`
against catalog entries; both enums are gone. The
`parse.token.keyword.*` / `parse.token.punct.*` catalog
entries themselves survive for one more commit (catalog
cleanup, ADR-0024 §cleanup-pass); the
`keys_validate_against_catalog` test still pins them.
- Modules deleted: `src/dsl/lexer.rs`, `src/dsl/keyword.rs`,
`src/dsl/ident_slot.rs`.
Tests: 806 passing, 0 failing, 1 ignored. The drop from 852
reflects the removed module-internal tests (~32 lexer, 7
keyword, 4 ident_slot, 1 theme token_color, 1 friendly keys
keyword/punct), and is the expected outcome.
Clippy clean with `nursery` lints + `-D warnings`.
This commit is contained in:
+33
-28
@@ -14,8 +14,7 @@
|
|||||||
//! The cycling memo (`LastCompletion` on `App`) lives in
|
//! The cycling memo (`LastCompletion` on `App`) lives in
|
||||||
//! `app.rs`; this module owns the candidate computation.
|
//! `app.rs`; this module owns the candidate computation.
|
||||||
|
|
||||||
use crate::dsl::ident_slot::IdentSlot;
|
use crate::dsl::grammar::IdentSource;
|
||||||
use crate::dsl::keyword::Keyword;
|
|
||||||
use crate::dsl::types::Type;
|
use crate::dsl::types::Type;
|
||||||
use crate::dsl::{ParseError, parse_command};
|
use crate::dsl::{ParseError, parse_command};
|
||||||
|
|
||||||
@@ -53,15 +52,15 @@ pub struct SchemaCache {
|
|||||||
|
|
||||||
impl SchemaCache {
|
impl SchemaCache {
|
||||||
/// Lookup the candidate list for an identifier slot.
|
/// Lookup the candidate list for an identifier slot.
|
||||||
/// `NewName` always returns `&[]` — the user invents
|
/// Sources that don't read from the schema (`NewName`,
|
||||||
/// these names.
|
/// `Types`, `Free`) return `&[]`.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn for_slot(&self, slot: IdentSlot) -> &[String] {
|
pub fn for_source(&self, source: IdentSource) -> &[String] {
|
||||||
match slot {
|
match source {
|
||||||
IdentSlot::NewName => &[],
|
IdentSource::Tables => &self.tables,
|
||||||
IdentSlot::TableName => &self.tables,
|
IdentSource::Columns => &self.columns,
|
||||||
IdentSlot::Column => &self.columns,
|
IdentSource::Relationships => &self.relationships,
|
||||||
IdentSlot::RelationshipName => &self.relationships,
|
IdentSource::NewName | IdentSource::Types | IdentSource::Free => &[],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -110,7 +109,7 @@ pub struct Completion {
|
|||||||
/// bare keywords (excluding punctuation and descriptive
|
/// bare keywords (excluding punctuation and descriptive
|
||||||
/// labels per ADR-0022 §10).
|
/// labels per ADR-0022 §10).
|
||||||
/// - **Schema identifiers**: when the parser's expected-set
|
/// - **Schema identifiers**: when the parser's expected-set
|
||||||
/// includes an `IdentSlot::expected_label()`, the matching
|
/// includes an `IdentSource::expected_label()`, the matching
|
||||||
/// schema list from `cache` is added (skipping the `NewName`
|
/// schema list from `cache` is added (skipping the `NewName`
|
||||||
/// slot — the user invents those).
|
/// slot — the user invents those).
|
||||||
///
|
///
|
||||||
@@ -172,7 +171,13 @@ pub fn candidates_at_cursor(
|
|||||||
let mut keywords: Vec<String> = expected
|
let mut keywords: Vec<String> = expected
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|item| strip_backticks(item))
|
.filter_map(|item| strip_backticks(item))
|
||||||
.filter_map(|name| Keyword::from_word(name).map(|_| name.to_string()))
|
// Backticked items are walker `Expectation::Word`s or
|
||||||
|
// `Expectation::Literal`s. Keywords are the
|
||||||
|
// alphabetic-only ones; punct (`,`, `=`) and digit
|
||||||
|
// literals (`1`) live in the same expected-set but
|
||||||
|
// surface through other candidate sources.
|
||||||
|
.filter(|name| !name.is_empty() && name.chars().all(|c| c.is_ascii_alphabetic()))
|
||||||
|
.map(str::to_string)
|
||||||
.filter(|name| matches_prefix(name))
|
.filter(|name| matches_prefix(name))
|
||||||
.collect();
|
.collect();
|
||||||
let mut seen_kw = std::collections::HashSet::new();
|
let mut seen_kw = std::collections::HashSet::new();
|
||||||
@@ -245,8 +250,8 @@ pub fn candidates_at_cursor(
|
|||||||
// matching known-set slot. `NewName` slots return `&[]`.
|
// matching known-set slot. `NewName` slots return `&[]`.
|
||||||
let mut identifiers: Vec<String> = expected
|
let mut identifiers: Vec<String> = expected
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|item| IdentSlot::from_expected_label(item))
|
.filter_map(|item| IdentSource::from_expected_label(item))
|
||||||
.flat_map(|slot| cache.for_slot(slot).iter().cloned())
|
.flat_map(|source| cache.for_source(source).iter().cloned())
|
||||||
.filter(|name| matches_prefix(name))
|
.filter(|name| matches_prefix(name))
|
||||||
.collect();
|
.collect();
|
||||||
identifiers.sort();
|
identifiers.sort();
|
||||||
@@ -365,7 +370,7 @@ pub struct InvalidIdent {
|
|||||||
/// The text the user typed in the slot.
|
/// The text the user typed in the slot.
|
||||||
pub found: String,
|
pub found: String,
|
||||||
/// Which known-set slot this position expected.
|
/// Which known-set slot this position expected.
|
||||||
pub slot: IdentSlot,
|
pub source: IdentSource,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// "User is typing a name" cursor state (round-3 follow-up).
|
/// "User is typing a name" cursor state (round-3 follow-up).
|
||||||
@@ -408,8 +413,8 @@ pub fn typing_name_at_cursor(input: &str, cursor: usize) -> Option<TypingName> {
|
|||||||
let expected = expected_set(leading);
|
let expected = expected_set(leading);
|
||||||
let is_new_name_slot = expected
|
let is_new_name_slot = expected
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|item| IdentSlot::from_expected_label(item))
|
.filter_map(|item| IdentSource::from_expected_label(item))
|
||||||
.any(|slot| slot == IdentSlot::NewName);
|
.any(|source| source == IdentSource::NewName);
|
||||||
if !is_new_name_slot {
|
if !is_new_name_slot {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
@@ -485,34 +490,34 @@ pub fn invalid_ident_at_cursor(
|
|||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
// Find every known-set slot in the expected list.
|
// Find every known-set slot in the expected list.
|
||||||
let slots: Vec<IdentSlot> = expected
|
let sources: Vec<IdentSource> = expected
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|item| IdentSlot::from_expected_label(item))
|
.filter_map(|item| IdentSource::from_expected_label(item))
|
||||||
.filter(|slot| slot.completes_from_schema())
|
.filter(|s| s.completes_from_schema())
|
||||||
.collect();
|
.collect();
|
||||||
if slots.is_empty() {
|
if sources.is_empty() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
let lowered = partial.to_lowercase();
|
let lowered = partial.to_lowercase();
|
||||||
// If any schema entry across the matching slots matches
|
// If any schema entry across the matching slots matches
|
||||||
// the prefix, the partial is not "invalid" — it's an
|
// the prefix, the partial is not "invalid" — it's an
|
||||||
// in-progress lookup.
|
// in-progress lookup.
|
||||||
let any_match = slots
|
let any_match = sources
|
||||||
.iter()
|
.iter()
|
||||||
.flat_map(|slot| cache.for_slot(*slot))
|
.flat_map(|s| cache.for_source(*s))
|
||||||
.any(|name| name.to_lowercase().starts_with(&lowered));
|
.any(|name| name.to_lowercase().starts_with(&lowered));
|
||||||
if any_match {
|
if any_match {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
// Pick the first slot kind for the diagnostic — when
|
// Pick the first source kind for the diagnostic — when
|
||||||
// multiple are expected (e.g. `drop relationship …`
|
// multiple are expected (e.g. `drop relationship …`
|
||||||
// expects RelationshipName *or* the `from` keyword;
|
// expects Relationships *or* the `from` keyword;
|
||||||
// here only the schema slot survives the filter) we
|
// here only the schema source survives the filter) we
|
||||||
// surface the first.
|
// surface the first.
|
||||||
Some(InvalidIdent {
|
Some(InvalidIdent {
|
||||||
range: (start, cursor),
|
range: (start, cursor),
|
||||||
found: partial.to_string(),
|
found: partial.to_string(),
|
||||||
slot: slots[0],
|
source: sources[0],
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1123,7 +1128,7 @@ mod tests {
|
|||||||
.expect("should be invalid");
|
.expect("should be invalid");
|
||||||
assert_eq!(invalid.range, (10, 15));
|
assert_eq!(invalid.range, (10, 15));
|
||||||
assert_eq!(invalid.found, "Custp");
|
assert_eq!(invalid.found, "Custp");
|
||||||
assert_eq!(invalid.slot, IdentSlot::TableName);
|
assert_eq!(invalid.source, IdentSource::Tables);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
@@ -507,7 +507,7 @@ enum Request {
|
|||||||
/// duplicates. The reply is small even for projects with
|
/// duplicates. The reply is small even for projects with
|
||||||
/// hundreds of tables/columns.
|
/// hundreds of tables/columns.
|
||||||
ListNamesFor {
|
ListNamesFor {
|
||||||
slot: crate::dsl::ident_slot::IdentSlot,
|
source: crate::dsl::grammar::IdentSource,
|
||||||
reply: oneshot::Sender<Result<Vec<String>, DbError>>,
|
reply: oneshot::Sender<Result<Vec<String>, DbError>>,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -854,30 +854,31 @@ impl Database {
|
|||||||
recv.await.map_err(|_| DbError::WorkerGone)?
|
recv.await.map_err(|_| DbError::WorkerGone)?
|
||||||
}
|
}
|
||||||
|
|
||||||
/// List schema entity names for an identifier slot
|
/// List schema entity names for an identifier source
|
||||||
/// (ADR-0022 §9).
|
/// (ADR-0022 §9, ADR-0024 §architecture).
|
||||||
///
|
///
|
||||||
/// Returns alphabetised, deduplicated names suitable for
|
/// Returns alphabetised, deduplicated names suitable for
|
||||||
/// the completion menu:
|
/// the completion menu:
|
||||||
/// - `IdentSlot::TableName` → user tables (filters
|
/// - `IdentSource::Tables` → user tables (filters
|
||||||
/// `__rdbms_*` internal tables);
|
/// `__rdbms_*` internal tables);
|
||||||
/// - `IdentSlot::Column` → distinct column names across
|
/// - `IdentSource::Columns` → distinct column names
|
||||||
/// all user tables (v1 simplification — no
|
/// across all user tables (v1 simplification — no
|
||||||
/// table-context binding);
|
/// table-context binding);
|
||||||
/// - `IdentSlot::RelationshipName` → relationship
|
/// - `IdentSource::Relationships` → relationship names
|
||||||
/// names from the metadata table;
|
/// from the metadata table;
|
||||||
/// - `IdentSlot::NewName` → returns `Ok(vec![])`
|
/// - `IdentSource::NewName`, `Types`, `Free` → returns
|
||||||
/// immediately without a worker round-trip (the user
|
/// `Ok(vec![])` immediately without a worker round-trip
|
||||||
/// invents these names).
|
/// (the user invents these names, or the source is
|
||||||
|
/// synthetic).
|
||||||
pub async fn list_names_for(
|
pub async fn list_names_for(
|
||||||
&self,
|
&self,
|
||||||
slot: crate::dsl::ident_slot::IdentSlot,
|
source: crate::dsl::grammar::IdentSource,
|
||||||
) -> Result<Vec<String>, DbError> {
|
) -> Result<Vec<String>, DbError> {
|
||||||
if !slot.completes_from_schema() {
|
if !source.completes_from_schema() {
|
||||||
return Ok(Vec::new());
|
return Ok(Vec::new());
|
||||||
}
|
}
|
||||||
let (reply, recv) = oneshot::channel();
|
let (reply, recv) = oneshot::channel();
|
||||||
self.send(Request::ListNamesFor { slot, reply }).await?;
|
self.send(Request::ListNamesFor { source, reply }).await?;
|
||||||
recv.await.map_err(|_| DbError::WorkerGone)?
|
recv.await.map_err(|_| DbError::WorkerGone)?
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1202,25 +1203,24 @@ fn handle_request(conn: &Connection, persistence: Option<&Persistence>, req: Req
|
|||||||
let result = do_find_rows_matching(conn, &table, &column, &value, limit);
|
let result = do_find_rows_matching(conn, &table, &column, &value, limit);
|
||||||
let _ = reply.send(result);
|
let _ = reply.send(result);
|
||||||
}
|
}
|
||||||
Request::ListNamesFor { slot, reply } => {
|
Request::ListNamesFor { source, reply } => {
|
||||||
let result = do_list_names_for(conn, slot);
|
let result = do_list_names_for(conn, source);
|
||||||
let _ = reply.send(result);
|
let _ = reply.send(result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Schema-name lookup for the completion engine
|
/// Schema-name lookup for the completion engine
|
||||||
/// (ADR-0022 §9). `NewName` never reaches here — the public
|
/// (ADR-0022 §9). Non-schema sources (`NewName`, `Types`, `Free`)
|
||||||
/// `list_names_for` short-circuits.
|
/// never reach here — the public `list_names_for` short-circuits.
|
||||||
fn do_list_names_for(
|
fn do_list_names_for(
|
||||||
conn: &Connection,
|
conn: &Connection,
|
||||||
slot: crate::dsl::ident_slot::IdentSlot,
|
source: crate::dsl::grammar::IdentSource,
|
||||||
) -> Result<Vec<String>, DbError> {
|
) -> Result<Vec<String>, DbError> {
|
||||||
use crate::dsl::ident_slot::IdentSlot;
|
use crate::dsl::grammar::IdentSource;
|
||||||
match slot {
|
match source {
|
||||||
IdentSlot::NewName => Ok(Vec::new()),
|
IdentSource::Tables => do_list_tables(conn),
|
||||||
IdentSlot::TableName => do_list_tables(conn),
|
IdentSource::Columns => {
|
||||||
IdentSlot::Column => {
|
|
||||||
// Distinct column names across all user tables.
|
// Distinct column names across all user tables.
|
||||||
// v1 simplification: no table-context binding
|
// v1 simplification: no table-context binding
|
||||||
// (ADR-0022 stage 6 note).
|
// (ADR-0022 stage 6 note).
|
||||||
@@ -1240,7 +1240,7 @@ fn do_list_names_for(
|
|||||||
}
|
}
|
||||||
Ok(out)
|
Ok(out)
|
||||||
}
|
}
|
||||||
IdentSlot::RelationshipName => {
|
IdentSource::Relationships => {
|
||||||
let mut stmt = conn
|
let mut stmt = conn
|
||||||
.prepare(&format!(
|
.prepare(&format!(
|
||||||
"SELECT name FROM {REL_TABLE} ORDER BY name;"
|
"SELECT name FROM {REL_TABLE} ORDER BY name;"
|
||||||
@@ -1255,6 +1255,7 @@ fn do_list_names_for(
|
|||||||
}
|
}
|
||||||
Ok(out)
|
Ok(out)
|
||||||
}
|
}
|
||||||
|
IdentSource::NewName | IdentSource::Types | IdentSource::Free => Ok(Vec::new()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -7136,7 +7137,7 @@ mod tests {
|
|||||||
// touching the worker.
|
// touching the worker.
|
||||||
let db = db();
|
let db = db();
|
||||||
let names = db
|
let names = db
|
||||||
.list_names_for(crate::dsl::ident_slot::IdentSlot::NewName)
|
.list_names_for(crate::dsl::grammar::IdentSource::NewName)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert!(names.is_empty());
|
assert!(names.is_empty());
|
||||||
@@ -7148,7 +7149,7 @@ mod tests {
|
|||||||
make_id_table(&db, "Customers").await;
|
make_id_table(&db, "Customers").await;
|
||||||
make_id_table(&db, "Orders").await;
|
make_id_table(&db, "Orders").await;
|
||||||
let names = db
|
let names = db
|
||||||
.list_names_for(crate::dsl::ident_slot::IdentSlot::TableName)
|
.list_names_for(crate::dsl::grammar::IdentSource::Tables)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(names, vec!["Customers".to_string(), "Orders".to_string()]);
|
assert_eq!(names, vec!["Customers".to_string(), "Orders".to_string()]);
|
||||||
@@ -7161,7 +7162,7 @@ mod tests {
|
|||||||
let db = db();
|
let db = db();
|
||||||
make_id_table(&db, "Customers").await;
|
make_id_table(&db, "Customers").await;
|
||||||
let names = db
|
let names = db
|
||||||
.list_names_for(crate::dsl::ident_slot::IdentSlot::TableName)
|
.list_names_for(crate::dsl::grammar::IdentSource::Tables)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(names, vec!["Customers".to_string()]);
|
assert_eq!(names, vec!["Customers".to_string()]);
|
||||||
@@ -7195,7 +7196,7 @@ mod tests {
|
|||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let names = db
|
let names = db
|
||||||
.list_names_for(crate::dsl::ident_slot::IdentSlot::Column)
|
.list_names_for(crate::dsl::grammar::IdentSource::Columns)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
// `id` appears once despite being in both tables (DISTINCT).
|
// `id` appears once despite being in both tables (DISTINCT).
|
||||||
@@ -7238,7 +7239,7 @@ mod tests {
|
|||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let names = db
|
let names = db
|
||||||
.list_names_for(crate::dsl::ident_slot::IdentSlot::RelationshipName)
|
.list_names_for(crate::dsl::grammar::IdentSource::Relationships)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(names, vec!["cust_orders".to_string()]);
|
assert_eq!(names, vec!["cust_orders".to_string()]);
|
||||||
|
|||||||
+57
-13
@@ -49,32 +49,76 @@ pub enum HighlightClass {
|
|||||||
|
|
||||||
/// Where an `Ident` slot's candidates come from at completion time.
|
/// Where an `Ident` slot's candidates come from at completion time.
|
||||||
///
|
///
|
||||||
/// Phase A only exercises `NewName` (the `import … as <target>`
|
/// Drives both the walker's `Expectation::Ident { source }` (which
|
||||||
/// slot) and `Free` (the catch-all branch in `mode`/`messages`
|
/// the parse-error bridge maps to a human label) and the
|
||||||
/// that funnels unknown values into a friendly validator). The
|
/// `SchemaCache` lookup the completion engine uses for Tab
|
||||||
/// schema-aware variants land in Phase B-D.
|
/// candidates. The `Free` and `NewName` variants do not query the
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
/// schema — `NewName` is for slots where the user invents the
|
||||||
|
/// identifier, `Free` is the catch-all branch in `mode`/`messages`
|
||||||
|
/// that funnels unknown values into a friendly validator.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
pub enum IdentSource {
|
pub enum IdentSource {
|
||||||
/// User invents this name. No schema lookup; no completion
|
/// User invents this name. No schema lookup; no completion
|
||||||
/// candidates beyond the identifier shape itself.
|
/// candidates beyond the identifier shape itself.
|
||||||
NewName,
|
NewName,
|
||||||
/// Existing table name. Phase B+.
|
/// Existing table name.
|
||||||
#[allow(dead_code)]
|
|
||||||
Tables,
|
Tables,
|
||||||
/// Existing column in the current table. Phase B+.
|
/// Existing column in the current table.
|
||||||
#[allow(dead_code)]
|
|
||||||
Columns,
|
Columns,
|
||||||
/// Existing relationship name. Phase B+.
|
/// Existing relationship name.
|
||||||
#[allow(dead_code)]
|
|
||||||
Relationships,
|
Relationships,
|
||||||
/// Closed set from `Type::all()`. Phase B+.
|
/// Closed set from `Type::all()` — surfaced by the walker's
|
||||||
#[allow(dead_code)]
|
/// content validator on column-type slots; not user-listable
|
||||||
|
/// from the schema.
|
||||||
Types,
|
Types,
|
||||||
/// Any identifier shape; used by synthetic catch-all branches
|
/// Any identifier shape; used by synthetic catch-all branches
|
||||||
/// (e.g., the unknown-value branch of `mode <value>`).
|
/// (e.g., the unknown-value branch of `mode <value>`).
|
||||||
Free,
|
Free,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl IdentSource {
|
||||||
|
/// Whether this source can be completed from the schema
|
||||||
|
/// cache (i.e. the candidate list comes from existing
|
||||||
|
/// entities rather than user invention or a closed set).
|
||||||
|
#[must_use]
|
||||||
|
pub const fn completes_from_schema(self) -> bool {
|
||||||
|
matches!(self, Self::Tables | Self::Columns | Self::Relationships)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Human-facing label used in parse-error wording
|
||||||
|
/// ("expected table name") and in the completion engine's
|
||||||
|
/// round-trip from a textual `expected` entry back to a
|
||||||
|
/// source kind. `Free` and `Types` collapse to "identifier"
|
||||||
|
/// and "type" respectively.
|
||||||
|
#[must_use]
|
||||||
|
pub const fn expected_label(self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Self::NewName | Self::Free => "identifier",
|
||||||
|
Self::Tables => "table name",
|
||||||
|
Self::Columns => "column name",
|
||||||
|
Self::Relationships => "relationship name",
|
||||||
|
Self::Types => "type",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Inverse of `expected_label`. Used by the completion engine
|
||||||
|
/// to recover the source kind from the `ParseError::Invalid::
|
||||||
|
/// expected` strings the walker bridge produces. `"identifier"`
|
||||||
|
/// maps to `NewName` (the only writeable label that uses that
|
||||||
|
/// wording in production grammars today).
|
||||||
|
#[must_use]
|
||||||
|
pub fn from_expected_label(label: &str) -> Option<Self> {
|
||||||
|
match label {
|
||||||
|
"identifier" => Some(Self::NewName),
|
||||||
|
"table name" => Some(Self::Tables),
|
||||||
|
"column name" => Some(Self::Columns),
|
||||||
|
"relationship name" => Some(Self::Relationships),
|
||||||
|
"type" => Some(Self::Types),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Hint-panel mode for an expected node.
|
/// Hint-panel mode for an expected node.
|
||||||
///
|
///
|
||||||
/// Phase A defaults to `Default`; the `ProseOnly` variant
|
/// Phase A defaults to `Default`; the `ProseOnly` variant
|
||||||
|
|||||||
@@ -1,140 +0,0 @@
|
|||||||
//! Identifier-slot taxonomy for ambient typing assistance
|
|
||||||
//! (ADR-0022 §8).
|
|
||||||
//!
|
|
||||||
//! Each `ident()` call in the DSL parser plays a particular
|
|
||||||
//! semantic role: a new name the user is inventing, the name
|
|
||||||
//! of an existing table, the name of an existing column, the
|
|
||||||
//! name of an existing relationship. The completion engine
|
|
||||||
//! (ADR-0022 §9) reads the slot type to know what candidates
|
|
||||||
//! to offer.
|
|
||||||
//!
|
|
||||||
//! Rather than carry slot data through chumsky's `extra`
|
|
||||||
//! payload (which would require a non-trivial type
|
|
||||||
//! refactor), we annotate each call site with a tag via the
|
|
||||||
//! `ident_ctx(slot)` wrapper in `parser.rs`. The wrapper
|
|
||||||
//! currently treats the slot as documentation only — it does
|
|
||||||
//! not propagate to the chumsky machinery — but the
|
|
||||||
//! call-site annotation forces every parser author to
|
|
||||||
//! consider the slot at the moment of writing the combinator,
|
|
||||||
//! and a unit test asserts no bare `ident_inner()` calls
|
|
||||||
//! escape into the command parsers (only `ident_ctx`-wrapped
|
|
||||||
//! sites).
|
|
||||||
//!
|
|
||||||
//! v1 scope (deliberately simple):
|
|
||||||
//!
|
|
||||||
//! - `NewName`: the user invents this identifier (new table
|
|
||||||
//! name, new column name, new relationship alias). No
|
|
||||||
//! completion candidates.
|
|
||||||
//! - `TableName`: an existing table. Completion candidates
|
|
||||||
//! come from the schema's table list.
|
|
||||||
//! - `Column`: an existing column. v1 does not bind the
|
|
||||||
//! column to a specific table; the completion engine in
|
|
||||||
//! stage 8 may union all columns or refine further. The
|
|
||||||
//! `TableRef` wrinkle (ADR-0022 §8 pseudocode) is deferred
|
|
||||||
//! until that stage demonstrates a need.
|
|
||||||
//! - `RelationshipName`: an existing relationship. Schema
|
|
||||||
//! queries for completion will hit `read_relationships`.
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
|
||||||
pub enum IdentSlot {
|
|
||||||
/// User invents this name. No completion candidates.
|
|
||||||
NewName,
|
|
||||||
/// An existing table. Completion candidates: schema
|
|
||||||
/// table list.
|
|
||||||
TableName,
|
|
||||||
/// An existing column. v1 does not bind to a specific
|
|
||||||
/// table — see module docs.
|
|
||||||
Column,
|
|
||||||
/// An existing relationship.
|
|
||||||
RelationshipName,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IdentSlot {
|
|
||||||
/// Whether the completion engine should produce
|
|
||||||
/// candidates for this slot at all. `false` for
|
|
||||||
/// `NewName` (the user invents the name).
|
|
||||||
#[must_use]
|
|
||||||
pub const fn completes_from_schema(self) -> bool {
|
|
||||||
match self {
|
|
||||||
Self::NewName => false,
|
|
||||||
Self::TableName | Self::Column | Self::RelationshipName => true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Human-readable label for the parser's expected-set
|
|
||||||
/// machinery (ADR-0022 §8 + stage 8c). Carried through
|
|
||||||
/// chumsky labels by `ident_ctx(slot)` so error messages
|
|
||||||
/// say "expected table name" instead of the generic
|
|
||||||
/// "expected identifier", and so the completion engine
|
|
||||||
/// can recover the slot from the parser's expected set
|
|
||||||
/// via `from_expected_label`.
|
|
||||||
#[must_use]
|
|
||||||
pub const fn expected_label(self) -> &'static str {
|
|
||||||
match self {
|
|
||||||
Self::NewName => "identifier",
|
|
||||||
Self::TableName => "table name",
|
|
||||||
Self::Column => "column name",
|
|
||||||
Self::RelationshipName => "relationship name",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Round-trip from the human label back to the slot kind.
|
|
||||||
/// `None` for any string that isn't one of the four
|
|
||||||
/// `expected_label()` outputs.
|
|
||||||
#[must_use]
|
|
||||||
pub fn from_expected_label(label: &str) -> Option<Self> {
|
|
||||||
match label {
|
|
||||||
"identifier" => Some(Self::NewName),
|
|
||||||
"table name" => Some(Self::TableName),
|
|
||||||
"column name" => Some(Self::Column),
|
|
||||||
"relationship name" => Some(Self::RelationshipName),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn new_name_does_not_complete_from_schema() {
|
|
||||||
assert!(!IdentSlot::NewName.completes_from_schema());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn schema_kinds_complete_from_schema() {
|
|
||||||
for slot in [
|
|
||||||
IdentSlot::TableName,
|
|
||||||
IdentSlot::Column,
|
|
||||||
IdentSlot::RelationshipName,
|
|
||||||
] {
|
|
||||||
assert!(
|
|
||||||
slot.completes_from_schema(),
|
|
||||||
"{slot:?} should complete from schema",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn expected_label_round_trips_for_every_variant() {
|
|
||||||
for slot in [
|
|
||||||
IdentSlot::NewName,
|
|
||||||
IdentSlot::TableName,
|
|
||||||
IdentSlot::Column,
|
|
||||||
IdentSlot::RelationshipName,
|
|
||||||
] {
|
|
||||||
assert_eq!(
|
|
||||||
IdentSlot::from_expected_label(slot.expected_label()),
|
|
||||||
Some(slot),
|
|
||||||
"round-trip failed for {slot:?}",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn unknown_expected_label_returns_none() {
|
|
||||||
assert_eq!(IdentSlot::from_expected_label("blob"), None);
|
|
||||||
assert_eq!(IdentSlot::from_expected_label("`create`"), None);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,311 +0,0 @@
|
|||||||
//! Keyword and punctuation tables for the DSL lexer (ADR-0020 §2a).
|
|
||||||
//!
|
|
||||||
//! `define_keywords!` and `define_punct!` are the single source
|
|
||||||
//! of truth from which the enums, the lex-side string→variant
|
|
||||||
//! mappings, and the `parse.token.*` catalog-key derivations
|
|
||||||
//! all come. Adding a new keyword is one line in the
|
|
||||||
//! `define_keywords!` invocation plus one line in
|
|
||||||
//! `src/friendly/strings/en-US.yaml` under
|
|
||||||
//! `parse.token.keyword.<lit>` (the catalog validator catches a
|
|
||||||
//! missing entry at test time per ADR-0021 §7). Adding a new
|
|
||||||
//! punctuation kind is symmetric.
|
|
||||||
|
|
||||||
macro_rules! define_keywords {
|
|
||||||
( $( $variant:ident => $literal:literal ),+ $(,)? ) => {
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
|
||||||
pub enum Keyword {
|
|
||||||
$( $variant ),+
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Keyword {
|
|
||||||
/// Every variant paired with its canonical lowercase
|
|
||||||
/// literal. Iteration order is the macro
|
|
||||||
/// declaration order.
|
|
||||||
pub const ALL: &'static [(Keyword, &'static str)] = &[
|
|
||||||
$( (Keyword::$variant, $literal) ),+
|
|
||||||
];
|
|
||||||
|
|
||||||
/// Lex-side mapping. Case-insensitive per ADR-0009.
|
|
||||||
/// `None` for any input that isn't a reserved word —
|
|
||||||
/// the lexer then keeps the input as
|
|
||||||
/// `TokenKind::Identifier`.
|
|
||||||
#[must_use]
|
|
||||||
pub fn from_word(s: &str) -> Option<Self> {
|
|
||||||
Self::ALL
|
|
||||||
.iter()
|
|
||||||
.find(|(_, lit)| s.eq_ignore_ascii_case(lit))
|
|
||||||
.map(|(kw, _)| *kw)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Canonical lowercase literal for this variant.
|
|
||||||
#[must_use]
|
|
||||||
pub fn as_str(self) -> &'static str {
|
|
||||||
Self::ALL
|
|
||||||
.iter()
|
|
||||||
.find(|(kw, _)| *kw == self)
|
|
||||||
.map(|(_, lit)| *lit)
|
|
||||||
.expect("ALL covers every variant by construction")
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Catalog key under `parse.token.keyword.*`
|
|
||||||
/// (ADR-0021 §4). The renderer looks this up to get
|
|
||||||
/// the user-facing wording for the keyword.
|
|
||||||
#[must_use]
|
|
||||||
pub fn catalog_token_key(self) -> String {
|
|
||||||
format!("parse.token.keyword.{}", self.as_str())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for Keyword {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
f.write_str(self.as_str())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
define_keywords! {
|
|
||||||
// Commands (entry keywords).
|
|
||||||
Create => "create",
|
|
||||||
Drop => "drop",
|
|
||||||
Add => "add",
|
|
||||||
Rename => "rename",
|
|
||||||
Change => "change",
|
|
||||||
Show => "show",
|
|
||||||
Insert => "insert",
|
|
||||||
Update => "update",
|
|
||||||
Delete => "delete",
|
|
||||||
Replay => "replay",
|
|
||||||
// Object words.
|
|
||||||
Table => "table",
|
|
||||||
Column => "column",
|
|
||||||
Data => "data",
|
|
||||||
Relationship => "relationship",
|
|
||||||
Pk => "pk",
|
|
||||||
// Connectives.
|
|
||||||
With => "with",
|
|
||||||
From => "from",
|
|
||||||
To => "to",
|
|
||||||
Into => "into",
|
|
||||||
As => "as",
|
|
||||||
In => "in",
|
|
||||||
On => "on",
|
|
||||||
Set => "set",
|
|
||||||
Where => "where",
|
|
||||||
Values => "values",
|
|
||||||
// Value literals.
|
|
||||||
Null => "null",
|
|
||||||
True => "true",
|
|
||||||
False => "false",
|
|
||||||
// Referential-action vocabulary (ADR-0013). `set` and `null`
|
|
||||||
// re-use the connective and value-literal keywords above —
|
|
||||||
// `set null` is the parser's job to recognise as a sequence,
|
|
||||||
// not the lexer's.
|
|
||||||
Cascade => "cascade",
|
|
||||||
Restrict => "restrict",
|
|
||||||
Action => "action",
|
|
||||||
No => "no",
|
|
||||||
// App-lifecycle commands (folded into the DSL parser so they
|
|
||||||
// surface in Tab completion and the parse-error usage
|
|
||||||
// templates). The dispatch handlers in app.rs branch on the
|
|
||||||
// parsed `Command::App(...)` variant before mode-specific
|
|
||||||
// routing so these work in both simple and advanced modes
|
|
||||||
// (per ADR-0003).
|
|
||||||
Quit => "quit",
|
|
||||||
Help => "help",
|
|
||||||
Rebuild => "rebuild",
|
|
||||||
Save => "save",
|
|
||||||
New => "new",
|
|
||||||
Load => "load",
|
|
||||||
Export => "export",
|
|
||||||
Import => "import",
|
|
||||||
Mode => "mode",
|
|
||||||
Messages => "messages",
|
|
||||||
// Value vocabulary for `mode <value>` and `messages <value>`.
|
|
||||||
// Free as identifier-shapes outside their slots (no command
|
|
||||||
// uses `simple` / `advanced` / `short` / `verbose` as an
|
|
||||||
// entity name today).
|
|
||||||
Simple => "simple",
|
|
||||||
Advanced => "advanced",
|
|
||||||
Short => "short",
|
|
||||||
Verbose => "verbose",
|
|
||||||
}
|
|
||||||
|
|
||||||
macro_rules! define_punct {
|
|
||||||
( $( $variant:ident => ($literal:literal, $name:literal) ),+ $(,)? ) => {
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
|
||||||
pub enum Punct {
|
|
||||||
$( $variant ),+
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Punct {
|
|
||||||
/// Every variant paired with its character and
|
|
||||||
/// snake-case name suffix.
|
|
||||||
pub const ALL: &'static [(Punct, char, &'static str)] = &[
|
|
||||||
$( (Punct::$variant, $literal, $name) ),+
|
|
||||||
];
|
|
||||||
|
|
||||||
/// Lex-side mapping. `None` for any character that
|
|
||||||
/// isn't punctuation — the lexer then either
|
|
||||||
/// classifies it as part of another token or
|
|
||||||
/// emits an `Error(LexError::UnknownChar)`.
|
|
||||||
#[must_use]
|
|
||||||
pub fn from_char(c: char) -> Option<Self> {
|
|
||||||
Self::ALL
|
|
||||||
.iter()
|
|
||||||
.find(|(_, lit, _)| *lit == c)
|
|
||||||
.map(|(p, _, _)| *p)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[must_use]
|
|
||||||
pub fn as_char(self) -> char {
|
|
||||||
Self::ALL
|
|
||||||
.iter()
|
|
||||||
.find(|(p, _, _)| *p == self)
|
|
||||||
.map(|(_, c, _)| *c)
|
|
||||||
.expect("ALL covers every variant by construction")
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Catalog key under `parse.token.punct.*`
|
|
||||||
/// (ADR-0021 §4).
|
|
||||||
#[must_use]
|
|
||||||
pub fn catalog_token_key(self) -> String {
|
|
||||||
let suffix = Self::ALL
|
|
||||||
.iter()
|
|
||||||
.find(|(p, _, _)| *p == self)
|
|
||||||
.map(|(_, _, n)| *n)
|
|
||||||
.expect("ALL covers every variant by construction");
|
|
||||||
format!("parse.token.punct.{suffix}")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl std::fmt::Display for Punct {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
use std::fmt::Write;
|
|
||||||
f.write_char(self.as_char())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
define_punct! {
|
|
||||||
Colon => (':', "colon"),
|
|
||||||
OpenParen => ('(', "open_paren"),
|
|
||||||
CloseParen => (')', "close_paren"),
|
|
||||||
Comma => (',', "comma"),
|
|
||||||
Equals => ('=', "equals"),
|
|
||||||
Dot => ('.', "dot"),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
use pretty_assertions::assert_eq;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn keyword_from_word_round_trips_every_variant() {
|
|
||||||
for &(kw, lit) in Keyword::ALL {
|
|
||||||
assert_eq!(Keyword::from_word(lit), Some(kw));
|
|
||||||
assert_eq!(kw.as_str(), lit);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn keyword_from_word_is_case_insensitive() {
|
|
||||||
assert_eq!(Keyword::from_word("CREATE"), Some(Keyword::Create));
|
|
||||||
assert_eq!(Keyword::from_word("Create"), Some(Keyword::Create));
|
|
||||||
assert_eq!(Keyword::from_word("cReAtE"), Some(Keyword::Create));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn keyword_from_word_returns_none_for_non_keyword() {
|
|
||||||
assert_eq!(Keyword::from_word("Customers"), None);
|
|
||||||
assert_eq!(Keyword::from_word("frobulate"), None);
|
|
||||||
// Type-name candidates explicitly stay non-keyword
|
|
||||||
// (ADR-0020 §2): they remain identifiers that the
|
|
||||||
// parser validates via `Type::from_str`.
|
|
||||||
assert_eq!(Keyword::from_word("text"), None);
|
|
||||||
assert_eq!(Keyword::from_word("int"), None);
|
|
||||||
assert_eq!(Keyword::from_word("varchar"), None);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn keyword_literals_are_unique() {
|
|
||||||
let mut lits: Vec<&str> = Keyword::ALL.iter().map(|(_, lit)| *lit).collect();
|
|
||||||
lits.sort_unstable();
|
|
||||||
let count_before = lits.len();
|
|
||||||
lits.dedup();
|
|
||||||
assert_eq!(lits.len(), count_before, "keyword literals must be unique");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn keyword_catalog_token_key_format() {
|
|
||||||
assert_eq!(
|
|
||||||
Keyword::Create.catalog_token_key(),
|
|
||||||
"parse.token.keyword.create"
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
Keyword::Pk.catalog_token_key(),
|
|
||||||
"parse.token.keyword.pk"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn keyword_display_uses_canonical_lowercase() {
|
|
||||||
assert_eq!(format!("{}", Keyword::Create), "create");
|
|
||||||
assert_eq!(format!("{}", Keyword::Relationship), "relationship");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn punct_round_trips_every_variant() {
|
|
||||||
for &(p, c, _) in Punct::ALL {
|
|
||||||
assert_eq!(Punct::from_char(c), Some(p));
|
|
||||||
assert_eq!(p.as_char(), c);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn punct_from_char_returns_none_for_non_punct() {
|
|
||||||
assert_eq!(Punct::from_char('a'), None);
|
|
||||||
assert_eq!(Punct::from_char(' '), None);
|
|
||||||
assert_eq!(Punct::from_char('-'), None);
|
|
||||||
assert_eq!(Punct::from_char('\''), None);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn punct_chars_are_unique() {
|
|
||||||
let mut chars: Vec<char> = Punct::ALL.iter().map(|(_, c, _)| *c).collect();
|
|
||||||
chars.sort_unstable();
|
|
||||||
let count_before = chars.len();
|
|
||||||
chars.dedup();
|
|
||||||
assert_eq!(chars.len(), count_before, "punct chars must be unique");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn punct_catalog_token_key_format() {
|
|
||||||
assert_eq!(
|
|
||||||
Punct::Colon.catalog_token_key(),
|
|
||||||
"parse.token.punct.colon"
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
Punct::OpenParen.catalog_token_key(),
|
|
||||||
"parse.token.punct.open_paren"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn every_command_entry_keyword_is_declared() {
|
|
||||||
// Sanity: the ten command entry keywords from
|
|
||||||
// ADR-0009/0014/0006 must all be reachable. If a future
|
|
||||||
// ADR adds a command, this list grows alongside it.
|
|
||||||
for cmd in [
|
|
||||||
"create", "drop", "add", "rename", "change", "show",
|
|
||||||
"insert", "update", "delete", "replay",
|
|
||||||
] {
|
|
||||||
assert!(
|
|
||||||
Keyword::from_word(cmd).is_some(),
|
|
||||||
"command entry keyword `{cmd}` must be declared",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,598 +0,0 @@
|
|||||||
//! DSL lexer (ADR-0020).
|
|
||||||
//!
|
|
||||||
//! Pure tokenizer: takes the source `&str` and produces a
|
|
||||||
//! `Vec<Token>` with byte-offset spans. Lex-shape errors
|
|
||||||
//! (unterminated string, unrecognised character, malformed
|
|
||||||
//! `--` flag) surface as `TokenKind::Error(_)` tokens — not a
|
|
||||||
//! `Result` variant. The parser sees `Error` tokens and raises
|
|
||||||
//! a structural error at that point; I4 (syntax highlighting,
|
|
||||||
//! future) walks the same token stream and renders Error tokens
|
|
||||||
//! with an error glyph. ADR-0020 §2 explains the rationale for
|
|
||||||
//! the in-stream error model.
|
|
||||||
|
|
||||||
use crate::dsl::keyword::{Keyword, Punct};
|
|
||||||
|
|
||||||
pub type Span = (usize, usize);
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
||||||
pub struct Token {
|
|
||||||
pub kind: TokenKind,
|
|
||||||
pub span: Span,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
||||||
pub enum TokenKind {
|
|
||||||
/// Reserved word recognised against the closed `Keyword`
|
|
||||||
/// set. Case-insensitive at lex time per ADR-0009.
|
|
||||||
Keyword(Keyword),
|
|
||||||
/// Anything alphabetic-or-underscore-then-alphanumeric that
|
|
||||||
/// did not match a keyword. Case is preserved per ADR-0009.
|
|
||||||
Identifier(String),
|
|
||||||
/// Numeric literal, raw text. The parser is responsible for
|
|
||||||
/// any further validation (e.g. `Value::Number` storage). A
|
|
||||||
/// leading `-` is included when present and immediately
|
|
||||||
/// adjacent to a digit (no whitespace).
|
|
||||||
Number(String),
|
|
||||||
/// Single-quoted string literal, with the `''` escape
|
|
||||||
/// processed (so `'don''t'` produces `"don't"`). The span
|
|
||||||
/// covers the surrounding quotes; the payload does not.
|
|
||||||
StringLiteral(String),
|
|
||||||
/// One-character punctuation per the closed `Punct` set.
|
|
||||||
Punct(Punct),
|
|
||||||
/// `--name` flag. The payload is the part after `--`.
|
|
||||||
Flag(String),
|
|
||||||
/// Lex-time shape error. The parser surfaces this with a
|
|
||||||
/// catalog-driven message (ADR-0021 §4
|
|
||||||
/// `parse.token.error.*`).
|
|
||||||
Error(LexError),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
||||||
pub enum LexError {
|
|
||||||
/// `'` opened a string literal that ran to end of input
|
|
||||||
/// without a closing `'`. Span covers the opening quote
|
|
||||||
/// through end-of-input.
|
|
||||||
UnterminatedString,
|
|
||||||
/// Character not recognised at this position. Span covers
|
|
||||||
/// the single character (UTF-8 width respected).
|
|
||||||
UnknownChar(char),
|
|
||||||
/// `--` not followed by an identifier-shaped tail. Today
|
|
||||||
/// only reachable with literal trailing `--`; reserved as
|
|
||||||
/// a distinct kind so the renderer can produce a sharper
|
|
||||||
/// hint than "unknown character".
|
|
||||||
BadFlag,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Tokenize an input string.
|
|
||||||
///
|
|
||||||
/// Always succeeds in producing a `Vec<Token>` — lex-shape
|
|
||||||
/// errors are embedded as `TokenKind::Error` tokens. Whitespace
|
|
||||||
/// between tokens is silently skipped (ADR-0009: liberal
|
|
||||||
/// whitespace).
|
|
||||||
#[must_use]
|
|
||||||
pub fn lex(input: &str) -> Vec<Token> {
|
|
||||||
let mut tokens = Vec::new();
|
|
||||||
let bytes = input.as_bytes();
|
|
||||||
let mut pos = 0;
|
|
||||||
while pos < bytes.len() {
|
|
||||||
let b = bytes[pos];
|
|
||||||
if b.is_ascii_whitespace() {
|
|
||||||
pos += 1;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if b.is_ascii_alphabetic() || b == b'_' {
|
|
||||||
let (tok, next) = lex_identifier(input, pos);
|
|
||||||
tokens.push(tok);
|
|
||||||
pos = next;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if b.is_ascii_digit() {
|
|
||||||
let (tok, next) = lex_number(input, pos, false);
|
|
||||||
tokens.push(tok);
|
|
||||||
pos = next;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if b == b'-' {
|
|
||||||
// `--name` flag, `-<digit>` negative-number literal,
|
|
||||||
// or a bare `-` (UnknownChar — no Minus variant in
|
|
||||||
// the current grammar).
|
|
||||||
let next_b = bytes.get(pos + 1).copied();
|
|
||||||
if next_b == Some(b'-') {
|
|
||||||
let (tok, next) = lex_flag(input, pos);
|
|
||||||
tokens.push(tok);
|
|
||||||
pos = next;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if next_b.is_some_and(|c| c.is_ascii_digit()) {
|
|
||||||
let (tok, next) = lex_number(input, pos, true);
|
|
||||||
tokens.push(tok);
|
|
||||||
pos = next;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
tokens.push(Token {
|
|
||||||
kind: TokenKind::Error(LexError::UnknownChar('-')),
|
|
||||||
span: (pos, pos + 1),
|
|
||||||
});
|
|
||||||
pos += 1;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if b == b'\'' {
|
|
||||||
let (tok, next) = lex_string(input, pos);
|
|
||||||
tokens.push(tok);
|
|
||||||
pos = next;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if let Some(p) = Punct::from_char(b as char) {
|
|
||||||
tokens.push(Token {
|
|
||||||
kind: TokenKind::Punct(p),
|
|
||||||
span: (pos, pos + 1),
|
|
||||||
});
|
|
||||||
pos += 1;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Anything else: read one whole char (UTF-8 safe) and
|
|
||||||
// emit an UnknownChar error token covering its bytes.
|
|
||||||
let ch = input[pos..]
|
|
||||||
.chars()
|
|
||||||
.next()
|
|
||||||
.expect("pos < bytes.len() ⇒ at least one char");
|
|
||||||
let len = ch.len_utf8();
|
|
||||||
tokens.push(Token {
|
|
||||||
kind: TokenKind::Error(LexError::UnknownChar(ch)),
|
|
||||||
span: (pos, pos + len),
|
|
||||||
});
|
|
||||||
pos += len;
|
|
||||||
}
|
|
||||||
tokens
|
|
||||||
}
|
|
||||||
|
|
||||||
fn lex_identifier(input: &str, start: usize) -> (Token, usize) {
|
|
||||||
let bytes = input.as_bytes();
|
|
||||||
let mut end = start + 1; // first byte already validated by caller
|
|
||||||
while end < bytes.len() {
|
|
||||||
let b = bytes[end];
|
|
||||||
if b.is_ascii_alphanumeric() || b == b'_' {
|
|
||||||
end += 1;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let word = &input[start..end];
|
|
||||||
let kind = Keyword::from_word(word).map_or_else(
|
|
||||||
|| TokenKind::Identifier(word.to_string()),
|
|
||||||
TokenKind::Keyword,
|
|
||||||
);
|
|
||||||
(
|
|
||||||
Token {
|
|
||||||
kind,
|
|
||||||
span: (start, end),
|
|
||||||
},
|
|
||||||
end,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn lex_number(input: &str, start: usize, leading_minus: bool) -> (Token, usize) {
|
|
||||||
let bytes = input.as_bytes();
|
|
||||||
let mut end = start;
|
|
||||||
if leading_minus {
|
|
||||||
end += 1; // consume the leading '-'
|
|
||||||
}
|
|
||||||
while end < bytes.len() && bytes[end].is_ascii_digit() {
|
|
||||||
end += 1;
|
|
||||||
}
|
|
||||||
// Optional fractional part: `.` followed by ≥1 digit. A
|
|
||||||
// trailing `.` with no digits behind it is left alone (it
|
|
||||||
// lexes as a separate Punct(Dot) — useful for `Customers.id`
|
|
||||||
// when an identifier is misread as a number, though that
|
|
||||||
// path is not currently reachable).
|
|
||||||
if end < bytes.len() && bytes[end] == b'.' {
|
|
||||||
let after_dot = end + 1;
|
|
||||||
if after_dot < bytes.len() && bytes[after_dot].is_ascii_digit() {
|
|
||||||
end = after_dot;
|
|
||||||
while end < bytes.len() && bytes[end].is_ascii_digit() {
|
|
||||||
end += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
(
|
|
||||||
Token {
|
|
||||||
kind: TokenKind::Number(input[start..end].to_string()),
|
|
||||||
span: (start, end),
|
|
||||||
},
|
|
||||||
end,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn lex_string(input: &str, start: usize) -> (Token, usize) {
|
|
||||||
let bytes = input.as_bytes();
|
|
||||||
debug_assert_eq!(bytes[start], b'\'');
|
|
||||||
let mut content = String::new();
|
|
||||||
let mut i = start + 1;
|
|
||||||
while i < bytes.len() {
|
|
||||||
if bytes[i] == b'\'' {
|
|
||||||
// `''` escape: append one literal `'` and continue.
|
|
||||||
if bytes.get(i + 1) == Some(&b'\'') {
|
|
||||||
content.push('\'');
|
|
||||||
i += 2;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Closing quote.
|
|
||||||
return (
|
|
||||||
Token {
|
|
||||||
kind: TokenKind::StringLiteral(content),
|
|
||||||
span: (start, i + 1),
|
|
||||||
},
|
|
||||||
i + 1,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
let ch = input[i..]
|
|
||||||
.chars()
|
|
||||||
.next()
|
|
||||||
.expect("i < bytes.len() ⇒ at least one char");
|
|
||||||
content.push(ch);
|
|
||||||
i += ch.len_utf8();
|
|
||||||
}
|
|
||||||
(
|
|
||||||
Token {
|
|
||||||
kind: TokenKind::Error(LexError::UnterminatedString),
|
|
||||||
span: (start, bytes.len()),
|
|
||||||
},
|
|
||||||
bytes.len(),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn lex_flag(input: &str, start: usize) -> (Token, usize) {
|
|
||||||
let bytes = input.as_bytes();
|
|
||||||
debug_assert!(bytes[start..].starts_with(b"--"));
|
|
||||||
let mut end = start + 2;
|
|
||||||
while end < bytes.len() {
|
|
||||||
let b = bytes[end];
|
|
||||||
if b.is_ascii_alphanumeric() || b == b'-' || b == b'_' {
|
|
||||||
end += 1;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if end == start + 2 {
|
|
||||||
return (
|
|
||||||
Token {
|
|
||||||
kind: TokenKind::Error(LexError::BadFlag),
|
|
||||||
span: (start, end),
|
|
||||||
},
|
|
||||||
end,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
(
|
|
||||||
Token {
|
|
||||||
kind: TokenKind::Flag(input[start + 2..end].to_string()),
|
|
||||||
span: (start, end),
|
|
||||||
},
|
|
||||||
end,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
use pretty_assertions::assert_eq;
|
|
||||||
|
|
||||||
fn kinds(input: &str) -> Vec<TokenKind> {
|
|
||||||
lex(input).into_iter().map(|t| t.kind).collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn empty_input_produces_no_tokens() {
|
|
||||||
assert_eq!(lex(""), Vec::<Token>::new());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn whitespace_only_produces_no_tokens() {
|
|
||||||
assert_eq!(lex(" "), Vec::<Token>::new());
|
|
||||||
assert_eq!(lex("\t\n \r"), Vec::<Token>::new());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn single_keyword_lexes_to_keyword_variant() {
|
|
||||||
assert_eq!(
|
|
||||||
kinds("create"),
|
|
||||||
vec![TokenKind::Keyword(Keyword::Create)],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn keyword_match_is_case_insensitive() {
|
|
||||||
assert_eq!(
|
|
||||||
kinds("CREATE"),
|
|
||||||
vec![TokenKind::Keyword(Keyword::Create)],
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
kinds("CrEaTe"),
|
|
||||||
vec![TokenKind::Keyword(Keyword::Create)],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn non_keyword_word_lexes_to_identifier_preserving_case() {
|
|
||||||
assert_eq!(
|
|
||||||
kinds("Customers"),
|
|
||||||
vec![TokenKind::Identifier("Customers".to_string())],
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
kinds("customer_v2"),
|
|
||||||
vec![TokenKind::Identifier("customer_v2".to_string())],
|
|
||||||
);
|
|
||||||
// Type names stay as identifiers (ADR-0020 §2).
|
|
||||||
assert_eq!(
|
|
||||||
kinds("text"),
|
|
||||||
vec![TokenKind::Identifier("text".to_string())],
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
kinds("varchar"),
|
|
||||||
vec![TokenKind::Identifier("varchar".to_string())],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn identifier_starts_with_letter_or_underscore_only() {
|
|
||||||
// A bare digit lexes as a number, not the start of an
|
|
||||||
// identifier. The parser then rejects it where an
|
|
||||||
// identifier was expected — this behaviour matches the
|
|
||||||
// pre-lexer parser.
|
|
||||||
assert_eq!(
|
|
||||||
kinds("1Customers"),
|
|
||||||
vec![
|
|
||||||
TokenKind::Number("1".to_string()),
|
|
||||||
TokenKind::Identifier("Customers".to_string()),
|
|
||||||
],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn positive_integer_lexes_as_number() {
|
|
||||||
assert_eq!(kinds("42"), vec![TokenKind::Number("42".to_string())]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn negative_integer_lexes_with_sign_attached() {
|
|
||||||
assert_eq!(kinds("-5"), vec![TokenKind::Number("-5".to_string())]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn fractional_number_lexes_as_one_token() {
|
|
||||||
assert_eq!(
|
|
||||||
kinds("3.14"),
|
|
||||||
vec![TokenKind::Number("3.14".to_string())],
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
kinds("-3.14"),
|
|
||||||
vec![TokenKind::Number("-3.14".to_string())],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn trailing_dot_without_digits_does_not_attach() {
|
|
||||||
// `1.` lexes as Number("1") then Punct(Dot). The parser
|
|
||||||
// can decide what (if anything) that combination means.
|
|
||||||
assert_eq!(
|
|
||||||
kinds("1."),
|
|
||||||
vec![
|
|
||||||
TokenKind::Number("1".to_string()),
|
|
||||||
TokenKind::Punct(Punct::Dot),
|
|
||||||
],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn dot_inside_qualified_name_lexes_as_punct() {
|
|
||||||
// `Customers.id` is identifier, dot, identifier — the
|
|
||||||
// parser composes these for `<Table>.<Col>` references.
|
|
||||||
assert_eq!(
|
|
||||||
kinds("Customers.id"),
|
|
||||||
vec![
|
|
||||||
TokenKind::Identifier("Customers".to_string()),
|
|
||||||
TokenKind::Punct(Punct::Dot),
|
|
||||||
TokenKind::Identifier("id".to_string()),
|
|
||||||
],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn bare_minus_lexes_as_unknown_char() {
|
|
||||||
assert_eq!(
|
|
||||||
kinds("-"),
|
|
||||||
vec![TokenKind::Error(LexError::UnknownChar('-'))],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn string_literal_lexes_with_escape_processed() {
|
|
||||||
assert_eq!(
|
|
||||||
kinds("'hello'"),
|
|
||||||
vec![TokenKind::StringLiteral("hello".to_string())],
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
kinds("'don''t'"),
|
|
||||||
vec![TokenKind::StringLiteral("don't".to_string())],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn empty_string_literal_lexes_to_empty_payload() {
|
|
||||||
assert_eq!(
|
|
||||||
kinds("''"),
|
|
||||||
vec![TokenKind::StringLiteral(String::new())],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn string_literal_preserves_internal_whitespace() {
|
|
||||||
assert_eq!(
|
|
||||||
kinds("'a b\tc'"),
|
|
||||||
vec![TokenKind::StringLiteral("a b\tc".to_string())],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn unterminated_string_emits_error_token() {
|
|
||||||
assert_eq!(
|
|
||||||
kinds("'oops"),
|
|
||||||
vec![TokenKind::Error(LexError::UnterminatedString)],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn string_literal_with_multi_byte_unicode_is_safe() {
|
|
||||||
let toks = lex("'café'");
|
|
||||||
assert_eq!(toks.len(), 1);
|
|
||||||
assert_eq!(
|
|
||||||
toks[0].kind,
|
|
||||||
TokenKind::StringLiteral("café".to_string()),
|
|
||||||
);
|
|
||||||
// Span covers all bytes including the multi-byte é.
|
|
||||||
assert_eq!(toks[0].span, (0, "'café'".len()));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn each_punct_lexes_to_its_variant() {
|
|
||||||
for &(p, c, _) in Punct::ALL {
|
|
||||||
assert_eq!(
|
|
||||||
kinds(&c.to_string()),
|
|
||||||
vec![TokenKind::Punct(p)],
|
|
||||||
"lexing `{c}`",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn flag_lexes_with_payload_minus_dashes() {
|
|
||||||
assert_eq!(
|
|
||||||
kinds("--all-rows"),
|
|
||||||
vec![TokenKind::Flag("all-rows".to_string())],
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
kinds("--create-fk"),
|
|
||||||
vec![TokenKind::Flag("create-fk".to_string())],
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
kinds("--force-conversion"),
|
|
||||||
vec![TokenKind::Flag("force-conversion".to_string())],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn bare_double_dash_emits_bad_flag_error() {
|
|
||||||
assert_eq!(kinds("--"), vec![TokenKind::Error(LexError::BadFlag)]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn unknown_character_emits_error_token() {
|
|
||||||
assert_eq!(
|
|
||||||
kinds("$"),
|
|
||||||
vec![TokenKind::Error(LexError::UnknownChar('$'))],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn unknown_character_with_multi_byte_does_not_panic() {
|
|
||||||
// Unicode emoji as an unknown char — span must respect
|
|
||||||
// UTF-8 width.
|
|
||||||
let toks = lex("✓");
|
|
||||||
assert_eq!(toks.len(), 1);
|
|
||||||
assert!(matches!(
|
|
||||||
toks[0].kind,
|
|
||||||
TokenKind::Error(LexError::UnknownChar('✓'))
|
|
||||||
));
|
|
||||||
assert_eq!(toks[0].span, (0, "✓".len()));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn whitespace_separates_otherwise_adjacent_tokens() {
|
|
||||||
assert_eq!(
|
|
||||||
kinds("create table"),
|
|
||||||
vec![
|
|
||||||
TokenKind::Keyword(Keyword::Create),
|
|
||||||
TokenKind::Keyword(Keyword::Table),
|
|
||||||
],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn create_table_full_command_lexes_to_expected_sequence() {
|
|
||||||
assert_eq!(
|
|
||||||
kinds("create table Customers with pk id:int"),
|
|
||||||
vec![
|
|
||||||
TokenKind::Keyword(Keyword::Create),
|
|
||||||
TokenKind::Keyword(Keyword::Table),
|
|
||||||
TokenKind::Identifier("Customers".to_string()),
|
|
||||||
TokenKind::Keyword(Keyword::With),
|
|
||||||
TokenKind::Keyword(Keyword::Pk),
|
|
||||||
TokenKind::Identifier("id".to_string()),
|
|
||||||
TokenKind::Punct(Punct::Colon),
|
|
||||||
TokenKind::Identifier("int".to_string()),
|
|
||||||
],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn one_to_n_cardinality_lexes_as_number_colon_identifier() {
|
|
||||||
assert_eq!(
|
|
||||||
kinds("1:n"),
|
|
||||||
vec![
|
|
||||||
TokenKind::Number("1".to_string()),
|
|
||||||
TokenKind::Punct(Punct::Colon),
|
|
||||||
TokenKind::Identifier("n".to_string()),
|
|
||||||
],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn insert_with_value_list_lexes_correctly() {
|
|
||||||
assert_eq!(
|
|
||||||
kinds("insert into T values (1, 'hi', null)"),
|
|
||||||
vec![
|
|
||||||
TokenKind::Keyword(Keyword::Insert),
|
|
||||||
TokenKind::Keyword(Keyword::Into),
|
|
||||||
TokenKind::Identifier("T".to_string()),
|
|
||||||
TokenKind::Keyword(Keyword::Values),
|
|
||||||
TokenKind::Punct(Punct::OpenParen),
|
|
||||||
TokenKind::Number("1".to_string()),
|
|
||||||
TokenKind::Punct(Punct::Comma),
|
|
||||||
TokenKind::StringLiteral("hi".to_string()),
|
|
||||||
TokenKind::Punct(Punct::Comma),
|
|
||||||
TokenKind::Keyword(Keyword::Null),
|
|
||||||
TokenKind::Punct(Punct::CloseParen),
|
|
||||||
],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn spans_are_byte_exact_for_simple_input() {
|
|
||||||
let toks = lex("create table");
|
|
||||||
assert_eq!(toks.len(), 2);
|
|
||||||
assert_eq!(toks[0].span, (0, "create".len()));
|
|
||||||
assert_eq!(toks[1].span, ("create ".len(), "create table".len()));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn trailing_whitespace_is_stripped() {
|
|
||||||
assert_eq!(
|
|
||||||
kinds("create "),
|
|
||||||
vec![TokenKind::Keyword(Keyword::Create)],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn error_tokens_appear_in_stream_alongside_valid_tokens() {
|
|
||||||
// The lexer keeps producing tokens after an error; the
|
|
||||||
// parser will reject the Error token at whatever point
|
|
||||||
// it tries to consume it.
|
|
||||||
assert_eq!(
|
|
||||||
kinds("create $ table"),
|
|
||||||
vec![
|
|
||||||
TokenKind::Keyword(Keyword::Create),
|
|
||||||
TokenKind::Error(LexError::UnknownChar('$')),
|
|
||||||
TokenKind::Keyword(Keyword::Table),
|
|
||||||
],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -12,9 +12,6 @@
|
|||||||
pub mod action;
|
pub mod action;
|
||||||
pub mod command;
|
pub mod command;
|
||||||
pub mod grammar;
|
pub mod grammar;
|
||||||
pub mod ident_slot;
|
|
||||||
pub mod keyword;
|
|
||||||
pub mod lexer;
|
|
||||||
pub mod parser;
|
pub mod parser;
|
||||||
pub mod shortid;
|
pub mod shortid;
|
||||||
pub mod types;
|
pub mod types;
|
||||||
|
|||||||
+10
-33
@@ -459,42 +459,19 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::KEYS_AND_PLACEHOLDERS;
|
use super::KEYS_AND_PLACEHOLDERS;
|
||||||
use crate::dsl::keyword::{Keyword, Punct};
|
|
||||||
use crate::friendly::format::catalog;
|
use crate::friendly::format::catalog;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
/// Every `Keyword` variant must have a
|
// The pre-Phase-F `keyword_and_punct_have_complete_token_vocabulary`
|
||||||
/// `parse.token.keyword.<name>` entry; every `Punct`
|
// test cross-checked the `Keyword` / `Punct` enums against
|
||||||
/// variant must have a `parse.token.punct.<name>` entry.
|
// `parse.token.keyword.*` / `parse.token.punct.*` catalog
|
||||||
/// Catches the case where a keyword or punct is added to
|
// keys. With those enums deleted (ADR-0024 §migration Phase F)
|
||||||
/// the macro but not to the catalog (ADR-0021 §7).
|
// and the walker rendering keyword wording via
|
||||||
#[test]
|
// `format!("`{word}`")`, the catalog entries survive only as
|
||||||
fn keyword_and_punct_have_complete_token_vocabulary() {
|
// historic vocabulary; the `keys_validate_against_catalog`
|
||||||
let declared: HashSet<&str> =
|
// test below still asserts every key in `KEYS_AND_PLACEHOLDERS`
|
||||||
KEYS_AND_PLACEHOLDERS.iter().map(|(k, _)| *k).collect();
|
// resolves and vice versa, which keeps the catalog itself
|
||||||
let mut missing: Vec<String> = Vec::new();
|
// honest. The dead entries collapse in ADR-0024 §cleanup-pass.
|
||||||
for &(kw, _) in Keyword::ALL {
|
|
||||||
let key = kw.catalog_token_key();
|
|
||||||
if !declared.contains(key.as_str()) {
|
|
||||||
missing.push(format!(
|
|
||||||
"Keyword::{kw:?} ⇒ catalog key `{key}` not declared in keys.rs"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for &(p, _, _) in Punct::ALL {
|
|
||||||
let key = p.catalog_token_key();
|
|
||||||
if !declared.contains(key.as_str()) {
|
|
||||||
missing.push(format!(
|
|
||||||
"Punct::{p:?} ⇒ catalog key `{key}` not declared in keys.rs"
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert!(
|
|
||||||
missing.is_empty(),
|
|
||||||
"token vocabulary incomplete:\n {}",
|
|
||||||
missing.join("\n "),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Walks `KEYS_AND_PLACEHOLDERS` and verifies every entry
|
/// Walks `KEYS_AND_PLACEHOLDERS` and verifies every entry
|
||||||
/// matches the catalog. ADR-0019 §8.6.
|
/// matches the catalog. ADR-0019 §8.6.
|
||||||
|
|||||||
+10
-9
@@ -210,15 +210,16 @@ pub fn ambient_hint(
|
|||||||
// the typed prefix matches nothing in the schema. (Stage
|
// the typed prefix matches nothing in the schema. (Stage
|
||||||
// 8e / the user's #5.)
|
// 8e / the user's #5.)
|
||||||
if let Some(inv) = crate::completion::invalid_ident_at_cursor(input, cursor, cache) {
|
if let Some(inv) = crate::completion::invalid_ident_at_cursor(input, cursor, cache) {
|
||||||
let kind = match inv.slot {
|
let kind = match inv.source {
|
||||||
crate::dsl::ident_slot::IdentSlot::TableName => "table",
|
crate::dsl::grammar::IdentSource::Tables => "table",
|
||||||
crate::dsl::ident_slot::IdentSlot::Column => "column",
|
crate::dsl::grammar::IdentSource::Columns => "column",
|
||||||
crate::dsl::ident_slot::IdentSlot::RelationshipName => "relationship",
|
crate::dsl::grammar::IdentSource::Relationships => "relationship",
|
||||||
// `NewName` is filtered out by `invalid_ident_at_cursor`
|
// `NewName`, `Types`, `Free` are filtered out by
|
||||||
// (it only fires for known-set slots), so this arm
|
// `invalid_ident_at_cursor` (it only fires for
|
||||||
// is unreachable in practice; render a neutral
|
// known-set sources via `completes_from_schema`), so
|
||||||
// fallback rather than panic.
|
// these arms are unreachable in practice — render a
|
||||||
crate::dsl::ident_slot::IdentSlot::NewName => "identifier",
|
// neutral fallback rather than panic.
|
||||||
|
_ => "identifier",
|
||||||
};
|
};
|
||||||
return Some(AmbientHint::Prose(crate::t!(
|
return Some(AmbientHint::Prose(crate::t!(
|
||||||
"hint.ambient_invalid_ident",
|
"hint.ambient_invalid_ident",
|
||||||
|
|||||||
+4
-7
@@ -839,18 +839,15 @@ async fn refresh_schema_cache(
|
|||||||
event_tx: &mpsc::Sender<AppEvent>,
|
event_tx: &mpsc::Sender<AppEvent>,
|
||||||
) {
|
) {
|
||||||
use crate::completion::SchemaCache;
|
use crate::completion::SchemaCache;
|
||||||
use crate::dsl::ident_slot::IdentSlot;
|
use crate::dsl::grammar::IdentSource;
|
||||||
let mut cache = SchemaCache::default();
|
let mut cache = SchemaCache::default();
|
||||||
if let Ok(tables) = database.list_names_for(IdentSlot::TableName).await {
|
if let Ok(tables) = database.list_names_for(IdentSource::Tables).await {
|
||||||
cache.tables = tables;
|
cache.tables = tables;
|
||||||
}
|
}
|
||||||
if let Ok(columns) = database.list_names_for(IdentSlot::Column).await {
|
if let Ok(columns) = database.list_names_for(IdentSource::Columns).await {
|
||||||
cache.columns = columns;
|
cache.columns = columns;
|
||||||
}
|
}
|
||||||
if let Ok(rels) = database
|
if let Ok(rels) = database.list_names_for(IdentSource::Relationships).await {
|
||||||
.list_names_for(IdentSlot::RelationshipName)
|
|
||||||
.await
|
|
||||||
{
|
|
||||||
cache.relationships = rels;
|
cache.relationships = rels;
|
||||||
}
|
}
|
||||||
let _ = event_tx.send(AppEvent::SchemaCacheRefreshed(cache)).await;
|
let _ = event_tx.send(AppEvent::SchemaCacheRefreshed(cache)).await;
|
||||||
|
|||||||
+8
-64
@@ -19,7 +19,6 @@
|
|||||||
use ratatui::style::Color;
|
use ratatui::style::Color;
|
||||||
|
|
||||||
use crate::dsl::grammar::HighlightClass;
|
use crate::dsl::grammar::HighlightClass;
|
||||||
use crate::dsl::lexer::TokenKind;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
pub enum Background {
|
pub enum Background {
|
||||||
@@ -106,23 +105,6 @@ impl Theme {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Map a `TokenKind` to its display colour for ambient
|
|
||||||
/// highlighting (ADR-0022 §3). Lex-error tokens always render
|
|
||||||
/// in `tok_error`, regardless of the parse-time error overlay
|
|
||||||
/// applied separately by the renderer.
|
|
||||||
#[must_use]
|
|
||||||
pub const fn token_color(&self, kind: &TokenKind) -> Color {
|
|
||||||
match kind {
|
|
||||||
TokenKind::Keyword(_) => self.tok_keyword,
|
|
||||||
TokenKind::Identifier(_) => self.tok_identifier,
|
|
||||||
TokenKind::Number(_) => self.tok_number,
|
|
||||||
TokenKind::StringLiteral(_) => self.tok_string,
|
|
||||||
TokenKind::Punct(_) => self.tok_punct,
|
|
||||||
TokenKind::Flag(_) => self.tok_flag,
|
|
||||||
TokenKind::Error(_) => self.tok_error,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Map a walker `HighlightClass` to its display colour
|
/// Map a walker `HighlightClass` to its display colour
|
||||||
/// (ADR-0024 §architecture, Phase F). This is the walker-side
|
/// (ADR-0024 §architecture, Phase F). This is the walker-side
|
||||||
/// equivalent of `token_color` — the renderer consumes
|
/// equivalent of `token_color` — the renderer consumes
|
||||||
@@ -152,8 +134,6 @@ impl Default for Theme {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::dsl::keyword::{Keyword, Punct};
|
|
||||||
use crate::dsl::lexer::LexError;
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn dark_theme_token_colours_differ_from_background() {
|
fn dark_theme_token_colours_differ_from_background() {
|
||||||
@@ -190,50 +170,14 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn token_color_maps_each_kind_to_the_expected_field() {
|
fn highlight_class_color_maps_each_variant() {
|
||||||
let t = Theme::dark();
|
let t = Theme::dark();
|
||||||
assert_eq!(
|
assert_eq!(t.highlight_class_color(HighlightClass::Keyword), t.tok_keyword);
|
||||||
t.token_color(&TokenKind::Keyword(Keyword::Create)),
|
assert_eq!(t.highlight_class_color(HighlightClass::Identifier), t.tok_identifier);
|
||||||
t.tok_keyword,
|
assert_eq!(t.highlight_class_color(HighlightClass::Number), t.tok_number);
|
||||||
);
|
assert_eq!(t.highlight_class_color(HighlightClass::String), t.tok_string);
|
||||||
assert_eq!(
|
assert_eq!(t.highlight_class_color(HighlightClass::Punct), t.tok_punct);
|
||||||
t.token_color(&TokenKind::Identifier("Customers".to_string())),
|
assert_eq!(t.highlight_class_color(HighlightClass::Flag), t.tok_flag);
|
||||||
t.tok_identifier,
|
assert_eq!(t.highlight_class_color(HighlightClass::Error), t.tok_error);
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
t.token_color(&TokenKind::Number("42".to_string())),
|
|
||||||
t.tok_number,
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
t.token_color(&TokenKind::StringLiteral("hi".to_string())),
|
|
||||||
t.tok_string,
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
t.token_color(&TokenKind::Punct(Punct::Colon)),
|
|
||||||
t.tok_punct,
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
t.token_color(&TokenKind::Flag("all-rows".to_string())),
|
|
||||||
t.tok_flag,
|
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
t.token_color(&TokenKind::Error(LexError::UnknownChar('$'))),
|
|
||||||
t.tok_error,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn lex_error_tokens_render_in_tok_error_regardless_of_kind() {
|
|
||||||
let t = Theme::dark();
|
|
||||||
for err in [
|
|
||||||
LexError::UnknownChar('$'),
|
|
||||||
LexError::UnterminatedString,
|
|
||||||
LexError::BadFlag,
|
|
||||||
] {
|
|
||||||
assert_eq!(
|
|
||||||
t.token_color(&TokenKind::Error(err)),
|
|
||||||
t.tok_error,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user