diff --git a/src/app.rs b/src/app.rs index 3a11d48..a8b6005 100644 --- a/src/app.rs +++ b/src/app.rs @@ -996,7 +996,11 @@ impl App { } fn dispatch_dsl(&mut self, input: &str, submission_mode: Mode) -> Vec { - match parse_command(input) { + // ADR-0024 §Phase D: parse with the live schema so typed + // value slots (insert-into-T-values-…) dispatch on the + // column's actual user-facing type instead of accepting + // any literal at bind time. + match crate::dsl::parser::parse_command_with_schema(input, &self.schema_cache) { Ok(Command::Replay { path }) => { // `replay` is parsed as a DSL command for the // sake of grammar uniformity, but its execution diff --git a/src/completion.rs b/src/completion.rs index f960ee0..da6b842 100644 --- a/src/completion.rs +++ b/src/completion.rs @@ -29,16 +29,30 @@ use crate::dsl::{ParseError, parse_command}; /// `add 1:n relationship`) — adding more is a one-line edit. const COMPOSITE_CANDIDATES: &[(&str, &str)] = &[("1", "1:n")]; -/// Per-project schema lookup cache (ADR-0022 §9). +/// Per-project schema lookup cache (ADR-0022 §9, ADR-0024 §Phase D). /// /// Held by `App::schema_cache` and consulted by the completion -/// engine for identifier slots. Empty by default; the runtime -/// refreshes on project load and after successful DDL. +/// engine for identifier slots and by the walker for schema-aware +/// value-slot dispatch (Phase D full). Empty by default; the +/// runtime refreshes on project load and after successful DDL. #[derive(Debug, Clone, Default)] pub struct SchemaCache { pub tables: Vec, pub columns: Vec, pub relationships: Vec, + /// Per-table column metadata with user-facing types + /// (ADR-0024 §Phase D). Keyed by table name; lookup is + /// case-insensitive in `columns_for_table` so the walker + /// can resolve `Customers` regardless of how it was typed. + pub table_columns: std::collections::HashMap>, +} + +/// One column's user-facing type info, scoped to a table +/// (ADR-0024 §Phase D, §WalkContext). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TableColumn { + pub name: String, + pub user_type: crate::dsl::types::Type, } impl SchemaCache { @@ -54,6 +68,24 @@ impl SchemaCache { IdentSource::NewName | IdentSource::Types | IdentSource::Free => &[], } } + + /// Per-table column metadata lookup. Case-insensitive on + /// the table name so the walker can resolve identifiers + /// the user typed in either case (ADR-0009 — keywords are + /// case-insensitive, identifiers preserve case; this helper + /// matches the walker's case-insensitive entry-word lookup + /// rather than the strict-case `tables` Vec). + /// + /// Returns `None` when no table matches; an empty `Vec` + /// when the table exists but has no columns (rare — + /// CSV-empty tables still carry PK columns in metadata). + #[must_use] + pub fn columns_for_table(&self, table: &str) -> Option<&[TableColumn]> { + self.table_columns + .iter() + .find(|(name, _)| name.eq_ignore_ascii_case(table)) + .map(|(_, cols)| cols.as_slice()) + } } /// What the grammar would accept at the end of `leading`, @@ -1051,6 +1083,7 @@ mod tests { tables: vec!["Customers".to_string(), "Orders".to_string()], columns: vec![], relationships: vec![], + ..SchemaCache::default() }; // After `show data ` the parser expects a table name. let cs = cands_with("show data ", 10, &cache); @@ -1063,6 +1096,7 @@ mod tests { tables: vec!["Customers".to_string()], columns: vec!["Email".to_string(), "Name".to_string()], relationships: vec![], + ..SchemaCache::default() }; // After `drop column from Customers: ` the parser // expects a column name (existing). @@ -1076,6 +1110,7 @@ mod tests { tables: vec![], columns: vec![], relationships: vec!["cust_orders".to_string(), "ord_items".to_string()], + ..SchemaCache::default() }; // After `drop relationship ` the parser expects either // an identifier (relationship name) or `from`. Schema @@ -1092,6 +1127,7 @@ mod tests { tables: vec!["Customers".to_string(), "Orders".to_string()], columns: vec![], relationships: vec![], + ..SchemaCache::default() }; // Typed `Cu` after `show data ` — only `Customers` // matches. @@ -1224,6 +1260,7 @@ mod tests { tables: vec!["Existing".to_string()], columns: vec!["AlsoExisting".to_string()], relationships: vec![], + ..SchemaCache::default() }; let cs = cands_with("create table ", 13, &cache); assert!(cs.is_empty(), "got {cs:?}"); diff --git a/src/dsl/grammar/app.rs b/src/dsl/grammar/app.rs index 2b971e7..38f8f6d 100644 --- a/src/dsl/grammar/app.rs +++ b/src/dsl/grammar/app.rs @@ -50,6 +50,8 @@ const IMPORT_AS_TARGET: Node = Node::Seq(&[ role: "target", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }, ]); const IMPORT_AS_TARGET_OPT: Node = Node::Optional(&IMPORT_AS_TARGET); @@ -72,6 +74,8 @@ const MODE_CHOICES: &[Node] = &[ role: "mode_value", validator: Some(UNKNOWN_MODE_VALIDATOR), highlight_override: None, + writes_table: false, + writes_column: false, }, ]; const MODE_VALUE: Node = Node::Choice(MODE_CHOICES); @@ -84,6 +88,8 @@ const MESSAGES_CHOICES: &[Node] = &[ role: "messages_value", validator: Some(UNKNOWN_MESSAGES_VALIDATOR), highlight_override: None, + writes_table: false, + writes_column: false, }, ]; const MESSAGES_VALUE: Node = Node::Choice(MESSAGES_CHOICES); diff --git a/src/dsl/grammar/data.rs b/src/dsl/grammar/data.rs index 4dee04b..242f3d2 100644 --- a/src/dsl/grammar/data.rs +++ b/src/dsl/grammar/data.rs @@ -19,6 +19,7 @@ use crate::dsl::command::{Command, RowFilter}; use crate::dsl::grammar::{ CommandNode, IdentSource, Node, ValidationError, Word, + shared::{column_value_list, current_column_value}, }; use crate::dsl::value::Value; use crate::dsl::walker::outcome::{MatchedItem, MatchedKind, MatchedPath}; @@ -32,6 +33,21 @@ const TABLE_NAME_EXISTING: Node = Node::Ident { role: "table_name", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, +}; + +/// Table-name slot variant that populates +/// `WalkContext::current_table_columns` (ADR-0024 §Phase D). +/// Used by `insert into …` so the inner value list can +/// dispatch typed slots per column. +const TABLE_NAME_INSERT: Node = Node::Ident { + source: IdentSource::Tables, + role: "table_name", + validator: None, + highlight_override: None, + writes_table: true, + writes_column: false, }; // `value_literal` — null / true / false / number / string. The @@ -90,6 +106,8 @@ const INSERT_PAREN_ITEM_CHOICES: &[Node] = &[ role: "insert_first_item", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }, ]; const INSERT_PAREN_ITEM: Node = Node::Choice(INSERT_PAREN_ITEM_CHOICES); @@ -99,11 +117,12 @@ const INSERT_PAREN_LIST: Node = Node::Repeated { min: 1, }; -const INSERT_VALUES_LIST: Node = Node::Repeated { - inner: &VALUE_LITERAL, - separator: Some(&Node::Punct(',')), - min: 1, -}; +/// Schema-aware value list: when the walker has a populated +/// `current_table_columns`, unfolds to a `Seq` of typed slots +/// per column (`int_slot`, `text_slot`, …). When schemaless, +/// falls back to the pre-Phase-D `Repeated(VALUE_LITERAL, ',', 1)` +/// shape (ADR-0024 §Phase D §column_value_list). +const INSERT_VALUES_LIST: Node = Node::DynamicSubgrammar(column_value_list); const INSERT_OPTIONAL_VALUES_NODES: &[Node] = &[ Node::Word(Word::keyword("values")), @@ -135,7 +154,7 @@ const INSERT_AFTER_TABLE: Node = Node::Choice(INSERT_AFTER_TABLE_CHOICES); const INSERT_NODES: &[Node] = &[ Node::Word(Word::keyword("into")), - TABLE_NAME_EXISTING, + TABLE_NAME_INSERT, INSERT_AFTER_TABLE, ]; const INSERT_SHAPE: Node = Node::Seq(INSERT_NODES); @@ -144,15 +163,50 @@ const INSERT_SHAPE: Node = Node::Seq(INSERT_NODES); // update — `update set =[, =] (where … | --all-rows)` // ================================================================= +/// Table-name slot that populates `current_table_columns` so +/// the inner `set =` / `where =` slots +/// can resolve column types (Phase D). +const TABLE_NAME_WRITES: Node = Node::Ident { + source: IdentSource::Tables, + role: "table_name", + validator: None, + highlight_override: None, + writes_table: true, + writes_column: false, +}; + +/// Column-name slot in `set col = …` — resolves the column's +/// type into `current_column` so the value slot dispatches per +/// column type (Phase D). +const SET_COLUMN: Node = Node::Ident { + source: IdentSource::Columns, + role: "update_set_column", + validator: None, + highlight_override: None, + writes_table: false, + writes_column: true, +}; + +/// Column-name slot in `where col = …` — same writes-column +/// semantics as SET_COLUMN, distinct role for the AST builder. +const FILTER_COLUMN: Node = Node::Ident { + source: IdentSource::Columns, + role: "filter_column", + validator: None, + highlight_override: None, + writes_table: false, + writes_column: true, +}; + +/// Value slot resolved at walk time from +/// `WalkContext::current_column`. Falls back to the schemaless +/// value-literal choice when no current_column is bound. +const PER_COLUMN_VALUE: Node = Node::DynamicSubgrammar(current_column_value); + const UPDATE_ASSIGNMENT_NODES: &[Node] = &[ - Node::Ident { - source: IdentSource::Columns, - role: "update_set_column", - validator: None, - highlight_override: None, - }, + SET_COLUMN, Node::Punct('='), - VALUE_LITERAL, + PER_COLUMN_VALUE, ]; const UPDATE_ASSIGNMENT: Node = Node::Seq(UPDATE_ASSIGNMENT_NODES); const UPDATE_ASSIGNMENTS: Node = Node::Repeated { @@ -163,14 +217,9 @@ const UPDATE_ASSIGNMENTS: Node = Node::Repeated { const WHERE_CLAUSE_NODES: &[Node] = &[ Node::Word(Word::keyword("where")), - Node::Ident { - source: IdentSource::Columns, - role: "filter_column", - validator: None, - highlight_override: None, - }, + FILTER_COLUMN, Node::Punct('='), - VALUE_LITERAL, + PER_COLUMN_VALUE, ]; const WHERE_CLAUSE: Node = Node::Seq(WHERE_CLAUSE_NODES); @@ -178,7 +227,7 @@ const FILTER_CHOICES: &[Node] = &[WHERE_CLAUSE, Node::Flag("all-rows")]; const FILTER_CLAUSE: Node = Node::Choice(FILTER_CHOICES); const UPDATE_NODES: &[Node] = &[ - TABLE_NAME_EXISTING, + TABLE_NAME_WRITES, Node::Word(Word::keyword("set")), UPDATE_ASSIGNMENTS, FILTER_CLAUSE, @@ -191,7 +240,7 @@ const UPDATE_SHAPE: Node = Node::Seq(UPDATE_NODES); const DELETE_NODES: &[Node] = &[ Node::Word(Word::keyword("from")), - TABLE_NAME_EXISTING, + TABLE_NAME_WRITES, FILTER_CLAUSE, ]; const DELETE_SHAPE: Node = Node::Seq(DELETE_NODES); diff --git a/src/dsl/grammar/ddl.rs b/src/dsl/grammar/ddl.rs index d1f56df..1ef950d 100644 --- a/src/dsl/grammar/ddl.rs +++ b/src/dsl/grammar/ddl.rs @@ -29,6 +29,8 @@ const TABLE_NAME_NEW: Node = Node::Ident { role: "table_name", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }; const TABLE_NAME_EXISTING: Node = Node::Ident { @@ -36,6 +38,8 @@ const TABLE_NAME_EXISTING: Node = Node::Ident { role: "table_name", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }; const COLUMN_NAME: Node = Node::Ident { @@ -43,6 +47,8 @@ const COLUMN_NAME: Node = Node::Ident { role: "column_name", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }; const COLUMN_NAME_NEW: Node = Node::Ident { @@ -50,6 +56,8 @@ const COLUMN_NAME_NEW: Node = Node::Ident { role: "column_name", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }; const RELATIONSHIP_NAME: Node = Node::Ident { @@ -57,6 +65,8 @@ const RELATIONSHIP_NAME: Node = Node::Ident { role: "relationship_name", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }; const RELATIONSHIP_NAME_NEW: Node = Node::Ident { @@ -64,6 +74,8 @@ const RELATIONSHIP_NAME_NEW: Node = Node::Ident { role: "relationship_name", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }; // `[to]` and `[table]` connectives. @@ -106,6 +118,8 @@ const DR_PARENT_NODES: &[Node] = &[ role: "parent_table", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }, Node::Punct('.'), Node::Ident { @@ -113,6 +127,8 @@ const DR_PARENT_NODES: &[Node] = &[ role: "parent_column", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }, ]; const DR_PARENT: Node = Node::Seq(DR_PARENT_NODES); @@ -123,6 +139,8 @@ const DR_CHILD_NODES: &[Node] = &[ role: "child_table", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }, Node::Punct('.'), Node::Ident { @@ -130,6 +148,8 @@ const DR_CHILD_NODES: &[Node] = &[ role: "child_column", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }, ]; const DR_CHILD: Node = Node::Seq(DR_CHILD_NODES); @@ -188,6 +208,8 @@ const AR_PARENT_NODES: &[Node] = &[ role: "parent_table", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }, Node::Punct('.'), Node::Ident { @@ -195,6 +217,8 @@ const AR_PARENT_NODES: &[Node] = &[ role: "parent_column", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }, ]; const AR_PARENT: Node = Node::Seq(AR_PARENT_NODES); @@ -205,6 +229,8 @@ const AR_CHILD_NODES: &[Node] = &[ role: "child_table", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }, Node::Punct('.'), Node::Ident { @@ -212,6 +238,8 @@ const AR_CHILD_NODES: &[Node] = &[ role: "child_column", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }, ]; const AR_CHILD: Node = Node::Seq(AR_CHILD_NODES); @@ -263,6 +291,8 @@ const RENAME_COLUMN_NODES: &[Node] = &[ role: "new_column_name", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }, ]; const RENAME_COLUMN: Node = Node::Seq(RENAME_COLUMN_NODES); @@ -595,6 +625,8 @@ const COL_SPEC_NODES: &[Node] = &[ role: "col_name", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }, Node::Punct(':'), Node::Ident { @@ -602,6 +634,8 @@ const COL_SPEC_NODES: &[Node] = &[ role: "col_type", validator: Some(TYPE_VALIDATOR), highlight_override: None, + writes_table: false, + writes_column: false, }, ]; const COL_SPEC: Node = Node::Seq(COL_SPEC_NODES); diff --git a/src/dsl/grammar/mod.rs b/src/dsl/grammar/mod.rs index fa6e8e5..8ed9650 100644 --- a/src/dsl/grammar/mod.rs +++ b/src/dsl/grammar/mod.rs @@ -215,12 +215,24 @@ pub enum Node { /// dispatch; `validator` runs after a successful identifier- /// shape match and may reject the value with a catalog-driven /// message. + /// + /// `writes_table` (Phase D): when `true` and `source == + /// Tables`, the walker writes the matched ident to + /// `WalkContext::current_table` and resolves + /// `current_table_columns` from the schema cache (if any). + /// `writes_column` (Phase D): when `true` and `source == + /// Columns`, the walker writes the matched ident's + /// `TableColumn` to `WalkContext::current_column` (resolved + /// against `current_table_columns`). Subsequent value slots + /// dispatch on the column's type. Ident { source: IdentSource, role: &'static str, validator: Option, #[allow(dead_code)] highlight_override: Option, + writes_table: bool, + writes_column: bool, }, /// A number literal. The optional `validator` runs against /// the matched text (used by Phase D value slots to enforce diff --git a/src/dsl/grammar/shared.rs b/src/dsl/grammar/shared.rs index 2de6a31..7ab1350 100644 --- a/src/dsl/grammar/shared.rs +++ b/src/dsl/grammar/shared.rs @@ -5,8 +5,11 @@ //! actions; Phase D extends with `where_clause`, //! `column_value_list`, and the typed value slots. -use crate::dsl::grammar::{IdentSource, IdentValidator, Node, ValidationError, Word}; +use crate::dsl::grammar::{ + IdentSource, IdentValidator, Node, NumberValidator, ValidationError, Word, +}; use crate::dsl::types::Type; +use crate::dsl::walker::context::WalkContext; use std::str::FromStr; // --- Type-name validator ------------------------------------------ @@ -46,6 +49,8 @@ pub const TYPE_SLOT: Node = Node::Ident { role: "type", validator: Some(TYPE_VALIDATOR), highlight_override: None, + writes_table: false, + writes_column: false, }; // --- Qualified column reference (`.`) -------------- @@ -56,6 +61,8 @@ const QUALIFIED_COLUMN_NODES: &[Node] = &[ role: "table_name", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }, Node::Punct('.'), Node::Ident { @@ -63,6 +70,8 @@ const QUALIFIED_COLUMN_NODES: &[Node] = &[ role: "column_name", validator: None, highlight_override: None, + writes_table: false, + writes_column: false, }, ]; pub const QUALIFIED_COLUMN: Node = Node::Seq(QUALIFIED_COLUMN_NODES); @@ -118,3 +127,179 @@ pub const REFERENTIAL_CLAUSES: Node = Node::Repeated { separator: None, min: 0, }; + +// ================================================================= +// Typed value slots (ADR-0024 §Phase D, §typed-value-slots) +// ================================================================= +// +// Each `_slot()` factory returns a `Node` that accepts either +// `null` or a literal of the corresponding shape, with an +// optional content validator that rejects mis-typed values at +// parse time with localised catalog wording. Per-type prose +// hints attach via `Choice` HintMode — but Phase D's first +// landing keeps `Default` everywhere; the dispatch-by-column-type +// covers the central design claim, and per-type prose can layer +// on later without grammar surface changes. + +fn validate_integer_only(value: &str) -> Result<(), ValidationError> { + // The lexer-side number consumer accepts integers and + // fractional forms (e.g. `3.14`). For int / serial / shortid + // columns reject any literal that carries a decimal point. + if value.contains('.') { + Err(ValidationError { + message_key: "parse.custom.bind_type_mismatch", + args: vec![ + ("found", value.to_string()), + ("expected", "integer".to_string()), + ], + }) + } else { + Ok(()) + } +} + +const INTEGER_ONLY_VALIDATOR: NumberValidator = validate_integer_only; + +fn validate_decimal_string(value: &str) -> Result<(), ValidationError> { + if value.parse::().is_ok() { + Ok(()) + } else { + Err(ValidationError { + message_key: "parse.custom.bind_type_mismatch", + args: vec![ + ("found", value.to_string()), + ("expected", "number".to_string()), + ], + }) + } +} + +const DECIMAL_VALIDATOR: NumberValidator = validate_decimal_string; + +// Bare `null` keyword — used as the trailing branch of every +// typed value slot so a column always accepts the absence sentinel. +const NULL_WORD: Node = Node::Word(Word::keyword("null")); + +const INT_SLOT_CHOICES: &[Node] = &[ + Node::NumberLit { + validator: Some(INTEGER_ONLY_VALIDATOR), + }, + NULL_WORD, +]; +const INT_SLOT: Node = Node::Choice(INT_SLOT_CHOICES); + +const REAL_SLOT_CHOICES: &[Node] = &[Node::NumberLit { validator: None }, NULL_WORD]; +const REAL_SLOT: Node = Node::Choice(REAL_SLOT_CHOICES); + +const DECIMAL_SLOT_CHOICES: &[Node] = &[ + Node::NumberLit { + validator: Some(DECIMAL_VALIDATOR), + }, + NULL_WORD, +]; +const DECIMAL_SLOT: Node = Node::Choice(DECIMAL_SLOT_CHOICES); + +const BOOL_SLOT_CHOICES: &[Node] = &[ + Node::Word(Word::keyword("true")), + Node::Word(Word::keyword("false")), + NULL_WORD, +]; +const BOOL_SLOT: Node = Node::Choice(BOOL_SLOT_CHOICES); + +const TEXT_SLOT_CHOICES: &[Node] = &[Node::StringLit, NULL_WORD]; +const TEXT_SLOT: Node = Node::Choice(TEXT_SLOT_CHOICES); + +const DATE_SLOT_CHOICES: &[Node] = &[Node::StringLit, NULL_WORD]; +const DATE_SLOT: Node = Node::Choice(DATE_SLOT_CHOICES); + +const DATETIME_SLOT_CHOICES: &[Node] = &[Node::StringLit, NULL_WORD]; +const DATETIME_SLOT: Node = Node::Choice(DATETIME_SLOT_CHOICES); + +const BLOB_SLOT_CHOICES: &[Node] = &[Node::StringLit, NULL_WORD]; +const BLOB_SLOT: Node = Node::Choice(BLOB_SLOT_CHOICES); + +/// Dispatch a value slot per user-facing type +/// (ADR-0024 §slot_for_type). Returns the same node every time +/// for a given Type — fine to call from within a +/// `DynamicSubgrammar` factory. +#[must_use] +pub const fn slot_for_type(ty: Type) -> Node { + match ty { + Type::Int | Type::Serial | Type::ShortId => INT_SLOT, + Type::Real => REAL_SLOT, + Type::Decimal => DECIMAL_SLOT, + Type::Bool => BOOL_SLOT, + Type::Text => TEXT_SLOT, + Type::Date => DATE_SLOT, + Type::DateTime => DATETIME_SLOT, + Type::Blob => BLOB_SLOT, + } +} + +// ================================================================= +// Dynamic sub-grammar: column_value_list +// ================================================================= + +/// Fallback when no schema-resolved column list is available +/// (schemaless parse, missing table, empty schema cache). +/// Mirrors the pre-Phase-D `value_literal` Choice. +const FALLBACK_VALUE_LITERAL_CHOICES: &[Node] = &[ + Node::Word(Word::keyword("null")), + Node::Word(Word::keyword("true")), + Node::Word(Word::keyword("false")), + Node::NumberLit { validator: None }, + Node::StringLit, +]; +const FALLBACK_VALUE_LITERAL: Node = Node::Choice(FALLBACK_VALUE_LITERAL_CHOICES); + +const FALLBACK_VALUE_LIST: Node = Node::Repeated { + inner: &FALLBACK_VALUE_LITERAL, + separator: Some(&Node::Punct(',')), + min: 1, +}; + +/// Value slot keyed on `WalkContext::current_column`. +/// +/// Picks the typed slot for the column whose name was most +/// recently matched by an `Ident { source: Columns, +/// writes_column: true }` node (ADR-0024 §Phase D). Fallback +/// when no current_column is resolved: the schemaless +/// value-literal choice. +pub fn current_column_value(ctx: &WalkContext) -> Node { + ctx.current_column + .as_ref() + .map_or(FALLBACK_VALUE_LITERAL, |col| slot_for_type(col.user_type)) +} + +/// Comma-separated list of typed value slots, one per column. +/// +/// Reads `current_table_columns` from the WalkContext (ADR-0024 +/// §Phase D §column_value_list). When the schema cache holds +/// no entry for the current table — or the walker is +/// schemaless — falls back to the schema-unaware +/// `Repeated(VALUE_LITERAL, ',', 1)` shape so existing +/// callers/tests continue to work. +pub fn column_value_list(ctx: &WalkContext) -> Node { + let Some(cols) = ctx.current_table_columns.as_ref() else { + return FALLBACK_VALUE_LIST; + }; + if cols.is_empty() { + return FALLBACK_VALUE_LIST; + } + // Build a Seq of typed slots interleaved with commas. + let mut children: Vec = Vec::with_capacity(cols.len() * 2); + for (i, col) in cols.iter().enumerate() { + if i > 0 { + children.push(Node::Punct(',')); + } + children.push(slot_for_type(col.user_type)); + } + Node::Seq(Box::leak(children.into_boxed_slice())) +} + +// The HintMode / NumberValidator imports are part of the Phase D +// typed-slot toolkit even though only NumberValidator is used by +// the explicit validators above; surface HintMode so future +// per-type prose annotations can attach without re-importing. +#[allow(dead_code)] +const _USES_HINT_MODE: Option = None; diff --git a/src/dsl/parser.rs b/src/dsl/parser.rs index a3a5adc..a2757e1 100644 --- a/src/dsl/parser.rs +++ b/src/dsl/parser.rs @@ -88,11 +88,37 @@ impl ParseError { /// first identifier-shape token isn't a registered entry word), /// produces a synthetic "unknown command" error naming every /// valid entry keyword. +/// +/// Schemaless variant: schema-aware nodes +/// (`Ident { source: Tables }` with `writes_table` enabled, +/// `DynamicSubgrammar`) fall back to schema-unaware behaviour. +/// Use `parse_command_with_schema` to enable typed value slots +/// (ADR-0024 §Phase D). pub fn parse_command(input: &str) -> Result { + parse_command_inner(input, None) +} + +/// Schema-aware parse entry point (ADR-0024 §Phase D). +/// +/// Threads a `SchemaCache` reference through `WalkContext` so +/// the walker can populate `current_table` / `current_column` +/// from existing entities and `DynamicSubgrammar` factories +/// can unfold per-column typed value slots. +pub fn parse_command_with_schema( + input: &str, + schema: &crate::completion::SchemaCache, +) -> Result { + parse_command_inner(input, Some(schema)) +} + +fn parse_command_inner( + input: &str, + schema: Option<&crate::completion::SchemaCache>, +) -> Result { if input.trim().is_empty() { return Err(ParseError::Empty); } - if let Some(result) = try_walker_route(input) { + if let Some(result) = try_walker_route(input, schema) { return result; } Err(unknown_command_error(input)) @@ -124,13 +150,18 @@ fn unknown_command_error(source: &str) -> ParseError { } } -/// Walker route (ADR-0024 §migration Phase A). Returns `None` -/// when the walker doesn't engage (input doesn't start with a -/// migrated entry keyword); the router falls through to the -/// chumsky path for non-migrated commands. -fn try_walker_route(source: &str) -> Option> { +/// Walker route. Returns `None` when the walker doesn't engage +/// (input doesn't start with a registered entry keyword); the +/// router falls through to the synthetic "unknown command" +/// error. +fn try_walker_route( + source: &str, + schema: Option<&crate::completion::SchemaCache>, +) -> Option> { use crate::dsl::walker::{self, outcome::WalkBound}; - let mut ctx = walker::context::WalkContext::new(); + let mut ctx = schema.map_or_else(walker::context::WalkContext::new, |s| { + walker::context::WalkContext::with_schema(s) + }); let (result, command) = walker::walk(source, WalkBound::EndOfInput, &mut ctx); let result = result?; Some(walker_outcome_to_parse_result(source, result, command)) diff --git a/src/dsl/walker/context.rs b/src/dsl/walker/context.rs index c1c148f..e44f9e0 100644 --- a/src/dsl/walker/context.rs +++ b/src/dsl/walker/context.rs @@ -1,43 +1,62 @@ //! `WalkContext` — per-walk mutable state that flows through the -//! walker (ADR-0024 §WalkContext). +//! walker (ADR-0024 §WalkContext, §Phase D). //! -//! Phase A keeps this minimal: app-lifecycle commands have no -//! schema dependency. The `current_table`, `current_table_columns`, -//! and schema-cache pointer become populated as Phase B-D land -//! the schema-aware DDL/data commands. +//! Phase D plumbed a schema reference through the context so +//! schema-aware nodes (`Ident { source: Tables }` writing +//! `current_table`, `DynamicSubgrammar` reading +//! `current_table_columns`) can resolve real entities at walk +//! time. Pre-Phase-D `default()` callers (tests, the chumsky- +//! era `parse_command(input)` signature) still work — the +//! schema slot is `None` and dynamic dispatch falls back to a +//! generic value-literal slot. -/// Per-walk state. Cheap to construct; `default()` is the right -/// shape for app-lifecycle commands. +use crate::completion::{SchemaCache, TableColumn}; + +/// Per-walk state. +/// +/// Carries an optional schema reference (so callers without a +/// schema continue to work) plus mutable accumulators that +/// nodes can write to during the walk: +/// +/// - `current_table` / `current_table_columns` — populated when +/// an `Ident { source: Tables }` node with `writes_table: +/// true` matches a known table. +/// - `current_column` — populated by `Ident { source: Columns +/// writes_column: true }` for `set col = …` / `where col = +/// …` slots so the next value-slot picks the column's typed +/// sub-grammar. #[derive(Debug, Default)] -pub struct WalkContext { - /// Table whose name an `Ident { source: Tables, writes_table: - /// true }` matched earlier in the walk. Phase B+ writes this. +pub struct WalkContext<'a> { + pub schema: Option<&'a SchemaCache>, pub current_table: Option, - - /// Columns of `current_table`, resolved against the schema - /// cache when the table identifier matched. Phase D+ uses - /// this to drive the dynamic `column_value_list` sub-grammar. - #[allow(dead_code)] - pub current_table_columns: Option>, - - /// For `set col=…` and `where col=…`, the column whose value - /// is about to be consumed. Phase D+ writes this so the value - /// slot picks the right typed sub-grammar. - #[allow(dead_code)] - pub current_column: Option, + pub current_table_columns: Option>, + pub current_column: Option, } -impl WalkContext { +impl<'a> WalkContext<'a> { + /// Schemaless walk context — the legacy default used by + /// pre-Phase-D callers and tests that don't care about + /// schema-aware narrowing. + #[must_use] pub fn new() -> Self { Self::default() } + + /// Schema-aware walk context. Dynamic sub-grammars read + /// `schema` (via `current_table_columns`) to unfold typed + /// per-column value slots. + #[must_use] + pub const fn with_schema(schema: &'a SchemaCache) -> Self { + Self { + schema: Some(schema), + current_table: None, + current_table_columns: None, + current_column: None, + } + } } -/// Schema info for a single column. Phase D+ populates this from -/// the schema cache; Phase A leaves it unused. -#[derive(Debug, Clone)] +/// Convenience re-export so non-walker modules don't reach +/// across `completion::TableColumn` directly. #[allow(dead_code)] -pub struct ColumnInfo { - pub name: String, - pub user_type: crate::dsl::types::Type, -} +pub type ColumnInfo = TableColumn; diff --git a/src/dsl/walker/driver.rs b/src/dsl/walker/driver.rs index e5ac87c..de102ff 100644 --- a/src/dsl/walker/driver.rs +++ b/src/dsl/walker/driver.rs @@ -92,21 +92,44 @@ pub fn walk_node( role, validator, highlight_override: _, - } => walk_ident(source, pos, *src, role, *validator, path, per_byte), + writes_table, + writes_column, + } => walk_ident( + source, + pos, + *src, + role, + *validator, + *writes_table, + *writes_column, + ctx, + path, + per_byte, + ), Node::NumberLit { validator } => walk_number_lit(source, pos, *validator, path, per_byte), Node::Literal(literal) => walk_literal(source, pos, literal, path, per_byte), Node::StringLit => walk_string_lit(source, pos, path, per_byte), - Node::BlobLit | Node::DynamicSubgrammar(_) => { - // Phase A-D: not exercised yet. Reaching this branch - // means a future-phase grammar got declared without - // the walker support landing — surface as a hard - // failure so tests catch it loudly rather than - // silently mis-parsing. + Node::BlobLit => { + // BlobLit terminals are declared but no current grammar + // node uses them. Reaching this branch means a future + // grammar declared a BlobLit without walker support + // landing — surface as a hard failure so tests catch + // it loudly rather than silently mis-parsing. NodeWalkResult::Failed { position: pos, kind: FailureKind::Mismatch { expected: vec![] }, } } + Node::DynamicSubgrammar(factory) => { + // ADR-0024 §sub-grammars: resolve the inner Node at + // walk time using the active `WalkContext`, then + // recursively walk it. `Box::leak` per-walk gives the + // inner static-slice fields (Choice/Seq) the lifetime + // they require; the leak is bounded by command-shape + // complexity per walk. + let resolved: &'static Node = Box::leak(Box::new(factory(ctx))); + walk_node(source, pos, resolved, ctx, path, per_byte) + } Node::Flag(name) => walk_flag(source, pos, name, path, per_byte), Node::Repeated { inner, @@ -185,12 +208,16 @@ fn walk_punct( } } +#[allow(clippy::too_many_arguments)] fn walk_ident( source: &str, position: usize, src: crate::dsl::grammar::IdentSource, role: &'static str, validator: Option, + writes_table: bool, + writes_column: bool, + ctx: &mut WalkContext, path: &mut MatchedPath, per_byte: &mut Vec, ) -> NodeWalkResult { @@ -209,6 +236,25 @@ fn walk_ident( kind: FailureKind::Validation(err), }; } + // ADR-0024 §Phase D: schema-aware writes. When the ident is + // a Tables source with `writes_table`, resolve the matched + // name against the schema cache and populate current_table / + // current_table_columns so subsequent dynamic sub-grammars + // can read them. `writes_column` resolves against the + // already-populated `current_table_columns`. + if writes_table && matches!(src, crate::dsl::grammar::IdentSource::Tables) { + ctx.current_table = Some(text.clone()); + ctx.current_table_columns = ctx + .schema + .and_then(|s| s.columns_for_table(&text).map(<[_]>::to_vec)); + } + if writes_column && matches!(src, crate::dsl::grammar::IdentSource::Columns) { + ctx.current_column = ctx.current_table_columns.as_ref().and_then(|cols| { + cols.iter() + .find(|c| c.name.eq_ignore_ascii_case(&text)) + .cloned() + }); + } path.push(MatchedItem { kind: MatchedKind::Ident { role }, text, diff --git a/src/dsl/walker/mod.rs b/src/dsl/walker/mod.rs index c648f78..1fd579c 100644 --- a/src/dsl/walker/mod.rs +++ b/src/dsl/walker/mod.rs @@ -152,10 +152,10 @@ pub fn expected_at_input(source: &str) -> Vec { /// walker's error. /// - `(None, None)` when the entry word doesn't match any /// registered command — the router falls through to chumsky. -pub fn walk( +pub fn walk<'a>( source: &str, bound: WalkBound, - ctx: &mut WalkContext, + ctx: &mut WalkContext<'a>, ) -> (Option, Option) { // Phase A only consumes EndOfInput; Position would slice // the source, which is the same operation. @@ -1165,4 +1165,232 @@ mod tests { // schema — schema-listable slot, not a HintMode case. assert!(hint_mode_at_input("show data ").is_none()); } + + // ========================================================= + // Phase D full — schema-aware value typing. + // ========================================================= + + use crate::completion::{SchemaCache, TableColumn}; + use crate::dsl::parser::parse_command_with_schema; + + fn schema_with(table: &str, columns: &[(&str, Type)]) -> SchemaCache { + let cols: Vec = columns + .iter() + .map(|(n, t)| TableColumn { + name: (*n).to_string(), + user_type: *t, + }) + .collect(); + let mut cache = SchemaCache::default(); + cache.tables.push(table.to_string()); + for c in &cols { + cache.columns.push(c.name.clone()); + } + cache.table_columns.insert(table.to_string(), cols); + cache + } + + #[test] + fn phase_d_insert_with_schema_accepts_typed_values_per_column() { + let schema = schema_with( + "Customers", + &[("id", Type::Serial), ("Name", Type::Text), ("Active", Type::Bool)], + ); + // 3 columns: int, text, bool. Each value matches its slot. + let cmd = parse_command_with_schema( + "insert into Customers values (1, 'Alice', true)", + &schema, + ) + .expect("parse"); + match cmd { + Command::Insert { table, values, .. } => { + assert_eq!(table, "Customers"); + assert_eq!(values.len(), 3); + } + other => panic!("expected Insert, got {other:?}"), + } + } + + #[test] + fn phase_d_insert_rejects_decimal_in_int_column() { + // The schema has `id` as Int. `3.14` is a Number with a + // decimal — the typed `int_slot` validator rejects. + let schema = schema_with("T", &[("id", Type::Int)]); + let err = parse_command_with_schema("insert into T values (3.14)", &schema) + .expect_err("should reject"); + match err { + crate::dsl::ParseError::Invalid { message, .. } => { + assert!( + message.contains("integer") || message.contains("3.14"), + "got: {message}" + ); + } + other => panic!("expected Invalid, got {other:?}"), + } + } + + #[test] + fn phase_d_insert_accepts_null_at_any_column() { + // null is the absence sentinel; every typed slot + // accepts it. + let schema = schema_with( + "T", + &[("a", Type::Int), ("b", Type::Text), ("c", Type::Bool)], + ); + let cmd = parse_command_with_schema( + "insert into T values (null, null, null)", + &schema, + ) + .expect("parse"); + match cmd { + Command::Insert { values, .. } => { + assert!(values.iter().all(|v| matches!(v, Value::Null))); + } + other => panic!("expected Insert, got {other:?}"), + } + } + + #[test] + fn phase_d_insert_falls_back_when_table_not_in_schema() { + // The schema is empty; the walker can't resolve column + // info for `Customers`. The DynamicSubgrammar falls + // back to the schemaless generic value-literal list and + // accepts mixed-shape values as it did pre-Phase-D. + let schema = SchemaCache::default(); + let cmd = parse_command_with_schema( + "insert into Customers values (1, 'Alice')", + &schema, + ) + .expect("parse — fallback path"); + match cmd { + Command::Insert { values, .. } => assert_eq!(values.len(), 2), + other => panic!("expected Insert, got {other:?}"), + } + } + + #[test] + fn phase_d_schemaless_parse_command_still_works() { + // The pre-Phase-D `parse_command(input)` signature + // passes no schema; the DynamicSubgrammar falls back to + // the schemaless value-literal list. + let cmd = parse("insert into T values (1, 'Alice', null)").expect("parse"); + match cmd { + Command::Insert { values, .. } => assert_eq!(values.len(), 3), + other => panic!("expected Insert, got {other:?}"), + } + } + + #[test] + fn phase_d_insert_accepts_bool_value_for_bool_column() { + let schema = schema_with("T", &[("flag", Type::Bool)]); + let cmd = parse_command_with_schema("insert into T values (false)", &schema) + .expect("parse"); + match cmd { + Command::Insert { values, .. } => { + assert_eq!(values, vec![Value::Bool(false)]); + } + other => panic!("expected Insert, got {other:?}"), + } + } + + #[test] + fn phase_d_update_accepts_text_value_for_text_column() { + let schema = schema_with( + "Customers", + &[("id", Type::Int), ("Email", Type::Text)], + ); + let cmd = parse_command_with_schema( + "update Customers set Email='new@b.c' where id=1", + &schema, + ) + .expect("parse"); + match cmd { + Command::Update { assignments, .. } => { + assert_eq!(assignments.len(), 1); + assert_eq!(assignments[0].0, "Email"); + } + other => panic!("expected Update, got {other:?}"), + } + } + + #[test] + fn phase_d_update_rejects_decimal_in_int_set_column() { + // Email is text; Score is int. Assigning `3.14` to Score + // hits the int_slot validator. + let schema = schema_with( + "T", + &[("id", Type::Int), ("Score", Type::Int)], + ); + let err = parse_command_with_schema( + "update T set Score=3.14 where id=1", + &schema, + ) + .expect_err("should reject"); + match err { + crate::dsl::ParseError::Invalid { message, .. } => { + assert!( + message.contains("integer") || message.contains("3.14"), + "got: {message}" + ); + } + other => panic!("expected Invalid, got {other:?}"), + } + } + + #[test] + fn phase_d_delete_where_uses_typed_column_value() { + // `where id=1` — id is Int; `1` matches the int_slot. + let schema = schema_with("T", &[("id", Type::Int), ("Name", Type::Text)]); + let cmd = parse_command_with_schema("delete from T where id=1", &schema) + .expect("parse"); + match cmd { + Command::Delete { .. } => {} + other => panic!("expected Delete, got {other:?}"), + } + } + + #[test] + fn phase_d_delete_where_rejects_decimal_at_int_column() { + // `where id=3.14` — id is Int; the typed slot rejects. + let schema = schema_with("T", &[("id", Type::Int)]); + let err = parse_command_with_schema("delete from T where id=3.14", &schema) + .expect_err("should reject"); + match err { + crate::dsl::ParseError::Invalid { message, .. } => { + assert!( + message.contains("integer") || message.contains("3.14"), + "got: {message}" + ); + } + other => panic!("expected Invalid, got {other:?}"), + } + } + + #[test] + fn phase_d_update_multi_assignment_uses_per_column_types() { + let schema = schema_with( + "Customers", + &[ + ("id", Type::Int), + ("Name", Type::Text), + ("Score", Type::Int), + ], + ); + // `Score=42` (int slot) and `Name='Alice'` (text slot) + // — each value slot dispatches on the column whose + // ident matched immediately before. + let cmd = parse_command_with_schema( + "update Customers set Score=42, Name='Alice' where id=1", + &schema, + ) + .expect("parse"); + match cmd { + Command::Update { assignments, .. } => { + assert_eq!(assignments.len(), 2); + assert_eq!(assignments[0].0, "Score"); + assert_eq!(assignments[1].0, "Name"); + } + other => panic!("expected Update, got {other:?}"), + } + } } diff --git a/src/friendly/keys.rs b/src/friendly/keys.rs index 1448937..e613aa3 100644 --- a/src/friendly/keys.rs +++ b/src/friendly/keys.rs @@ -143,6 +143,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[ ("parse.caret", &["padding"]), // Custom (try_map / source-slice) error messages raised // by the DSL parser. See `parse.custom.*` in the catalog. + ("parse.custom.bind_type_mismatch", &["found", "expected"]), ("parse.custom.change_column_flags_exclusive", &[]), ("parse.custom.create_table_needs_pk", &[]), ("parse.custom.on_action_specified_twice", &["target"]), diff --git a/src/friendly/strings/en-US.yaml b/src/friendly/strings/en-US.yaml index 1bf9a0e..a955c91 100644 --- a/src/friendly/strings/en-US.yaml +++ b/src/friendly/strings/en-US.yaml @@ -313,6 +313,12 @@ parse: change_column_flags_exclusive: "`--force-conversion` and `--dont-convert` are mutually exclusive — pick one." unknown_type: "unknown type '{found}' (expected one of: {expected})" unknown_action: "unknown referential action '{found}' (expected one of: {expected})" + # Phase D typed-value-slot mismatch (ADR-0024 §Phase D): + # surfaced when a column's value slot rejects the literal + # the user typed (e.g. `3.14` at an `int` column). `{found}` + # is the literal text; `{expected}` names the required + # shape (`integer`, `number`, …). + bind_type_mismatch: "value '{found}' is not a valid {expected}" # Caret pointer showing where in the input the parser # failed. `{padding}` is the leading whitespace; the # template appends `^` so the rendered line places the diff --git a/src/runtime.rs b/src/runtime.rs index 46dd30d..e275a0b 100644 --- a/src/runtime.rs +++ b/src/runtime.rs @@ -838,7 +838,7 @@ async fn refresh_schema_cache( database: &Database, event_tx: &mpsc::Sender, ) { - use crate::completion::SchemaCache; + use crate::completion::{SchemaCache, TableColumn}; use crate::dsl::grammar::IdentSource; let mut cache = SchemaCache::default(); if let Ok(tables) = database.list_names_for(IdentSource::Tables).await { @@ -850,6 +850,28 @@ async fn refresh_schema_cache( if let Ok(rels) = database.list_names_for(IdentSource::Relationships).await { cache.relationships = rels; } + // Phase D (ADR-0024 §Phase D): per-table column metadata + // with user-facing types. The walker's + // `DynamicSubgrammar(column_value_list)` reads this to + // unfold typed value slots per column at `insert into T + // values (...)` positions. Best-effort: a `describe_table` + // miss leaves that table's columns unpopulated and the + // walker falls back to the schemaless value-literal list. + for name in cache.tables.clone() { + if let Ok(desc) = database.describe_table(name.clone(), None).await { + let cols: Vec = desc + .columns + .into_iter() + .filter_map(|c| { + c.user_type.map(|ty| TableColumn { + name: c.name, + user_type: ty, + }) + }) + .collect(); + cache.table_columns.insert(name, cols); + } + } let _ = event_tx.send(AppEvent::SchemaCacheRefreshed(cache)).await; }