From 80310929d79c24f59edf80a5355ec831a5cd195e Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Mon, 25 May 2026 08:11:39 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20ADR-0035=204a=20=E2=80=94=20SQL=20CREAT?= =?UTF-8?q?E=20TABLE=20grammar=20shape?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The post-CREATE shape (src/dsl/grammar/sql_create_table.rs): TABLE [IF NOT EXISTS] ( , … ) [;] - col-def: [NOT NULL] [UNIQUE] [PRIMARY KEY] - type: ten keywords + standard-SQL aliases (via from_sql_name) + the two-word `double precision` branch + discarded (len[,len]) arg - table-level PRIMARY KEY (cols) — single and compound - __rdbms_* target rejected at walk time DEFAULT/CHECK/table-level UNIQUE shapes are deliberately absent (the 4a.2 constraint slice); FK is absent (4b). 13 accept/reject tests mirror sql_insert's walk_node harness. Shape only — the CommandNode + builder + worker wiring follow. --- src/dsl/grammar/mod.rs | 1 + src/dsl/grammar/sql_create_table.rs | 376 ++++++++++++++++++++++++++++ 2 files changed, 377 insertions(+) create mode 100644 src/dsl/grammar/sql_create_table.rs diff --git a/src/dsl/grammar/mod.rs b/src/dsl/grammar/mod.rs index 1e5638f..c744dbf 100644 --- a/src/dsl/grammar/mod.rs +++ b/src/dsl/grammar/mod.rs @@ -28,6 +28,7 @@ pub mod ddl; pub mod expr; pub mod shared; pub mod sql_expr; +pub mod sql_create_table; pub mod sql_delete; pub mod sql_insert; pub mod sql_select; diff --git a/src/dsl/grammar/sql_create_table.rs b/src/dsl/grammar/sql_create_table.rs new file mode 100644 index 0000000..9820e66 --- /dev/null +++ b/src/dsl/grammar/sql_create_table.rs @@ -0,0 +1,376 @@ +//! SQL `CREATE TABLE` grammar (ADR-0035 §4, sub-phase 4a). +//! +//! Grammar-as-text in the unified tree (ADR-0030 §4), but — unlike +//! the DML `Sql*` commands which execute verbatim — `CREATE TABLE` +//! executes **structurally** (ADR-0035 §1): the builder extracts the +//! columns / types / primary key and the worker drives the existing +//! `do_create_table` machinery, so an advanced-mode-created table is a +//! first-class playground object (metadata, `STRICT`, the ten-type +//! vocabulary). This file holds only the **shape**; the `CommandNode` +//! and `build_sql_create_table` live in `ddl.rs` (mirroring how the +//! DML shapes here pair with `data.rs` builders). +//! +//! Scope (4a): columns + types (the §3 alias map, incl. the two-word +//! `double precision` and discarded length args) + the clean-reuse +//! column constraints `NOT NULL` / `UNIQUE` / column-level +//! `PRIMARY KEY` + single/compound table-level `PRIMARY KEY (…)` + +//! `IF NOT EXISTS`. **No** foreign keys (4b), **no** `DEFAULT` / +//! `CHECK` / table-level `UNIQUE` (the 4a.2 constraint slice) — those +//! shapes are absent here, so typing them is an ordinary parse error +//! until their slice lands. +//! +//! The entry-word dispatch consumes the leading `CREATE` keyword +//! before this shape walks, so it starts at `TABLE` (mirroring +//! `sql_insert::SQL_INSERT_SHAPE`, which starts at `INTO`). + +use crate::dsl::grammar::sql_select::reject_internal_table; +use crate::dsl::grammar::{IdentSource, Node, ValidationError, Word}; +use crate::dsl::types::Type; + +static COMMA: Node = Node::Punct(','); + +// --- Type-name slot (advanced-mode aliases, ADR-0035 §3) ---------- + +/// Reject any type name the SQL alias resolver doesn't recognise. +/// Distinct from `shared::validate_type_name` (the simple-mode +/// validator, which accepts only the ten keywords); this one also +/// admits the standard-SQL aliases via [`Type::from_sql_name`]. The +/// `{expected}` list still names the ten playground keywords — the +/// vocabulary we teach — not the aliases. +fn validate_sql_type_name(value: &str) -> Result<(), ValidationError> { + if Type::from_sql_name(value).is_some() { + Ok(()) + } else { + let expected = Type::all() + .iter() + .map(|t| t.keyword()) + .collect::>() + .join(", "); + Err(ValidationError { + message_key: "parse.custom.unknown_type", + args: vec![("found", value.to_string()), ("expected", expected)], + }) + } +} + +/// The single-word type name. `double precision` is handled by a +/// separate keyword-pair branch in [`SQL_TYPE`] (ADR-0035 §6.3, +/// implementer call), so the validator only ever sees one word. +const SQL_TYPE_NAME: Node = Node::Ident { + source: IdentSource::Types, + role: "col_type", + validator: Some(validate_sql_type_name), + highlight_override: None, + writes_table: false, + writes_column: false, + writes_user_listed_column: false, + writes_table_alias: false, + writes_cte_name: false, + writes_projection_alias: false, +}; + +// Optional `( n [, n] )` length / precision argument — matched and +// **discarded** (the playground's types are unparameterised, +// ADR-0035 §3). `varchar(255)`, `numeric(10, 2)`. +static LENGTH_SECOND_NODES: &[Node] = &[Node::Punct(','), Node::NumberLit { validator: None }]; +static LENGTH_NODES: &[Node] = &[ + Node::Punct('('), + Node::NumberLit { validator: None }, + Node::Optional(&Node::Seq(LENGTH_SECOND_NODES)), + Node::Punct(')'), +]; +const LENGTH_OPT: Node = Node::Optional(&Node::Seq(LENGTH_NODES)); + +// `double precision` — the lone two-word alias. A dedicated branch so +// the per-word `Ident` validator never has to make sense of `double` +// on its own (ADR-0035 §6.3). The builder maps the pair to +// `Type::Real`. +static DOUBLE_PRECISION_NODES: &[Node] = &[ + Node::Word(Word::keyword("double")), + Node::Word(Word::keyword("precision")), +]; +static TYPE_WITH_LENGTH_NODES: &[Node] = &[SQL_TYPE_NAME, LENGTH_OPT]; +static SQL_TYPE_CHOICES: &[Node] = &[ + Node::Seq(DOUBLE_PRECISION_NODES), + Node::Seq(TYPE_WITH_LENGTH_NODES), +]; +/// `double precision | [ '(' n [, n] ')' ]`. +const SQL_TYPE: Node = Node::Choice(SQL_TYPE_CHOICES); + +// --- Column-level constraints (4a clean-reuse set only) ----------- + +static NOT_NULL_NODES: &[Node] = &[ + Node::Word(Word::keyword("not")), + Node::Word(Word::keyword("null")), +]; +static PRIMARY_KEY_NODES: &[Node] = &[ + Node::Word(Word::keyword("primary")), + Node::Word(Word::keyword("key")), +]; +// `NOT NULL` | `UNIQUE` | `PRIMARY KEY`. `DEFAULT` / `CHECK` are +// deliberately absent (4a.2): typing them is an ordinary parse error +// until the constraint slice lands. +static COL_CONSTRAINT_CHOICES: &[Node] = &[ + Node::Seq(NOT_NULL_NODES), + Node::Word(Word::keyword("unique")), + Node::Seq(PRIMARY_KEY_NODES), +]; +const COL_CONSTRAINT: Node = Node::Choice(COL_CONSTRAINT_CHOICES); +/// Zero-or-more column constraints after the type (`min: 0`). +const COL_CONSTRAINT_SUFFIX: Node = Node::Repeated { + inner: &COL_CONSTRAINT, + separator: None, + min: 0, +}; + +// --- Column definition: ` [constraints…]` ------------ + +const COL_NAME: Node = Node::Ident { + source: IdentSource::NewName, + role: "col_name", + validator: None, + highlight_override: None, + writes_table: false, + writes_column: false, + writes_user_listed_column: false, + writes_table_alias: false, + writes_cte_name: false, + writes_projection_alias: false, +}; + +static COLUMN_DEF_NODES: &[Node] = &[COL_NAME, SQL_TYPE, COL_CONSTRAINT_SUFFIX]; +const COLUMN_DEF: Node = Node::Seq(COLUMN_DEF_NODES); + +// --- Table-level `PRIMARY KEY ( col, … )` (single + compound) ----- + +// A column reference inside the table-level PK list. The columns are +// defined in this same statement (not in the schema yet), so +// `NewName` (no schema completion); the builder checks each name +// against the defined columns. +const PK_COLUMN_REF: Node = Node::Ident { + source: IdentSource::NewName, + role: "pk_column", + validator: None, + highlight_override: None, + writes_table: false, + writes_column: false, + writes_user_listed_column: false, + writes_table_alias: false, + writes_cte_name: false, + writes_projection_alias: false, +}; + +static TABLE_PK_NODES: &[Node] = &[ + Node::Word(Word::keyword("primary")), + Node::Word(Word::keyword("key")), + Node::Punct('('), + Node::Repeated { + inner: &PK_COLUMN_REF, + separator: Some(&COMMA), + min: 1, + }, + Node::Punct(')'), +]; +const TABLE_PK: Node = Node::Seq(TABLE_PK_NODES); + +// One element of the column list: a table-level `PRIMARY KEY (…)` or a +// column definition. `TABLE_PK` is tried first — it starts with the +// keyword `primary`, which disambiguates it from a column name. (A +// column literally named `primary` is therefore unavailable, the same +// trade real SQL makes with its reserved words.) +static ELEMENT_CHOICES: &[Node] = &[TABLE_PK, COLUMN_DEF]; +const ELEMENT: Node = Node::Choice(ELEMENT_CHOICES); + +static COLUMN_LIST_NODES: &[Node] = &[ + Node::Punct('('), + Node::Repeated { + inner: &ELEMENT, + separator: Some(&COMMA), + min: 1, + }, + Node::Punct(')'), +]; +const COLUMN_LIST: Node = Node::Seq(COLUMN_LIST_NODES); + +// --- `IF NOT EXISTS` (ADR-0035 §4, no-op-with-note at execution) --- + +static IF_NOT_EXISTS_NODES: &[Node] = &[ + Node::Word(Word::keyword("if")), + Node::Word(Word::keyword("not")), + Node::Word(Word::keyword("exists")), +]; +const IF_NOT_EXISTS_OPT: Node = Node::Optional(&Node::Seq(IF_NOT_EXISTS_NODES)); + +// --- The full post-`CREATE` shape --------------------------------- + +/// The table name. `NewName` (the user invents it); `__rdbms_*` +/// rejected (ADR-0030 §6) so the walker's `[ERR]` indicator flags an +/// internal-table target before submit, mirroring the DML shapes. +const TABLE_NAME: Node = Node::Ident { + source: IdentSource::NewName, + role: "table_name", + validator: Some(reject_internal_table), + highlight_override: None, + writes_table: false, + writes_column: false, + writes_user_listed_column: false, + writes_table_alias: false, + writes_cte_name: false, + writes_projection_alias: false, +}; + +static SQL_CREATE_TABLE_TAIL_NODES: &[Node] = &[ + Node::Word(Word::keyword("table")), + IF_NOT_EXISTS_OPT, + TABLE_NAME, + COLUMN_LIST, + Node::Optional(&Node::Punct(';')), +]; + +/// The post-`CREATE` portion of a SQL `CREATE TABLE` statement. +/// +/// `TABLE [IF NOT EXISTS] ( (',' )* ) [';']`, +/// where an element is a column definition or a table-level +/// `PRIMARY KEY (…)` (ADR-0035 §4). The entry-word dispatch consumes +/// the leading `CREATE` before this shape walks, so a `CommandNode` +/// references it via `Subgrammar` (the `ddl::SQL_CREATE_TABLE` node). +pub static SQL_CREATE_TABLE_SHAPE: Node = Node::Seq(SQL_CREATE_TABLE_TAIL_NODES); + +// ================================================================= +// Tests — grammar accept/reject for the post-`CREATE` tail. +// ================================================================= + +#[cfg(test)] +mod tests { + use super::SQL_CREATE_TABLE_SHAPE; + use crate::dsl::walker::context::WalkContext; + use crate::dsl::walker::driver::{NodeWalkResult, walk_node}; + use crate::dsl::walker::outcome::MatchedPath; + + /// Walk `input` against the CREATE TABLE tail. `true` only when the + /// walk matches *and* consumes all of `input` (trailing whitespace + /// allowed). Schemaless context: the shape is structural, so the + /// table/column idents match by shape and `reject_internal_table` + /// still fires on `__rdbms_*`. + fn walks(input: &str) -> bool { + let mut ctx = WalkContext::new(); + let mut path = MatchedPath::new(); + let mut per_byte = Vec::new(); + match walk_node(input, 0, &SQL_CREATE_TABLE_SHAPE, &mut ctx, &mut path, &mut per_byte) { + NodeWalkResult::Matched { end, .. } => input[end..].trim().is_empty(), + _ => false, + } + } + + fn good(input: &str) { + assert!(walks(input), "{input:?} should be a valid CREATE TABLE tail"); + } + + fn bad(input: &str) { + assert!(!walks(input), "{input:?} should NOT walk as a complete CREATE TABLE tail"); + } + + #[test] + fn minimal_single_column() { + good("table t (id int)"); + good("table t (id int);"); + good("table widgets (sku text)"); + } + + #[test] + fn multiple_columns() { + good("table t (id int, name text)"); + good("table orders (id int, total real, note text)"); + } + + #[test] + fn column_level_constraints() { + good("table t (id int primary key)"); + good("table t (id int primary key, name text not null)"); + good("table t (id serial primary key, email text unique)"); + good("table t (a int not null unique, b text)"); + } + + #[test] + fn integer_primary_key_parses() { + // INTEGER PRIMARY KEY is accepted (it maps to plain int at the + // builder, ADR-0035 §3 — verified there, not here). + good("table t (id integer primary key)"); + } + + #[test] + fn table_level_primary_key_single_and_compound() { + good("table t (id int, primary key (id))"); + good("table t (a int, b int, primary key (a, b))"); + good("table t (a int, b int, c text, primary key (a, b, c))"); + } + + #[test] + fn standard_sql_type_aliases() { + good("table t (a integer, b varchar, c boolean, d timestamp)"); + good("table t (e bigint, f smallint, g char, h numeric)"); + good("table t (i binary, j varbinary, k float)"); + } + + #[test] + fn double_precision_two_word_type() { + good("table t (x double precision)"); + good("table t (id int, x double precision, y real)"); + } + + #[test] + fn length_precision_args_accepted_and_ignored() { + good("table t (name varchar(255))"); + good("table t (price numeric(10, 2))"); + good("table t (code char(8), amount decimal(12, 4))"); + } + + #[test] + fn if_not_exists_admitted() { + good("table if not exists t (id int)"); + good("table if not exists widgets (sku text, qty int);"); + } + + #[test] + fn internal_target_table_rejected() { + bad("table __rdbms_playground_columns (id int)"); + bad("table if not exists __rdbms_playground_relationships (id int)"); + } + + #[test] + fn unknown_type_rejected() { + bad("table t (id money)"); + bad("table t (id json)"); + // Bare `double` (no `precision`) is not a supported type. + bad("table t (x double)"); + } + + #[test] + fn structurally_incomplete_or_wrong_rejected() { + // Empty column list — at least one element required. + bad("table t ()"); + // No column list at all. + bad("table t"); + // Missing table name. + bad("table (id int)"); + // Column with no type. + bad("table t (id)"); + // Trailing comma with no following element. + bad("table t (id int,)"); + // Missing TABLE keyword (entry dispatch would have eaten it). + bad("t (id int)"); + // Unclosed column list. + bad("table t (id int"); + } + + #[test] + fn deferred_constraints_are_not_accepted_in_4a() { + // DEFAULT / CHECK / table-level UNIQUE belong to the 4a.2 + // constraint slice; their shapes are absent here, so they do + // not walk (the builder turns this into a friendly + // "not yet supported" — tested there). + bad("table t (id int default 0)"); + bad("table t (id int check (id > 0))"); + bad("table t (a int, b int, unique (a, b))"); + } +}