feat: ADR-0035 4a — SQL CREATE TABLE grammar shape

The post-CREATE shape (src/dsl/grammar/sql_create_table.rs):
TABLE [IF NOT EXISTS] <name> ( <col-def | table-PK> , … ) [;]
- col-def: <name> <type> [NOT NULL] [UNIQUE] [PRIMARY KEY]
- type: ten keywords + standard-SQL aliases (via from_sql_name) +
  the two-word `double precision` branch + discarded (len[,len]) arg
- table-level PRIMARY KEY (cols) — single and compound
- __rdbms_* target rejected at walk time

DEFAULT/CHECK/table-level UNIQUE shapes are deliberately absent (the
4a.2 constraint slice); FK is absent (4b). 13 accept/reject tests
mirror sql_insert's walk_node harness. Shape only — the CommandNode +
builder + worker wiring follow.
This commit is contained in:
claude@clouddev1
2026-05-25 08:11:39 +00:00
parent 58386d77e9
commit 80310929d7
2 changed files with 377 additions and 0 deletions
+1
View File
@@ -28,6 +28,7 @@ pub mod ddl;
pub mod expr; pub mod expr;
pub mod shared; pub mod shared;
pub mod sql_expr; pub mod sql_expr;
pub mod sql_create_table;
pub mod sql_delete; pub mod sql_delete;
pub mod sql_insert; pub mod sql_insert;
pub mod sql_select; pub mod sql_select;
+376
View File
@@ -0,0 +1,376 @@
//! SQL `CREATE TABLE` grammar (ADR-0035 §4, sub-phase 4a).
//!
//! Grammar-as-text in the unified tree (ADR-0030 §4), but — unlike
//! the DML `Sql*` commands which execute verbatim — `CREATE TABLE`
//! executes **structurally** (ADR-0035 §1): the builder extracts the
//! columns / types / primary key and the worker drives the existing
//! `do_create_table` machinery, so an advanced-mode-created table is a
//! first-class playground object (metadata, `STRICT`, the ten-type
//! vocabulary). This file holds only the **shape**; the `CommandNode`
//! and `build_sql_create_table` live in `ddl.rs` (mirroring how the
//! DML shapes here pair with `data.rs` builders).
//!
//! Scope (4a): columns + types (the §3 alias map, incl. the two-word
//! `double precision` and discarded length args) + the clean-reuse
//! column constraints `NOT NULL` / `UNIQUE` / column-level
//! `PRIMARY KEY` + single/compound table-level `PRIMARY KEY (…)` +
//! `IF NOT EXISTS`. **No** foreign keys (4b), **no** `DEFAULT` /
//! `CHECK` / table-level `UNIQUE` (the 4a.2 constraint slice) — those
//! shapes are absent here, so typing them is an ordinary parse error
//! until their slice lands.
//!
//! The entry-word dispatch consumes the leading `CREATE` keyword
//! before this shape walks, so it starts at `TABLE` (mirroring
//! `sql_insert::SQL_INSERT_SHAPE`, which starts at `INTO`).
use crate::dsl::grammar::sql_select::reject_internal_table;
use crate::dsl::grammar::{IdentSource, Node, ValidationError, Word};
use crate::dsl::types::Type;
static COMMA: Node = Node::Punct(',');
// --- Type-name slot (advanced-mode aliases, ADR-0035 §3) ----------
/// Reject any type name the SQL alias resolver doesn't recognise.
/// Distinct from `shared::validate_type_name` (the simple-mode
/// validator, which accepts only the ten keywords); this one also
/// admits the standard-SQL aliases via [`Type::from_sql_name`]. The
/// `{expected}` list still names the ten playground keywords — the
/// vocabulary we teach — not the aliases.
fn validate_sql_type_name(value: &str) -> Result<(), ValidationError> {
if Type::from_sql_name(value).is_some() {
Ok(())
} else {
let expected = Type::all()
.iter()
.map(|t| t.keyword())
.collect::<Vec<_>>()
.join(", ");
Err(ValidationError {
message_key: "parse.custom.unknown_type",
args: vec![("found", value.to_string()), ("expected", expected)],
})
}
}
/// The single-word type name. `double precision` is handled by a
/// separate keyword-pair branch in [`SQL_TYPE`] (ADR-0035 §6.3,
/// implementer call), so the validator only ever sees one word.
const SQL_TYPE_NAME: Node = Node::Ident {
source: IdentSource::Types,
role: "col_type",
validator: Some(validate_sql_type_name),
highlight_override: None,
writes_table: false,
writes_column: false,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
// Optional `( n [, n] )` length / precision argument — matched and
// **discarded** (the playground's types are unparameterised,
// ADR-0035 §3). `varchar(255)`, `numeric(10, 2)`.
static LENGTH_SECOND_NODES: &[Node] = &[Node::Punct(','), Node::NumberLit { validator: None }];
static LENGTH_NODES: &[Node] = &[
Node::Punct('('),
Node::NumberLit { validator: None },
Node::Optional(&Node::Seq(LENGTH_SECOND_NODES)),
Node::Punct(')'),
];
const LENGTH_OPT: Node = Node::Optional(&Node::Seq(LENGTH_NODES));
// `double precision` — the lone two-word alias. A dedicated branch so
// the per-word `Ident` validator never has to make sense of `double`
// on its own (ADR-0035 §6.3). The builder maps the pair to
// `Type::Real`.
static DOUBLE_PRECISION_NODES: &[Node] = &[
Node::Word(Word::keyword("double")),
Node::Word(Word::keyword("precision")),
];
static TYPE_WITH_LENGTH_NODES: &[Node] = &[SQL_TYPE_NAME, LENGTH_OPT];
static SQL_TYPE_CHOICES: &[Node] = &[
Node::Seq(DOUBLE_PRECISION_NODES),
Node::Seq(TYPE_WITH_LENGTH_NODES),
];
/// `double precision | <type-keyword-or-alias> [ '(' n [, n] ')' ]`.
const SQL_TYPE: Node = Node::Choice(SQL_TYPE_CHOICES);
// --- Column-level constraints (4a clean-reuse set only) -----------
static NOT_NULL_NODES: &[Node] = &[
Node::Word(Word::keyword("not")),
Node::Word(Word::keyword("null")),
];
static PRIMARY_KEY_NODES: &[Node] = &[
Node::Word(Word::keyword("primary")),
Node::Word(Word::keyword("key")),
];
// `NOT NULL` | `UNIQUE` | `PRIMARY KEY`. `DEFAULT` / `CHECK` are
// deliberately absent (4a.2): typing them is an ordinary parse error
// until the constraint slice lands.
static COL_CONSTRAINT_CHOICES: &[Node] = &[
Node::Seq(NOT_NULL_NODES),
Node::Word(Word::keyword("unique")),
Node::Seq(PRIMARY_KEY_NODES),
];
const COL_CONSTRAINT: Node = Node::Choice(COL_CONSTRAINT_CHOICES);
/// Zero-or-more column constraints after the type (`min: 0`).
const COL_CONSTRAINT_SUFFIX: Node = Node::Repeated {
inner: &COL_CONSTRAINT,
separator: None,
min: 0,
};
// --- Column definition: `<name> <type> [constraints…]` ------------
const COL_NAME: Node = Node::Ident {
source: IdentSource::NewName,
role: "col_name",
validator: None,
highlight_override: None,
writes_table: false,
writes_column: false,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
static COLUMN_DEF_NODES: &[Node] = &[COL_NAME, SQL_TYPE, COL_CONSTRAINT_SUFFIX];
const COLUMN_DEF: Node = Node::Seq(COLUMN_DEF_NODES);
// --- Table-level `PRIMARY KEY ( col, … )` (single + compound) -----
// A column reference inside the table-level PK list. The columns are
// defined in this same statement (not in the schema yet), so
// `NewName` (no schema completion); the builder checks each name
// against the defined columns.
const PK_COLUMN_REF: Node = Node::Ident {
source: IdentSource::NewName,
role: "pk_column",
validator: None,
highlight_override: None,
writes_table: false,
writes_column: false,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
static TABLE_PK_NODES: &[Node] = &[
Node::Word(Word::keyword("primary")),
Node::Word(Word::keyword("key")),
Node::Punct('('),
Node::Repeated {
inner: &PK_COLUMN_REF,
separator: Some(&COMMA),
min: 1,
},
Node::Punct(')'),
];
const TABLE_PK: Node = Node::Seq(TABLE_PK_NODES);
// One element of the column list: a table-level `PRIMARY KEY (…)` or a
// column definition. `TABLE_PK` is tried first — it starts with the
// keyword `primary`, which disambiguates it from a column name. (A
// column literally named `primary` is therefore unavailable, the same
// trade real SQL makes with its reserved words.)
static ELEMENT_CHOICES: &[Node] = &[TABLE_PK, COLUMN_DEF];
const ELEMENT: Node = Node::Choice(ELEMENT_CHOICES);
static COLUMN_LIST_NODES: &[Node] = &[
Node::Punct('('),
Node::Repeated {
inner: &ELEMENT,
separator: Some(&COMMA),
min: 1,
},
Node::Punct(')'),
];
const COLUMN_LIST: Node = Node::Seq(COLUMN_LIST_NODES);
// --- `IF NOT EXISTS` (ADR-0035 §4, no-op-with-note at execution) ---
static IF_NOT_EXISTS_NODES: &[Node] = &[
Node::Word(Word::keyword("if")),
Node::Word(Word::keyword("not")),
Node::Word(Word::keyword("exists")),
];
const IF_NOT_EXISTS_OPT: Node = Node::Optional(&Node::Seq(IF_NOT_EXISTS_NODES));
// --- The full post-`CREATE` shape ---------------------------------
/// The table name. `NewName` (the user invents it); `__rdbms_*`
/// rejected (ADR-0030 §6) so the walker's `[ERR]` indicator flags an
/// internal-table target before submit, mirroring the DML shapes.
const TABLE_NAME: Node = Node::Ident {
source: IdentSource::NewName,
role: "table_name",
validator: Some(reject_internal_table),
highlight_override: None,
writes_table: false,
writes_column: false,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
static SQL_CREATE_TABLE_TAIL_NODES: &[Node] = &[
Node::Word(Word::keyword("table")),
IF_NOT_EXISTS_OPT,
TABLE_NAME,
COLUMN_LIST,
Node::Optional(&Node::Punct(';')),
];
/// The post-`CREATE` portion of a SQL `CREATE TABLE` statement.
///
/// `TABLE [IF NOT EXISTS] <name> ( <element> (',' <element>)* ) [';']`,
/// where an element is a column definition or a table-level
/// `PRIMARY KEY (…)` (ADR-0035 §4). The entry-word dispatch consumes
/// the leading `CREATE` before this shape walks, so a `CommandNode`
/// references it via `Subgrammar` (the `ddl::SQL_CREATE_TABLE` node).
pub static SQL_CREATE_TABLE_SHAPE: Node = Node::Seq(SQL_CREATE_TABLE_TAIL_NODES);
// =================================================================
// Tests — grammar accept/reject for the post-`CREATE` tail.
// =================================================================
#[cfg(test)]
mod tests {
use super::SQL_CREATE_TABLE_SHAPE;
use crate::dsl::walker::context::WalkContext;
use crate::dsl::walker::driver::{NodeWalkResult, walk_node};
use crate::dsl::walker::outcome::MatchedPath;
/// Walk `input` against the CREATE TABLE tail. `true` only when the
/// walk matches *and* consumes all of `input` (trailing whitespace
/// allowed). Schemaless context: the shape is structural, so the
/// table/column idents match by shape and `reject_internal_table`
/// still fires on `__rdbms_*`.
fn walks(input: &str) -> bool {
let mut ctx = WalkContext::new();
let mut path = MatchedPath::new();
let mut per_byte = Vec::new();
match walk_node(input, 0, &SQL_CREATE_TABLE_SHAPE, &mut ctx, &mut path, &mut per_byte) {
NodeWalkResult::Matched { end, .. } => input[end..].trim().is_empty(),
_ => false,
}
}
fn good(input: &str) {
assert!(walks(input), "{input:?} should be a valid CREATE TABLE tail");
}
fn bad(input: &str) {
assert!(!walks(input), "{input:?} should NOT walk as a complete CREATE TABLE tail");
}
#[test]
fn minimal_single_column() {
good("table t (id int)");
good("table t (id int);");
good("table widgets (sku text)");
}
#[test]
fn multiple_columns() {
good("table t (id int, name text)");
good("table orders (id int, total real, note text)");
}
#[test]
fn column_level_constraints() {
good("table t (id int primary key)");
good("table t (id int primary key, name text not null)");
good("table t (id serial primary key, email text unique)");
good("table t (a int not null unique, b text)");
}
#[test]
fn integer_primary_key_parses() {
// INTEGER PRIMARY KEY is accepted (it maps to plain int at the
// builder, ADR-0035 §3 — verified there, not here).
good("table t (id integer primary key)");
}
#[test]
fn table_level_primary_key_single_and_compound() {
good("table t (id int, primary key (id))");
good("table t (a int, b int, primary key (a, b))");
good("table t (a int, b int, c text, primary key (a, b, c))");
}
#[test]
fn standard_sql_type_aliases() {
good("table t (a integer, b varchar, c boolean, d timestamp)");
good("table t (e bigint, f smallint, g char, h numeric)");
good("table t (i binary, j varbinary, k float)");
}
#[test]
fn double_precision_two_word_type() {
good("table t (x double precision)");
good("table t (id int, x double precision, y real)");
}
#[test]
fn length_precision_args_accepted_and_ignored() {
good("table t (name varchar(255))");
good("table t (price numeric(10, 2))");
good("table t (code char(8), amount decimal(12, 4))");
}
#[test]
fn if_not_exists_admitted() {
good("table if not exists t (id int)");
good("table if not exists widgets (sku text, qty int);");
}
#[test]
fn internal_target_table_rejected() {
bad("table __rdbms_playground_columns (id int)");
bad("table if not exists __rdbms_playground_relationships (id int)");
}
#[test]
fn unknown_type_rejected() {
bad("table t (id money)");
bad("table t (id json)");
// Bare `double` (no `precision`) is not a supported type.
bad("table t (x double)");
}
#[test]
fn structurally_incomplete_or_wrong_rejected() {
// Empty column list — at least one element required.
bad("table t ()");
// No column list at all.
bad("table t");
// Missing table name.
bad("table (id int)");
// Column with no type.
bad("table t (id)");
// Trailing comma with no following element.
bad("table t (id int,)");
// Missing TABLE keyword (entry dispatch would have eaten it).
bad("t (id int)");
// Unclosed column list.
bad("table t (id int");
}
#[test]
fn deferred_constraints_are_not_accepted_in_4a() {
// DEFAULT / CHECK / table-level UNIQUE belong to the 4a.2
// constraint slice; their shapes are absent here, so they do
// not walk (the builder turns this into a friendly
// "not yet supported" — tested there).
bad("table t (id int default 0)");
bad("table t (id int check (id > 0))");
bad("table t (a int, b int, unique (a, b))");
}
}