Files
rdbms-playground/src/persistence/yaml.rs
T
claude@clouddev1 942222bfc9 constraints: CHECK — check (<expr>) at create table & add column (ADR-0029)
The fourth constraint. `check ( <expr> )` reuses the ADR-0026
WHERE-expression grammar via `Subgrammar`, so a check is
written in the same language as a `where` filter.

- Grammar: a `CHECK_CONSTRAINT` arm joins the shared
  constraint-suffix Choice; `consume_check_expr` extracts the
  parenthesised expression (paren-depth aware) into
  `ColumnSpec.check` / `Command::AddColumn.check`.
- Storage: the parsed `Expr` is compiled once to inline SQL
  (`compile_check_sql` — `compile_expr` + ADR-0028's
  param-inliner) and stored in that form everywhere — a new
  `check_expr` column in `__rdbms_playground_columns`,
  `project.yaml`'s `ColumnSchema.check`, and the column DDL
  emitted by `do_create_table` / `schema_to_ddl`.
- `add column … check` routes through the rebuild primitive
  (SQLite's `ALTER … ADD COLUMN` cannot carry it); a CHECK on
  a serial/shortid column is create-table-only and refused at
  add-column with a friendly message.
- `describe` surfaces the CHECK. ADR-0029 §7/§8 updated to the
  SQL-form decision — double-quoted identifiers, consistent
  with ADR-0028's `explain` display SQL.

1201 tests pass (+8); clippy clean.
2026-05-19 16:42:18 +00:00

647 lines
21 KiB
Rust

//! `project.yaml` writer (hand-rolled, ADR-0015 §3) and
//! reader (`serde_yml`, ADR-0015 §7).
//!
//! The schema YAML uses a small, fixed set of structures —
//! tables, columns, relationships — and the values it carries
//! are all known-safe (identifiers from the DSL, types from
//! the fixed `Type` enum, action names from `ReferentialAction`).
//! Hand-rolling the writer avoids pulling a YAML serializer
//! dep just for the write path; the read path uses
//! `serde_yml` because we need to handle whatever the user
//! (or a future migrator, or a hand-edit) puts in there.
//
// `pub(crate)` items in this private submodule are
// re-exported from `persistence::mod.rs`; that path is what
// the db worker uses. Clippy's `redundant_pub_crate` lint
// flags this pattern, but it's load-bearing here.
#![allow(clippy::redundant_pub_crate)]
use std::fmt::Write as _;
use serde::Deserialize;
use crate::dsl::action::ReferentialAction;
use crate::dsl::types::Type;
use super::{ColumnSchema, IndexSchema, RelationshipSchema, SchemaSnapshot, TableSchema};
/// Serialize a `SchemaSnapshot` to a `project.yaml` body.
#[must_use]
pub(super) fn serialize_schema(schema: &SchemaSnapshot) -> String {
let mut out = String::new();
let _ = writeln!(out, "version: 1");
let _ = writeln!(out, "project:");
let _ = writeln!(out, " created_at: {}", quote_if_needed(&schema.created_at));
if schema.tables.is_empty() {
let _ = writeln!(out, "tables: []");
} else {
let _ = writeln!(out, "tables:");
for table in &schema.tables {
write_table(&mut out, table);
}
}
if schema.relationships.is_empty() {
let _ = writeln!(out, "relationships: []");
} else {
let _ = writeln!(out, "relationships:");
for rel in &schema.relationships {
write_relationship(&mut out, rel);
}
}
if schema.indexes.is_empty() {
let _ = writeln!(out, "indexes: []");
} else {
let _ = writeln!(out, "indexes:");
for index in &schema.indexes {
write_index(&mut out, index);
}
}
out
}
fn write_index(out: &mut String, index: &IndexSchema) {
let _ = writeln!(out, " - name: {}", quote_if_needed(&index.name));
let _ = writeln!(out, " table: {}", quote_if_needed(&index.table));
write!(out, " columns: [").unwrap();
for (i, col) in index.columns.iter().enumerate() {
if i > 0 {
out.push_str(", ");
}
out.push_str(&quote_if_needed(col));
}
let _ = writeln!(out, "]");
}
fn write_table(out: &mut String, table: &TableSchema) {
let _ = writeln!(out, " - name: {}", quote_if_needed(&table.name));
write!(out, " primary_key: [").unwrap();
for (i, key) in table.primary_key.iter().enumerate() {
if i > 0 {
out.push_str(", ");
}
out.push_str(&quote_if_needed(key));
}
let _ = writeln!(out, "]");
let _ = writeln!(out, " columns:");
for col in &table.columns {
write_column(out, col);
}
}
/// Always render `s` as a double-quoted YAML string — used
/// for a column's `default` SQL literal, which must round-trip
/// as a string even when it looks numeric (ADR-0029).
fn yaml_string(s: &str) -> String {
let mut out = String::with_capacity(s.len() + 2);
out.push('"');
for c in s.chars() {
match c {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
'\n' => out.push_str("\\n"),
_ => out.push(c),
}
}
out.push('"');
out
}
fn write_column(out: &mut String, col: &ColumnSchema) {
let mut line = format!(
" - {{ name: {}, type: {}",
quote_if_needed(&col.name),
col.user_type.keyword(),
);
// ADR-0018 / ADR-0029 constraint flags — emitted only when
// set, so an unconstrained column stays a compact two-field
// entry and older readers stay forward-compatible.
if col.unique {
line.push_str(", unique: true");
}
if col.not_null {
line.push_str(", not_null: true");
}
if let Some(default) = &col.default {
line.push_str(", default: ");
line.push_str(&yaml_string(default));
}
if let Some(check) = &col.check {
line.push_str(", check: ");
line.push_str(&yaml_string(check));
}
line.push_str(" }");
let _ = writeln!(out, "{line}");
}
fn write_relationship(out: &mut String, rel: &RelationshipSchema) {
let _ = writeln!(out, " - name: {}", quote_if_needed(&rel.name));
let _ = writeln!(
out,
" parent: {{ table: {}, column: {} }}",
quote_if_needed(&rel.parent_table),
quote_if_needed(&rel.parent_column),
);
let _ = writeln!(
out,
" child: {{ table: {}, column: {} }}",
quote_if_needed(&rel.child_table),
quote_if_needed(&rel.child_column),
);
let _ = writeln!(out, " on_delete: {}", action_keyword(rel.on_delete));
let _ = writeln!(out, " on_update: {}", action_keyword(rel.on_update));
}
const fn action_keyword(action: ReferentialAction) -> &'static str {
match action {
ReferentialAction::NoAction => "no_action",
ReferentialAction::Restrict => "restrict",
ReferentialAction::SetNull => "set_null",
ReferentialAction::Cascade => "cascade",
}
}
/// Quote a string for safe inclusion as a YAML scalar.
///
/// We're conservative: anything not made of safe characters
/// (alphanumerics, `_`, `-`, `:` for ISO timestamps, `.`)
/// gets double-quoted with `"` and `\` escaped. Common
/// identifiers from the DSL (which restricts to alnum + `_`)
/// pass through unquoted, which keeps the YAML pleasantly
/// readable.
fn quote_if_needed(s: &str) -> String {
if needs_quoting(s) {
let mut out = String::with_capacity(s.len() + 2);
out.push('"');
for c in s.chars() {
match c {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
'\n' => out.push_str("\\n"),
_ => out.push(c),
}
}
out.push('"');
out
} else {
s.to_string()
}
}
fn needs_quoting(s: &str) -> bool {
if s.is_empty() {
return true;
}
// YAML reserves several leading characters and the empty
// string. Be defensive on anything outside the safe set.
let first = s.chars().next().unwrap();
if !is_safe_yaml_char(first) || first == '-' {
return true;
}
// Scalar text that looks like a YAML keyword needs quoting
// even if every character is safe.
if matches!(s, "true" | "false" | "null" | "~" | "yes" | "no" | "on" | "off") {
return true;
}
s.chars().any(|c| !is_safe_yaml_char(c))
}
const fn is_safe_yaml_char(c: char) -> bool {
c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | '.' | ':')
}
/// Parse a `project.yaml` body into a `SchemaSnapshot`.
///
/// The wire types below mirror the format `serialize_schema`
/// emits. Anything outside that shape produces a structured
/// error — callers (the rebuild path) translate those into a
/// fatal banner per ADR-0015 §8.
pub(crate) fn parse_schema(body: &str) -> Result<SchemaSnapshot, YamlError> {
let raw: RawProject =
serde_yml::from_str(body).map_err(|e| YamlError::Syntax(e.to_string()))?;
if raw.version != 1 {
return Err(YamlError::UnsupportedVersion(raw.version));
}
let mut tables: Vec<TableSchema> = Vec::with_capacity(raw.tables.len());
for t in raw.tables {
let mut columns: Vec<ColumnSchema> = Vec::with_capacity(t.columns.len());
for c in t.columns {
let user_type = c.user_type.parse::<Type>().map_err(|_| {
YamlError::UnknownType {
table: t.name.clone(),
column: c.name.clone(),
raw: c.user_type.clone(),
}
})?;
columns.push(ColumnSchema {
name: c.name,
user_type,
unique: c.unique,
not_null: c.not_null,
default: c.default,
check: c.check,
});
}
tables.push(TableSchema {
name: t.name,
primary_key: t.primary_key,
columns,
});
}
let mut relationships: Vec<RelationshipSchema> = Vec::with_capacity(raw.relationships.len());
for r in raw.relationships {
let on_delete = parse_action(&r.on_delete)
.ok_or_else(|| YamlError::UnknownAction(r.on_delete.clone()))?;
let on_update = parse_action(&r.on_update)
.ok_or_else(|| YamlError::UnknownAction(r.on_update.clone()))?;
relationships.push(RelationshipSchema {
name: r.name,
parent_table: r.parent.table,
parent_column: r.parent.column,
child_table: r.child.table,
child_column: r.child.column,
on_delete,
on_update,
});
}
let indexes: Vec<IndexSchema> = raw
.indexes
.into_iter()
.map(|i| IndexSchema {
name: i.name,
table: i.table,
columns: i.columns,
})
.collect();
Ok(SchemaSnapshot {
created_at: raw.project.created_at,
tables,
relationships,
indexes,
})
}
#[derive(Debug)]
pub(crate) enum YamlError {
Syntax(String),
UnsupportedVersion(u32),
UnknownType {
table: String,
column: String,
raw: String,
},
UnknownAction(String),
}
impl std::fmt::Display for YamlError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Syntax(msg) => f.write_str(&crate::t!(
"persistence.yaml.syntax",
detail = msg,
)),
Self::UnsupportedVersion(v) => f.write_str(&crate::t!(
"persistence.yaml.unsupported_version",
version = v,
)),
Self::UnknownType { table, column, raw } => f.write_str(&crate::t!(
"persistence.yaml.unknown_type",
table = table,
column = column,
raw = raw,
)),
Self::UnknownAction(raw) => f.write_str(&crate::t!(
"persistence.yaml.unknown_action",
raw = raw,
)),
}
}
}
impl std::error::Error for YamlError {}
fn parse_action(s: &str) -> Option<ReferentialAction> {
match s {
"no_action" => Some(ReferentialAction::NoAction),
"restrict" => Some(ReferentialAction::Restrict),
"set_null" => Some(ReferentialAction::SetNull),
"cascade" => Some(ReferentialAction::Cascade),
_ => None,
}
}
#[derive(Deserialize)]
struct RawProject {
version: u32,
project: RawProjectMeta,
#[serde(default)]
tables: Vec<RawTable>,
#[serde(default)]
relationships: Vec<RawRelationship>,
/// Optional: project files written before ADR-0025 carry no
/// `indexes:` field and default to an empty list.
#[serde(default)]
indexes: Vec<RawIndex>,
}
#[derive(Deserialize)]
struct RawProjectMeta {
created_at: String,
}
#[derive(Deserialize)]
struct RawTable {
name: String,
primary_key: Vec<String>,
columns: Vec<RawColumn>,
}
#[derive(Deserialize)]
struct RawColumn {
name: String,
#[serde(rename = "type")]
user_type: String,
/// Optional flag introduced in ADR-0018 for single-column
/// UNIQUE constraints. Older project files without this
/// field default to `false`.
#[serde(default)]
unique: bool,
/// `NOT NULL` flag (ADR-0029); absent in older files.
#[serde(default)]
not_null: bool,
/// `DEFAULT` SQL literal (ADR-0029); absent in older files.
#[serde(default)]
default: Option<String>,
/// `CHECK` SQL (ADR-0029); absent in older files.
#[serde(default)]
check: Option<String>,
}
#[derive(Deserialize)]
struct RawRelationship {
name: String,
parent: RawEndpoint,
child: RawEndpoint,
on_delete: String,
on_update: String,
}
#[derive(Deserialize)]
struct RawEndpoint {
table: String,
column: String,
}
#[derive(Deserialize)]
struct RawIndex {
name: String,
table: String,
columns: Vec<String>,
}
#[cfg(test)]
mod tests {
use super::*;
use crate::dsl::types::Type;
fn snapshot() -> SchemaSnapshot {
SchemaSnapshot {
created_at: "2026-05-07T14:30:12Z".to_string(),
tables: vec![
TableSchema {
name: "Customers".to_string(),
primary_key: vec!["id".to_string()],
columns: vec![
ColumnSchema { name: "id".to_string(), user_type: Type::Serial, unique: false, not_null: false, default: None, check: None },
ColumnSchema { name: "Name".to_string(), user_type: Type::Text, unique: false, not_null: false, default: None, check: None },
],
},
TableSchema {
name: "Orders".to_string(),
primary_key: vec!["id".to_string()],
columns: vec![
ColumnSchema { name: "id".to_string(), user_type: Type::Serial, unique: false, not_null: false, default: None, check: None },
ColumnSchema { name: "CustId".to_string(), user_type: Type::Int, unique: false, not_null: false, default: None, check: None },
],
},
],
relationships: vec![RelationshipSchema {
name: "Customers_id_to_Orders_CustId".to_string(),
parent_table: "Customers".to_string(),
parent_column: "id".to_string(),
child_table: "Orders".to_string(),
child_column: "CustId".to_string(),
on_delete: ReferentialAction::Cascade,
on_update: ReferentialAction::NoAction,
}],
indexes: vec![IndexSchema {
name: "Orders_CustId_idx".to_string(),
table: "Orders".to_string(),
columns: vec!["CustId".to_string()],
}],
}
}
#[test]
fn writes_expected_yaml_for_full_schema() {
let body = serialize_schema(&snapshot());
// Spot-check structural lines rather than asserting on
// the whole blob — easier to read in failure output.
assert!(body.contains("version: 1"));
assert!(body.contains("created_at: 2026-05-07T14:30:12Z"));
assert!(body.contains("- name: Customers"));
assert!(body.contains("primary_key: [id]"));
assert!(body.contains("{ name: id, type: serial }"));
assert!(body.contains("{ name: Name, type: text }"));
assert!(body.contains("- name: Customers_id_to_Orders_CustId"));
assert!(body.contains("parent: { table: Customers, column: id }"));
assert!(body.contains("child: { table: Orders, column: CustId }"));
assert!(body.contains("on_delete: cascade"));
assert!(body.contains("on_update: no_action"));
assert!(body.contains("- name: Orders_CustId_idx"));
assert!(body.contains("table: Orders"));
assert!(body.contains("columns: [CustId]"));
}
#[test]
fn empty_lists_use_inline_brackets() {
let body = serialize_schema(&SchemaSnapshot {
created_at: "2026-05-07T14:30:12Z".to_string(),
tables: vec![],
relationships: vec![],
indexes: vec![],
});
assert!(body.contains("tables: []"));
assert!(body.contains("relationships: []"));
assert!(body.contains("indexes: []"));
}
#[test]
fn quotes_yaml_keywords_used_as_identifiers() {
let body = serialize_schema(&SchemaSnapshot {
created_at: "2026-05-07T14:30:12Z".to_string(),
tables: vec![TableSchema {
name: "true".to_string(), // reserved keyword
primary_key: vec!["id".to_string()],
columns: vec![ColumnSchema {
name: "yes".to_string(),
user_type: Type::Bool,
unique: false,
not_null: false,
default: None,
check: None,
}],
}],
relationships: vec![],
indexes: vec![],
});
assert!(body.contains("- name: \"true\""));
assert!(body.contains("{ name: \"yes\", type: bool }"));
}
#[test]
fn quotes_strings_with_unsafe_characters() {
assert_eq!(quote_if_needed("My Project"), "\"My Project\"");
assert_eq!(quote_if_needed("with\"quote"), "\"with\\\"quote\"");
}
#[test]
fn write_then_read_round_trips() {
let original = snapshot();
let body = serialize_schema(&original);
let parsed = parse_schema(&body).expect("parse schema");
assert_eq!(parsed, original);
}
#[test]
fn column_constraints_round_trip_through_yaml() {
// NOT NULL / UNIQUE / DEFAULT survive a serialize →
// parse cycle (ADR-0029 §7).
let snap = SchemaSnapshot {
created_at: "2026-05-19T00:00:00Z".to_string(),
tables: vec![TableSchema {
name: "Books".to_string(),
primary_key: vec!["isbn".to_string()],
columns: vec![
ColumnSchema {
name: "isbn".to_string(),
user_type: Type::Text,
unique: false,
not_null: false,
default: None,
check: None,
},
ColumnSchema {
name: "title".to_string(),
user_type: Type::Text,
unique: true,
not_null: true,
default: Some("'untitled'".to_string()),
check: None,
},
ColumnSchema {
name: "stock".to_string(),
user_type: Type::Int,
unique: false,
not_null: false,
default: Some("0".to_string()),
check: Some("\"stock\" >= 0".to_string()),
},
],
}],
relationships: vec![],
indexes: vec![],
};
let body = serialize_schema(&snap);
let parsed = parse_schema(&body).expect("parse schema");
assert_eq!(parsed, snap, "constraints survive the yaml round-trip");
}
#[test]
fn parses_minimal_yaml_with_no_tables() {
let body = "\
version: 1
project:
created_at: 2026-05-07T14:30:12Z
tables: []
relationships: []
";
let parsed = parse_schema(body).expect("parse minimal");
assert_eq!(parsed.tables.len(), 0);
assert_eq!(parsed.relationships.len(), 0);
// A project file with no `indexes:` field (written
// before ADR-0025) parses with an empty index list.
assert_eq!(parsed.indexes.len(), 0);
assert_eq!(parsed.created_at, "2026-05-07T14:30:12Z");
}
#[test]
fn rejects_unknown_version() {
let body = "version: 9\nproject:\n created_at: x\ntables: []\nrelationships: []\n";
match parse_schema(body) {
Err(YamlError::UnsupportedVersion(9)) => {}
other => panic!("expected UnsupportedVersion(9), got {other:?}"),
}
}
#[test]
fn rejects_unknown_column_type() {
let body = "\
version: 1
project:
created_at: x
tables:
- name: T
primary_key: [id]
columns:
- { name: id, type: bogus }
relationships: []
";
match parse_schema(body) {
Err(YamlError::UnknownType { raw, .. }) => assert_eq!(raw, "bogus"),
other => panic!("expected UnknownType, got {other:?}"),
}
}
#[test]
fn rejects_unknown_action() {
let body = "\
version: 1
project:
created_at: x
tables: []
relationships:
- name: R
parent: { table: A, column: id }
child: { table: B, column: aid }
on_delete: blow_up
on_update: no_action
";
match parse_schema(body) {
Err(YamlError::UnknownAction(s)) => assert_eq!(s, "blow_up"),
other => panic!("expected UnknownAction, got {other:?}"),
}
}
#[test]
fn preserves_compound_primary_key_order() {
let body = serialize_schema(&SchemaSnapshot {
created_at: "2026-05-07T14:30:12Z".to_string(),
tables: vec![TableSchema {
name: "Items".to_string(),
primary_key: vec!["a".to_string(), "b".to_string()],
columns: vec![
ColumnSchema { name: "a".to_string(), user_type: Type::Int, unique: false, not_null: false, default: None, check: None },
ColumnSchema { name: "b".to_string(), user_type: Type::Int, unique: false, not_null: false, default: None, check: None },
],
}],
relationships: vec![],
indexes: vec![],
});
assert!(body.contains("primary_key: [a, b]"));
}
}