Iteration 3: existence-only load + rebuild from text on missing .db

When the runtime opens a project whose playground.db is missing,
it now rebuilds the database from project.yaml + data/<table>.csv
per ADR-0015 §7. The rebuild path:

1. Parses project.yaml (serde_yml). Unknown versions / types /
   actions surface as PersistenceFatal.
2. Recreates each user table with FK constraints inline
   (PRAGMA foreign_keys=OFF), then populates the column-type,
   relationship, and project metadata tables.
3. Loads each table's CSV via a hand-rolled reader that
   preserves the NULL-vs-empty distinction (the csv crate
   doesn't expose whether a field was quoted; ours does).
4. Runs PRAGMA foreign_key_check before commit; any violation
   aborts.
5. Restores foreign_keys=ON regardless of success.

Row-level failures get DbError::RebuildRowFailed with row
number, file, table, and a friendly per-type detail. They land
in the runtime as a fatal stderr message ("unable to load row N
from `data/T.csv` into table `T`: ...") before the alternate
screen is entered.

created_at from project.yaml overwrites the configure-time
placeholder so timestamps round-trip stably.

Tests: 307 passing (267 lib + 9 + 5 new + 9 + 17), 0 failing,
0 skipped. Clippy clean with nursery lints.
This commit is contained in:
claude@clouddev1
2026-05-07 22:11:45 +00:00
parent 5410075398
commit f0fc063756
8 changed files with 1244 additions and 5 deletions
+212 -3
View File
@@ -1,16 +1,27 @@
//! Hand-rolled `project.yaml` writer (ADR-0015 §3).
//! `project.yaml` writer (hand-rolled, ADR-0015 §3) and
//! reader (`serde_yml`, ADR-0015 §7).
//!
//! The schema YAML uses a small, fixed set of structures —
//! tables, columns, relationships — and the values it carries
//! are all known-safe (identifiers from the DSL, types from
//! the fixed `Type` enum, action names from `ReferentialAction`).
//! Hand-rolling the writer avoids pulling a YAML serializer
//! dep just for this file. The reader (Iteration 3) will use
//! a real YAML parser.
//! dep just for the write path; the read path uses
//! `serde_yml` because we need to handle whatever the user
//! (or a future migrator, or a hand-edit) puts in there.
//
// `pub(crate)` items in this private submodule are
// re-exported from `persistence::mod.rs`; that path is what
// the db worker uses. Clippy's `redundant_pub_crate` lint
// flags this pattern, but it's load-bearing here.
#![allow(clippy::redundant_pub_crate)]
use std::fmt::Write as _;
use serde::Deserialize;
use crate::dsl::action::ReferentialAction;
use crate::dsl::types::Type;
use super::{ColumnSchema, RelationshipSchema, SchemaSnapshot, TableSchema};
@@ -144,6 +155,133 @@ const fn is_safe_yaml_char(c: char) -> bool {
c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | '.' | ':')
}
/// Parse a `project.yaml` body into a `SchemaSnapshot`.
///
/// The wire types below mirror the format `serialize_schema`
/// emits. Anything outside that shape produces a structured
/// error — callers (the rebuild path) translate those into a
/// fatal banner per ADR-0015 §8.
pub(crate) fn parse_schema(body: &str) -> Result<SchemaSnapshot, YamlError> {
let raw: RawProject =
serde_yml::from_str(body).map_err(|e| YamlError::Syntax(e.to_string()))?;
if raw.version != 1 {
return Err(YamlError::UnsupportedVersion(raw.version));
}
let mut tables: Vec<TableSchema> = Vec::with_capacity(raw.tables.len());
for t in raw.tables {
let mut columns: Vec<ColumnSchema> = Vec::with_capacity(t.columns.len());
for c in t.columns {
let user_type = c.user_type.parse::<Type>().map_err(|_| {
YamlError::UnknownType {
table: t.name.clone(),
column: c.name.clone(),
raw: c.user_type.clone(),
}
})?;
columns.push(ColumnSchema {
name: c.name,
user_type,
});
}
tables.push(TableSchema {
name: t.name,
primary_key: t.primary_key,
columns,
});
}
let mut relationships: Vec<RelationshipSchema> = Vec::with_capacity(raw.relationships.len());
for r in raw.relationships {
let on_delete = parse_action(&r.on_delete)
.ok_or_else(|| YamlError::UnknownAction(r.on_delete.clone()))?;
let on_update = parse_action(&r.on_update)
.ok_or_else(|| YamlError::UnknownAction(r.on_update.clone()))?;
relationships.push(RelationshipSchema {
name: r.name,
parent_table: r.parent.table,
parent_column: r.parent.column,
child_table: r.child.table,
child_column: r.child.column,
on_delete,
on_update,
});
}
Ok(SchemaSnapshot {
created_at: raw.project.created_at,
tables,
relationships,
})
}
#[derive(Debug, thiserror::Error)]
pub(crate) enum YamlError {
#[error("project.yaml syntax error: {0}")]
Syntax(String),
#[error("unsupported project.yaml version: {0} (expected 1)")]
UnsupportedVersion(u32),
#[error("unknown user-facing column type `{raw}` for `{table}.{column}`")]
UnknownType {
table: String,
column: String,
raw: String,
},
#[error("unknown referential action `{0}`")]
UnknownAction(String),
}
fn parse_action(s: &str) -> Option<ReferentialAction> {
match s {
"no_action" => Some(ReferentialAction::NoAction),
"restrict" => Some(ReferentialAction::Restrict),
"set_null" => Some(ReferentialAction::SetNull),
"cascade" => Some(ReferentialAction::Cascade),
_ => None,
}
}
#[derive(Deserialize)]
struct RawProject {
version: u32,
project: RawProjectMeta,
#[serde(default)]
tables: Vec<RawTable>,
#[serde(default)]
relationships: Vec<RawRelationship>,
}
#[derive(Deserialize)]
struct RawProjectMeta {
created_at: String,
}
#[derive(Deserialize)]
struct RawTable {
name: String,
primary_key: Vec<String>,
columns: Vec<RawColumn>,
}
#[derive(Deserialize)]
struct RawColumn {
name: String,
#[serde(rename = "type")]
user_type: String,
}
#[derive(Deserialize)]
struct RawRelationship {
name: String,
parent: RawEndpoint,
child: RawEndpoint,
on_delete: String,
on_update: String,
}
#[derive(Deserialize)]
struct RawEndpoint {
table: String,
column: String,
}
#[cfg(test)]
mod tests {
use super::*;
@@ -235,6 +373,77 @@ mod tests {
assert_eq!(quote_if_needed("with\"quote"), "\"with\\\"quote\"");
}
#[test]
fn write_then_read_round_trips() {
let original = snapshot();
let body = serialize_schema(&original);
let parsed = parse_schema(&body).expect("parse schema");
assert_eq!(parsed, original);
}
#[test]
fn parses_minimal_yaml_with_no_tables() {
let body = "\
version: 1
project:
created_at: 2026-05-07T14:30:12Z
tables: []
relationships: []
";
let parsed = parse_schema(body).expect("parse minimal");
assert_eq!(parsed.tables.len(), 0);
assert_eq!(parsed.relationships.len(), 0);
assert_eq!(parsed.created_at, "2026-05-07T14:30:12Z");
}
#[test]
fn rejects_unknown_version() {
let body = "version: 9\nproject:\n created_at: x\ntables: []\nrelationships: []\n";
match parse_schema(body) {
Err(YamlError::UnsupportedVersion(9)) => {}
other => panic!("expected UnsupportedVersion(9), got {other:?}"),
}
}
#[test]
fn rejects_unknown_column_type() {
let body = "\
version: 1
project:
created_at: x
tables:
- name: T
primary_key: [id]
columns:
- { name: id, type: bogus }
relationships: []
";
match parse_schema(body) {
Err(YamlError::UnknownType { raw, .. }) => assert_eq!(raw, "bogus"),
other => panic!("expected UnknownType, got {other:?}"),
}
}
#[test]
fn rejects_unknown_action() {
let body = "\
version: 1
project:
created_at: x
tables: []
relationships:
- name: R
parent: { table: A, column: id }
child: { table: B, column: aid }
on_delete: blow_up
on_update: no_action
";
match parse_schema(body) {
Err(YamlError::UnknownAction(s)) => assert_eq!(s, "blow_up"),
other => panic!("expected UnknownAction, got {other:?}"),
}
}
#[test]
fn preserves_compound_primary_key_order() {
let body = serialize_schema(&SchemaSnapshot {