Iteration 3: existence-only load + rebuild from text on missing .db

When the runtime opens a project whose playground.db is missing,
it now rebuilds the database from project.yaml + data/<table>.csv
per ADR-0015 §7. The rebuild path:

1. Parses project.yaml (serde_yml). Unknown versions / types /
   actions surface as PersistenceFatal.
2. Recreates each user table with FK constraints inline
   (PRAGMA foreign_keys=OFF), then populates the column-type,
   relationship, and project metadata tables.
3. Loads each table's CSV via a hand-rolled reader that
   preserves the NULL-vs-empty distinction (the csv crate
   doesn't expose whether a field was quoted; ours does).
4. Runs PRAGMA foreign_key_check before commit; any violation
   aborts.
5. Restores foreign_keys=ON regardless of success.

Row-level failures get DbError::RebuildRowFailed with row
number, file, table, and a friendly per-type detail. They land
in the runtime as a fatal stderr message ("unable to load row N
from `data/T.csv` into table `T`: ...") before the alternate
screen is entered.

created_at from project.yaml overwrites the configure-time
placeholder so timestamps round-trip stably.

Tests: 307 passing (267 lib + 9 + 5 new + 9 + 17), 0 failing,
0 skipped. Clippy clean with nursery lints.
This commit is contained in:
claude@clouddev1
2026-05-07 22:11:45 +00:00
parent 5410075398
commit f0fc063756
8 changed files with 1244 additions and 5 deletions
Generated
+27
View File
@@ -796,6 +796,16 @@ dependencies = [
"vcpkg", "vcpkg",
] ]
[[package]]
name = "libyml"
version = "0.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3302702afa434ffa30847a83305f0a69d6abd74293b6554c18ec85c7ef30c980"
dependencies = [
"anyhow",
"version_check",
]
[[package]] [[package]]
name = "line-clipping" name = "line-clipping"
version = "0.3.7" version = "0.3.7"
@@ -1360,6 +1370,8 @@ dependencies = [
"rand 0.10.1", "rand 0.10.1",
"ratatui", "ratatui",
"rusqlite", "rusqlite",
"serde",
"serde_yml",
"sysinfo", "sysinfo",
"tempfile", "tempfile",
"thiserror 2.0.18", "thiserror 2.0.18",
@@ -1531,6 +1543,21 @@ dependencies = [
"zmij", "zmij",
] ]
[[package]]
name = "serde_yml"
version = "0.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59e2dd588bf1597a252c3b920e0143eb99b0f76e4e082f4c92ce34fbc9e71ddd"
dependencies = [
"indexmap",
"itoa",
"libyml",
"memchr",
"ryu",
"serde",
"version_check",
]
[[package]] [[package]]
name = "sha2" name = "sha2"
version = "0.10.9" version = "0.10.9"
+2
View File
@@ -20,6 +20,8 @@ gethostname = "1.1.0"
rand = "0.10.1" rand = "0.10.1"
ratatui = "0.30.0" ratatui = "0.30.0"
rusqlite = { version = "0.39.0", features = ["bundled"] } rusqlite = { version = "0.39.0", features = ["bundled"] }
serde = { version = "1.0.228", features = ["derive"] }
serde_yml = "0.0.12"
sysinfo = { version = "0.39.0", default-features = false, features = ["system"] } sysinfo = { version = "0.39.0", default-features = false, features = ["system"] }
thiserror = "2.0.18" thiserror = "2.0.18"
tokio = { version = "1.52.2", features = ["full"] } tokio = { version = "1.52.2", features = ["full"] }
+313 -2
View File
@@ -39,8 +39,9 @@ use crate::dsl::types::Type;
use crate::dsl::value::{Bound, Value, ValueError}; use crate::dsl::value::{Bound, Value, ValueError};
use crate::persistence::{ use crate::persistence::{
CellValue, ColumnSchema, Persistence, PersistenceError, RelationshipSchema, SchemaSnapshot, CellValue, ColumnSchema, Persistence, PersistenceError, RelationshipSchema, SchemaSnapshot,
TableSchema, TableSnapshot, TableSchema, TableSnapshot, decode_cell, parse_csv, parse_schema,
}; };
use crate::project::{DATA_DIR, PROJECT_YAML};
/// Inbox capacity. The worker is fast enough that this rarely /// Inbox capacity. The worker is fast enough that this rarely
/// matters; `64` is a generous head-room for bursts. /// matters; `64` is a generous head-room for bursts.
@@ -117,6 +118,16 @@ pub enum DbError {
path: std::path::PathBuf, path: std::path::PathBuf,
message: String, message: String,
}, },
#[error(
"unable to load row {row_number} from `{}` into table `{table}`: {detail}",
csv_path.display()
)]
RebuildRowFailed {
table: String,
csv_path: std::path::PathBuf,
row_number: usize,
detail: String,
},
#[error("database worker is no longer available")] #[error("database worker is no longer available")]
WorkerGone, WorkerGone,
#[error("io error: {0}")] #[error("io error: {0}")]
@@ -213,7 +224,7 @@ impl DbError {
/// surfaces these as fatal banners. /// surfaces these as fatal banners.
#[must_use] #[must_use]
pub const fn is_fatal(&self) -> bool { pub const fn is_fatal(&self) -> bool {
matches!(self, Self::PersistenceFatal { .. }) matches!(self, Self::PersistenceFatal { .. } | Self::RebuildRowFailed { .. })
} }
} }
@@ -309,6 +320,14 @@ enum Request {
source: Option<String>, source: Option<String>,
reply: oneshot::Sender<Result<DataResult, DbError>>, reply: oneshot::Sender<Result<DataResult, DbError>>,
}, },
/// Rebuild the database from `project.yaml` + `data/`
/// (ADR-0015 §7). Used by the runtime when the `.db` file
/// is missing on project open. Iteration 4's `rebuild`
/// app-level command will reuse the same request.
RebuildFromText {
project_path: std::path::PathBuf,
reply: oneshot::Sender<Result<(), DbError>>,
},
} }
impl Database { impl Database {
@@ -523,6 +542,23 @@ impl Database {
recv.await.map_err(|_| DbError::WorkerGone)? recv.await.map_err(|_| DbError::WorkerGone)?
} }
/// Rebuild the database from `project.yaml` + `data/`
/// (ADR-0015 §7). Called by the runtime on a missing `.db`
/// at startup; Iteration 4 will also expose this via the
/// `rebuild` app-level command.
pub async fn rebuild_from_text(
&self,
project_path: std::path::PathBuf,
) -> Result<(), DbError> {
let (reply, recv) = oneshot::channel();
self.send(Request::RebuildFromText {
project_path,
reply,
})
.await?;
recv.await.map_err(|_| DbError::WorkerGone)?
}
pub async fn query_data( pub async fn query_data(
&self, &self,
table: String, table: String,
@@ -785,6 +821,9 @@ fn handle_request(conn: &Connection, persistence: Option<&Persistence>, req: Req
&table, &table,
)); ));
} }
Request::RebuildFromText { project_path, reply } => {
let _ = reply.send(do_rebuild_from_text(conn, &project_path));
}
} }
} }
@@ -2393,6 +2432,278 @@ fn read_relationships_inbound(
Ok(out) Ok(out)
} }
/// Rebuild the database from `project.yaml` + `data/<table>.csv`
/// (ADR-0015 §7).
///
/// The on-disk text is the authoritative source: this function
/// recreates schema, metadata, and rows so the resulting `.db`
/// reflects them exactly. Persistence callbacks are NOT invoked;
/// we're loading, not changing user-visible state.
///
/// FK enforcement is disabled for the load and re-enabled at
/// the end (regardless of success). A `foreign_key_check`
/// before commit verifies the loaded data is consistent — any
/// violation aborts with a fatal error.
fn do_rebuild_from_text(conn: &Connection, project_path: &Path) -> Result<(), DbError> {
let yaml_path = project_path.join(PROJECT_YAML);
let data_dir = project_path.join(DATA_DIR);
let yaml_body =
std::fs::read_to_string(&yaml_path).map_err(|e| DbError::PersistenceFatal {
operation: "read",
path: yaml_path.clone(),
message: e.to_string(),
})?;
let snapshot = parse_schema(&yaml_body).map_err(|e| DbError::PersistenceFatal {
operation: "parse",
path: yaml_path.clone(),
message: e.to_string(),
})?;
conn.execute_batch("PRAGMA foreign_keys = OFF;")
.map_err(DbError::from_rusqlite)?;
let result = (|| -> Result<(), DbError> {
let tx = conn
.unchecked_transaction()
.map_err(DbError::from_rusqlite)?;
// 1. Recreate user tables with FK constraints inline.
for table in &snapshot.tables {
let read_schema = build_read_schema(table, &snapshot.relationships);
let ddl = schema_to_ddl(&table.name, &read_schema);
tx.execute_batch(&ddl).map_err(DbError::from_rusqlite)?;
}
// 2. Column-type metadata.
{
let mut stmt = tx
.prepare(&format!(
"INSERT INTO {META_TABLE} (table_name, column_name, user_type) \
VALUES (?1, ?2, ?3);"
))
.map_err(DbError::from_rusqlite)?;
for table in &snapshot.tables {
for col in &table.columns {
stmt.execute([
table.name.as_str(),
col.name.as_str(),
col.user_type.keyword(),
])
.map_err(DbError::from_rusqlite)?;
}
}
}
// 3. Relationship metadata.
{
let mut stmt = tx
.prepare(&format!(
"INSERT INTO {REL_TABLE} \
(name, parent_table, parent_column, child_table, child_column, \
on_delete, on_update) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7);"
))
.map_err(DbError::from_rusqlite)?;
for rel in &snapshot.relationships {
stmt.execute([
rel.name.as_str(),
rel.parent_table.as_str(),
rel.parent_column.as_str(),
rel.child_table.as_str(),
rel.child_column.as_str(),
rel.on_delete.keyword(),
rel.on_update.keyword(),
])
.map_err(DbError::from_rusqlite)?;
}
}
// 4. Project metadata: overwrite the configure-time
// `created_at` with the YAML's authoritative value.
tx.execute(
&format!(
"INSERT INTO {META_PROJECT_TABLE} (key, value) VALUES ('created_at', ?1) \
ON CONFLICT(key) DO UPDATE SET value = excluded.value;"
),
[snapshot.created_at.as_str()],
)
.map_err(DbError::from_rusqlite)?;
// 5. Load each table's rows (if a CSV is present).
for table in &snapshot.tables {
let csv_path = data_dir.join(format!("{}.csv", table.name));
if !csv_path.exists() {
continue;
}
load_table_csv(&tx, table, &csv_path)?;
}
// 6. Verify FK consistency before committing.
{
let mut check = tx
.prepare("PRAGMA foreign_key_check;")
.map_err(DbError::from_rusqlite)?;
let mut rows = check.query([]).map_err(DbError::from_rusqlite)?;
if rows.next().map_err(DbError::from_rusqlite)?.is_some() {
return Err(DbError::PersistenceFatal {
operation: "rebuild",
path: yaml_path.clone(),
message: "rebuilt data violates foreign-key constraints".to_string(),
});
}
}
tx.commit().map_err(DbError::from_rusqlite)?;
Ok(())
})();
let pragma_result = conn
.execute_batch("PRAGMA foreign_keys = ON;")
.map_err(DbError::from_rusqlite);
result.and(pragma_result)
}
/// Build a `ReadSchema` for `table` that includes any
/// relationships from the snapshot in which `table` is the
/// child. The output drives `schema_to_ddl` so the resulting
/// CREATE TABLE has the FKs inline.
fn build_read_schema(table: &TableSchema, relationships: &[RelationshipSchema]) -> ReadSchema {
let columns: Vec<ReadColumn> = table
.columns
.iter()
.map(|c| ReadColumn {
name: c.name.clone(),
sqlite_type: c.user_type.sqlite_strict_type().to_string(),
notnull: false,
primary_key: table.primary_key.contains(&c.name),
user_type: Some(c.user_type),
})
.collect();
let foreign_keys: Vec<ReadForeignKey> = relationships
.iter()
.filter(|r| r.child_table == table.name)
.map(|r| ReadForeignKey {
parent_table: r.parent_table.clone(),
parent_column: r.parent_column.clone(),
child_column: r.child_column.clone(),
on_delete: r.on_delete,
on_update: r.on_update,
})
.collect();
ReadSchema {
columns,
primary_key: table.primary_key.clone(),
foreign_keys,
}
}
/// Read `csv_path` and INSERT each row into `table.name`.
/// Failures are wrapped in `DbError::RebuildRowFailed` with
/// row number and table name per ADR-0015 §7.
fn load_table_csv(
tx: &rusqlite::Transaction<'_>,
table: &TableSchema,
csv_path: &Path,
) -> Result<(), DbError> {
let body = std::fs::read_to_string(csv_path).map_err(|e| DbError::PersistenceFatal {
operation: "read",
path: csv_path.to_path_buf(),
message: e.to_string(),
})?;
let parsed = parse_csv(&body).map_err(|e| DbError::PersistenceFatal {
operation: "parse",
path: csv_path.to_path_buf(),
message: e.to_string(),
})?;
if parsed.rows.is_empty() {
return Ok(());
}
// Header sanity check: column names must match the YAML
// schema's column order. A mismatch is a hand-edit hazard;
// surfacing it as a fatal error is better than silently
// mis-aligning columns.
let expected: Vec<&str> = table.columns.iter().map(|c| c.name.as_str()).collect();
let header_strs: Vec<&str> = parsed.header.iter().map(String::as_str).collect();
if header_strs != expected {
return Err(DbError::PersistenceFatal {
operation: "validate",
path: csv_path.to_path_buf(),
message: format!(
"CSV header {:?} does not match table columns {:?}",
parsed.header, expected,
),
});
}
let cols_csv = table
.columns
.iter()
.map(|c| quote_ident(&c.name))
.collect::<Vec<_>>()
.join(", ");
let placeholders = (1..=table.columns.len())
.map(|i| format!("?{i}"))
.collect::<Vec<_>>()
.join(", ");
let sql = format!(
"INSERT INTO {ident} ({cols_csv}) VALUES ({placeholders});",
ident = quote_ident(&table.name),
);
let mut stmt = tx.prepare(&sql).map_err(DbError::from_rusqlite)?;
for (idx, raw_row) in parsed.rows.iter().enumerate() {
// Row number reported as a 1-based file line: header
// is line 1, so the first data row is line 2.
let row_number = idx + 2;
if raw_row.len() != table.columns.len() {
return Err(DbError::RebuildRowFailed {
table: table.name.clone(),
csv_path: csv_path.to_path_buf(),
row_number,
detail: format!(
"row has {} field(s) but table has {} column(s)",
raw_row.len(),
table.columns.len(),
),
});
}
let mut params: Vec<rusqlite::types::Value> = Vec::with_capacity(raw_row.len());
for (col, raw_cell) in table.columns.iter().zip(raw_row.iter()) {
let cell = decode_cell(col.user_type, raw_cell).map_err(|detail| {
DbError::RebuildRowFailed {
table: table.name.clone(),
csv_path: csv_path.to_path_buf(),
row_number,
detail: format!("column `{}`: {detail}", col.name),
}
})?;
params.push(cell_value_to_sqlite(&cell));
}
stmt.execute(rusqlite::params_from_iter(params.iter()))
.map_err(|e| DbError::RebuildRowFailed {
table: table.name.clone(),
csv_path: csv_path.to_path_buf(),
row_number,
detail: e.to_string(),
})?;
}
Ok(())
}
fn cell_value_to_sqlite(cell: &CellValue) -> rusqlite::types::Value {
use rusqlite::types::Value;
match cell {
CellValue::Null => Value::Null,
CellValue::Integer(n) => Value::Integer(*n),
CellValue::Real(f) => Value::Real(*f),
CellValue::Text(s) => Value::Text(s.clone()),
CellValue::Blob(b) => Value::Blob(b.clone()),
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
+271
View File
@@ -16,6 +16,12 @@
//! quote, newline). We handle the empty-string-vs-null //! quote, newline). We handle the empty-string-vs-null
//! distinction manually by always quoting non-null empty //! distinction manually by always quoting non-null empty
//! Text and never quoting Null. //! Text and never quoting Null.
//
// `pub(crate)` items below are re-exported from
// `persistence::mod.rs`; the db worker reaches them via that
// path. Clippy's `redundant_pub_crate` lint flags this
// pattern, but it's load-bearing here.
#![allow(clippy::redundant_pub_crate)]
use std::io::Write as _; use std::io::Write as _;
@@ -172,6 +178,182 @@ fn format_real(f: f64) -> String {
} }
} }
/// Parsed CSV records: header row + zero or more data rows.
///
/// Each cell records whether it was syntactically quoted in
/// the source — that's the bit we need to distinguish NULL
/// (empty unquoted) from `""` (empty quoted). The `csv`
/// crate doesn't expose this, which is why we hand-roll the
/// reader to pair with the hand-rolled writer above.
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct ParsedCsv {
pub header: Vec<String>,
pub rows: Vec<Vec<RawCell>>,
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub(crate) struct RawCell {
pub content: String,
pub was_quoted: bool,
}
#[derive(Debug, thiserror::Error)]
pub(crate) enum CsvError {
#[error("CSV is empty")]
Empty,
#[error("invalid UTF-8 in CSV body")]
InvalidUtf8,
#[error("unterminated quoted field")]
UnterminatedQuote,
}
/// Tokenize a CSV body. Returns the header (column names from
/// the first record) and the data rows. Each cell preserves a
/// `was_quoted` flag so the caller can distinguish an empty
/// unquoted field (NULL) from an empty quoted field (`""`).
pub(crate) fn parse_csv(body: &str) -> Result<ParsedCsv, CsvError> {
let mut records: Vec<Vec<RawCell>> = Vec::new();
let mut current: Vec<RawCell> = Vec::new();
let bytes = body.as_bytes();
let mut i = 0;
let n = bytes.len();
while i < n {
let (cell, advance) = parse_field(&bytes[i..])?;
i += advance;
current.push(cell);
match bytes.get(i) {
Some(&b',') => i += 1,
Some(&b'\n') => {
i += 1;
records.push(std::mem::take(&mut current));
}
Some(&b'\r') => {
i += 1;
if bytes.get(i) == Some(&b'\n') {
i += 1;
}
records.push(std::mem::take(&mut current));
}
None => {
records.push(std::mem::take(&mut current));
}
Some(&other) => {
// A non-structural byte after a quoted field —
// shouldn't happen with our well-formed writer.
// Treat as part of an unquoted continuation by
// appending to the last cell. We choose to
// tolerate rather than error since the most
// common cause is a trailing space, which we
// can roll into the cell.
let last = current
.last_mut()
.or_else(|| records.last_mut().and_then(|r| r.last_mut()));
if let Some(c) = last {
c.content.push(other as char);
}
i += 1;
}
}
}
if !current.is_empty() {
records.push(current);
}
if records.is_empty() {
return Err(CsvError::Empty);
}
let header_record = records.remove(0);
let header: Vec<String> = header_record.into_iter().map(|c| c.content).collect();
Ok(ParsedCsv {
header,
rows: records,
})
}
fn parse_field(bytes: &[u8]) -> Result<(RawCell, usize), CsvError> {
if bytes.first() == Some(&b'"') {
let mut content_bytes: Vec<u8> = Vec::new();
let mut i = 1;
while i < bytes.len() {
match bytes[i] {
b'"' => {
if bytes.get(i + 1) == Some(&b'"') {
content_bytes.push(b'"');
i += 2;
} else {
let content =
String::from_utf8(content_bytes).map_err(|_| CsvError::InvalidUtf8)?;
return Ok((
RawCell {
content,
was_quoted: true,
},
i + 1,
));
}
}
other => {
content_bytes.push(other);
i += 1;
}
}
}
Err(CsvError::UnterminatedQuote)
} else {
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b',' | b'\n' | b'\r' => break,
_ => i += 1,
}
}
let content =
String::from_utf8(bytes[..i].to_vec()).map_err(|_| CsvError::InvalidUtf8)?;
Ok((
RawCell {
content,
was_quoted: false,
},
i,
))
}
}
/// Decode one parsed cell into a `CellValue` per the column's
/// declared type. Returns an error string the caller can
/// embed in a fatal banner per ADR-0015 §7 ("unable to load
/// row N from data/T.csv into table T: …").
pub(crate) fn decode_cell(ty: Type, cell: &RawCell) -> Result<CellValue, String> {
if !cell.was_quoted && cell.content.is_empty() {
return Ok(CellValue::Null);
}
match ty {
Type::Text | Type::Date | Type::DateTime | Type::Decimal | Type::ShortId => {
Ok(CellValue::Text(cell.content.clone()))
}
Type::Int | Type::Serial => cell
.content
.parse::<i64>()
.map(CellValue::Integer)
.map_err(|_| format!("expected an integer, got `{}`", cell.content)),
Type::Real => cell
.content
.parse::<f64>()
.map(CellValue::Real)
.map_err(|_| format!("expected a real number, got `{}`", cell.content)),
Type::Bool => match cell.content.as_str() {
"true" => Ok(CellValue::Integer(1)),
"false" => Ok(CellValue::Integer(0)),
other => Err(format!("expected `true` or `false`, got `{other}`")),
},
Type::Blob => base64::engine::general_purpose::STANDARD
.decode(cell.content.as_bytes())
.map(CellValue::Blob)
.map_err(|e| format!("invalid base64 blob: {e}")),
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@@ -290,6 +472,95 @@ mod tests {
assert!(s.contains("2026-05-07,2026-05-07T14:30:12Z")); assert!(s.contains("2026-05-07,2026-05-07T14:30:12Z"));
} }
#[test]
fn parse_round_trips_simple_table() {
let table = TableSnapshot {
name: "Customers".to_string(),
columns: vec![col("id", Type::Serial), col("Name", Type::Text)],
rows: vec![
vec![CellValue::Integer(1), CellValue::Text("Alice".to_string())],
vec![CellValue::Integer(2), CellValue::Text("Bob".to_string())],
],
};
let body = serialize_table(&table).unwrap();
let parsed = parse_csv(std::str::from_utf8(&body).unwrap()).unwrap();
assert_eq!(parsed.header, vec!["id", "Name"]);
assert_eq!(parsed.rows.len(), 2);
assert_eq!(parsed.rows[0][0].content, "1");
assert_eq!(parsed.rows[0][1].content, "Alice");
assert_eq!(parsed.rows[1][1].content, "Bob");
}
#[test]
fn parse_distinguishes_null_from_empty_string() {
// Header "Name", then two rows: NULL (empty unquoted)
// and "" (empty quoted).
let body = "Name\n\n\"\"\n";
let parsed = parse_csv(body).unwrap();
assert_eq!(parsed.rows.len(), 2);
assert!(!parsed.rows[0][0].was_quoted);
assert_eq!(parsed.rows[0][0].content, "");
assert!(parsed.rows[1][0].was_quoted);
assert_eq!(parsed.rows[1][0].content, "");
let null = decode_cell(Type::Text, &parsed.rows[0][0]).unwrap();
let empty = decode_cell(Type::Text, &parsed.rows[1][0]).unwrap();
assert!(matches!(null, CellValue::Null));
assert!(matches!(empty, CellValue::Text(s) if s.is_empty()));
}
#[test]
fn parse_handles_rfc4180_escapes() {
let body = "Name\n\"hello, world\"\n\"she said \"\"hi\"\"\"\n";
let parsed = parse_csv(body).unwrap();
assert_eq!(parsed.rows[0][0].content, "hello, world");
assert_eq!(parsed.rows[1][0].content, "she said \"hi\"");
}
#[test]
fn parse_decodes_per_type() {
// Rows match the round-trip produced by serialize_table.
let table = TableSnapshot {
name: "T".to_string(),
columns: vec![
col("n", Type::Int),
col("r", Type::Real),
col("b", Type::Bool),
col("blob", Type::Blob),
],
rows: vec![vec![
CellValue::Integer(42),
CellValue::Real(std::f64::consts::PI),
CellValue::Integer(1),
CellValue::Blob(b"hi".to_vec()),
]],
};
let body = serialize_table(&table).unwrap();
let parsed = parse_csv(std::str::from_utf8(&body).unwrap()).unwrap();
let row = &parsed.rows[0];
assert!(matches!(decode_cell(Type::Int, &row[0]).unwrap(), CellValue::Integer(42)));
match decode_cell(Type::Real, &row[1]).unwrap() {
CellValue::Real(f) => assert!((f - std::f64::consts::PI).abs() < 1e-12),
other => panic!("got {other:?}"),
}
assert!(matches!(decode_cell(Type::Bool, &row[2]).unwrap(), CellValue::Integer(1)));
assert!(matches!(decode_cell(Type::Blob, &row[3]).unwrap(), CellValue::Blob(b) if b == b"hi"));
}
#[test]
fn parse_rejects_unterminated_quotes() {
let err = parse_csv("Name\n\"oops").expect_err("must error");
assert!(matches!(err, CsvError::UnterminatedQuote));
}
#[test]
fn decode_cell_reports_friendly_error_for_bad_int() {
let cell = RawCell { content: "abc".to_string(), was_quoted: false };
let err = decode_cell(Type::Int, &cell).expect_err("must error");
assert!(err.contains("integer"));
assert!(err.contains("abc"));
}
#[test] #[test]
fn row_width_mismatch_errors() { fn row_width_mismatch_errors() {
let err = serialize_table(&TableSnapshot { let err = serialize_table(&TableSnapshot {
+5
View File
@@ -22,10 +22,15 @@ use crate::dsl::action::ReferentialAction;
use crate::dsl::types::Type; use crate::dsl::types::Type;
use crate::project::{DATA_DIR, HISTORY_LOG, PROJECT_YAML}; use crate::project::{DATA_DIR, HISTORY_LOG, PROJECT_YAML};
// Submodules are private; the few items the db worker needs
// during rebuild (ADR-0015 §7) are re-exported below.
mod csv_io; mod csv_io;
mod history; mod history;
mod yaml; mod yaml;
pub(crate) use csv_io::{decode_cell, parse_csv};
pub(crate) use yaml::parse_schema;
/// Owns persistence to a single project on disk. Cheap to /// Owns persistence to a single project on disk. Cheap to
/// move; the db worker holds one instance for its lifetime. /// move; the db worker holds one instance for its lifetime.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
+212 -3
View File
@@ -1,16 +1,27 @@
//! Hand-rolled `project.yaml` writer (ADR-0015 §3). //! `project.yaml` writer (hand-rolled, ADR-0015 §3) and
//! reader (`serde_yml`, ADR-0015 §7).
//! //!
//! The schema YAML uses a small, fixed set of structures — //! The schema YAML uses a small, fixed set of structures —
//! tables, columns, relationships — and the values it carries //! tables, columns, relationships — and the values it carries
//! are all known-safe (identifiers from the DSL, types from //! are all known-safe (identifiers from the DSL, types from
//! the fixed `Type` enum, action names from `ReferentialAction`). //! the fixed `Type` enum, action names from `ReferentialAction`).
//! Hand-rolling the writer avoids pulling a YAML serializer //! Hand-rolling the writer avoids pulling a YAML serializer
//! dep just for this file. The reader (Iteration 3) will use //! dep just for the write path; the read path uses
//! a real YAML parser. //! `serde_yml` because we need to handle whatever the user
//! (or a future migrator, or a hand-edit) puts in there.
//
// `pub(crate)` items in this private submodule are
// re-exported from `persistence::mod.rs`; that path is what
// the db worker uses. Clippy's `redundant_pub_crate` lint
// flags this pattern, but it's load-bearing here.
#![allow(clippy::redundant_pub_crate)]
use std::fmt::Write as _; use std::fmt::Write as _;
use serde::Deserialize;
use crate::dsl::action::ReferentialAction; use crate::dsl::action::ReferentialAction;
use crate::dsl::types::Type;
use super::{ColumnSchema, RelationshipSchema, SchemaSnapshot, TableSchema}; use super::{ColumnSchema, RelationshipSchema, SchemaSnapshot, TableSchema};
@@ -144,6 +155,133 @@ const fn is_safe_yaml_char(c: char) -> bool {
c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | '.' | ':') c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | '.' | ':')
} }
/// Parse a `project.yaml` body into a `SchemaSnapshot`.
///
/// The wire types below mirror the format `serialize_schema`
/// emits. Anything outside that shape produces a structured
/// error — callers (the rebuild path) translate those into a
/// fatal banner per ADR-0015 §8.
pub(crate) fn parse_schema(body: &str) -> Result<SchemaSnapshot, YamlError> {
let raw: RawProject =
serde_yml::from_str(body).map_err(|e| YamlError::Syntax(e.to_string()))?;
if raw.version != 1 {
return Err(YamlError::UnsupportedVersion(raw.version));
}
let mut tables: Vec<TableSchema> = Vec::with_capacity(raw.tables.len());
for t in raw.tables {
let mut columns: Vec<ColumnSchema> = Vec::with_capacity(t.columns.len());
for c in t.columns {
let user_type = c.user_type.parse::<Type>().map_err(|_| {
YamlError::UnknownType {
table: t.name.clone(),
column: c.name.clone(),
raw: c.user_type.clone(),
}
})?;
columns.push(ColumnSchema {
name: c.name,
user_type,
});
}
tables.push(TableSchema {
name: t.name,
primary_key: t.primary_key,
columns,
});
}
let mut relationships: Vec<RelationshipSchema> = Vec::with_capacity(raw.relationships.len());
for r in raw.relationships {
let on_delete = parse_action(&r.on_delete)
.ok_or_else(|| YamlError::UnknownAction(r.on_delete.clone()))?;
let on_update = parse_action(&r.on_update)
.ok_or_else(|| YamlError::UnknownAction(r.on_update.clone()))?;
relationships.push(RelationshipSchema {
name: r.name,
parent_table: r.parent.table,
parent_column: r.parent.column,
child_table: r.child.table,
child_column: r.child.column,
on_delete,
on_update,
});
}
Ok(SchemaSnapshot {
created_at: raw.project.created_at,
tables,
relationships,
})
}
#[derive(Debug, thiserror::Error)]
pub(crate) enum YamlError {
#[error("project.yaml syntax error: {0}")]
Syntax(String),
#[error("unsupported project.yaml version: {0} (expected 1)")]
UnsupportedVersion(u32),
#[error("unknown user-facing column type `{raw}` for `{table}.{column}`")]
UnknownType {
table: String,
column: String,
raw: String,
},
#[error("unknown referential action `{0}`")]
UnknownAction(String),
}
fn parse_action(s: &str) -> Option<ReferentialAction> {
match s {
"no_action" => Some(ReferentialAction::NoAction),
"restrict" => Some(ReferentialAction::Restrict),
"set_null" => Some(ReferentialAction::SetNull),
"cascade" => Some(ReferentialAction::Cascade),
_ => None,
}
}
#[derive(Deserialize)]
struct RawProject {
version: u32,
project: RawProjectMeta,
#[serde(default)]
tables: Vec<RawTable>,
#[serde(default)]
relationships: Vec<RawRelationship>,
}
#[derive(Deserialize)]
struct RawProjectMeta {
created_at: String,
}
#[derive(Deserialize)]
struct RawTable {
name: String,
primary_key: Vec<String>,
columns: Vec<RawColumn>,
}
#[derive(Deserialize)]
struct RawColumn {
name: String,
#[serde(rename = "type")]
user_type: String,
}
#[derive(Deserialize)]
struct RawRelationship {
name: String,
parent: RawEndpoint,
child: RawEndpoint,
on_delete: String,
on_update: String,
}
#[derive(Deserialize)]
struct RawEndpoint {
table: String,
column: String,
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@@ -235,6 +373,77 @@ mod tests {
assert_eq!(quote_if_needed("with\"quote"), "\"with\\\"quote\""); assert_eq!(quote_if_needed("with\"quote"), "\"with\\\"quote\"");
} }
#[test]
fn write_then_read_round_trips() {
let original = snapshot();
let body = serialize_schema(&original);
let parsed = parse_schema(&body).expect("parse schema");
assert_eq!(parsed, original);
}
#[test]
fn parses_minimal_yaml_with_no_tables() {
let body = "\
version: 1
project:
created_at: 2026-05-07T14:30:12Z
tables: []
relationships: []
";
let parsed = parse_schema(body).expect("parse minimal");
assert_eq!(parsed.tables.len(), 0);
assert_eq!(parsed.relationships.len(), 0);
assert_eq!(parsed.created_at, "2026-05-07T14:30:12Z");
}
#[test]
fn rejects_unknown_version() {
let body = "version: 9\nproject:\n created_at: x\ntables: []\nrelationships: []\n";
match parse_schema(body) {
Err(YamlError::UnsupportedVersion(9)) => {}
other => panic!("expected UnsupportedVersion(9), got {other:?}"),
}
}
#[test]
fn rejects_unknown_column_type() {
let body = "\
version: 1
project:
created_at: x
tables:
- name: T
primary_key: [id]
columns:
- { name: id, type: bogus }
relationships: []
";
match parse_schema(body) {
Err(YamlError::UnknownType { raw, .. }) => assert_eq!(raw, "bogus"),
other => panic!("expected UnknownType, got {other:?}"),
}
}
#[test]
fn rejects_unknown_action() {
let body = "\
version: 1
project:
created_at: x
tables: []
relationships:
- name: R
parent: { table: A, column: id }
child: { table: B, column: aid }
on_delete: blow_up
on_update: no_action
";
match parse_schema(body) {
Err(YamlError::UnknownAction(s)) => assert_eq!(s, "blow_up"),
other => panic!("expected UnknownAction, got {other:?}"),
}
}
#[test] #[test]
fn preserves_compound_primary_key_order() { fn preserves_compound_primary_key_order() {
let body = serialize_schema(&SchemaSnapshot { let body = serialize_schema(&SchemaSnapshot {
+21
View File
@@ -47,8 +47,29 @@ pub async fn run(args: Args) -> Result<()> {
let db_path = project.db_path(); let db_path = project.db_path();
let display_name = project.display_name().to_string(); let display_name = project.display_name().to_string();
let persistence = crate::persistence::Persistence::new(project.path().to_path_buf()); let persistence = crate::persistence::Persistence::new(project.path().to_path_buf());
// Capture whether the .db file existed BEFORE we open it —
// sqlite creates it on connect, so this is the only honest
// signal that we need to rebuild from text (ADR-0015 §7).
let db_existed = db_path.exists();
let database = Database::open_with_persistence(db_path.as_path(), persistence) let database = Database::open_with_persistence(db_path.as_path(), persistence)
.context("open database")?; .context("open database")?;
if !db_existed
&& let Err(e) = database.rebuild_from_text(project.path().to_path_buf()).await
{
// The terminal is still in cooked mode here (we haven't
// entered the alternate screen yet), so writing to
// stderr lands directly in the user's shell. Drop the
// project to release the lock first.
drop(project);
if matches!(
e,
DbError::PersistenceFatal { .. } | DbError::RebuildRowFailed { .. }
) {
eprintln!("rdbms-playground: {}", e.friendly_message());
return Ok(());
}
return Err(anyhow::anyhow!(e.friendly_message())).context("rebuild from text");
}
let mut terminal = setup_terminal().context("setup terminal")?; let mut terminal = setup_terminal().context("setup terminal")?;
let result = run_loop(&mut terminal, args.theme, database, display_name).await; let result = run_loop(&mut terminal, args.theme, database, display_name).await;
+393
View File
@@ -0,0 +1,393 @@
//! Iteration-3 integration tests: rebuild from text on a
//! missing `.db` (ADR-0015 §7).
//!
//! These tests:
//!
//! 1. Build a populated project via Iteration 2's write-through
//! path so YAML and CSVs end up on disk.
//! 2. Delete `playground.db`.
//! 3. Re-open the project and call `rebuild_from_text`.
//! 4. Verify the schema, relationships, and row data round-trip.
use std::fs;
use rdbms_playground::db::Database;
use rdbms_playground::dsl::{ColumnSpec, ReferentialAction, Type, Value};
use rdbms_playground::persistence::Persistence;
use rdbms_playground::project::{self, PLAYGROUND_DB};
fn tempdir() -> tempfile::TempDir {
tempfile::tempdir().expect("create tempdir")
}
fn rt() -> tokio::runtime::Runtime {
tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.expect("tokio rt")
}
#[test]
fn rebuild_restores_schema_only_project() {
let data = tempdir();
// Phase 1: populate via write-through.
let project_path = {
let project = project::open_or_create(None, Some(data.path())).unwrap();
let path = project.path().to_path_buf();
let db = Database::open_with_persistence(
project.db_path(),
Persistence::new(path.clone()),
)
.unwrap();
rt().block_on(async {
db.create_table(
"Customers".to_string(),
vec![
ColumnSpec { name: "id".to_string(), ty: Type::Serial },
ColumnSpec { name: "Name".to_string(), ty: Type::Text },
],
vec!["id".to_string()],
Some("create table Customers with pk id:serial".to_string()),
)
.await
.unwrap();
});
drop(db);
drop(project);
path
};
// Phase 2: delete the .db so the next open triggers rebuild.
fs::remove_file(project_path.join(PLAYGROUND_DB)).unwrap();
// Phase 3: reopen and rebuild.
let project = project::Project::open(&project_path).unwrap();
let db = Database::open_with_persistence(
project.db_path(),
Persistence::new(project.path().to_path_buf()),
)
.unwrap();
rt().block_on(async {
db.rebuild_from_text(project.path().to_path_buf())
.await
.expect("rebuild");
});
// Phase 4: confirm Customers exists with the right shape.
let desc = rt()
.block_on(async { db.describe_table("Customers".to_string(), None).await })
.expect("describe_table");
assert_eq!(desc.name, "Customers");
let cols: Vec<&str> = desc.columns.iter().map(|c| c.name.as_str()).collect();
assert_eq!(cols, vec!["id", "Name"]);
}
#[test]
fn rebuild_restores_rows_from_csv() {
let data = tempdir();
let project_path = {
let project = project::open_or_create(None, Some(data.path())).unwrap();
let path = project.path().to_path_buf();
let db = Database::open_with_persistence(
project.db_path(),
Persistence::new(path.clone()),
)
.unwrap();
rt().block_on(async {
db.create_table(
"Customers".to_string(),
vec![
ColumnSpec { name: "id".to_string(), ty: Type::Serial },
ColumnSpec { name: "Name".to_string(), ty: Type::Text },
],
vec!["id".to_string()],
Some("create".to_string()),
)
.await
.unwrap();
db.insert(
"Customers".to_string(),
None,
vec![Value::Text("Alice".to_string())],
Some("insert".to_string()),
)
.await
.unwrap();
db.insert(
"Customers".to_string(),
None,
vec![Value::Text("Bob".to_string())],
Some("insert".to_string()),
)
.await
.unwrap();
});
drop(db);
drop(project);
path
};
fs::remove_file(project_path.join(PLAYGROUND_DB)).unwrap();
let project = project::Project::open(&project_path).unwrap();
let db = Database::open_with_persistence(
project.db_path(),
Persistence::new(project.path().to_path_buf()),
)
.unwrap();
rt().block_on(async {
db.rebuild_from_text(project.path().to_path_buf())
.await
.expect("rebuild");
});
let rows = rt()
.block_on(async { db.query_data("Customers".to_string(), None).await })
.expect("query_data");
assert_eq!(rows.rows.len(), 2);
let names: Vec<Option<String>> = rows.rows.iter().map(|r| r[1].clone()).collect();
assert_eq!(names[0].as_deref(), Some("Alice"));
assert_eq!(names[1].as_deref(), Some("Bob"));
}
#[test]
fn rebuild_restores_relationships_and_cascade_behaviour() {
let data = tempdir();
let project_path = {
let project = project::open_or_create(None, Some(data.path())).unwrap();
let path = project.path().to_path_buf();
let db = Database::open_with_persistence(
project.db_path(),
Persistence::new(path.clone()),
)
.unwrap();
rt().block_on(async {
db.create_table(
"Customers".to_string(),
vec![ColumnSpec { name: "id".to_string(), ty: Type::Serial }],
vec!["id".to_string()],
Some("create".to_string()),
)
.await
.unwrap();
db.create_table(
"Orders".to_string(),
vec![
ColumnSpec { name: "id".to_string(), ty: Type::Serial },
ColumnSpec { name: "CustId".to_string(), ty: Type::Int },
],
vec!["id".to_string()],
Some("create".to_string()),
)
.await
.unwrap();
db.add_relationship(
None,
"Customers".to_string(),
"id".to_string(),
"Orders".to_string(),
"CustId".to_string(),
ReferentialAction::Cascade,
ReferentialAction::NoAction,
false,
Some("rel".to_string()),
)
.await
.unwrap();
db.insert(
"Customers".to_string(),
Some(vec!["id".to_string()]),
vec![Value::Number("1".to_string())],
Some("insert".to_string()),
)
.await
.unwrap();
db.insert(
"Orders".to_string(),
Some(vec!["CustId".to_string()]),
vec![Value::Number("1".to_string())],
Some("insert".to_string()),
)
.await
.unwrap();
});
drop(db);
drop(project);
path
};
fs::remove_file(project_path.join(PLAYGROUND_DB)).unwrap();
let project = project::Project::open(&project_path).unwrap();
let db = Database::open_with_persistence(
project.db_path(),
Persistence::new(project.path().to_path_buf()),
)
.unwrap();
rt().block_on(async {
db.rebuild_from_text(project.path().to_path_buf())
.await
.expect("rebuild");
});
// Relationship is back: cascade-delete from Customers
// should also clean Orders.
let result = rt()
.block_on(async {
db.delete(
"Customers".to_string(),
rdbms_playground::dsl::RowFilter::AllRows,
Some("delete".to_string()),
)
.await
})
.expect("delete");
assert_eq!(result.rows_affected, 1);
assert_eq!(result.cascade.len(), 1, "expected one cascade entry: {result:?}");
assert_eq!(result.cascade[0].child_table, "Orders");
}
#[test]
fn rebuild_reports_fatal_error_on_bad_csv_row() {
let data = tempdir();
// Create a project, populate, then corrupt the CSV.
let project_path = {
let project = project::open_or_create(None, Some(data.path())).unwrap();
let path = project.path().to_path_buf();
let db = Database::open_with_persistence(
project.db_path(),
Persistence::new(path.clone()),
)
.unwrap();
rt().block_on(async {
db.create_table(
"Numbers".to_string(),
vec![
ColumnSpec { name: "id".to_string(), ty: Type::Serial },
ColumnSpec { name: "n".to_string(), ty: Type::Int },
],
vec!["id".to_string()],
Some("create".to_string()),
)
.await
.unwrap();
db.insert(
"Numbers".to_string(),
Some(vec!["n".to_string()]),
vec![Value::Number("1".to_string())],
Some("insert".to_string()),
)
.await
.unwrap();
});
drop(db);
drop(project);
path
};
// Hand-corrupt the CSV: replace the int with a non-number.
let csv_path = project_path.join("data").join("Numbers.csv");
let body = fs::read_to_string(&csv_path).unwrap();
let corrupt = body.replace(",1\n", ",not-a-number\n");
fs::write(&csv_path, corrupt).unwrap();
fs::remove_file(project_path.join(PLAYGROUND_DB)).unwrap();
let project = project::Project::open(&project_path).unwrap();
let db = Database::open_with_persistence(
project.db_path(),
Persistence::new(project.path().to_path_buf()),
)
.unwrap();
let err = rt()
.block_on(async {
db.rebuild_from_text(project.path().to_path_buf()).await
})
.expect_err("must fail with row-level error");
let msg = format!("{err}");
assert!(msg.contains("row 2"), "msg should name the row: {msg}");
assert!(msg.contains("Numbers"), "msg should name the table: {msg}");
assert!(msg.contains("integer"), "msg should explain the type mismatch: {msg}");
}
#[test]
fn rebuild_preserves_created_at_from_yaml() {
let data = tempdir();
let project_path = {
let project = project::open_or_create(None, Some(data.path())).unwrap();
let path = project.path().to_path_buf();
let db = Database::open_with_persistence(
project.db_path(),
Persistence::new(path.clone()),
)
.unwrap();
rt().block_on(async {
db.create_table(
"T".to_string(),
vec![ColumnSpec { name: "id".to_string(), ty: Type::Serial }],
vec!["id".to_string()],
Some("create".to_string()),
)
.await
.unwrap();
});
drop(db);
drop(project);
path
};
// Substitute a recognizable timestamp into project.yaml.
let yaml_path = project_path.join("project.yaml");
let body = fs::read_to_string(&yaml_path).unwrap();
let edited = body
.lines()
.map(|l| {
if l.trim_start().starts_with("created_at:") {
" created_at: 2020-01-02T03:04:05Z".to_string()
} else {
l.to_string()
}
})
.collect::<Vec<_>>()
.join("\n");
fs::write(&yaml_path, format!("{edited}\n")).unwrap();
// Delete the .db, rebuild from text.
fs::remove_file(project_path.join(PLAYGROUND_DB)).unwrap();
let project = project::Project::open(&project_path).unwrap();
let db = Database::open_with_persistence(
project.db_path(),
Persistence::new(project.path().to_path_buf()),
)
.unwrap();
rt().block_on(async {
db.rebuild_from_text(project.path().to_path_buf())
.await
.expect("rebuild");
});
// Trigger any successful command so project.yaml is
// rewritten from the now-rebuilt db state.
rt().block_on(async {
db.describe_table("T".to_string(), Some("show table T".to_string()))
.await
.unwrap();
// describe is read-only; force a rewrite by adding a column.
db.add_column(
"T".to_string(),
"Note".to_string(),
Type::Text,
Some("add column".to_string()),
)
.await
.unwrap();
});
let final_yaml = fs::read_to_string(&yaml_path).unwrap();
assert!(
final_yaml.contains("created_at: 2020-01-02T03:04:05Z"),
"yaml should preserve the edited created_at:\n{final_yaml}",
);
}