diff --git a/Cargo.lock b/Cargo.lock index 52634be..ee98f6d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -796,6 +796,16 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "libyml" +version = "0.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3302702afa434ffa30847a83305f0a69d6abd74293b6554c18ec85c7ef30c980" +dependencies = [ + "anyhow", + "version_check", +] + [[package]] name = "line-clipping" version = "0.3.7" @@ -1360,6 +1370,8 @@ dependencies = [ "rand 0.10.1", "ratatui", "rusqlite", + "serde", + "serde_yml", "sysinfo", "tempfile", "thiserror 2.0.18", @@ -1531,6 +1543,21 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_yml" +version = "0.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59e2dd588bf1597a252c3b920e0143eb99b0f76e4e082f4c92ce34fbc9e71ddd" +dependencies = [ + "indexmap", + "itoa", + "libyml", + "memchr", + "ryu", + "serde", + "version_check", +] + [[package]] name = "sha2" version = "0.10.9" diff --git a/Cargo.toml b/Cargo.toml index 402e63c..ecb5730 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,8 @@ gethostname = "1.1.0" rand = "0.10.1" ratatui = "0.30.0" rusqlite = { version = "0.39.0", features = ["bundled"] } +serde = { version = "1.0.228", features = ["derive"] } +serde_yml = "0.0.12" sysinfo = { version = "0.39.0", default-features = false, features = ["system"] } thiserror = "2.0.18" tokio = { version = "1.52.2", features = ["full"] } diff --git a/src/db.rs b/src/db.rs index 28013e2..2faed94 100644 --- a/src/db.rs +++ b/src/db.rs @@ -39,8 +39,9 @@ use crate::dsl::types::Type; use crate::dsl::value::{Bound, Value, ValueError}; use crate::persistence::{ CellValue, ColumnSchema, Persistence, PersistenceError, RelationshipSchema, SchemaSnapshot, - TableSchema, TableSnapshot, + TableSchema, TableSnapshot, decode_cell, parse_csv, parse_schema, }; +use crate::project::{DATA_DIR, PROJECT_YAML}; /// Inbox capacity. The worker is fast enough that this rarely /// matters; `64` is a generous head-room for bursts. @@ -117,6 +118,16 @@ pub enum DbError { path: std::path::PathBuf, message: String, }, + #[error( + "unable to load row {row_number} from `{}` into table `{table}`: {detail}", + csv_path.display() + )] + RebuildRowFailed { + table: String, + csv_path: std::path::PathBuf, + row_number: usize, + detail: String, + }, #[error("database worker is no longer available")] WorkerGone, #[error("io error: {0}")] @@ -213,7 +224,7 @@ impl DbError { /// surfaces these as fatal banners. #[must_use] pub const fn is_fatal(&self) -> bool { - matches!(self, Self::PersistenceFatal { .. }) + matches!(self, Self::PersistenceFatal { .. } | Self::RebuildRowFailed { .. }) } } @@ -309,6 +320,14 @@ enum Request { source: Option, reply: oneshot::Sender>, }, + /// Rebuild the database from `project.yaml` + `data/` + /// (ADR-0015 §7). Used by the runtime when the `.db` file + /// is missing on project open. Iteration 4's `rebuild` + /// app-level command will reuse the same request. + RebuildFromText { + project_path: std::path::PathBuf, + reply: oneshot::Sender>, + }, } impl Database { @@ -523,6 +542,23 @@ impl Database { recv.await.map_err(|_| DbError::WorkerGone)? } + /// Rebuild the database from `project.yaml` + `data/` + /// (ADR-0015 §7). Called by the runtime on a missing `.db` + /// at startup; Iteration 4 will also expose this via the + /// `rebuild` app-level command. + pub async fn rebuild_from_text( + &self, + project_path: std::path::PathBuf, + ) -> Result<(), DbError> { + let (reply, recv) = oneshot::channel(); + self.send(Request::RebuildFromText { + project_path, + reply, + }) + .await?; + recv.await.map_err(|_| DbError::WorkerGone)? + } + pub async fn query_data( &self, table: String, @@ -785,6 +821,9 @@ fn handle_request(conn: &Connection, persistence: Option<&Persistence>, req: Req &table, )); } + Request::RebuildFromText { project_path, reply } => { + let _ = reply.send(do_rebuild_from_text(conn, &project_path)); + } } } @@ -2393,6 +2432,278 @@ fn read_relationships_inbound( Ok(out) } +/// Rebuild the database from `project.yaml` + `data/.csv` +/// (ADR-0015 §7). +/// +/// The on-disk text is the authoritative source: this function +/// recreates schema, metadata, and rows so the resulting `.db` +/// reflects them exactly. Persistence callbacks are NOT invoked; +/// we're loading, not changing user-visible state. +/// +/// FK enforcement is disabled for the load and re-enabled at +/// the end (regardless of success). A `foreign_key_check` +/// before commit verifies the loaded data is consistent — any +/// violation aborts with a fatal error. +fn do_rebuild_from_text(conn: &Connection, project_path: &Path) -> Result<(), DbError> { + let yaml_path = project_path.join(PROJECT_YAML); + let data_dir = project_path.join(DATA_DIR); + + let yaml_body = + std::fs::read_to_string(&yaml_path).map_err(|e| DbError::PersistenceFatal { + operation: "read", + path: yaml_path.clone(), + message: e.to_string(), + })?; + let snapshot = parse_schema(&yaml_body).map_err(|e| DbError::PersistenceFatal { + operation: "parse", + path: yaml_path.clone(), + message: e.to_string(), + })?; + + conn.execute_batch("PRAGMA foreign_keys = OFF;") + .map_err(DbError::from_rusqlite)?; + + let result = (|| -> Result<(), DbError> { + let tx = conn + .unchecked_transaction() + .map_err(DbError::from_rusqlite)?; + + // 1. Recreate user tables with FK constraints inline. + for table in &snapshot.tables { + let read_schema = build_read_schema(table, &snapshot.relationships); + let ddl = schema_to_ddl(&table.name, &read_schema); + tx.execute_batch(&ddl).map_err(DbError::from_rusqlite)?; + } + + // 2. Column-type metadata. + { + let mut stmt = tx + .prepare(&format!( + "INSERT INTO {META_TABLE} (table_name, column_name, user_type) \ + VALUES (?1, ?2, ?3);" + )) + .map_err(DbError::from_rusqlite)?; + for table in &snapshot.tables { + for col in &table.columns { + stmt.execute([ + table.name.as_str(), + col.name.as_str(), + col.user_type.keyword(), + ]) + .map_err(DbError::from_rusqlite)?; + } + } + } + + // 3. Relationship metadata. + { + let mut stmt = tx + .prepare(&format!( + "INSERT INTO {REL_TABLE} \ + (name, parent_table, parent_column, child_table, child_column, \ + on_delete, on_update) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7);" + )) + .map_err(DbError::from_rusqlite)?; + for rel in &snapshot.relationships { + stmt.execute([ + rel.name.as_str(), + rel.parent_table.as_str(), + rel.parent_column.as_str(), + rel.child_table.as_str(), + rel.child_column.as_str(), + rel.on_delete.keyword(), + rel.on_update.keyword(), + ]) + .map_err(DbError::from_rusqlite)?; + } + } + + // 4. Project metadata: overwrite the configure-time + // `created_at` with the YAML's authoritative value. + tx.execute( + &format!( + "INSERT INTO {META_PROJECT_TABLE} (key, value) VALUES ('created_at', ?1) \ + ON CONFLICT(key) DO UPDATE SET value = excluded.value;" + ), + [snapshot.created_at.as_str()], + ) + .map_err(DbError::from_rusqlite)?; + + // 5. Load each table's rows (if a CSV is present). + for table in &snapshot.tables { + let csv_path = data_dir.join(format!("{}.csv", table.name)); + if !csv_path.exists() { + continue; + } + load_table_csv(&tx, table, &csv_path)?; + } + + // 6. Verify FK consistency before committing. + { + let mut check = tx + .prepare("PRAGMA foreign_key_check;") + .map_err(DbError::from_rusqlite)?; + let mut rows = check.query([]).map_err(DbError::from_rusqlite)?; + if rows.next().map_err(DbError::from_rusqlite)?.is_some() { + return Err(DbError::PersistenceFatal { + operation: "rebuild", + path: yaml_path.clone(), + message: "rebuilt data violates foreign-key constraints".to_string(), + }); + } + } + + tx.commit().map_err(DbError::from_rusqlite)?; + Ok(()) + })(); + + let pragma_result = conn + .execute_batch("PRAGMA foreign_keys = ON;") + .map_err(DbError::from_rusqlite); + result.and(pragma_result) +} + +/// Build a `ReadSchema` for `table` that includes any +/// relationships from the snapshot in which `table` is the +/// child. The output drives `schema_to_ddl` so the resulting +/// CREATE TABLE has the FKs inline. +fn build_read_schema(table: &TableSchema, relationships: &[RelationshipSchema]) -> ReadSchema { + let columns: Vec = table + .columns + .iter() + .map(|c| ReadColumn { + name: c.name.clone(), + sqlite_type: c.user_type.sqlite_strict_type().to_string(), + notnull: false, + primary_key: table.primary_key.contains(&c.name), + user_type: Some(c.user_type), + }) + .collect(); + let foreign_keys: Vec = relationships + .iter() + .filter(|r| r.child_table == table.name) + .map(|r| ReadForeignKey { + parent_table: r.parent_table.clone(), + parent_column: r.parent_column.clone(), + child_column: r.child_column.clone(), + on_delete: r.on_delete, + on_update: r.on_update, + }) + .collect(); + ReadSchema { + columns, + primary_key: table.primary_key.clone(), + foreign_keys, + } +} + +/// Read `csv_path` and INSERT each row into `table.name`. +/// Failures are wrapped in `DbError::RebuildRowFailed` with +/// row number and table name per ADR-0015 §7. +fn load_table_csv( + tx: &rusqlite::Transaction<'_>, + table: &TableSchema, + csv_path: &Path, +) -> Result<(), DbError> { + let body = std::fs::read_to_string(csv_path).map_err(|e| DbError::PersistenceFatal { + operation: "read", + path: csv_path.to_path_buf(), + message: e.to_string(), + })?; + let parsed = parse_csv(&body).map_err(|e| DbError::PersistenceFatal { + operation: "parse", + path: csv_path.to_path_buf(), + message: e.to_string(), + })?; + + if parsed.rows.is_empty() { + return Ok(()); + } + + // Header sanity check: column names must match the YAML + // schema's column order. A mismatch is a hand-edit hazard; + // surfacing it as a fatal error is better than silently + // mis-aligning columns. + let expected: Vec<&str> = table.columns.iter().map(|c| c.name.as_str()).collect(); + let header_strs: Vec<&str> = parsed.header.iter().map(String::as_str).collect(); + if header_strs != expected { + return Err(DbError::PersistenceFatal { + operation: "validate", + path: csv_path.to_path_buf(), + message: format!( + "CSV header {:?} does not match table columns {:?}", + parsed.header, expected, + ), + }); + } + + let cols_csv = table + .columns + .iter() + .map(|c| quote_ident(&c.name)) + .collect::>() + .join(", "); + let placeholders = (1..=table.columns.len()) + .map(|i| format!("?{i}")) + .collect::>() + .join(", "); + let sql = format!( + "INSERT INTO {ident} ({cols_csv}) VALUES ({placeholders});", + ident = quote_ident(&table.name), + ); + let mut stmt = tx.prepare(&sql).map_err(DbError::from_rusqlite)?; + + for (idx, raw_row) in parsed.rows.iter().enumerate() { + // Row number reported as a 1-based file line: header + // is line 1, so the first data row is line 2. + let row_number = idx + 2; + if raw_row.len() != table.columns.len() { + return Err(DbError::RebuildRowFailed { + table: table.name.clone(), + csv_path: csv_path.to_path_buf(), + row_number, + detail: format!( + "row has {} field(s) but table has {} column(s)", + raw_row.len(), + table.columns.len(), + ), + }); + } + let mut params: Vec = Vec::with_capacity(raw_row.len()); + for (col, raw_cell) in table.columns.iter().zip(raw_row.iter()) { + let cell = decode_cell(col.user_type, raw_cell).map_err(|detail| { + DbError::RebuildRowFailed { + table: table.name.clone(), + csv_path: csv_path.to_path_buf(), + row_number, + detail: format!("column `{}`: {detail}", col.name), + } + })?; + params.push(cell_value_to_sqlite(&cell)); + } + stmt.execute(rusqlite::params_from_iter(params.iter())) + .map_err(|e| DbError::RebuildRowFailed { + table: table.name.clone(), + csv_path: csv_path.to_path_buf(), + row_number, + detail: e.to_string(), + })?; + } + Ok(()) +} + +fn cell_value_to_sqlite(cell: &CellValue) -> rusqlite::types::Value { + use rusqlite::types::Value; + match cell { + CellValue::Null => Value::Null, + CellValue::Integer(n) => Value::Integer(*n), + CellValue::Real(f) => Value::Real(*f), + CellValue::Text(s) => Value::Text(s.clone()), + CellValue::Blob(b) => Value::Blob(b.clone()), + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/persistence/csv_io.rs b/src/persistence/csv_io.rs index c336cca..3edfea6 100644 --- a/src/persistence/csv_io.rs +++ b/src/persistence/csv_io.rs @@ -16,6 +16,12 @@ //! quote, newline). We handle the empty-string-vs-null //! distinction manually by always quoting non-null empty //! Text and never quoting Null. +// +// `pub(crate)` items below are re-exported from +// `persistence::mod.rs`; the db worker reaches them via that +// path. Clippy's `redundant_pub_crate` lint flags this +// pattern, but it's load-bearing here. +#![allow(clippy::redundant_pub_crate)] use std::io::Write as _; @@ -172,6 +178,182 @@ fn format_real(f: f64) -> String { } } +/// Parsed CSV records: header row + zero or more data rows. +/// +/// Each cell records whether it was syntactically quoted in +/// the source — that's the bit we need to distinguish NULL +/// (empty unquoted) from `""` (empty quoted). The `csv` +/// crate doesn't expose this, which is why we hand-roll the +/// reader to pair with the hand-rolled writer above. +#[derive(Debug, PartialEq, Eq)] +pub(crate) struct ParsedCsv { + pub header: Vec, + pub rows: Vec>, +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub(crate) struct RawCell { + pub content: String, + pub was_quoted: bool, +} + +#[derive(Debug, thiserror::Error)] +pub(crate) enum CsvError { + #[error("CSV is empty")] + Empty, + #[error("invalid UTF-8 in CSV body")] + InvalidUtf8, + #[error("unterminated quoted field")] + UnterminatedQuote, +} + +/// Tokenize a CSV body. Returns the header (column names from +/// the first record) and the data rows. Each cell preserves a +/// `was_quoted` flag so the caller can distinguish an empty +/// unquoted field (NULL) from an empty quoted field (`""`). +pub(crate) fn parse_csv(body: &str) -> Result { + let mut records: Vec> = Vec::new(); + let mut current: Vec = Vec::new(); + let bytes = body.as_bytes(); + let mut i = 0; + let n = bytes.len(); + + while i < n { + let (cell, advance) = parse_field(&bytes[i..])?; + i += advance; + current.push(cell); + match bytes.get(i) { + Some(&b',') => i += 1, + Some(&b'\n') => { + i += 1; + records.push(std::mem::take(&mut current)); + } + Some(&b'\r') => { + i += 1; + if bytes.get(i) == Some(&b'\n') { + i += 1; + } + records.push(std::mem::take(&mut current)); + } + None => { + records.push(std::mem::take(&mut current)); + } + Some(&other) => { + // A non-structural byte after a quoted field — + // shouldn't happen with our well-formed writer. + // Treat as part of an unquoted continuation by + // appending to the last cell. We choose to + // tolerate rather than error since the most + // common cause is a trailing space, which we + // can roll into the cell. + let last = current + .last_mut() + .or_else(|| records.last_mut().and_then(|r| r.last_mut())); + if let Some(c) = last { + c.content.push(other as char); + } + i += 1; + } + } + } + if !current.is_empty() { + records.push(current); + } + + if records.is_empty() { + return Err(CsvError::Empty); + } + let header_record = records.remove(0); + let header: Vec = header_record.into_iter().map(|c| c.content).collect(); + Ok(ParsedCsv { + header, + rows: records, + }) +} + +fn parse_field(bytes: &[u8]) -> Result<(RawCell, usize), CsvError> { + if bytes.first() == Some(&b'"') { + let mut content_bytes: Vec = Vec::new(); + let mut i = 1; + while i < bytes.len() { + match bytes[i] { + b'"' => { + if bytes.get(i + 1) == Some(&b'"') { + content_bytes.push(b'"'); + i += 2; + } else { + let content = + String::from_utf8(content_bytes).map_err(|_| CsvError::InvalidUtf8)?; + return Ok(( + RawCell { + content, + was_quoted: true, + }, + i + 1, + )); + } + } + other => { + content_bytes.push(other); + i += 1; + } + } + } + Err(CsvError::UnterminatedQuote) + } else { + let mut i = 0; + while i < bytes.len() { + match bytes[i] { + b',' | b'\n' | b'\r' => break, + _ => i += 1, + } + } + let content = + String::from_utf8(bytes[..i].to_vec()).map_err(|_| CsvError::InvalidUtf8)?; + Ok(( + RawCell { + content, + was_quoted: false, + }, + i, + )) + } +} + +/// Decode one parsed cell into a `CellValue` per the column's +/// declared type. Returns an error string the caller can +/// embed in a fatal banner per ADR-0015 §7 ("unable to load +/// row N from data/T.csv into table T: …"). +pub(crate) fn decode_cell(ty: Type, cell: &RawCell) -> Result { + if !cell.was_quoted && cell.content.is_empty() { + return Ok(CellValue::Null); + } + match ty { + Type::Text | Type::Date | Type::DateTime | Type::Decimal | Type::ShortId => { + Ok(CellValue::Text(cell.content.clone())) + } + Type::Int | Type::Serial => cell + .content + .parse::() + .map(CellValue::Integer) + .map_err(|_| format!("expected an integer, got `{}`", cell.content)), + Type::Real => cell + .content + .parse::() + .map(CellValue::Real) + .map_err(|_| format!("expected a real number, got `{}`", cell.content)), + Type::Bool => match cell.content.as_str() { + "true" => Ok(CellValue::Integer(1)), + "false" => Ok(CellValue::Integer(0)), + other => Err(format!("expected `true` or `false`, got `{other}`")), + }, + Type::Blob => base64::engine::general_purpose::STANDARD + .decode(cell.content.as_bytes()) + .map(CellValue::Blob) + .map_err(|e| format!("invalid base64 blob: {e}")), + } +} + #[cfg(test)] mod tests { use super::*; @@ -290,6 +472,95 @@ mod tests { assert!(s.contains("2026-05-07,2026-05-07T14:30:12Z")); } + #[test] + fn parse_round_trips_simple_table() { + let table = TableSnapshot { + name: "Customers".to_string(), + columns: vec![col("id", Type::Serial), col("Name", Type::Text)], + rows: vec![ + vec![CellValue::Integer(1), CellValue::Text("Alice".to_string())], + vec![CellValue::Integer(2), CellValue::Text("Bob".to_string())], + ], + }; + let body = serialize_table(&table).unwrap(); + let parsed = parse_csv(std::str::from_utf8(&body).unwrap()).unwrap(); + assert_eq!(parsed.header, vec!["id", "Name"]); + assert_eq!(parsed.rows.len(), 2); + assert_eq!(parsed.rows[0][0].content, "1"); + assert_eq!(parsed.rows[0][1].content, "Alice"); + assert_eq!(parsed.rows[1][1].content, "Bob"); + } + + #[test] + fn parse_distinguishes_null_from_empty_string() { + // Header "Name", then two rows: NULL (empty unquoted) + // and "" (empty quoted). + let body = "Name\n\n\"\"\n"; + let parsed = parse_csv(body).unwrap(); + assert_eq!(parsed.rows.len(), 2); + assert!(!parsed.rows[0][0].was_quoted); + assert_eq!(parsed.rows[0][0].content, ""); + assert!(parsed.rows[1][0].was_quoted); + assert_eq!(parsed.rows[1][0].content, ""); + + let null = decode_cell(Type::Text, &parsed.rows[0][0]).unwrap(); + let empty = decode_cell(Type::Text, &parsed.rows[1][0]).unwrap(); + assert!(matches!(null, CellValue::Null)); + assert!(matches!(empty, CellValue::Text(s) if s.is_empty())); + } + + #[test] + fn parse_handles_rfc4180_escapes() { + let body = "Name\n\"hello, world\"\n\"she said \"\"hi\"\"\"\n"; + let parsed = parse_csv(body).unwrap(); + assert_eq!(parsed.rows[0][0].content, "hello, world"); + assert_eq!(parsed.rows[1][0].content, "she said \"hi\""); + } + + #[test] + fn parse_decodes_per_type() { + // Rows match the round-trip produced by serialize_table. + let table = TableSnapshot { + name: "T".to_string(), + columns: vec![ + col("n", Type::Int), + col("r", Type::Real), + col("b", Type::Bool), + col("blob", Type::Blob), + ], + rows: vec![vec![ + CellValue::Integer(42), + CellValue::Real(std::f64::consts::PI), + CellValue::Integer(1), + CellValue::Blob(b"hi".to_vec()), + ]], + }; + let body = serialize_table(&table).unwrap(); + let parsed = parse_csv(std::str::from_utf8(&body).unwrap()).unwrap(); + let row = &parsed.rows[0]; + assert!(matches!(decode_cell(Type::Int, &row[0]).unwrap(), CellValue::Integer(42))); + match decode_cell(Type::Real, &row[1]).unwrap() { + CellValue::Real(f) => assert!((f - std::f64::consts::PI).abs() < 1e-12), + other => panic!("got {other:?}"), + } + assert!(matches!(decode_cell(Type::Bool, &row[2]).unwrap(), CellValue::Integer(1))); + assert!(matches!(decode_cell(Type::Blob, &row[3]).unwrap(), CellValue::Blob(b) if b == b"hi")); + } + + #[test] + fn parse_rejects_unterminated_quotes() { + let err = parse_csv("Name\n\"oops").expect_err("must error"); + assert!(matches!(err, CsvError::UnterminatedQuote)); + } + + #[test] + fn decode_cell_reports_friendly_error_for_bad_int() { + let cell = RawCell { content: "abc".to_string(), was_quoted: false }; + let err = decode_cell(Type::Int, &cell).expect_err("must error"); + assert!(err.contains("integer")); + assert!(err.contains("abc")); + } + #[test] fn row_width_mismatch_errors() { let err = serialize_table(&TableSnapshot { diff --git a/src/persistence/mod.rs b/src/persistence/mod.rs index 40a2c6c..f745f8f 100644 --- a/src/persistence/mod.rs +++ b/src/persistence/mod.rs @@ -22,10 +22,15 @@ use crate::dsl::action::ReferentialAction; use crate::dsl::types::Type; use crate::project::{DATA_DIR, HISTORY_LOG, PROJECT_YAML}; +// Submodules are private; the few items the db worker needs +// during rebuild (ADR-0015 §7) are re-exported below. mod csv_io; mod history; mod yaml; +pub(crate) use csv_io::{decode_cell, parse_csv}; +pub(crate) use yaml::parse_schema; + /// Owns persistence to a single project on disk. Cheap to /// move; the db worker holds one instance for its lifetime. #[derive(Debug, Clone)] diff --git a/src/persistence/yaml.rs b/src/persistence/yaml.rs index 12b62c5..9b9c256 100644 --- a/src/persistence/yaml.rs +++ b/src/persistence/yaml.rs @@ -1,16 +1,27 @@ -//! Hand-rolled `project.yaml` writer (ADR-0015 §3). +//! `project.yaml` writer (hand-rolled, ADR-0015 §3) and +//! reader (`serde_yml`, ADR-0015 §7). //! //! The schema YAML uses a small, fixed set of structures — //! tables, columns, relationships — and the values it carries //! are all known-safe (identifiers from the DSL, types from //! the fixed `Type` enum, action names from `ReferentialAction`). //! Hand-rolling the writer avoids pulling a YAML serializer -//! dep just for this file. The reader (Iteration 3) will use -//! a real YAML parser. +//! dep just for the write path; the read path uses +//! `serde_yml` because we need to handle whatever the user +//! (or a future migrator, or a hand-edit) puts in there. +// +// `pub(crate)` items in this private submodule are +// re-exported from `persistence::mod.rs`; that path is what +// the db worker uses. Clippy's `redundant_pub_crate` lint +// flags this pattern, but it's load-bearing here. +#![allow(clippy::redundant_pub_crate)] use std::fmt::Write as _; +use serde::Deserialize; + use crate::dsl::action::ReferentialAction; +use crate::dsl::types::Type; use super::{ColumnSchema, RelationshipSchema, SchemaSnapshot, TableSchema}; @@ -144,6 +155,133 @@ const fn is_safe_yaml_char(c: char) -> bool { c.is_ascii_alphanumeric() || matches!(c, '_' | '-' | '.' | ':') } +/// Parse a `project.yaml` body into a `SchemaSnapshot`. +/// +/// The wire types below mirror the format `serialize_schema` +/// emits. Anything outside that shape produces a structured +/// error — callers (the rebuild path) translate those into a +/// fatal banner per ADR-0015 §8. +pub(crate) fn parse_schema(body: &str) -> Result { + let raw: RawProject = + serde_yml::from_str(body).map_err(|e| YamlError::Syntax(e.to_string()))?; + if raw.version != 1 { + return Err(YamlError::UnsupportedVersion(raw.version)); + } + let mut tables: Vec = Vec::with_capacity(raw.tables.len()); + for t in raw.tables { + let mut columns: Vec = Vec::with_capacity(t.columns.len()); + for c in t.columns { + let user_type = c.user_type.parse::().map_err(|_| { + YamlError::UnknownType { + table: t.name.clone(), + column: c.name.clone(), + raw: c.user_type.clone(), + } + })?; + columns.push(ColumnSchema { + name: c.name, + user_type, + }); + } + tables.push(TableSchema { + name: t.name, + primary_key: t.primary_key, + columns, + }); + } + let mut relationships: Vec = Vec::with_capacity(raw.relationships.len()); + for r in raw.relationships { + let on_delete = parse_action(&r.on_delete) + .ok_or_else(|| YamlError::UnknownAction(r.on_delete.clone()))?; + let on_update = parse_action(&r.on_update) + .ok_or_else(|| YamlError::UnknownAction(r.on_update.clone()))?; + relationships.push(RelationshipSchema { + name: r.name, + parent_table: r.parent.table, + parent_column: r.parent.column, + child_table: r.child.table, + child_column: r.child.column, + on_delete, + on_update, + }); + } + Ok(SchemaSnapshot { + created_at: raw.project.created_at, + tables, + relationships, + }) +} + +#[derive(Debug, thiserror::Error)] +pub(crate) enum YamlError { + #[error("project.yaml syntax error: {0}")] + Syntax(String), + #[error("unsupported project.yaml version: {0} (expected 1)")] + UnsupportedVersion(u32), + #[error("unknown user-facing column type `{raw}` for `{table}.{column}`")] + UnknownType { + table: String, + column: String, + raw: String, + }, + #[error("unknown referential action `{0}`")] + UnknownAction(String), +} + +fn parse_action(s: &str) -> Option { + match s { + "no_action" => Some(ReferentialAction::NoAction), + "restrict" => Some(ReferentialAction::Restrict), + "set_null" => Some(ReferentialAction::SetNull), + "cascade" => Some(ReferentialAction::Cascade), + _ => None, + } +} + +#[derive(Deserialize)] +struct RawProject { + version: u32, + project: RawProjectMeta, + #[serde(default)] + tables: Vec, + #[serde(default)] + relationships: Vec, +} + +#[derive(Deserialize)] +struct RawProjectMeta { + created_at: String, +} + +#[derive(Deserialize)] +struct RawTable { + name: String, + primary_key: Vec, + columns: Vec, +} + +#[derive(Deserialize)] +struct RawColumn { + name: String, + #[serde(rename = "type")] + user_type: String, +} + +#[derive(Deserialize)] +struct RawRelationship { + name: String, + parent: RawEndpoint, + child: RawEndpoint, + on_delete: String, + on_update: String, +} + +#[derive(Deserialize)] +struct RawEndpoint { + table: String, + column: String, +} + #[cfg(test)] mod tests { use super::*; @@ -235,6 +373,77 @@ mod tests { assert_eq!(quote_if_needed("with\"quote"), "\"with\\\"quote\""); } + #[test] + fn write_then_read_round_trips() { + let original = snapshot(); + let body = serialize_schema(&original); + let parsed = parse_schema(&body).expect("parse schema"); + assert_eq!(parsed, original); + } + + #[test] + fn parses_minimal_yaml_with_no_tables() { + let body = "\ +version: 1 +project: + created_at: 2026-05-07T14:30:12Z +tables: [] +relationships: [] +"; + let parsed = parse_schema(body).expect("parse minimal"); + assert_eq!(parsed.tables.len(), 0); + assert_eq!(parsed.relationships.len(), 0); + assert_eq!(parsed.created_at, "2026-05-07T14:30:12Z"); + } + + #[test] + fn rejects_unknown_version() { + let body = "version: 9\nproject:\n created_at: x\ntables: []\nrelationships: []\n"; + match parse_schema(body) { + Err(YamlError::UnsupportedVersion(9)) => {} + other => panic!("expected UnsupportedVersion(9), got {other:?}"), + } + } + + #[test] + fn rejects_unknown_column_type() { + let body = "\ +version: 1 +project: + created_at: x +tables: + - name: T + primary_key: [id] + columns: + - { name: id, type: bogus } +relationships: [] +"; + match parse_schema(body) { + Err(YamlError::UnknownType { raw, .. }) => assert_eq!(raw, "bogus"), + other => panic!("expected UnknownType, got {other:?}"), + } + } + + #[test] + fn rejects_unknown_action() { + let body = "\ +version: 1 +project: + created_at: x +tables: [] +relationships: + - name: R + parent: { table: A, column: id } + child: { table: B, column: aid } + on_delete: blow_up + on_update: no_action +"; + match parse_schema(body) { + Err(YamlError::UnknownAction(s)) => assert_eq!(s, "blow_up"), + other => panic!("expected UnknownAction, got {other:?}"), + } + } + #[test] fn preserves_compound_primary_key_order() { let body = serialize_schema(&SchemaSnapshot { diff --git a/src/runtime.rs b/src/runtime.rs index fe6195b..e298118 100644 --- a/src/runtime.rs +++ b/src/runtime.rs @@ -47,8 +47,29 @@ pub async fn run(args: Args) -> Result<()> { let db_path = project.db_path(); let display_name = project.display_name().to_string(); let persistence = crate::persistence::Persistence::new(project.path().to_path_buf()); + // Capture whether the .db file existed BEFORE we open it — + // sqlite creates it on connect, so this is the only honest + // signal that we need to rebuild from text (ADR-0015 §7). + let db_existed = db_path.exists(); let database = Database::open_with_persistence(db_path.as_path(), persistence) .context("open database")?; + if !db_existed + && let Err(e) = database.rebuild_from_text(project.path().to_path_buf()).await + { + // The terminal is still in cooked mode here (we haven't + // entered the alternate screen yet), so writing to + // stderr lands directly in the user's shell. Drop the + // project to release the lock first. + drop(project); + if matches!( + e, + DbError::PersistenceFatal { .. } | DbError::RebuildRowFailed { .. } + ) { + eprintln!("rdbms-playground: {}", e.friendly_message()); + return Ok(()); + } + return Err(anyhow::anyhow!(e.friendly_message())).context("rebuild from text"); + } let mut terminal = setup_terminal().context("setup terminal")?; let result = run_loop(&mut terminal, args.theme, database, display_name).await; diff --git a/tests/iteration3_rebuild.rs b/tests/iteration3_rebuild.rs new file mode 100644 index 0000000..20fd228 --- /dev/null +++ b/tests/iteration3_rebuild.rs @@ -0,0 +1,393 @@ +//! Iteration-3 integration tests: rebuild from text on a +//! missing `.db` (ADR-0015 §7). +//! +//! These tests: +//! +//! 1. Build a populated project via Iteration 2's write-through +//! path so YAML and CSVs end up on disk. +//! 2. Delete `playground.db`. +//! 3. Re-open the project and call `rebuild_from_text`. +//! 4. Verify the schema, relationships, and row data round-trip. + +use std::fs; + +use rdbms_playground::db::Database; +use rdbms_playground::dsl::{ColumnSpec, ReferentialAction, Type, Value}; +use rdbms_playground::persistence::Persistence; +use rdbms_playground::project::{self, PLAYGROUND_DB}; + +fn tempdir() -> tempfile::TempDir { + tempfile::tempdir().expect("create tempdir") +} + +fn rt() -> tokio::runtime::Runtime { + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("tokio rt") +} + +#[test] +fn rebuild_restores_schema_only_project() { + let data = tempdir(); + + // Phase 1: populate via write-through. + let project_path = { + let project = project::open_or_create(None, Some(data.path())).unwrap(); + let path = project.path().to_path_buf(); + let db = Database::open_with_persistence( + project.db_path(), + Persistence::new(path.clone()), + ) + .unwrap(); + rt().block_on(async { + db.create_table( + "Customers".to_string(), + vec![ + ColumnSpec { name: "id".to_string(), ty: Type::Serial }, + ColumnSpec { name: "Name".to_string(), ty: Type::Text }, + ], + vec!["id".to_string()], + Some("create table Customers with pk id:serial".to_string()), + ) + .await + .unwrap(); + }); + drop(db); + drop(project); + path + }; + + // Phase 2: delete the .db so the next open triggers rebuild. + fs::remove_file(project_path.join(PLAYGROUND_DB)).unwrap(); + + // Phase 3: reopen and rebuild. + let project = project::Project::open(&project_path).unwrap(); + let db = Database::open_with_persistence( + project.db_path(), + Persistence::new(project.path().to_path_buf()), + ) + .unwrap(); + rt().block_on(async { + db.rebuild_from_text(project.path().to_path_buf()) + .await + .expect("rebuild"); + }); + + // Phase 4: confirm Customers exists with the right shape. + let desc = rt() + .block_on(async { db.describe_table("Customers".to_string(), None).await }) + .expect("describe_table"); + assert_eq!(desc.name, "Customers"); + let cols: Vec<&str> = desc.columns.iter().map(|c| c.name.as_str()).collect(); + assert_eq!(cols, vec!["id", "Name"]); +} + +#[test] +fn rebuild_restores_rows_from_csv() { + let data = tempdir(); + let project_path = { + let project = project::open_or_create(None, Some(data.path())).unwrap(); + let path = project.path().to_path_buf(); + let db = Database::open_with_persistence( + project.db_path(), + Persistence::new(path.clone()), + ) + .unwrap(); + rt().block_on(async { + db.create_table( + "Customers".to_string(), + vec![ + ColumnSpec { name: "id".to_string(), ty: Type::Serial }, + ColumnSpec { name: "Name".to_string(), ty: Type::Text }, + ], + vec!["id".to_string()], + Some("create".to_string()), + ) + .await + .unwrap(); + db.insert( + "Customers".to_string(), + None, + vec![Value::Text("Alice".to_string())], + Some("insert".to_string()), + ) + .await + .unwrap(); + db.insert( + "Customers".to_string(), + None, + vec![Value::Text("Bob".to_string())], + Some("insert".to_string()), + ) + .await + .unwrap(); + }); + drop(db); + drop(project); + path + }; + + fs::remove_file(project_path.join(PLAYGROUND_DB)).unwrap(); + + let project = project::Project::open(&project_path).unwrap(); + let db = Database::open_with_persistence( + project.db_path(), + Persistence::new(project.path().to_path_buf()), + ) + .unwrap(); + rt().block_on(async { + db.rebuild_from_text(project.path().to_path_buf()) + .await + .expect("rebuild"); + }); + + let rows = rt() + .block_on(async { db.query_data("Customers".to_string(), None).await }) + .expect("query_data"); + assert_eq!(rows.rows.len(), 2); + let names: Vec> = rows.rows.iter().map(|r| r[1].clone()).collect(); + assert_eq!(names[0].as_deref(), Some("Alice")); + assert_eq!(names[1].as_deref(), Some("Bob")); +} + +#[test] +fn rebuild_restores_relationships_and_cascade_behaviour() { + let data = tempdir(); + let project_path = { + let project = project::open_or_create(None, Some(data.path())).unwrap(); + let path = project.path().to_path_buf(); + let db = Database::open_with_persistence( + project.db_path(), + Persistence::new(path.clone()), + ) + .unwrap(); + rt().block_on(async { + db.create_table( + "Customers".to_string(), + vec![ColumnSpec { name: "id".to_string(), ty: Type::Serial }], + vec!["id".to_string()], + Some("create".to_string()), + ) + .await + .unwrap(); + db.create_table( + "Orders".to_string(), + vec![ + ColumnSpec { name: "id".to_string(), ty: Type::Serial }, + ColumnSpec { name: "CustId".to_string(), ty: Type::Int }, + ], + vec!["id".to_string()], + Some("create".to_string()), + ) + .await + .unwrap(); + db.add_relationship( + None, + "Customers".to_string(), + "id".to_string(), + "Orders".to_string(), + "CustId".to_string(), + ReferentialAction::Cascade, + ReferentialAction::NoAction, + false, + Some("rel".to_string()), + ) + .await + .unwrap(); + db.insert( + "Customers".to_string(), + Some(vec!["id".to_string()]), + vec![Value::Number("1".to_string())], + Some("insert".to_string()), + ) + .await + .unwrap(); + db.insert( + "Orders".to_string(), + Some(vec!["CustId".to_string()]), + vec![Value::Number("1".to_string())], + Some("insert".to_string()), + ) + .await + .unwrap(); + }); + drop(db); + drop(project); + path + }; + + fs::remove_file(project_path.join(PLAYGROUND_DB)).unwrap(); + + let project = project::Project::open(&project_path).unwrap(); + let db = Database::open_with_persistence( + project.db_path(), + Persistence::new(project.path().to_path_buf()), + ) + .unwrap(); + rt().block_on(async { + db.rebuild_from_text(project.path().to_path_buf()) + .await + .expect("rebuild"); + }); + + // Relationship is back: cascade-delete from Customers + // should also clean Orders. + let result = rt() + .block_on(async { + db.delete( + "Customers".to_string(), + rdbms_playground::dsl::RowFilter::AllRows, + Some("delete".to_string()), + ) + .await + }) + .expect("delete"); + assert_eq!(result.rows_affected, 1); + assert_eq!(result.cascade.len(), 1, "expected one cascade entry: {result:?}"); + assert_eq!(result.cascade[0].child_table, "Orders"); +} + +#[test] +fn rebuild_reports_fatal_error_on_bad_csv_row() { + let data = tempdir(); + + // Create a project, populate, then corrupt the CSV. + let project_path = { + let project = project::open_or_create(None, Some(data.path())).unwrap(); + let path = project.path().to_path_buf(); + let db = Database::open_with_persistence( + project.db_path(), + Persistence::new(path.clone()), + ) + .unwrap(); + rt().block_on(async { + db.create_table( + "Numbers".to_string(), + vec![ + ColumnSpec { name: "id".to_string(), ty: Type::Serial }, + ColumnSpec { name: "n".to_string(), ty: Type::Int }, + ], + vec!["id".to_string()], + Some("create".to_string()), + ) + .await + .unwrap(); + db.insert( + "Numbers".to_string(), + Some(vec!["n".to_string()]), + vec![Value::Number("1".to_string())], + Some("insert".to_string()), + ) + .await + .unwrap(); + }); + drop(db); + drop(project); + path + }; + + // Hand-corrupt the CSV: replace the int with a non-number. + let csv_path = project_path.join("data").join("Numbers.csv"); + let body = fs::read_to_string(&csv_path).unwrap(); + let corrupt = body.replace(",1\n", ",not-a-number\n"); + fs::write(&csv_path, corrupt).unwrap(); + + fs::remove_file(project_path.join(PLAYGROUND_DB)).unwrap(); + + let project = project::Project::open(&project_path).unwrap(); + let db = Database::open_with_persistence( + project.db_path(), + Persistence::new(project.path().to_path_buf()), + ) + .unwrap(); + let err = rt() + .block_on(async { + db.rebuild_from_text(project.path().to_path_buf()).await + }) + .expect_err("must fail with row-level error"); + let msg = format!("{err}"); + assert!(msg.contains("row 2"), "msg should name the row: {msg}"); + assert!(msg.contains("Numbers"), "msg should name the table: {msg}"); + assert!(msg.contains("integer"), "msg should explain the type mismatch: {msg}"); +} + +#[test] +fn rebuild_preserves_created_at_from_yaml() { + let data = tempdir(); + let project_path = { + let project = project::open_or_create(None, Some(data.path())).unwrap(); + let path = project.path().to_path_buf(); + let db = Database::open_with_persistence( + project.db_path(), + Persistence::new(path.clone()), + ) + .unwrap(); + rt().block_on(async { + db.create_table( + "T".to_string(), + vec![ColumnSpec { name: "id".to_string(), ty: Type::Serial }], + vec!["id".to_string()], + Some("create".to_string()), + ) + .await + .unwrap(); + }); + drop(db); + drop(project); + path + }; + + // Substitute a recognizable timestamp into project.yaml. + let yaml_path = project_path.join("project.yaml"); + let body = fs::read_to_string(&yaml_path).unwrap(); + let edited = body + .lines() + .map(|l| { + if l.trim_start().starts_with("created_at:") { + " created_at: 2020-01-02T03:04:05Z".to_string() + } else { + l.to_string() + } + }) + .collect::>() + .join("\n"); + fs::write(&yaml_path, format!("{edited}\n")).unwrap(); + + // Delete the .db, rebuild from text. + fs::remove_file(project_path.join(PLAYGROUND_DB)).unwrap(); + let project = project::Project::open(&project_path).unwrap(); + let db = Database::open_with_persistence( + project.db_path(), + Persistence::new(project.path().to_path_buf()), + ) + .unwrap(); + rt().block_on(async { + db.rebuild_from_text(project.path().to_path_buf()) + .await + .expect("rebuild"); + }); + + // Trigger any successful command so project.yaml is + // rewritten from the now-rebuilt db state. + rt().block_on(async { + db.describe_table("T".to_string(), Some("show table T".to_string())) + .await + .unwrap(); + // describe is read-only; force a rewrite by adding a column. + db.add_column( + "T".to_string(), + "Note".to_string(), + Type::Text, + Some("add column".to_string()), + ) + .await + .unwrap(); + }); + + let final_yaml = fs::read_to_string(&yaml_path).unwrap(); + assert!( + final_yaml.contains("created_at: 2020-01-02T03:04:05Z"), + "yaml should preserve the edited created_at:\n{final_yaml}", + ); +}