Iteration 3: existence-only load + rebuild from text on missing .db
When the runtime opens a project whose playground.db is missing,
it now rebuilds the database from project.yaml + data/<table>.csv
per ADR-0015 §7. The rebuild path:
1. Parses project.yaml (serde_yml). Unknown versions / types /
actions surface as PersistenceFatal.
2. Recreates each user table with FK constraints inline
(PRAGMA foreign_keys=OFF), then populates the column-type,
relationship, and project metadata tables.
3. Loads each table's CSV via a hand-rolled reader that
preserves the NULL-vs-empty distinction (the csv crate
doesn't expose whether a field was quoted; ours does).
4. Runs PRAGMA foreign_key_check before commit; any violation
aborts.
5. Restores foreign_keys=ON regardless of success.
Row-level failures get DbError::RebuildRowFailed with row
number, file, table, and a friendly per-type detail. They land
in the runtime as a fatal stderr message ("unable to load row N
from `data/T.csv` into table `T`: ...") before the alternate
screen is entered.
created_at from project.yaml overwrites the configure-time
placeholder so timestamps round-trip stably.
Tests: 307 passing (267 lib + 9 + 5 new + 9 + 17), 0 failing,
0 skipped. Clippy clean with nursery lints.
This commit is contained in:
@@ -16,6 +16,12 @@
|
||||
//! quote, newline). We handle the empty-string-vs-null
|
||||
//! distinction manually by always quoting non-null empty
|
||||
//! Text and never quoting Null.
|
||||
//
|
||||
// `pub(crate)` items below are re-exported from
|
||||
// `persistence::mod.rs`; the db worker reaches them via that
|
||||
// path. Clippy's `redundant_pub_crate` lint flags this
|
||||
// pattern, but it's load-bearing here.
|
||||
#![allow(clippy::redundant_pub_crate)]
|
||||
|
||||
use std::io::Write as _;
|
||||
|
||||
@@ -172,6 +178,182 @@ fn format_real(f: f64) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
/// Parsed CSV records: header row + zero or more data rows.
|
||||
///
|
||||
/// Each cell records whether it was syntactically quoted in
|
||||
/// the source — that's the bit we need to distinguish NULL
|
||||
/// (empty unquoted) from `""` (empty quoted). The `csv`
|
||||
/// crate doesn't expose this, which is why we hand-roll the
|
||||
/// reader to pair with the hand-rolled writer above.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParsedCsv {
|
||||
pub header: Vec<String>,
|
||||
pub rows: Vec<Vec<RawCell>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub(crate) struct RawCell {
|
||||
pub content: String,
|
||||
pub was_quoted: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub(crate) enum CsvError {
|
||||
#[error("CSV is empty")]
|
||||
Empty,
|
||||
#[error("invalid UTF-8 in CSV body")]
|
||||
InvalidUtf8,
|
||||
#[error("unterminated quoted field")]
|
||||
UnterminatedQuote,
|
||||
}
|
||||
|
||||
/// Tokenize a CSV body. Returns the header (column names from
|
||||
/// the first record) and the data rows. Each cell preserves a
|
||||
/// `was_quoted` flag so the caller can distinguish an empty
|
||||
/// unquoted field (NULL) from an empty quoted field (`""`).
|
||||
pub(crate) fn parse_csv(body: &str) -> Result<ParsedCsv, CsvError> {
|
||||
let mut records: Vec<Vec<RawCell>> = Vec::new();
|
||||
let mut current: Vec<RawCell> = Vec::new();
|
||||
let bytes = body.as_bytes();
|
||||
let mut i = 0;
|
||||
let n = bytes.len();
|
||||
|
||||
while i < n {
|
||||
let (cell, advance) = parse_field(&bytes[i..])?;
|
||||
i += advance;
|
||||
current.push(cell);
|
||||
match bytes.get(i) {
|
||||
Some(&b',') => i += 1,
|
||||
Some(&b'\n') => {
|
||||
i += 1;
|
||||
records.push(std::mem::take(&mut current));
|
||||
}
|
||||
Some(&b'\r') => {
|
||||
i += 1;
|
||||
if bytes.get(i) == Some(&b'\n') {
|
||||
i += 1;
|
||||
}
|
||||
records.push(std::mem::take(&mut current));
|
||||
}
|
||||
None => {
|
||||
records.push(std::mem::take(&mut current));
|
||||
}
|
||||
Some(&other) => {
|
||||
// A non-structural byte after a quoted field —
|
||||
// shouldn't happen with our well-formed writer.
|
||||
// Treat as part of an unquoted continuation by
|
||||
// appending to the last cell. We choose to
|
||||
// tolerate rather than error since the most
|
||||
// common cause is a trailing space, which we
|
||||
// can roll into the cell.
|
||||
let last = current
|
||||
.last_mut()
|
||||
.or_else(|| records.last_mut().and_then(|r| r.last_mut()));
|
||||
if let Some(c) = last {
|
||||
c.content.push(other as char);
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if !current.is_empty() {
|
||||
records.push(current);
|
||||
}
|
||||
|
||||
if records.is_empty() {
|
||||
return Err(CsvError::Empty);
|
||||
}
|
||||
let header_record = records.remove(0);
|
||||
let header: Vec<String> = header_record.into_iter().map(|c| c.content).collect();
|
||||
Ok(ParsedCsv {
|
||||
header,
|
||||
rows: records,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_field(bytes: &[u8]) -> Result<(RawCell, usize), CsvError> {
|
||||
if bytes.first() == Some(&b'"') {
|
||||
let mut content_bytes: Vec<u8> = Vec::new();
|
||||
let mut i = 1;
|
||||
while i < bytes.len() {
|
||||
match bytes[i] {
|
||||
b'"' => {
|
||||
if bytes.get(i + 1) == Some(&b'"') {
|
||||
content_bytes.push(b'"');
|
||||
i += 2;
|
||||
} else {
|
||||
let content =
|
||||
String::from_utf8(content_bytes).map_err(|_| CsvError::InvalidUtf8)?;
|
||||
return Ok((
|
||||
RawCell {
|
||||
content,
|
||||
was_quoted: true,
|
||||
},
|
||||
i + 1,
|
||||
));
|
||||
}
|
||||
}
|
||||
other => {
|
||||
content_bytes.push(other);
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(CsvError::UnterminatedQuote)
|
||||
} else {
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
match bytes[i] {
|
||||
b',' | b'\n' | b'\r' => break,
|
||||
_ => i += 1,
|
||||
}
|
||||
}
|
||||
let content =
|
||||
String::from_utf8(bytes[..i].to_vec()).map_err(|_| CsvError::InvalidUtf8)?;
|
||||
Ok((
|
||||
RawCell {
|
||||
content,
|
||||
was_quoted: false,
|
||||
},
|
||||
i,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Decode one parsed cell into a `CellValue` per the column's
|
||||
/// declared type. Returns an error string the caller can
|
||||
/// embed in a fatal banner per ADR-0015 §7 ("unable to load
|
||||
/// row N from data/T.csv into table T: …").
|
||||
pub(crate) fn decode_cell(ty: Type, cell: &RawCell) -> Result<CellValue, String> {
|
||||
if !cell.was_quoted && cell.content.is_empty() {
|
||||
return Ok(CellValue::Null);
|
||||
}
|
||||
match ty {
|
||||
Type::Text | Type::Date | Type::DateTime | Type::Decimal | Type::ShortId => {
|
||||
Ok(CellValue::Text(cell.content.clone()))
|
||||
}
|
||||
Type::Int | Type::Serial => cell
|
||||
.content
|
||||
.parse::<i64>()
|
||||
.map(CellValue::Integer)
|
||||
.map_err(|_| format!("expected an integer, got `{}`", cell.content)),
|
||||
Type::Real => cell
|
||||
.content
|
||||
.parse::<f64>()
|
||||
.map(CellValue::Real)
|
||||
.map_err(|_| format!("expected a real number, got `{}`", cell.content)),
|
||||
Type::Bool => match cell.content.as_str() {
|
||||
"true" => Ok(CellValue::Integer(1)),
|
||||
"false" => Ok(CellValue::Integer(0)),
|
||||
other => Err(format!("expected `true` or `false`, got `{other}`")),
|
||||
},
|
||||
Type::Blob => base64::engine::general_purpose::STANDARD
|
||||
.decode(cell.content.as_bytes())
|
||||
.map(CellValue::Blob)
|
||||
.map_err(|e| format!("invalid base64 blob: {e}")),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -290,6 +472,95 @@ mod tests {
|
||||
assert!(s.contains("2026-05-07,2026-05-07T14:30:12Z"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_round_trips_simple_table() {
|
||||
let table = TableSnapshot {
|
||||
name: "Customers".to_string(),
|
||||
columns: vec![col("id", Type::Serial), col("Name", Type::Text)],
|
||||
rows: vec![
|
||||
vec![CellValue::Integer(1), CellValue::Text("Alice".to_string())],
|
||||
vec![CellValue::Integer(2), CellValue::Text("Bob".to_string())],
|
||||
],
|
||||
};
|
||||
let body = serialize_table(&table).unwrap();
|
||||
let parsed = parse_csv(std::str::from_utf8(&body).unwrap()).unwrap();
|
||||
assert_eq!(parsed.header, vec!["id", "Name"]);
|
||||
assert_eq!(parsed.rows.len(), 2);
|
||||
assert_eq!(parsed.rows[0][0].content, "1");
|
||||
assert_eq!(parsed.rows[0][1].content, "Alice");
|
||||
assert_eq!(parsed.rows[1][1].content, "Bob");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_distinguishes_null_from_empty_string() {
|
||||
// Header "Name", then two rows: NULL (empty unquoted)
|
||||
// and "" (empty quoted).
|
||||
let body = "Name\n\n\"\"\n";
|
||||
let parsed = parse_csv(body).unwrap();
|
||||
assert_eq!(parsed.rows.len(), 2);
|
||||
assert!(!parsed.rows[0][0].was_quoted);
|
||||
assert_eq!(parsed.rows[0][0].content, "");
|
||||
assert!(parsed.rows[1][0].was_quoted);
|
||||
assert_eq!(parsed.rows[1][0].content, "");
|
||||
|
||||
let null = decode_cell(Type::Text, &parsed.rows[0][0]).unwrap();
|
||||
let empty = decode_cell(Type::Text, &parsed.rows[1][0]).unwrap();
|
||||
assert!(matches!(null, CellValue::Null));
|
||||
assert!(matches!(empty, CellValue::Text(s) if s.is_empty()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_handles_rfc4180_escapes() {
|
||||
let body = "Name\n\"hello, world\"\n\"she said \"\"hi\"\"\"\n";
|
||||
let parsed = parse_csv(body).unwrap();
|
||||
assert_eq!(parsed.rows[0][0].content, "hello, world");
|
||||
assert_eq!(parsed.rows[1][0].content, "she said \"hi\"");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_decodes_per_type() {
|
||||
// Rows match the round-trip produced by serialize_table.
|
||||
let table = TableSnapshot {
|
||||
name: "T".to_string(),
|
||||
columns: vec![
|
||||
col("n", Type::Int),
|
||||
col("r", Type::Real),
|
||||
col("b", Type::Bool),
|
||||
col("blob", Type::Blob),
|
||||
],
|
||||
rows: vec![vec![
|
||||
CellValue::Integer(42),
|
||||
CellValue::Real(std::f64::consts::PI),
|
||||
CellValue::Integer(1),
|
||||
CellValue::Blob(b"hi".to_vec()),
|
||||
]],
|
||||
};
|
||||
let body = serialize_table(&table).unwrap();
|
||||
let parsed = parse_csv(std::str::from_utf8(&body).unwrap()).unwrap();
|
||||
let row = &parsed.rows[0];
|
||||
assert!(matches!(decode_cell(Type::Int, &row[0]).unwrap(), CellValue::Integer(42)));
|
||||
match decode_cell(Type::Real, &row[1]).unwrap() {
|
||||
CellValue::Real(f) => assert!((f - std::f64::consts::PI).abs() < 1e-12),
|
||||
other => panic!("got {other:?}"),
|
||||
}
|
||||
assert!(matches!(decode_cell(Type::Bool, &row[2]).unwrap(), CellValue::Integer(1)));
|
||||
assert!(matches!(decode_cell(Type::Blob, &row[3]).unwrap(), CellValue::Blob(b) if b == b"hi"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_rejects_unterminated_quotes() {
|
||||
let err = parse_csv("Name\n\"oops").expect_err("must error");
|
||||
assert!(matches!(err, CsvError::UnterminatedQuote));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_cell_reports_friendly_error_for_bad_int() {
|
||||
let cell = RawCell { content: "abc".to_string(), was_quoted: false };
|
||||
let err = decode_cell(Type::Int, &cell).expect_err("must error");
|
||||
assert!(err.contains("integer"));
|
||||
assert!(err.contains("abc"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_width_mismatch_errors() {
|
||||
let err = serialize_table(&TableSnapshot {
|
||||
|
||||
Reference in New Issue
Block a user