Iteration 2: per-command write-through to project.yaml, CSVs, history.log
Every successful user command now persists through to YAML, the
affected CSVs, and history.log inside the same SQLite transaction,
with the commit-db-last ordering from ADR-0015 §6: validate ->
mutate -> stage text + fsync -> atomic rename -> append history ->
commit. A failure in any text-write step rolls back the SQLite tx,
so disk state is unchanged on failure. Persistence failures are
routed through a new AppEvent::PersistenceFatal which sets a
fatal_message on the App, emits Action::Quit, and is printed to
stderr after terminal teardown so the banner remains above the
shell prompt (ADR-0015 §8).
New persistence module owns the file formats: hand-rolled YAML
schema writer, per-type CSV encoder (RFC 4180, NULL distinct from
empty string, base64 blobs), append-only history.log with ISO-8601
timestamps and successful-only entries. Atomic per-file writes via
tmp + fsync + rename.
The db worker holds an Option<Persistence>; tests still use
Database::open(":memory:") with no persistence. Action::ExecuteDsl
gains a source field carrying the user-typed text, threaded
through to history.log.
Tests: 289 passing (256 lib + 7 new integration + 9 lifecycle + 17
walking-skeleton), 0 failing, 0 skipped. Clippy clean with nursery
lints.
This commit is contained in:
@@ -0,0 +1,303 @@
|
||||
//! Per-type CSV writer (ADR-0015 §4).
|
||||
//!
|
||||
//! Encoding rules per type are exactly as specified in the
|
||||
//! ADR; the cell-level encoder lives in `encode_cell`. The
|
||||
//! `csv` crate handles RFC 4180 quoting around our encoded
|
||||
//! strings.
|
||||
//!
|
||||
//! NULL representation: an empty unquoted field. The `csv`
|
||||
//! crate's writer emits a non-quoted empty field for an empty
|
||||
//! string by default; we map `CellValue::Null` to that, and
|
||||
//! `CellValue::Text(String::new())` to a *quoted* empty
|
||||
//! field (`""`) by emitting a sentinel that round-trips.
|
||||
//!
|
||||
//! For the writer, the trick is: `WriterBuilder::quote_style(QuoteStyle::Necessary)`
|
||||
//! is the default and quotes only when needed (separator,
|
||||
//! quote, newline). We handle the empty-string-vs-null
|
||||
//! distinction manually by always quoting non-null empty
|
||||
//! Text and never quoting Null.
|
||||
|
||||
use std::io::Write as _;
|
||||
|
||||
use base64::Engine as _;
|
||||
|
||||
use crate::dsl::types::Type;
|
||||
|
||||
use super::{CellValue, TableSnapshot};
|
||||
|
||||
/// Serialize a `TableSnapshot` to a CSV body. Returns the raw
|
||||
/// bytes (UTF-8) ready to be written to disk.
|
||||
pub(super) fn serialize_table(table: &TableSnapshot) -> Result<Vec<u8>, String> {
|
||||
// We bypass the `csv` crate for cell-level emission so the
|
||||
// NULL-vs-empty distinction stays under our control. The
|
||||
// header and per-line framing are still simple enough to
|
||||
// emit directly.
|
||||
let mut out: Vec<u8> = Vec::new();
|
||||
write_record(
|
||||
&mut out,
|
||||
table.columns.iter().map(|c| Cell::Plain(c.name.clone())),
|
||||
)?;
|
||||
for row in &table.rows {
|
||||
if row.len() != table.columns.len() {
|
||||
return Err(format!(
|
||||
"row width {} does not match column count {} for table `{}`",
|
||||
row.len(),
|
||||
table.columns.len(),
|
||||
table.name,
|
||||
));
|
||||
}
|
||||
let mut cells: Vec<Cell> = Vec::with_capacity(row.len());
|
||||
for (col, value) in table.columns.iter().zip(row.iter()) {
|
||||
cells.push(encode_cell(col.user_type, value)?);
|
||||
}
|
||||
write_record(&mut out, cells.into_iter())?;
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
/// One cell to write. `Plain` is unquoted; `Quoted` is
|
||||
/// always RFC 4180 double-quoted (used for the empty-string
|
||||
/// vs NULL distinction).
|
||||
enum Cell {
|
||||
Plain(String),
|
||||
Quoted(String),
|
||||
}
|
||||
|
||||
/// Emit a record (header or row) to `out`. Adds the trailing
|
||||
/// `\n` (RFC 4180 says CRLF, but `\n` is universally accepted
|
||||
/// and matches what every CSV reader on every platform
|
||||
/// handles cleanly; line endings are deliberately uniform
|
||||
/// across our generated artefacts).
|
||||
fn write_record<I: Iterator<Item = Cell>>(out: &mut Vec<u8>, cells: I) -> Result<(), String> {
|
||||
let mut first = true;
|
||||
for cell in cells {
|
||||
if !first {
|
||||
out.push(b',');
|
||||
}
|
||||
first = false;
|
||||
match cell {
|
||||
Cell::Plain(s) => {
|
||||
if needs_quoting(&s) {
|
||||
write_quoted(out, &s);
|
||||
} else {
|
||||
out.write_all(s.as_bytes()).map_err(|e| e.to_string())?;
|
||||
}
|
||||
}
|
||||
Cell::Quoted(s) => write_quoted(out, &s),
|
||||
}
|
||||
}
|
||||
out.push(b'\n');
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_quoted(out: &mut Vec<u8>, s: &str) {
|
||||
out.push(b'"');
|
||||
for &b in s.as_bytes() {
|
||||
if b == b'"' {
|
||||
out.extend_from_slice(b"\"\"");
|
||||
} else {
|
||||
out.push(b);
|
||||
}
|
||||
}
|
||||
out.push(b'"');
|
||||
}
|
||||
|
||||
fn needs_quoting(s: &str) -> bool {
|
||||
s.bytes().any(|b| matches!(b, b',' | b'"' | b'\n' | b'\r'))
|
||||
}
|
||||
|
||||
/// Encode a single cell per type (ADR-0015 §4 table). Returns
|
||||
/// the cell wrapped in `Plain` or `Quoted` as appropriate for
|
||||
/// the NULL/empty distinction.
|
||||
fn encode_cell(ty: Type, value: &CellValue) -> Result<Cell, String> {
|
||||
if matches!(value, CellValue::Null) {
|
||||
return Ok(Cell::Plain(String::new()));
|
||||
}
|
||||
match ty {
|
||||
Type::Text => match value {
|
||||
CellValue::Text(s) if s.is_empty() => Ok(Cell::Quoted(String::new())),
|
||||
CellValue::Text(s) => Ok(Cell::Plain(s.clone())),
|
||||
other => Err(format!("expected text, got {other:?}")),
|
||||
},
|
||||
Type::Int => match value {
|
||||
CellValue::Integer(n) => Ok(Cell::Plain(n.to_string())),
|
||||
other => Err(format!("expected int, got {other:?}")),
|
||||
},
|
||||
Type::Real => match value {
|
||||
CellValue::Real(f) => Ok(Cell::Plain(format_real(*f))),
|
||||
other => Err(format!("expected real, got {other:?}")),
|
||||
},
|
||||
Type::Decimal => match value {
|
||||
// Decimals are stored as TEXT to preserve precision.
|
||||
CellValue::Text(s) if s.is_empty() => Ok(Cell::Quoted(String::new())),
|
||||
CellValue::Text(s) => Ok(Cell::Plain(s.clone())),
|
||||
other => Err(format!("expected decimal (text), got {other:?}")),
|
||||
},
|
||||
Type::Bool => match value {
|
||||
CellValue::Integer(0) => Ok(Cell::Plain("false".to_string())),
|
||||
CellValue::Integer(1) => Ok(Cell::Plain("true".to_string())),
|
||||
other => Err(format!("expected bool (0 or 1), got {other:?}")),
|
||||
},
|
||||
Type::Date | Type::DateTime => match value {
|
||||
CellValue::Text(s) if s.is_empty() => Ok(Cell::Quoted(String::new())),
|
||||
CellValue::Text(s) => Ok(Cell::Plain(s.clone())),
|
||||
other => Err(format!("expected date/datetime (text), got {other:?}")),
|
||||
},
|
||||
Type::Blob => match value {
|
||||
CellValue::Blob(bytes) => Ok(Cell::Plain(base64::engine::general_purpose::STANDARD.encode(bytes))),
|
||||
other => Err(format!("expected blob, got {other:?}")),
|
||||
},
|
||||
Type::Serial => match value {
|
||||
CellValue::Integer(n) => Ok(Cell::Plain(n.to_string())),
|
||||
other => Err(format!("expected serial (int), got {other:?}")),
|
||||
},
|
||||
Type::ShortId => match value {
|
||||
CellValue::Text(s) if s.is_empty() => Ok(Cell::Quoted(String::new())),
|
||||
CellValue::Text(s) => Ok(Cell::Plain(s.clone())),
|
||||
other => Err(format!("expected shortid (text), got {other:?}")),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn format_real(f: f64) -> String {
|
||||
if f.is_nan() {
|
||||
"nan".to_string()
|
||||
} else if f.is_infinite() {
|
||||
if f > 0.0 { "inf".to_string() } else { "-inf".to_string() }
|
||||
} else {
|
||||
// Default `{}` formatting on f64 emits a shortest
|
||||
// round-tripping decimal — exactly what the ADR asks
|
||||
// for.
|
||||
format!("{f}")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::persistence::ColumnSchema;
|
||||
|
||||
fn col(name: &str, ty: Type) -> ColumnSchema {
|
||||
ColumnSchema { name: name.to_string(), user_type: ty }
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_table_emits_header_only() {
|
||||
let body = serialize_table(&TableSnapshot {
|
||||
name: "Customers".to_string(),
|
||||
columns: vec![col("id", Type::Serial), col("Name", Type::Text)],
|
||||
rows: vec![],
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(String::from_utf8(body).unwrap(), "id,Name\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn null_is_empty_unquoted_field() {
|
||||
let body = serialize_table(&TableSnapshot {
|
||||
name: "T".to_string(),
|
||||
columns: vec![col("Name", Type::Text)],
|
||||
rows: vec![vec![CellValue::Null]],
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(String::from_utf8(body).unwrap(), "Name\n\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_string_is_double_quoted() {
|
||||
let body = serialize_table(&TableSnapshot {
|
||||
name: "T".to_string(),
|
||||
columns: vec![col("Name", Type::Text)],
|
||||
rows: vec![vec![CellValue::Text(String::new())]],
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(String::from_utf8(body).unwrap(), "Name\n\"\"\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn text_with_comma_or_quote_is_rfc4180_quoted() {
|
||||
let body = serialize_table(&TableSnapshot {
|
||||
name: "T".to_string(),
|
||||
columns: vec![col("Name", Type::Text)],
|
||||
rows: vec![
|
||||
vec![CellValue::Text("hello, world".to_string())],
|
||||
vec![CellValue::Text("she said \"hi\"".to_string())],
|
||||
],
|
||||
})
|
||||
.unwrap();
|
||||
let s = String::from_utf8(body).unwrap();
|
||||
assert!(s.contains("\"hello, world\""));
|
||||
assert!(s.contains("\"she said \"\"hi\"\"\""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ints_and_reals_round_trip_simply() {
|
||||
let body = serialize_table(&TableSnapshot {
|
||||
name: "T".to_string(),
|
||||
columns: vec![col("n", Type::Int), col("r", Type::Real)],
|
||||
rows: vec![
|
||||
vec![CellValue::Integer(42), CellValue::Real(std::f64::consts::PI)],
|
||||
vec![CellValue::Integer(-7), CellValue::Real(0.0)],
|
||||
],
|
||||
})
|
||||
.unwrap();
|
||||
let s = String::from_utf8(body).unwrap();
|
||||
let lines: Vec<&str> = s.trim_end().lines().collect();
|
||||
assert_eq!(lines[0], "n,r");
|
||||
assert!(lines[1].starts_with("42,"));
|
||||
assert_eq!(lines[2], "-7,0");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bools_use_words_not_digits() {
|
||||
let body = serialize_table(&TableSnapshot {
|
||||
name: "T".to_string(),
|
||||
columns: vec![col("b", Type::Bool)],
|
||||
rows: vec![
|
||||
vec![CellValue::Integer(1)],
|
||||
vec![CellValue::Integer(0)],
|
||||
],
|
||||
})
|
||||
.unwrap();
|
||||
let s = String::from_utf8(body).unwrap();
|
||||
assert_eq!(s, "b\ntrue\nfalse\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn blobs_use_base64() {
|
||||
let body = serialize_table(&TableSnapshot {
|
||||
name: "T".to_string(),
|
||||
columns: vec![col("blob", Type::Blob)],
|
||||
rows: vec![vec![CellValue::Blob(b"hello".to_vec())]],
|
||||
})
|
||||
.unwrap();
|
||||
let s = String::from_utf8(body).unwrap();
|
||||
assert!(s.contains("aGVsbG8="));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dates_and_datetimes_pass_through() {
|
||||
let body = serialize_table(&TableSnapshot {
|
||||
name: "T".to_string(),
|
||||
columns: vec![col("d", Type::Date), col("ts", Type::DateTime)],
|
||||
rows: vec![vec![
|
||||
CellValue::Text("2026-05-07".to_string()),
|
||||
CellValue::Text("2026-05-07T14:30:12Z".to_string()),
|
||||
]],
|
||||
})
|
||||
.unwrap();
|
||||
let s = String::from_utf8(body).unwrap();
|
||||
assert!(s.contains("2026-05-07,2026-05-07T14:30:12Z"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn row_width_mismatch_errors() {
|
||||
let err = serialize_table(&TableSnapshot {
|
||||
name: "T".to_string(),
|
||||
columns: vec![col("a", Type::Int), col("b", Type::Int)],
|
||||
rows: vec![vec![CellValue::Integer(1)]],
|
||||
})
|
||||
.unwrap_err();
|
||||
assert!(err.contains("row width"));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user