rdbms-playground/src/persistence/mod.rs

//! Per-command persistence to `project.yaml`, `data/*.csv`,
//! and `history.log` (ADR-0015 §3–§6).
//!
//! Iteration 2 wiring: every successful user command, after
//! its SQLite mutations are staged but before the transaction
//! commits, asks `Persistence` to write the affected text
//! targets atomically (write-temp + fsync + rename). The
//! commit-db-last ordering (ADR-0015 §6) is enforced in
//! `db.rs`; this module owns the file-format details and the
//! atomic-write primitive.
//!
//! Failure semantics: any write or rename failure produces a
//! `PersistenceError`. The caller (the db worker) is
//! responsible for translating that into a fatal error and
//! letting the SQLite tx roll back.

use std::fs;
use std::io::Write as _;
use std::path::{Path, PathBuf};

use crate::dsl::action::ReferentialAction;
use crate::dsl::types::Type;
use crate::project::{DATA_DIR, HISTORY_LOG, PROJECT_YAML};

// Submodules are private; the few items the db worker needs
// during rebuild (ADR-0015 §7) are re-exported below.
mod csv_io;
mod history;
pub mod migrations;
mod yaml;

pub(crate) use csv_io::{decode_cell, parse_csv};
pub(crate) use yaml::parse_schema;

/// Owns persistence to a single project on disk. Cheap to
/// move; the db worker holds one instance for its lifetime.
#[derive(Debug, Clone)]
pub struct Persistence {
    project_path: PathBuf,
}

#[derive(Debug, thiserror::Error)]
pub enum PersistenceError {
    #[error("could not {operation} `{path}`: {source}")]
    Io {
        operation: &'static str,
        path: PathBuf,
        #[source]
        source: std::io::Error,
    },
    #[error("could not encode {kind} for `{path}`: {message}")]
    Encode {
        kind: &'static str,
        path: PathBuf,
        message: String,
    },
}

impl PersistenceError {
    /// Path the failure was associated with.
    #[must_use]
    pub fn path(&self) -> &Path {
        match self {
            Self::Io { path, .. } | Self::Encode { path, .. } => path,
        }
    }

    /// Short label for the kind of operation that failed,
    /// suitable for the fatal banner.
    #[must_use]
    pub const fn operation(&self) -> &'static str {
        match self {
            Self::Io { operation, .. } => operation,
            Self::Encode { .. } => "encode",
        }
    }
}

/// Snapshot of the full schema as it is to be written to
/// `project.yaml`.
///
/// Read from the database after the in-flight mutation has
/// staged its changes (within the same SQLite tx) so the YAML
/// reflects the post-mutation state.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SchemaSnapshot {
    pub created_at: String,
    pub tables: Vec<TableSchema>,
    pub relationships: Vec<RelationshipSchema>,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TableSchema {
    pub name: String,
    pub primary_key: Vec<String>,
    pub columns: Vec<ColumnSchema>,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ColumnSchema {
    pub name: String,
    pub user_type: Type,
    /// Whether this column carries a single-column UNIQUE
    /// constraint (ADR-0018 §4). Stored explicitly in the
    /// project YAML so that a `serial → int` round-trip
    /// (which leaves UNIQUE in place) is preserved across a
    /// save/load cycle. Defaults to `false` when missing in
    /// older project files.
    pub unique: bool,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RelationshipSchema {
    pub name: String,
    pub parent_table: String,
    pub parent_column: String,
    pub child_table: String,
    pub child_column: String,
    pub on_delete: ReferentialAction,
    pub on_update: ReferentialAction,
}

/// Snapshot of one table's full row data, for writing
/// `data/<table>.csv`. The column order matches the table's
/// declaration order; the row tuples are aligned to it.
#[derive(Debug, Clone, PartialEq)]
pub struct TableSnapshot {
    pub name: String,
    pub columns: Vec<ColumnSchema>,
    pub rows: Vec<Vec<CellValue>>,
}

/// A scalar cell value, in the small ADT understood by the
/// CSV encoder.
///
/// `Null` and `Text("")` are distinct. `Eq` is intentionally
/// NOT derived because `Real(f64)` does not satisfy it (NaN);
/// use `PartialEq` for comparison.
#[derive(Debug, Clone, PartialEq)]
pub enum CellValue {
    Null,
    Integer(i64),
    Real(f64),
    Text(String),
    Blob(Vec<u8>),
}

impl Persistence {
    #[must_use]
    pub const fn new(project_path: PathBuf) -> Self {
        Self { project_path }
    }

    /// Project root directory. Used in tests and diagnostics.
    #[must_use]
    pub fn project_path(&self) -> &Path {
        &self.project_path
    }

    /// Write `project.yaml` from a full schema snapshot.
    /// Atomic: writes to `project.yaml.tmp`, fsyncs, then
    /// renames over the destination.
    pub fn write_schema(&self, schema: &SchemaSnapshot) -> Result<(), PersistenceError> {
        let body = yaml::serialize_schema(schema);
        atomic_write(&self.project_path.join(PROJECT_YAML), body.as_bytes())
    }

    /// Write `data/<table>.csv` from a table snapshot. Atomic
    /// per file. Creates the `data/` directory if missing
    /// (tolerant of fresh projects).
    ///
    /// **Empty tables produce no CSV.** A header-only file
    /// would carry no information beyond what `project.yaml`
    /// already records, so an empty snapshot is treated
    /// identically to "drop this table's data file": the CSV
    /// is removed if it exists, no file is created if it
    /// doesn't. This keeps the rule "data lives in CSV; no
    /// data, no CSV" consistent and avoids surprising users
    /// with files they didn't ask for.
    pub fn write_table_data(&self, table: &TableSnapshot) -> Result<(), PersistenceError> {
        if table.rows.is_empty() {
            return self.delete_table_data(&table.name);
        }
        let data_dir = self.project_path.join(DATA_DIR);
        fs::create_dir_all(&data_dir).map_err(|source| PersistenceError::Io {
            operation: "create",
            path: data_dir.clone(),
            source,
        })?;
        let body =
            csv_io::serialize_table(table).map_err(|message| PersistenceError::Encode {
                kind: "CSV",
                path: data_dir.join(format!("{}.csv", table.name)),
                message,
            })?;
        atomic_write(&data_dir.join(format!("{}.csv", table.name)), &body)
    }

    /// Remove `data/<table>.csv` if present. Used when a
    /// table is dropped so stale CSVs don't linger.
    pub fn delete_table_data(&self, table_name: &str) -> Result<(), PersistenceError> {
        let path = self
            .project_path
            .join(DATA_DIR)
            .join(format!("{table_name}.csv"));
        match fs::remove_file(&path) {
            Ok(()) => Ok(()),
            Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
            Err(source) => Err(PersistenceError::Io {
                operation: "delete",
                path,
                source,
            }),
        }
    }

    /// Append one record to `history.log`.
    pub fn append_history(&self, command_text: &str) -> Result<(), PersistenceError> {
        let path = self.project_path.join(HISTORY_LOG);
        let line = history::format_record(command_text, history::utc_iso8601_now());
        history::append(&path, &line)
    }

    /// Read the most-recent `max_n` sources out of
    /// `history.log` for input-history hydration on project
    /// open (ADR-0015 §12). Returned in chronological order
    /// (oldest first). A missing file is `Ok(Vec::new())`.
    pub fn read_recent_history(&self, max_n: usize) -> Result<Vec<String>, PersistenceError> {
        let path = self.project_path.join(HISTORY_LOG);
        history::read_recent_sources(&path, max_n)
    }
}

/// Write `body` to `path` atomically via temp file + fsync +
/// rename. The temp file is named `<final>.tmp` in the same
/// directory so the rename stays on the same filesystem.
fn atomic_write(path: &Path, body: &[u8]) -> Result<(), PersistenceError> {
    let tmp_path = path.with_extension(extension_with_tmp(path));
    {
        let mut tmp = fs::File::create(&tmp_path).map_err(|source| PersistenceError::Io {
            operation: "create",
            path: tmp_path.clone(),
            source,
        })?;
        tmp.write_all(body).map_err(|source| PersistenceError::Io {
            operation: "write",
            path: tmp_path.clone(),
            source,
        })?;
        tmp.sync_all().map_err(|source| PersistenceError::Io {
            operation: "fsync",
            path: tmp_path.clone(),
            source,
        })?;
    }
    fs::rename(&tmp_path, path).map_err(|source| PersistenceError::Io {
        operation: "rename",
        path: path.to_path_buf(),
        source,
    })?;
    Ok(())
}

/// Build the `.tmp` extension for a path.
///
/// If the path already has an extension (`project.yaml`), the
/// tmp variant is `project.yaml.tmp`. If the path has no
/// extension, the extension becomes plain `tmp`.
fn extension_with_tmp(path: &Path) -> String {
    path.extension().map_or_else(
        || "tmp".to_string(),
        |ext| format!("{}.tmp", ext.to_string_lossy()),
    )
}

#[cfg(test)]
mod tests {
    use super::*;

    fn tempdir() -> tempfile::TempDir {
        tempfile::tempdir().expect("create tempdir")
    }

    #[test]
    fn extension_with_tmp_appends_to_existing_extension() {
        assert_eq!(extension_with_tmp(Path::new("a/b/project.yaml")), "yaml.tmp");
        assert_eq!(extension_with_tmp(Path::new("a/b/Customers.csv")), "csv.tmp");
        assert_eq!(extension_with_tmp(Path::new("a/b/lockfile")), "tmp");
    }

    #[test]
    fn atomic_write_roundtrips() {
        let dir = tempdir();
        let target = dir.path().join("file.txt");
        atomic_write(&target, b"hello\n").unwrap();
        assert_eq!(fs::read_to_string(&target).unwrap(), "hello\n");

        // Calling again replaces atomically — no .tmp left behind.
        atomic_write(&target, b"world\n").unwrap();
        assert_eq!(fs::read_to_string(&target).unwrap(), "world\n");
        assert!(!target.with_extension("txt.tmp").exists());
    }

    #[test]
    fn write_schema_writes_yaml() {
        let dir = tempdir();
        let p = Persistence::new(dir.path().to_path_buf());
        let schema = SchemaSnapshot {
            created_at: "2026-05-07T14:30:12Z".to_string(),
            tables: vec![],
            relationships: vec![],
        };
        p.write_schema(&schema).unwrap();
        let body = fs::read_to_string(dir.path().join(PROJECT_YAML)).unwrap();
        assert!(body.contains("version: 1"));
        assert!(body.contains("created_at:"));
    }

    #[test]
    fn write_and_delete_table_data() {
        let dir = tempdir();
        let p = Persistence::new(dir.path().to_path_buf());
        let table = TableSnapshot {
            name: "Customers".to_string(),
            columns: vec![ColumnSchema {
                name: "Name".to_string(),
                user_type: Type::Text,
                unique: false,
            }],
            rows: vec![vec![CellValue::Text("Alice".to_string())]],
        };
        p.write_table_data(&table).unwrap();
        let csv_path = dir.path().join(DATA_DIR).join("Customers.csv");
        assert!(csv_path.exists());
        let body = fs::read_to_string(&csv_path).unwrap();
        assert!(body.contains("Name"));
        assert!(body.contains("Alice"));

        p.delete_table_data("Customers").unwrap();
        assert!(!csv_path.exists());

        // Idempotent on a missing file.
        p.delete_table_data("Customers").unwrap();
    }

    #[test]
    fn append_history_creates_and_appends() {
        let dir = tempdir();
        let p = Persistence::new(dir.path().to_path_buf());
        p.append_history("create table Foo with pk id:serial").unwrap();
        p.append_history("insert into Foo (1)").unwrap();
        let body = fs::read_to_string(dir.path().join(HISTORY_LOG)).unwrap();
        let lines: Vec<&str> = body.trim_end().lines().collect();
        assert_eq!(lines.len(), 2);
        assert!(lines[0].ends_with("|ok|create table Foo with pk id:serial"));
        assert!(lines[1].ends_with("|ok|insert into Foo (1)"));
    }
}