//! Pure fake-data generation library for the `seed` command (ADR-0048). //! //! This module is the **generation half** of `seed`: given a column's //! shape (name, type, constraints), it chooses a *generator* and turns //! a seeded RNG into plausible [`Value`]s. It is deliberately decoupled //! from `db.rs` — it knows nothing about SQLite, the worker thread, or //! persistence — so it stays pure and unit-testable, with exact-value //! assertions made possible by the seedable RNG (ADR-0048 D4). //! //! The executor (`db.rs::do_seed`) adapts the real schema into //! [`ColumnSpec`]s, calls [`choose_generator`] per column, and then //! [`generate_value`] per row — except for the *stateful* markers //! ([`Generator::IdentitySequential`], [`Generator::ForeignKeySample`]) //! which need database context (existing rows, the running sequence) //! and so are resolved by the executor, not here. //! //! Layout: //! - this file — the public types ([`ColumnSpec`], [`Generator`], //! [`SeedRng`]) and the RNG constructor. //! - [`heuristics`] — [`choose_generator`] + the name-aware catalogue //! (D7), table-context disambiguation (D11), identifier (D10) and //! enum-ish (D12) detection. //! - [`generators`] — [`generate_value`]: per-generator value //! production, the hand-rolled `product` generator (D9) and the //! bounded date windows (D8). mod generators; mod heuristics; pub use generators::generate_value; pub use heuristics::{choose_generator, is_enum_ish}; use rand::rngs::StdRng; use rand::{RngExt, SeedableRng}; use crate::dsl::types::Type; /// The RNG that drives all seed generation. /// /// A single seeded `StdRng` feeds both `fake`'s `fake_with_rng` and the /// hand-rolled generators, so a `--seed` value fully determines the /// output (ADR-0048 D4). `rand 0.10`'s `StdRng` satisfies `fake`'s /// `RngExt` bound (it re-exports `rand::RngExt`), so the same handle /// works on both sides. pub type SeedRng = StdRng; /// Build the seed RNG. /// /// With `Some(seed)` the stream is reproducible; with `None` it is /// seeded from entropy (via the thread RNG) so each run differs. /// Seeding `StdRng` from a single `u64` in both cases keeps /// construction uniform and avoids `rand`'s churn-prone from-entropy /// constructors. #[must_use] pub fn make_rng(seed: Option) -> SeedRng { let seed = seed.unwrap_or_else(|| rand::rng().random::()); StdRng::seed_from_u64(seed) } /// A column described in just enough detail to choose and run a /// generator. Built by the executor from the real schema; kept /// independent of `db.rs`'s `ReadColumn` so this library stays pure. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ColumnSpec { /// The column's name — the primary signal for generator choice. pub name: String, /// The user-facing playground type — gates every name heuristic. pub ty: Type, /// `NOT NULL` — the executor uses this for the block guard (D1); /// generation always produces a value, so it is informational here. pub not_null: bool, /// Part of the table's primary key. pub primary_key: bool, /// Carries a `UNIQUE` constraint (or is a single-column PK). pub unique: bool, /// A foreign-key column — generation is the executor's job /// (sample an existing parent row, D14), so [`choose_generator`] /// returns [`Generator::ForeignKeySample`]. pub is_foreign_key: bool, /// Values parsed from a simple `col IN ('a', 'b', …)` CHECK /// constraint (D17). When present, generation draws from them so /// the common enum-as-CHECK pattern "just works". pub check_in_values: Option>, } impl ColumnSpec { /// Convenience constructor for a plain, unconstrained column — /// used heavily in tests. #[cfg(test)] #[must_use] pub fn plain(name: &str, ty: Type) -> Self { Self { name: name.to_string(), ty, not_null: false, primary_key: false, unique: false, is_foreign_key: false, check_in_values: None, } } } /// The chosen generation strategy for a column. /// /// Most variants are *stateless* — [`generate_value`] turns them into a /// [`Value`] from the RNG alone. Two are *stateful markers* that the /// executor must intercept (they need database context): /// [`Self::IdentitySequential`] (the running `MAX+offset` sequence, /// D10) and [`Self::ForeignKeySample`] (draw from existing parent /// rows, D14). For safety [`generate_value`] treats an un-intercepted /// marker as [`Self::Generic`] rather than panicking. #[derive(Debug, Clone, PartialEq, Eq)] pub enum Generator { // — Person — FirstName, LastName, /// A full person name (table-context default for `name`/`title`). FullName, Email, Username, Password, Phone, // — Address — City, Country, StateName, Street, ZipCode, // — Organisation / commerce — Company, JobTitle, /// Hand-rolled `{adjective} {material} {noun}` (D9) — `fake` has no /// commerce module. ProductName, // — Free text — Sentence, Paragraph, Url, HexColor, // — Numeric — /// A money-shaped amount (whole for `int`, two-decimal otherwise). CurrencyAmount, /// A plausible human age (18–80). Age, /// A small positive integer (quantities, counts). SmallInt, // — Temporal (bounded windows, D8) — /// A date within the last few years. DateRecent, /// A date in an adult birth window (≈18–80 years ago) — for `dob`. DateAdult, /// A datetime within the last few years. DateTimeRecent, // — Boolean — Boolean, // — Stateful markers (executor-resolved) — /// Unique sequential identifier (D10): the executor supplies /// `MAX(col)+offset`. Chosen for identifier-named non-FK columns. IdentitySequential, /// FK column (D14): the executor samples an existing parent key. ForeignKeySample, // — List / fallback — /// Uniform pick from a fixed list — a simple `IN`-CHECK (D17), an /// enum, or a future `set in (…)` override. PickFrom(Vec), /// Type-based fallback (D8) when no name heuristic matches. Generic, } #[cfg(test)] mod tests { use super::*; use pretty_assertions::assert_eq; #[test] fn same_seed_yields_identical_rng_streams() { let mut a = make_rng(Some(42)); let mut b = make_rng(Some(42)); let xs: Vec = (0..8).map(|_| a.random::()).collect(); let ys: Vec = (0..8).map(|_| b.random::()).collect(); assert_eq!(xs, ys, "a fixed seed must reproduce the stream"); } #[test] fn different_seeds_yield_different_streams() { let mut a = make_rng(Some(1)); let mut b = make_rng(Some(2)); let xs: Vec = (0..8).map(|_| a.random::()).collect(); let ys: Vec = (0..8).map(|_| b.random::()).collect(); assert_ne!(xs, ys); } #[test] fn unseeded_rng_constructs_without_panicking() { // Entropy-seeded path: just exercise it. let mut rng = make_rng(None); let _ = rng.random::(); } }