5bb0a147f0
Generalises serial and shortid beyond their previous restricted forms: - `serial` is no longer restricted to single-column PK. Non-PK serial columns get an emitted UNIQUE constraint and use application-side MAX(col)+1 at INSERT time (rowid alias still drives the PK case for free; per ADR-0010 worker-thread serialisation, the read-then-insert sequence is safe). - `shortid` columns auto-fill existing null cells when the column is materialised — `add column T: x (shortid)` on a non-empty table no longer leaves rows in a not-really-valid NULL state. - `int -> serial` joins the type-change matrix as always-clean identity (closes the asymmetry vs `text -> shortid`); other sources are refused with a route-via-int hint. - `change column T: x (serial|shortid)` fills null source cells with sequence / generated values in the same rebuild transaction. Internal infrastructure: - ReadColumn gains `unique: bool`; read_schema detects single- column UNIQUE indexes via pragma_index_list / pragma_index_info; schema_to_ddl emits inline UNIQUE for non-PK columns. - ColumnSchema (persistence) gains `unique: bool` so the flag survives YAML round-trip and rebuild-from-text reconstructs it faithfully — preserves the "serial -> int leaves UNIQUE in place" promise across save/load cycles. - ChangeColumnTypeResult.client_side now carries `auto_filled` + `auto_fill_kind` alongside `transformed` + `lossy`; the app handler renders separate note lines when both apply. - AddColumnResult is a new return type carrying pre-rendered [client-side] note lines for the auto-fill paths. Tests: 519 -> 534 (+15). Clippy clean.
1178 lines
39 KiB
Rust
1178 lines
39 KiB
Rust
//! Per-cell type-change transformer matrix (ADR-0017).
|
|
//!
|
|
//! When a `change column T: c (newtype)` invocation runs, every
|
|
//! cell of `c` is classified into one of three outcomes:
|
|
//!
|
|
//! - **Clean** — the transformer produced a value the new type
|
|
//! accepts without information loss.
|
|
//! - **Lossy** — valid output, but some property (precision,
|
|
//! fractional part, time component, …) was discarded.
|
|
//! - **Incompatible** — no transformer for this pair can produce
|
|
//! a valid output for this cell.
|
|
//!
|
|
//! The classification is a property of (source type, target type,
|
|
//! cell value) — the same pair yields different outcomes for
|
|
//! different cell values. NULL passes through any transformer
|
|
//! unchanged (always Clean).
|
|
//!
|
|
//! Pairs not present in the matrix are statically refused via
|
|
//! [`static_refusal`] before any per-cell pass runs. Same-type
|
|
//! identity, anything → `serial`, anything ↔ `blob`, and
|
|
//! `date` ↔ `datetime` direct are all statically refused.
|
|
|
|
use rusqlite::types::Value;
|
|
|
|
use crate::dsl::Type;
|
|
use crate::dsl::shortid;
|
|
use crate::output_render::{Alignment, numeric_alignment_for};
|
|
|
|
/// Outcome of attempting to transform one cell's value to fit a
|
|
/// new column type.
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
pub enum CellOutcome {
|
|
/// Transformer produced a value the new type accepts without
|
|
/// information loss.
|
|
Clean(Value),
|
|
/// Valid output, but some property of the original cell was
|
|
/// discarded.
|
|
Lossy { new: Value, reason: String },
|
|
/// No transformer for this pair can produce a valid value
|
|
/// for this cell.
|
|
Incompatible { reason: String },
|
|
}
|
|
|
|
/// Whether a (source, target) type pair is statically refused
|
|
/// before per-cell classification. Returns `Some(reason)` when
|
|
/// refused; `None` when the per-cell matrix should be consulted.
|
|
///
|
|
/// Static refusals cover: same-type identity, anything →
|
|
/// `serial`, anything ↔ `blob`, `date` ↔ `datetime` direct, and
|
|
/// any cross-domain pair not present in the matrix
|
|
/// (e.g. `bool` → `date`).
|
|
#[must_use]
|
|
pub fn static_refusal(src: Type, target: Type) -> Option<String> {
|
|
if src == target {
|
|
return Some(format!("column is already `{src}`."));
|
|
}
|
|
// ADR-0018 §8: `int → serial` is allowed via the matrix.
|
|
// Other sources to serial are refused — the user routes
|
|
// via int first if needed.
|
|
if matches!(target, Type::Serial) && !matches!(src, Type::Int) {
|
|
return Some(format!(
|
|
"to convert from `{src}` to `serial`, change the column to `int` \
|
|
first; only `int → serial` is supported directly."
|
|
));
|
|
}
|
|
if matches!(src, Type::Blob) || matches!(target, Type::Blob) {
|
|
return Some(format!(
|
|
"conversion between `{src}` and `{target}` is not supported \
|
|
in this version."
|
|
));
|
|
}
|
|
if matches!(
|
|
(src, target),
|
|
(Type::Date, Type::DateTime) | (Type::DateTime, Type::Date)
|
|
) {
|
|
return Some(format!(
|
|
"direct `{src}` to `{target}` conversion is not supported in \
|
|
this version; route via `text` if needed."
|
|
));
|
|
}
|
|
if !is_in_matrix(src, target) {
|
|
return Some(format!(
|
|
"no conversion is defined from `{src}` to `{target}`."
|
|
));
|
|
}
|
|
None
|
|
}
|
|
|
|
const fn is_in_matrix(src: Type, target: Type) -> bool {
|
|
use Type::{
|
|
Bool, Date, DateTime, Decimal, Int, Real, Serial, ShortId, Text,
|
|
};
|
|
matches!(
|
|
(src, target),
|
|
// Always-clean transformers
|
|
(Int | Serial, Real | Decimal | Text)
|
|
// serial -> int: ADR-0017 §4.1's canonical allowed
|
|
// case ("removing auto-increment while preserving
|
|
// stored values"). Storage stays INTEGER; we treat
|
|
// it as an identity transformer.
|
|
| (Serial, Int)
|
|
// int -> serial: ADR-0018 §8. Storage stays
|
|
// INTEGER, the metadata flips to "auto-generated"
|
|
// and the column gains UNIQUE if non-PK. The
|
|
// matrix entry is identity at the value level;
|
|
// uniqueness and auto-fill of nulls happen at the
|
|
// change-column orchestration layer, not in the
|
|
// per-cell transformer.
|
|
| (Int, Serial)
|
|
| (Bool, Int | Real | Decimal | Text)
|
|
| (Decimal | Date | DateTime | ShortId | Real, Text)
|
|
// Per-cell-classified
|
|
| (Real, Int | Decimal | Bool)
|
|
| (Decimal, Int | Real | Bool)
|
|
| (Int, Bool)
|
|
| (Text, Int | Real | Decimal | Bool | Date | DateTime | ShortId)
|
|
)
|
|
}
|
|
|
|
/// Transform a single cell's value through the matrix.
|
|
///
|
|
/// Caller is responsible for first checking [`static_refusal`];
|
|
/// calling this with a statically-refused pair returns
|
|
/// `Incompatible` for every value. `value` is the cell as read
|
|
/// from SQLite — its variant matches the source type's storage
|
|
/// class (e.g. `Type::Real` ⇒ `Value::Real`). `Value::Null` is
|
|
/// always returned `Clean(Null)`.
|
|
#[must_use]
|
|
pub fn transform_cell(src: Type, target: Type, value: &Value) -> CellOutcome {
|
|
if matches!(value, Value::Null) {
|
|
return CellOutcome::Clean(Value::Null);
|
|
}
|
|
use Type::{
|
|
Bool, Date, DateTime, Decimal, Int, Real, Serial, ShortId, Text,
|
|
};
|
|
match (src, target) {
|
|
// ---- Always-clean: int / serial source ----
|
|
(Int | Serial, Real) => match value {
|
|
Value::Integer(i) => CellOutcome::Clean(Value::Real(*i as f64)),
|
|
other => unexpected_storage("int/serial", other),
|
|
},
|
|
(Int | Serial, Decimal | Text) => match value {
|
|
Value::Integer(i) => CellOutcome::Clean(Value::Text(i.to_string())),
|
|
other => unexpected_storage("int/serial", other),
|
|
},
|
|
// serial -> int: identity at the storage class level;
|
|
// the conversion drops auto-increment semantics from the
|
|
// column metadata. ADR-0017 §4.1.
|
|
(Serial, Int) => match value {
|
|
Value::Integer(i) => CellOutcome::Clean(Value::Integer(*i)),
|
|
other => unexpected_storage("serial", other),
|
|
},
|
|
// int -> serial: identity at the storage class level
|
|
// (both INTEGER); the conversion adds the auto-
|
|
// generated contract and (for non-PK columns) UNIQUE.
|
|
// Per-cell transformer is identity; the change-column
|
|
// orchestrator handles null auto-fill and uniqueness.
|
|
// ADR-0018 §8.
|
|
(Int, Serial) => match value {
|
|
Value::Integer(i) => CellOutcome::Clean(Value::Integer(*i)),
|
|
other => unexpected_storage("int", other),
|
|
},
|
|
|
|
// ---- Always-clean: bool source (stored as INTEGER 0/1) ----
|
|
(Bool, Int) => match value {
|
|
Value::Integer(i) => CellOutcome::Clean(Value::Integer(*i)),
|
|
other => unexpected_storage("bool", other),
|
|
},
|
|
(Bool, Real) => match value {
|
|
Value::Integer(i) => CellOutcome::Clean(Value::Real(*i as f64)),
|
|
other => unexpected_storage("bool", other),
|
|
},
|
|
(Bool, Decimal) => match value {
|
|
Value::Integer(i) => {
|
|
CellOutcome::Clean(Value::Text(if *i == 0 { "0".into() } else { "1".into() }))
|
|
}
|
|
other => unexpected_storage("bool", other),
|
|
},
|
|
(Bool, Text) => match value {
|
|
// "true" / "false" matches the DSL boolean grammar
|
|
// (ADR-0014 §5), not raw 0/1 stringification.
|
|
Value::Integer(i) => CellOutcome::Clean(Value::Text(
|
|
if *i == 0 { "false".into() } else { "true".into() },
|
|
)),
|
|
other => unexpected_storage("bool", other),
|
|
},
|
|
|
|
// ---- Always-clean: text-backed source -> text ----
|
|
(Decimal | Date | DateTime | ShortId, Text) => match value {
|
|
Value::Text(s) => CellOutcome::Clean(Value::Text(s.clone())),
|
|
other => unexpected_storage("text-backed", other),
|
|
},
|
|
|
|
// ---- Always-clean: real -> text ----
|
|
(Real, Text) => match value {
|
|
Value::Real(r) => CellOutcome::Clean(Value::Text(format_real(*r))),
|
|
other => unexpected_storage("real", other),
|
|
},
|
|
|
|
// ---- Per-cell: real -> int ----
|
|
(Real, Int) => match value {
|
|
Value::Real(r) => real_to_int(*r),
|
|
other => unexpected_storage("real", other),
|
|
},
|
|
|
|
// ---- Per-cell: real -> decimal ----
|
|
// f64 -> shortest-round-trip decimal string. The f64 was
|
|
// already at the storage precision; representing it as a
|
|
// decimal preserves what we have. Always-clean in
|
|
// practice; the ADR's "lossy otherwise" provision exists
|
|
// for theoretical higher-precision sources we don't
|
|
// currently produce.
|
|
(Real, Decimal) => match value {
|
|
Value::Real(r) => {
|
|
if r.is_finite() {
|
|
CellOutcome::Clean(Value::Text(format_real(*r)))
|
|
} else {
|
|
CellOutcome::Incompatible {
|
|
reason: format!("`{r}` is not finite"),
|
|
}
|
|
}
|
|
}
|
|
other => unexpected_storage("real", other),
|
|
},
|
|
|
|
// ---- Per-cell: real -> bool ----
|
|
(Real, Bool) => match value {
|
|
Value::Real(r) => {
|
|
if *r == 0.0 {
|
|
CellOutcome::Clean(Value::Integer(0))
|
|
} else if *r == 1.0 {
|
|
CellOutcome::Clean(Value::Integer(1))
|
|
} else {
|
|
CellOutcome::Incompatible {
|
|
reason: format!("`{}` is not 0 or 1", format_real(*r)),
|
|
}
|
|
}
|
|
}
|
|
other => unexpected_storage("real", other),
|
|
},
|
|
|
|
// ---- Per-cell: decimal -> int ----
|
|
(Decimal, Int) => match value {
|
|
Value::Text(s) => decimal_text_to_int(s),
|
|
other => unexpected_storage("decimal", other),
|
|
},
|
|
|
|
// ---- Per-cell: decimal -> real ----
|
|
(Decimal, Real) => match value {
|
|
Value::Text(s) => decimal_text_to_real(s),
|
|
other => unexpected_storage("decimal", other),
|
|
},
|
|
|
|
// ---- Per-cell: decimal -> bool ----
|
|
(Decimal, Bool) => match value {
|
|
Value::Text(s) => {
|
|
let trimmed = s.trim();
|
|
// Accept "0" / "1" plus their decimal equivalents
|
|
// (e.g. "0.0", "1.0", "1.00") since those are the
|
|
// exact-zero / exact-one cases per ADR-0017 §3.
|
|
trimmed.parse::<f64>().map_or_else(
|
|
|_| CellOutcome::Incompatible {
|
|
reason: format!("`{s}` is not 0 or 1"),
|
|
},
|
|
|parsed| {
|
|
if parsed == 0.0 {
|
|
CellOutcome::Clean(Value::Integer(0))
|
|
} else if parsed == 1.0 {
|
|
CellOutcome::Clean(Value::Integer(1))
|
|
} else {
|
|
CellOutcome::Incompatible {
|
|
reason: format!("`{s}` is not 0 or 1"),
|
|
}
|
|
}
|
|
},
|
|
)
|
|
}
|
|
other => unexpected_storage("decimal", other),
|
|
},
|
|
|
|
// ---- Per-cell: int -> bool ----
|
|
(Int, Bool) => match value {
|
|
Value::Integer(i) => {
|
|
if *i == 0 {
|
|
CellOutcome::Clean(Value::Integer(0))
|
|
} else if *i == 1 {
|
|
CellOutcome::Clean(Value::Integer(1))
|
|
} else {
|
|
CellOutcome::Incompatible {
|
|
reason: format!("`{i}` is not 0 or 1"),
|
|
}
|
|
}
|
|
}
|
|
other => unexpected_storage("int", other),
|
|
},
|
|
|
|
// ---- Per-cell: text -> int ----
|
|
(Text, Int) => match value {
|
|
Value::Text(s) => text_to_int(s),
|
|
other => unexpected_storage("text", other),
|
|
},
|
|
|
|
// ---- Per-cell: text -> real ----
|
|
(Text, Real) => match value {
|
|
Value::Text(s) => match s.trim().parse::<f64>() {
|
|
Ok(r) if r.is_finite() => CellOutcome::Clean(Value::Real(r)),
|
|
Ok(_) => CellOutcome::Incompatible {
|
|
reason: format!("`{s}` is not a finite real number"),
|
|
},
|
|
Err(_) => CellOutcome::Incompatible {
|
|
reason: format!("`{s}` is not a valid real number"),
|
|
},
|
|
},
|
|
other => unexpected_storage("text", other),
|
|
},
|
|
|
|
// ---- Per-cell: text -> decimal ----
|
|
(Text, Decimal) => match value {
|
|
Value::Text(s) => {
|
|
if s.trim().parse::<f64>().is_ok() {
|
|
CellOutcome::Clean(Value::Text(s.clone()))
|
|
} else {
|
|
CellOutcome::Incompatible {
|
|
reason: format!("`{s}` is not a valid decimal number"),
|
|
}
|
|
}
|
|
}
|
|
other => unexpected_storage("text", other),
|
|
},
|
|
|
|
// ---- Per-cell: text -> bool ----
|
|
(Text, Bool) => match value {
|
|
Value::Text(s) => {
|
|
let lowered = s.trim().to_ascii_lowercase();
|
|
if lowered == "true" {
|
|
CellOutcome::Clean(Value::Integer(1))
|
|
} else if lowered == "false" {
|
|
CellOutcome::Clean(Value::Integer(0))
|
|
} else {
|
|
CellOutcome::Incompatible {
|
|
reason: format!("`{s}` is not `true` or `false`"),
|
|
}
|
|
}
|
|
}
|
|
other => unexpected_storage("text", other),
|
|
},
|
|
|
|
// ---- Per-cell: text -> date ----
|
|
(Text, Date) => match value {
|
|
Value::Text(s) => match crate::dsl::value::validate_date(s) {
|
|
Ok(()) => CellOutcome::Clean(Value::Text(s.clone())),
|
|
Err(_) => CellOutcome::Incompatible {
|
|
reason: format!("`{s}` is not a date in `YYYY-MM-DD` form"),
|
|
},
|
|
},
|
|
other => unexpected_storage("text", other),
|
|
},
|
|
|
|
// ---- Per-cell: text -> datetime ----
|
|
(Text, DateTime) => match value {
|
|
Value::Text(s) => {
|
|
if crate::dsl::value::validate_datetime(s).is_ok() {
|
|
CellOutcome::Clean(Value::Text(s.clone()))
|
|
} else if crate::dsl::value::validate_date(s).is_ok() {
|
|
let promoted = format!("{s}T00:00:00Z");
|
|
CellOutcome::Lossy {
|
|
new: Value::Text(promoted),
|
|
reason: "bare date promoted to `T00:00:00Z`".to_string(),
|
|
}
|
|
} else {
|
|
CellOutcome::Incompatible {
|
|
reason: format!(
|
|
"`{s}` is not a datetime in `YYYY-MM-DDTHH:MM:SS` form"
|
|
),
|
|
}
|
|
}
|
|
}
|
|
other => unexpected_storage("text", other),
|
|
},
|
|
|
|
// ---- Per-cell: text -> shortid ----
|
|
(Text, ShortId) => match value {
|
|
Value::Text(s) => match shortid::validate(s) {
|
|
Ok(()) => CellOutcome::Clean(Value::Text(s.clone())),
|
|
Err(message) => CellOutcome::Incompatible { reason: message },
|
|
},
|
|
other => unexpected_storage("text", other),
|
|
},
|
|
|
|
// Pairs caught by `static_refusal` before reaching here.
|
|
_ => CellOutcome::Incompatible {
|
|
reason: format!("no transformer defined for `{src}` to `{target}`"),
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Map a user-facing type to the column alignment used in
|
|
/// diagnostic tables (ADR-0017 §7). Numeric types right-align,
|
|
/// everything else left-aligns; matches ADR-0016 §2.
|
|
#[must_use]
|
|
pub const fn is_in_matrix_alignment(ty: Type) -> Alignment {
|
|
numeric_alignment_for(ty)
|
|
}
|
|
|
|
/// Whether the transformed value is materially different from
|
|
/// the source (i.e. counts as a "client-side transformation"
|
|
/// for the §6 note).
|
|
///
|
|
/// Two values are considered the same only when both are NULL.
|
|
/// Different storage classes (e.g. `Text("42")` vs
|
|
/// `Integer(42)`) count as transformations even though the
|
|
/// human reading is identical, because the database now stores
|
|
/// a different shape.
|
|
#[must_use]
|
|
pub fn is_non_identity(original: &Value, transformed: &Value) -> bool {
|
|
match (original, transformed) {
|
|
(Value::Null, Value::Null) => false,
|
|
(a, b) => !values_equal(a, b),
|
|
}
|
|
}
|
|
|
|
fn values_equal(a: &Value, b: &Value) -> bool {
|
|
match (a, b) {
|
|
(Value::Null, Value::Null) => true,
|
|
(Value::Integer(x), Value::Integer(y)) => x == y,
|
|
(Value::Real(x), Value::Real(y)) => x.to_bits() == y.to_bits(),
|
|
(Value::Text(x), Value::Text(y)) => x == y,
|
|
(Value::Blob(x), Value::Blob(y)) => x == y,
|
|
_ => false,
|
|
}
|
|
}
|
|
|
|
fn real_to_int(r: f64) -> CellOutcome {
|
|
if !r.is_finite() {
|
|
return CellOutcome::Incompatible {
|
|
reason: format!("`{r}` is not finite"),
|
|
};
|
|
}
|
|
let i64_min = i64::MIN as f64;
|
|
let i64_max = i64::MAX as f64;
|
|
if r < i64_min || r > i64_max {
|
|
return CellOutcome::Incompatible {
|
|
reason: format!("`{}` is out of int range", format_real(r)),
|
|
};
|
|
}
|
|
if r.fract() == 0.0 {
|
|
CellOutcome::Clean(Value::Integer(r as i64))
|
|
} else {
|
|
let truncated = r.trunc() as i64;
|
|
let discarded = r - r.trunc();
|
|
CellOutcome::Lossy {
|
|
new: Value::Integer(truncated),
|
|
reason: format!(
|
|
"truncated; would discard {}",
|
|
format_real(discarded)
|
|
),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn decimal_text_to_int(s: &str) -> CellOutcome {
|
|
let trimmed = s.trim();
|
|
if let Ok(i) = trimmed.parse::<i64>() {
|
|
return CellOutcome::Clean(Value::Integer(i));
|
|
}
|
|
match trimmed.parse::<f64>() {
|
|
Ok(r) if r.is_finite() => real_to_int(r),
|
|
_ => CellOutcome::Incompatible {
|
|
reason: format!("`{s}` is not a valid number"),
|
|
},
|
|
}
|
|
}
|
|
|
|
fn decimal_text_to_real(s: &str) -> CellOutcome {
|
|
match s.trim().parse::<f64>() {
|
|
Ok(r) if r.is_finite() => {
|
|
// If the f64's shortest round-trip form matches the
|
|
// source string (after canonicalising whitespace), we
|
|
// haven't lost precision. Otherwise this is a lossy
|
|
// conversion (typical for long decimals).
|
|
let canonical = format_real(r);
|
|
if canonical == s.trim() || equal_after_normalising(&canonical, s.trim()) {
|
|
CellOutcome::Clean(Value::Real(r))
|
|
} else {
|
|
CellOutcome::Lossy {
|
|
new: Value::Real(r),
|
|
reason: format!("precision reduced from `{}` to `{}`", s.trim(), canonical),
|
|
}
|
|
}
|
|
}
|
|
_ => CellOutcome::Incompatible {
|
|
reason: format!("`{s}` is not a valid number"),
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Compare two numeric strings ignoring trailing-zero/decimal-
|
|
/// notation differences that don't carry information
|
|
/// (e.g. "3" vs "3.0", "3.10" vs "3.1").
|
|
fn equal_after_normalising(a: &str, b: &str) -> bool {
|
|
fn normalise(s: &str) -> String {
|
|
let s = s.trim();
|
|
s.find('.').map_or_else(
|
|
|| s.to_string(),
|
|
|dot| {
|
|
let (int_part, frac_part) = s.split_at(dot);
|
|
let frac = frac_part[1..].trim_end_matches('0');
|
|
if frac.is_empty() {
|
|
int_part.to_string()
|
|
} else {
|
|
format!("{int_part}.{frac}")
|
|
}
|
|
},
|
|
)
|
|
}
|
|
normalise(a) == normalise(b)
|
|
}
|
|
|
|
fn text_to_int(s: &str) -> CellOutcome {
|
|
let trimmed = s.trim();
|
|
if let Ok(i) = trimmed.parse::<i64>() {
|
|
return CellOutcome::Clean(Value::Integer(i));
|
|
}
|
|
match trimmed.parse::<f64>() {
|
|
Ok(r) if r.is_finite() => match real_to_int(r) {
|
|
// For text -> int via real: a clean integer parse
|
|
// of the float (e.g. "3.0" -> 3) still counts as
|
|
// lossy because the source carried decimal notation
|
|
// the int can't preserve. Promote Clean -> Lossy.
|
|
CellOutcome::Clean(value) => CellOutcome::Lossy {
|
|
new: value,
|
|
reason: format!("parsed as real then narrowed; source was `{s}`"),
|
|
},
|
|
other => other,
|
|
},
|
|
_ => CellOutcome::Incompatible {
|
|
reason: format!("`{s}` is not a valid int"),
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Shortest round-trip f64 representation. Rust's default `{}`
|
|
/// for f64 since 1.0 produces a string that always parses back
|
|
/// to the exact same f64.
|
|
fn format_real(r: f64) -> String {
|
|
let s = format!("{r}");
|
|
// Rust prints integral f64 without a dot ("3" rather than
|
|
// "3.0"). Some downstream consumers expect to spot reals by
|
|
// their dot; for our purposes — storing in a TEXT-backed
|
|
// decimal column or rendering in a diagnostic — the no-dot
|
|
// form is fine and matches what the user would type for a
|
|
// whole-number real literal.
|
|
s
|
|
}
|
|
|
|
fn unexpected_storage(label: &str, value: &Value) -> CellOutcome {
|
|
CellOutcome::Incompatible {
|
|
reason: format!(
|
|
"internal: cell stored unexpectedly for `{label}` source ({value:?})"
|
|
),
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
#[allow(clippy::approx_constant)] // 3.14 is a convenient lossy fixture, not PI
|
|
mod tests {
|
|
use super::*;
|
|
use pretty_assertions::assert_eq;
|
|
|
|
fn int(i: i64) -> Value {
|
|
Value::Integer(i)
|
|
}
|
|
fn real(r: f64) -> Value {
|
|
Value::Real(r)
|
|
}
|
|
fn text(s: &str) -> Value {
|
|
Value::Text(s.to_string())
|
|
}
|
|
|
|
// ---- Static refusals ----
|
|
|
|
#[test]
|
|
fn same_type_is_statically_refused() {
|
|
for ty in Type::all() {
|
|
assert!(static_refusal(*ty, *ty).is_some(), "{ty:?}");
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn non_int_sources_to_serial_are_statically_refused() {
|
|
// ADR-0018 §8: only `int → serial` is allowed directly.
|
|
// Other sources have to route via int. (Same-type
|
|
// `serial → serial` is the no-op identity refusal.)
|
|
for &src in Type::all() {
|
|
if matches!(src, Type::Int | Type::Serial) {
|
|
continue;
|
|
}
|
|
assert!(static_refusal(src, Type::Serial).is_some(), "{src:?}");
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn int_to_serial_is_allowed() {
|
|
// ADR-0018 §8: `int → serial` joins the matrix as
|
|
// always-clean identity. Non-null values pass through.
|
|
assert!(static_refusal(Type::Int, Type::Serial).is_none());
|
|
assert_eq!(
|
|
transform_cell(Type::Int, Type::Serial, &int(42)),
|
|
CellOutcome::Clean(int(42))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn anything_involving_blob_is_statically_refused() {
|
|
for &other in Type::all() {
|
|
if other != Type::Blob {
|
|
assert!(static_refusal(Type::Blob, other).is_some(), "{other:?}");
|
|
assert!(static_refusal(other, Type::Blob).is_some(), "{other:?}");
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn date_to_datetime_direct_is_statically_refused() {
|
|
assert!(static_refusal(Type::Date, Type::DateTime).is_some());
|
|
assert!(static_refusal(Type::DateTime, Type::Date).is_some());
|
|
}
|
|
|
|
#[test]
|
|
fn cross_domain_unmapped_pair_is_statically_refused() {
|
|
for (src, target) in [
|
|
(Type::Bool, Type::Date),
|
|
(Type::Bool, Type::DateTime),
|
|
(Type::Bool, Type::ShortId),
|
|
(Type::Real, Type::DateTime),
|
|
(Type::Int, Type::ShortId),
|
|
(Type::Int, Type::Date),
|
|
(Type::Date, Type::Int),
|
|
(Type::ShortId, Type::Int),
|
|
] {
|
|
assert!(static_refusal(src, target).is_some(), "{src:?} -> {target:?}");
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn matrix_pairs_pass_static_check() {
|
|
// A representative subset; all-pairs coverage is via
|
|
// is_in_matrix's match.
|
|
let pairs = [
|
|
(Type::Int, Type::Real),
|
|
(Type::Int, Type::Text),
|
|
(Type::Serial, Type::Int),
|
|
(Type::Serial, Type::Text),
|
|
(Type::Real, Type::Int),
|
|
(Type::Text, Type::ShortId),
|
|
(Type::Text, Type::DateTime),
|
|
(Type::Bool, Type::Text),
|
|
];
|
|
for (s, t) in pairs {
|
|
assert!(static_refusal(s, t).is_none(), "{s:?} -> {t:?}");
|
|
}
|
|
}
|
|
|
|
// ---- NULL ----
|
|
|
|
#[test]
|
|
fn null_passes_through_clean_for_any_pair() {
|
|
let pairs = [
|
|
(Type::Int, Type::Text),
|
|
(Type::Real, Type::Int),
|
|
(Type::Text, Type::Date),
|
|
(Type::Bool, Type::Real),
|
|
];
|
|
for (s, t) in pairs {
|
|
assert_eq!(transform_cell(s, t, &Value::Null), CellOutcome::Clean(Value::Null));
|
|
}
|
|
}
|
|
|
|
// ---- Always-clean: int / serial source ----
|
|
|
|
#[test]
|
|
fn int_to_real_is_clean() {
|
|
assert_eq!(
|
|
transform_cell(Type::Int, Type::Real, &int(7)),
|
|
CellOutcome::Clean(real(7.0))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn int_to_text_is_clean() {
|
|
assert_eq!(
|
|
transform_cell(Type::Int, Type::Text, &int(42)),
|
|
CellOutcome::Clean(text("42"))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn int_to_decimal_is_clean() {
|
|
assert_eq!(
|
|
transform_cell(Type::Int, Type::Decimal, &int(-12)),
|
|
CellOutcome::Clean(text("-12"))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn serial_to_real_or_text_is_clean() {
|
|
// The (Int | Serial, Real | Decimal | Text) transformer
|
|
// handles serial sources symmetrically with int.
|
|
assert_eq!(
|
|
transform_cell(Type::Serial, Type::Real, &int(3)),
|
|
CellOutcome::Clean(real(3.0))
|
|
);
|
|
assert_eq!(
|
|
transform_cell(Type::Serial, Type::Text, &int(3)),
|
|
CellOutcome::Clean(text("3"))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn serial_to_int_is_identity_clean() {
|
|
// ADR-0017 §4.1's canonical "drop auto-increment" case:
|
|
// storage class is unchanged, value passes through.
|
|
assert_eq!(
|
|
transform_cell(Type::Serial, Type::Int, &int(42)),
|
|
CellOutcome::Clean(int(42))
|
|
);
|
|
assert!(static_refusal(Type::Serial, Type::Int).is_none());
|
|
}
|
|
|
|
// ---- Always-clean: bool source ----
|
|
|
|
#[test]
|
|
fn bool_to_int_is_clean_for_zero_and_one() {
|
|
assert_eq!(
|
|
transform_cell(Type::Bool, Type::Int, &int(0)),
|
|
CellOutcome::Clean(int(0))
|
|
);
|
|
assert_eq!(
|
|
transform_cell(Type::Bool, Type::Int, &int(1)),
|
|
CellOutcome::Clean(int(1))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn bool_to_text_uses_dsl_keywords() {
|
|
assert_eq!(
|
|
transform_cell(Type::Bool, Type::Text, &int(0)),
|
|
CellOutcome::Clean(text("false"))
|
|
);
|
|
assert_eq!(
|
|
transform_cell(Type::Bool, Type::Text, &int(1)),
|
|
CellOutcome::Clean(text("true"))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn bool_to_decimal_yields_zero_or_one() {
|
|
assert_eq!(
|
|
transform_cell(Type::Bool, Type::Decimal, &int(0)),
|
|
CellOutcome::Clean(text("0"))
|
|
);
|
|
assert_eq!(
|
|
transform_cell(Type::Bool, Type::Decimal, &int(1)),
|
|
CellOutcome::Clean(text("1"))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn bool_to_real_yields_zero_or_one() {
|
|
assert_eq!(
|
|
transform_cell(Type::Bool, Type::Real, &int(0)),
|
|
CellOutcome::Clean(real(0.0))
|
|
);
|
|
assert_eq!(
|
|
transform_cell(Type::Bool, Type::Real, &int(1)),
|
|
CellOutcome::Clean(real(1.0))
|
|
);
|
|
}
|
|
|
|
// ---- Always-clean: text-backed source -> text ----
|
|
|
|
#[test]
|
|
fn decimal_to_text_passes_through() {
|
|
assert_eq!(
|
|
transform_cell(Type::Decimal, Type::Text, &text("3.14")),
|
|
CellOutcome::Clean(text("3.14"))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn date_to_text_passes_through() {
|
|
assert_eq!(
|
|
transform_cell(Type::Date, Type::Text, &text("2025-01-15")),
|
|
CellOutcome::Clean(text("2025-01-15"))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn datetime_to_text_passes_through() {
|
|
assert_eq!(
|
|
transform_cell(Type::DateTime, Type::Text, &text("2025-01-15T14:30:00")),
|
|
CellOutcome::Clean(text("2025-01-15T14:30:00"))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn shortid_to_text_passes_through() {
|
|
assert_eq!(
|
|
transform_cell(Type::ShortId, Type::Text, &text("23456789Ab")),
|
|
CellOutcome::Clean(text("23456789Ab"))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn real_to_text_uses_shortest_round_trip() {
|
|
assert_eq!(
|
|
transform_cell(Type::Real, Type::Text, &real(3.14)),
|
|
CellOutcome::Clean(text("3.14"))
|
|
);
|
|
// Whole numbers print without trailing zero, matching
|
|
// Rust's default Display.
|
|
assert_eq!(
|
|
transform_cell(Type::Real, Type::Text, &real(3.0)),
|
|
CellOutcome::Clean(text("3"))
|
|
);
|
|
}
|
|
|
|
// ---- Per-cell: real -> int ----
|
|
|
|
#[test]
|
|
fn real_to_int_clean_for_whole_numbers() {
|
|
assert_eq!(
|
|
transform_cell(Type::Real, Type::Int, &real(3.0)),
|
|
CellOutcome::Clean(int(3))
|
|
);
|
|
assert_eq!(
|
|
transform_cell(Type::Real, Type::Int, &real(-7.0)),
|
|
CellOutcome::Clean(int(-7))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn real_to_int_lossy_for_fractional() {
|
|
match transform_cell(Type::Real, Type::Int, &real(3.14)) {
|
|
CellOutcome::Lossy { new, reason } => {
|
|
assert_eq!(new, int(3));
|
|
assert!(reason.contains("truncated"), "{reason}");
|
|
}
|
|
other => panic!("unexpected: {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn real_to_int_incompatible_for_nan_or_inf() {
|
|
assert!(matches!(
|
|
transform_cell(Type::Real, Type::Int, &real(f64::NAN)),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
assert!(matches!(
|
|
transform_cell(Type::Real, Type::Int, &real(f64::INFINITY)),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
}
|
|
|
|
// ---- Per-cell: real -> bool ----
|
|
|
|
#[test]
|
|
fn real_to_bool_clean_for_zero_and_one() {
|
|
assert_eq!(
|
|
transform_cell(Type::Real, Type::Bool, &real(0.0)),
|
|
CellOutcome::Clean(int(0))
|
|
);
|
|
assert_eq!(
|
|
transform_cell(Type::Real, Type::Bool, &real(1.0)),
|
|
CellOutcome::Clean(int(1))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn real_to_bool_incompatible_for_other_values() {
|
|
assert!(matches!(
|
|
transform_cell(Type::Real, Type::Bool, &real(0.5)),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
assert!(matches!(
|
|
transform_cell(Type::Real, Type::Bool, &real(2.0)),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
}
|
|
|
|
// ---- Per-cell: real -> decimal ----
|
|
|
|
#[test]
|
|
fn real_to_decimal_is_clean_for_finite() {
|
|
assert_eq!(
|
|
transform_cell(Type::Real, Type::Decimal, &real(3.14)),
|
|
CellOutcome::Clean(text("3.14"))
|
|
);
|
|
}
|
|
|
|
// ---- Per-cell: decimal -> int ----
|
|
|
|
#[test]
|
|
fn decimal_to_int_clean_for_integer_values() {
|
|
assert_eq!(
|
|
transform_cell(Type::Decimal, Type::Int, &text("42")),
|
|
CellOutcome::Clean(int(42))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn decimal_to_int_lossy_for_fractional() {
|
|
match transform_cell(Type::Decimal, Type::Int, &text("3.14")) {
|
|
CellOutcome::Lossy { new, reason } => {
|
|
assert_eq!(new, int(3));
|
|
assert!(reason.contains("truncated"), "{reason}");
|
|
}
|
|
other => panic!("unexpected: {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn decimal_to_int_incompatible_for_garbage() {
|
|
assert!(matches!(
|
|
transform_cell(Type::Decimal, Type::Int, &text("notanumber")),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
}
|
|
|
|
// ---- Per-cell: decimal -> bool ----
|
|
|
|
#[test]
|
|
fn decimal_to_bool_clean_for_exact_zero_one() {
|
|
assert_eq!(
|
|
transform_cell(Type::Decimal, Type::Bool, &text("0")),
|
|
CellOutcome::Clean(int(0))
|
|
);
|
|
assert_eq!(
|
|
transform_cell(Type::Decimal, Type::Bool, &text("1.0")),
|
|
CellOutcome::Clean(int(1))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn decimal_to_bool_incompatible_for_other() {
|
|
assert!(matches!(
|
|
transform_cell(Type::Decimal, Type::Bool, &text("0.5")),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
}
|
|
|
|
// ---- Per-cell: int -> bool ----
|
|
|
|
#[test]
|
|
fn int_to_bool_clean_for_zero_one() {
|
|
assert_eq!(
|
|
transform_cell(Type::Int, Type::Bool, &int(0)),
|
|
CellOutcome::Clean(int(0))
|
|
);
|
|
assert_eq!(
|
|
transform_cell(Type::Int, Type::Bool, &int(1)),
|
|
CellOutcome::Clean(int(1))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn int_to_bool_incompatible_for_other() {
|
|
assert!(matches!(
|
|
transform_cell(Type::Int, Type::Bool, &int(2)),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
assert!(matches!(
|
|
transform_cell(Type::Int, Type::Bool, &int(-1)),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
}
|
|
|
|
// ---- Per-cell: text -> int ----
|
|
|
|
#[test]
|
|
fn text_to_int_clean_for_integer_string() {
|
|
assert_eq!(
|
|
transform_cell(Type::Text, Type::Int, &text("42")),
|
|
CellOutcome::Clean(int(42))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn text_to_int_lossy_via_real_for_fractional_string() {
|
|
match transform_cell(Type::Text, Type::Int, &text("3.14")) {
|
|
CellOutcome::Lossy { new, .. } => assert_eq!(new, int(3)),
|
|
other => panic!("unexpected: {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn text_to_int_lossy_for_real_lookalike_whole_number() {
|
|
// ADR-0017 §3: text "3.0" -> int via real-then-narrow is
|
|
// lossy because the source carried decimal notation the
|
|
// int representation discards.
|
|
match transform_cell(Type::Text, Type::Int, &text("3.0")) {
|
|
CellOutcome::Lossy { new, .. } => assert_eq!(new, int(3)),
|
|
other => panic!("unexpected: {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn text_to_int_incompatible_for_garbage() {
|
|
assert!(matches!(
|
|
transform_cell(Type::Text, Type::Int, &text("abc")),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
assert!(matches!(
|
|
transform_cell(Type::Text, Type::Int, &text("")),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
}
|
|
|
|
// ---- Per-cell: text -> real / decimal ----
|
|
|
|
#[test]
|
|
fn text_to_real_clean_for_numeric_string() {
|
|
assert_eq!(
|
|
transform_cell(Type::Text, Type::Real, &text("3.14")),
|
|
CellOutcome::Clean(real(3.14))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn text_to_real_incompatible_for_garbage() {
|
|
assert!(matches!(
|
|
transform_cell(Type::Text, Type::Real, &text("xyz")),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn text_to_decimal_clean_for_numeric_string() {
|
|
assert_eq!(
|
|
transform_cell(Type::Text, Type::Decimal, &text("3.14")),
|
|
CellOutcome::Clean(text("3.14"))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn text_to_decimal_incompatible_for_garbage() {
|
|
assert!(matches!(
|
|
transform_cell(Type::Text, Type::Decimal, &text("xyz")),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
}
|
|
|
|
// ---- Per-cell: text -> bool ----
|
|
|
|
#[test]
|
|
fn text_to_bool_accepts_true_false_case_insensitively() {
|
|
for s in ["true", "True", "TRUE", "tRue"] {
|
|
assert_eq!(
|
|
transform_cell(Type::Text, Type::Bool, &text(s)),
|
|
CellOutcome::Clean(int(1)),
|
|
"{s}"
|
|
);
|
|
}
|
|
for s in ["false", "False", "FALSE"] {
|
|
assert_eq!(
|
|
transform_cell(Type::Text, Type::Bool, &text(s)),
|
|
CellOutcome::Clean(int(0)),
|
|
"{s}"
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn text_to_bool_refuses_zero_one_strings() {
|
|
// ADR-0017 §3: "no implicit 0/1 parse — matches the DSL
|
|
// boolean grammar."
|
|
assert!(matches!(
|
|
transform_cell(Type::Text, Type::Bool, &text("0")),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
assert!(matches!(
|
|
transform_cell(Type::Text, Type::Bool, &text("1")),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
}
|
|
|
|
// ---- Per-cell: text -> date ----
|
|
|
|
#[test]
|
|
fn text_to_date_clean_for_iso() {
|
|
assert_eq!(
|
|
transform_cell(Type::Text, Type::Date, &text("2025-01-15")),
|
|
CellOutcome::Clean(text("2025-01-15"))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn text_to_date_incompatible_for_other_formats() {
|
|
assert!(matches!(
|
|
transform_cell(Type::Text, Type::Date, &text("2025/01/15")),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
}
|
|
|
|
// ---- Per-cell: text -> datetime ----
|
|
|
|
#[test]
|
|
fn text_to_datetime_clean_for_iso_datetime() {
|
|
assert_eq!(
|
|
transform_cell(Type::Text, Type::DateTime, &text("2025-01-15T14:30:00")),
|
|
CellOutcome::Clean(text("2025-01-15T14:30:00"))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn text_to_datetime_lossy_for_bare_date() {
|
|
match transform_cell(Type::Text, Type::DateTime, &text("2025-01-15")) {
|
|
CellOutcome::Lossy { new, reason } => {
|
|
assert_eq!(new, text("2025-01-15T00:00:00Z"));
|
|
assert!(reason.contains("promoted"), "{reason}");
|
|
}
|
|
other => panic!("unexpected: {other:?}"),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn text_to_datetime_incompatible_for_garbage() {
|
|
assert!(matches!(
|
|
transform_cell(Type::Text, Type::DateTime, &text("not a date")),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
}
|
|
|
|
// ---- Per-cell: text -> shortid ----
|
|
|
|
#[test]
|
|
fn text_to_shortid_clean_for_valid_shortid() {
|
|
assert_eq!(
|
|
transform_cell(Type::Text, Type::ShortId, &text("23456789Ab")),
|
|
CellOutcome::Clean(text("23456789Ab"))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn text_to_shortid_incompatible_for_invalid() {
|
|
// Too long.
|
|
assert!(matches!(
|
|
transform_cell(Type::Text, Type::ShortId, &text("toolong_xyz_more")),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
// Ambiguous chars (per shortid alphabet).
|
|
assert!(matches!(
|
|
transform_cell(Type::Text, Type::ShortId, &text("0OIl234567")),
|
|
CellOutcome::Incompatible { .. }
|
|
));
|
|
}
|
|
|
|
// ---- is_non_identity ----
|
|
|
|
#[test]
|
|
fn null_to_null_is_identity() {
|
|
assert!(!is_non_identity(&Value::Null, &Value::Null));
|
|
}
|
|
|
|
#[test]
|
|
fn storage_class_change_counts_as_non_identity() {
|
|
// Same human reading, different storage class: counts.
|
|
assert!(is_non_identity(&text("42"), &int(42)));
|
|
assert!(is_non_identity(&int(3), &real(3.0)));
|
|
}
|
|
|
|
#[test]
|
|
fn identical_value_is_identity() {
|
|
assert!(!is_non_identity(&text("hi"), &text("hi")));
|
|
assert!(!is_non_identity(&int(42), &int(42)));
|
|
}
|
|
}
|