Files
rdbms-playground/src/type_change.rs
T
claude@clouddev1 5bb0a147f0 ADR-0018 implementation: auto-fill contracts for serial and shortid
Generalises serial and shortid beyond their previous restricted
forms:

- `serial` is no longer restricted to single-column PK. Non-PK
  serial columns get an emitted UNIQUE constraint and use
  application-side MAX(col)+1 at INSERT time (rowid alias still
  drives the PK case for free; per ADR-0010 worker-thread
  serialisation, the read-then-insert sequence is safe).
- `shortid` columns auto-fill existing null cells when the
  column is materialised — `add column T: x (shortid)` on a
  non-empty table no longer leaves rows in a not-really-valid
  NULL state.
- `int -> serial` joins the type-change matrix as always-clean
  identity (closes the asymmetry vs `text -> shortid`); other
  sources are refused with a route-via-int hint.
- `change column T: x (serial|shortid)` fills null source
  cells with sequence / generated values in the same rebuild
  transaction.

Internal infrastructure:

- ReadColumn gains `unique: bool`; read_schema detects single-
  column UNIQUE indexes via pragma_index_list /
  pragma_index_info; schema_to_ddl emits inline UNIQUE for
  non-PK columns.
- ColumnSchema (persistence) gains `unique: bool` so the flag
  survives YAML round-trip and rebuild-from-text reconstructs
  it faithfully — preserves the "serial -> int leaves UNIQUE
  in place" promise across save/load cycles.
- ChangeColumnTypeResult.client_side now carries `auto_filled`
  + `auto_fill_kind` alongside `transformed` + `lossy`; the
  app handler renders separate note lines when both apply.
- AddColumnResult is a new return type carrying pre-rendered
  [client-side] note lines for the auto-fill paths.

Tests: 519 -> 534 (+15). Clippy clean.
2026-05-08 14:32:19 +00:00

1178 lines
39 KiB
Rust

//! Per-cell type-change transformer matrix (ADR-0017).
//!
//! When a `change column T: c (newtype)` invocation runs, every
//! cell of `c` is classified into one of three outcomes:
//!
//! - **Clean** — the transformer produced a value the new type
//! accepts without information loss.
//! - **Lossy** — valid output, but some property (precision,
//! fractional part, time component, …) was discarded.
//! - **Incompatible** — no transformer for this pair can produce
//! a valid output for this cell.
//!
//! The classification is a property of (source type, target type,
//! cell value) — the same pair yields different outcomes for
//! different cell values. NULL passes through any transformer
//! unchanged (always Clean).
//!
//! Pairs not present in the matrix are statically refused via
//! [`static_refusal`] before any per-cell pass runs. Same-type
//! identity, anything → `serial`, anything ↔ `blob`, and
//! `date` ↔ `datetime` direct are all statically refused.
use rusqlite::types::Value;
use crate::dsl::Type;
use crate::dsl::shortid;
use crate::output_render::{Alignment, numeric_alignment_for};
/// Outcome of attempting to transform one cell's value to fit a
/// new column type.
#[derive(Debug, Clone, PartialEq)]
pub enum CellOutcome {
/// Transformer produced a value the new type accepts without
/// information loss.
Clean(Value),
/// Valid output, but some property of the original cell was
/// discarded.
Lossy { new: Value, reason: String },
/// No transformer for this pair can produce a valid value
/// for this cell.
Incompatible { reason: String },
}
/// Whether a (source, target) type pair is statically refused
/// before per-cell classification. Returns `Some(reason)` when
/// refused; `None` when the per-cell matrix should be consulted.
///
/// Static refusals cover: same-type identity, anything →
/// `serial`, anything ↔ `blob`, `date` ↔ `datetime` direct, and
/// any cross-domain pair not present in the matrix
/// (e.g. `bool` → `date`).
#[must_use]
pub fn static_refusal(src: Type, target: Type) -> Option<String> {
if src == target {
return Some(format!("column is already `{src}`."));
}
// ADR-0018 §8: `int → serial` is allowed via the matrix.
// Other sources to serial are refused — the user routes
// via int first if needed.
if matches!(target, Type::Serial) && !matches!(src, Type::Int) {
return Some(format!(
"to convert from `{src}` to `serial`, change the column to `int` \
first; only `int → serial` is supported directly."
));
}
if matches!(src, Type::Blob) || matches!(target, Type::Blob) {
return Some(format!(
"conversion between `{src}` and `{target}` is not supported \
in this version."
));
}
if matches!(
(src, target),
(Type::Date, Type::DateTime) | (Type::DateTime, Type::Date)
) {
return Some(format!(
"direct `{src}` to `{target}` conversion is not supported in \
this version; route via `text` if needed."
));
}
if !is_in_matrix(src, target) {
return Some(format!(
"no conversion is defined from `{src}` to `{target}`."
));
}
None
}
const fn is_in_matrix(src: Type, target: Type) -> bool {
use Type::{
Bool, Date, DateTime, Decimal, Int, Real, Serial, ShortId, Text,
};
matches!(
(src, target),
// Always-clean transformers
(Int | Serial, Real | Decimal | Text)
// serial -> int: ADR-0017 §4.1's canonical allowed
// case ("removing auto-increment while preserving
// stored values"). Storage stays INTEGER; we treat
// it as an identity transformer.
| (Serial, Int)
// int -> serial: ADR-0018 §8. Storage stays
// INTEGER, the metadata flips to "auto-generated"
// and the column gains UNIQUE if non-PK. The
// matrix entry is identity at the value level;
// uniqueness and auto-fill of nulls happen at the
// change-column orchestration layer, not in the
// per-cell transformer.
| (Int, Serial)
| (Bool, Int | Real | Decimal | Text)
| (Decimal | Date | DateTime | ShortId | Real, Text)
// Per-cell-classified
| (Real, Int | Decimal | Bool)
| (Decimal, Int | Real | Bool)
| (Int, Bool)
| (Text, Int | Real | Decimal | Bool | Date | DateTime | ShortId)
)
}
/// Transform a single cell's value through the matrix.
///
/// Caller is responsible for first checking [`static_refusal`];
/// calling this with a statically-refused pair returns
/// `Incompatible` for every value. `value` is the cell as read
/// from SQLite — its variant matches the source type's storage
/// class (e.g. `Type::Real` ⇒ `Value::Real`). `Value::Null` is
/// always returned `Clean(Null)`.
#[must_use]
pub fn transform_cell(src: Type, target: Type, value: &Value) -> CellOutcome {
if matches!(value, Value::Null) {
return CellOutcome::Clean(Value::Null);
}
use Type::{
Bool, Date, DateTime, Decimal, Int, Real, Serial, ShortId, Text,
};
match (src, target) {
// ---- Always-clean: int / serial source ----
(Int | Serial, Real) => match value {
Value::Integer(i) => CellOutcome::Clean(Value::Real(*i as f64)),
other => unexpected_storage("int/serial", other),
},
(Int | Serial, Decimal | Text) => match value {
Value::Integer(i) => CellOutcome::Clean(Value::Text(i.to_string())),
other => unexpected_storage("int/serial", other),
},
// serial -> int: identity at the storage class level;
// the conversion drops auto-increment semantics from the
// column metadata. ADR-0017 §4.1.
(Serial, Int) => match value {
Value::Integer(i) => CellOutcome::Clean(Value::Integer(*i)),
other => unexpected_storage("serial", other),
},
// int -> serial: identity at the storage class level
// (both INTEGER); the conversion adds the auto-
// generated contract and (for non-PK columns) UNIQUE.
// Per-cell transformer is identity; the change-column
// orchestrator handles null auto-fill and uniqueness.
// ADR-0018 §8.
(Int, Serial) => match value {
Value::Integer(i) => CellOutcome::Clean(Value::Integer(*i)),
other => unexpected_storage("int", other),
},
// ---- Always-clean: bool source (stored as INTEGER 0/1) ----
(Bool, Int) => match value {
Value::Integer(i) => CellOutcome::Clean(Value::Integer(*i)),
other => unexpected_storage("bool", other),
},
(Bool, Real) => match value {
Value::Integer(i) => CellOutcome::Clean(Value::Real(*i as f64)),
other => unexpected_storage("bool", other),
},
(Bool, Decimal) => match value {
Value::Integer(i) => {
CellOutcome::Clean(Value::Text(if *i == 0 { "0".into() } else { "1".into() }))
}
other => unexpected_storage("bool", other),
},
(Bool, Text) => match value {
// "true" / "false" matches the DSL boolean grammar
// (ADR-0014 §5), not raw 0/1 stringification.
Value::Integer(i) => CellOutcome::Clean(Value::Text(
if *i == 0 { "false".into() } else { "true".into() },
)),
other => unexpected_storage("bool", other),
},
// ---- Always-clean: text-backed source -> text ----
(Decimal | Date | DateTime | ShortId, Text) => match value {
Value::Text(s) => CellOutcome::Clean(Value::Text(s.clone())),
other => unexpected_storage("text-backed", other),
},
// ---- Always-clean: real -> text ----
(Real, Text) => match value {
Value::Real(r) => CellOutcome::Clean(Value::Text(format_real(*r))),
other => unexpected_storage("real", other),
},
// ---- Per-cell: real -> int ----
(Real, Int) => match value {
Value::Real(r) => real_to_int(*r),
other => unexpected_storage("real", other),
},
// ---- Per-cell: real -> decimal ----
// f64 -> shortest-round-trip decimal string. The f64 was
// already at the storage precision; representing it as a
// decimal preserves what we have. Always-clean in
// practice; the ADR's "lossy otherwise" provision exists
// for theoretical higher-precision sources we don't
// currently produce.
(Real, Decimal) => match value {
Value::Real(r) => {
if r.is_finite() {
CellOutcome::Clean(Value::Text(format_real(*r)))
} else {
CellOutcome::Incompatible {
reason: format!("`{r}` is not finite"),
}
}
}
other => unexpected_storage("real", other),
},
// ---- Per-cell: real -> bool ----
(Real, Bool) => match value {
Value::Real(r) => {
if *r == 0.0 {
CellOutcome::Clean(Value::Integer(0))
} else if *r == 1.0 {
CellOutcome::Clean(Value::Integer(1))
} else {
CellOutcome::Incompatible {
reason: format!("`{}` is not 0 or 1", format_real(*r)),
}
}
}
other => unexpected_storage("real", other),
},
// ---- Per-cell: decimal -> int ----
(Decimal, Int) => match value {
Value::Text(s) => decimal_text_to_int(s),
other => unexpected_storage("decimal", other),
},
// ---- Per-cell: decimal -> real ----
(Decimal, Real) => match value {
Value::Text(s) => decimal_text_to_real(s),
other => unexpected_storage("decimal", other),
},
// ---- Per-cell: decimal -> bool ----
(Decimal, Bool) => match value {
Value::Text(s) => {
let trimmed = s.trim();
// Accept "0" / "1" plus their decimal equivalents
// (e.g. "0.0", "1.0", "1.00") since those are the
// exact-zero / exact-one cases per ADR-0017 §3.
trimmed.parse::<f64>().map_or_else(
|_| CellOutcome::Incompatible {
reason: format!("`{s}` is not 0 or 1"),
},
|parsed| {
if parsed == 0.0 {
CellOutcome::Clean(Value::Integer(0))
} else if parsed == 1.0 {
CellOutcome::Clean(Value::Integer(1))
} else {
CellOutcome::Incompatible {
reason: format!("`{s}` is not 0 or 1"),
}
}
},
)
}
other => unexpected_storage("decimal", other),
},
// ---- Per-cell: int -> bool ----
(Int, Bool) => match value {
Value::Integer(i) => {
if *i == 0 {
CellOutcome::Clean(Value::Integer(0))
} else if *i == 1 {
CellOutcome::Clean(Value::Integer(1))
} else {
CellOutcome::Incompatible {
reason: format!("`{i}` is not 0 or 1"),
}
}
}
other => unexpected_storage("int", other),
},
// ---- Per-cell: text -> int ----
(Text, Int) => match value {
Value::Text(s) => text_to_int(s),
other => unexpected_storage("text", other),
},
// ---- Per-cell: text -> real ----
(Text, Real) => match value {
Value::Text(s) => match s.trim().parse::<f64>() {
Ok(r) if r.is_finite() => CellOutcome::Clean(Value::Real(r)),
Ok(_) => CellOutcome::Incompatible {
reason: format!("`{s}` is not a finite real number"),
},
Err(_) => CellOutcome::Incompatible {
reason: format!("`{s}` is not a valid real number"),
},
},
other => unexpected_storage("text", other),
},
// ---- Per-cell: text -> decimal ----
(Text, Decimal) => match value {
Value::Text(s) => {
if s.trim().parse::<f64>().is_ok() {
CellOutcome::Clean(Value::Text(s.clone()))
} else {
CellOutcome::Incompatible {
reason: format!("`{s}` is not a valid decimal number"),
}
}
}
other => unexpected_storage("text", other),
},
// ---- Per-cell: text -> bool ----
(Text, Bool) => match value {
Value::Text(s) => {
let lowered = s.trim().to_ascii_lowercase();
if lowered == "true" {
CellOutcome::Clean(Value::Integer(1))
} else if lowered == "false" {
CellOutcome::Clean(Value::Integer(0))
} else {
CellOutcome::Incompatible {
reason: format!("`{s}` is not `true` or `false`"),
}
}
}
other => unexpected_storage("text", other),
},
// ---- Per-cell: text -> date ----
(Text, Date) => match value {
Value::Text(s) => match crate::dsl::value::validate_date(s) {
Ok(()) => CellOutcome::Clean(Value::Text(s.clone())),
Err(_) => CellOutcome::Incompatible {
reason: format!("`{s}` is not a date in `YYYY-MM-DD` form"),
},
},
other => unexpected_storage("text", other),
},
// ---- Per-cell: text -> datetime ----
(Text, DateTime) => match value {
Value::Text(s) => {
if crate::dsl::value::validate_datetime(s).is_ok() {
CellOutcome::Clean(Value::Text(s.clone()))
} else if crate::dsl::value::validate_date(s).is_ok() {
let promoted = format!("{s}T00:00:00Z");
CellOutcome::Lossy {
new: Value::Text(promoted),
reason: "bare date promoted to `T00:00:00Z`".to_string(),
}
} else {
CellOutcome::Incompatible {
reason: format!(
"`{s}` is not a datetime in `YYYY-MM-DDTHH:MM:SS` form"
),
}
}
}
other => unexpected_storage("text", other),
},
// ---- Per-cell: text -> shortid ----
(Text, ShortId) => match value {
Value::Text(s) => match shortid::validate(s) {
Ok(()) => CellOutcome::Clean(Value::Text(s.clone())),
Err(message) => CellOutcome::Incompatible { reason: message },
},
other => unexpected_storage("text", other),
},
// Pairs caught by `static_refusal` before reaching here.
_ => CellOutcome::Incompatible {
reason: format!("no transformer defined for `{src}` to `{target}`"),
},
}
}
/// Map a user-facing type to the column alignment used in
/// diagnostic tables (ADR-0017 §7). Numeric types right-align,
/// everything else left-aligns; matches ADR-0016 §2.
#[must_use]
pub const fn is_in_matrix_alignment(ty: Type) -> Alignment {
numeric_alignment_for(ty)
}
/// Whether the transformed value is materially different from
/// the source (i.e. counts as a "client-side transformation"
/// for the §6 note).
///
/// Two values are considered the same only when both are NULL.
/// Different storage classes (e.g. `Text("42")` vs
/// `Integer(42)`) count as transformations even though the
/// human reading is identical, because the database now stores
/// a different shape.
#[must_use]
pub fn is_non_identity(original: &Value, transformed: &Value) -> bool {
match (original, transformed) {
(Value::Null, Value::Null) => false,
(a, b) => !values_equal(a, b),
}
}
fn values_equal(a: &Value, b: &Value) -> bool {
match (a, b) {
(Value::Null, Value::Null) => true,
(Value::Integer(x), Value::Integer(y)) => x == y,
(Value::Real(x), Value::Real(y)) => x.to_bits() == y.to_bits(),
(Value::Text(x), Value::Text(y)) => x == y,
(Value::Blob(x), Value::Blob(y)) => x == y,
_ => false,
}
}
fn real_to_int(r: f64) -> CellOutcome {
if !r.is_finite() {
return CellOutcome::Incompatible {
reason: format!("`{r}` is not finite"),
};
}
let i64_min = i64::MIN as f64;
let i64_max = i64::MAX as f64;
if r < i64_min || r > i64_max {
return CellOutcome::Incompatible {
reason: format!("`{}` is out of int range", format_real(r)),
};
}
if r.fract() == 0.0 {
CellOutcome::Clean(Value::Integer(r as i64))
} else {
let truncated = r.trunc() as i64;
let discarded = r - r.trunc();
CellOutcome::Lossy {
new: Value::Integer(truncated),
reason: format!(
"truncated; would discard {}",
format_real(discarded)
),
}
}
}
fn decimal_text_to_int(s: &str) -> CellOutcome {
let trimmed = s.trim();
if let Ok(i) = trimmed.parse::<i64>() {
return CellOutcome::Clean(Value::Integer(i));
}
match trimmed.parse::<f64>() {
Ok(r) if r.is_finite() => real_to_int(r),
_ => CellOutcome::Incompatible {
reason: format!("`{s}` is not a valid number"),
},
}
}
fn decimal_text_to_real(s: &str) -> CellOutcome {
match s.trim().parse::<f64>() {
Ok(r) if r.is_finite() => {
// If the f64's shortest round-trip form matches the
// source string (after canonicalising whitespace), we
// haven't lost precision. Otherwise this is a lossy
// conversion (typical for long decimals).
let canonical = format_real(r);
if canonical == s.trim() || equal_after_normalising(&canonical, s.trim()) {
CellOutcome::Clean(Value::Real(r))
} else {
CellOutcome::Lossy {
new: Value::Real(r),
reason: format!("precision reduced from `{}` to `{}`", s.trim(), canonical),
}
}
}
_ => CellOutcome::Incompatible {
reason: format!("`{s}` is not a valid number"),
},
}
}
/// Compare two numeric strings ignoring trailing-zero/decimal-
/// notation differences that don't carry information
/// (e.g. "3" vs "3.0", "3.10" vs "3.1").
fn equal_after_normalising(a: &str, b: &str) -> bool {
fn normalise(s: &str) -> String {
let s = s.trim();
s.find('.').map_or_else(
|| s.to_string(),
|dot| {
let (int_part, frac_part) = s.split_at(dot);
let frac = frac_part[1..].trim_end_matches('0');
if frac.is_empty() {
int_part.to_string()
} else {
format!("{int_part}.{frac}")
}
},
)
}
normalise(a) == normalise(b)
}
fn text_to_int(s: &str) -> CellOutcome {
let trimmed = s.trim();
if let Ok(i) = trimmed.parse::<i64>() {
return CellOutcome::Clean(Value::Integer(i));
}
match trimmed.parse::<f64>() {
Ok(r) if r.is_finite() => match real_to_int(r) {
// For text -> int via real: a clean integer parse
// of the float (e.g. "3.0" -> 3) still counts as
// lossy because the source carried decimal notation
// the int can't preserve. Promote Clean -> Lossy.
CellOutcome::Clean(value) => CellOutcome::Lossy {
new: value,
reason: format!("parsed as real then narrowed; source was `{s}`"),
},
other => other,
},
_ => CellOutcome::Incompatible {
reason: format!("`{s}` is not a valid int"),
},
}
}
/// Shortest round-trip f64 representation. Rust's default `{}`
/// for f64 since 1.0 produces a string that always parses back
/// to the exact same f64.
fn format_real(r: f64) -> String {
let s = format!("{r}");
// Rust prints integral f64 without a dot ("3" rather than
// "3.0"). Some downstream consumers expect to spot reals by
// their dot; for our purposes — storing in a TEXT-backed
// decimal column or rendering in a diagnostic — the no-dot
// form is fine and matches what the user would type for a
// whole-number real literal.
s
}
fn unexpected_storage(label: &str, value: &Value) -> CellOutcome {
CellOutcome::Incompatible {
reason: format!(
"internal: cell stored unexpectedly for `{label}` source ({value:?})"
),
}
}
#[cfg(test)]
#[allow(clippy::approx_constant)] // 3.14 is a convenient lossy fixture, not PI
mod tests {
use super::*;
use pretty_assertions::assert_eq;
fn int(i: i64) -> Value {
Value::Integer(i)
}
fn real(r: f64) -> Value {
Value::Real(r)
}
fn text(s: &str) -> Value {
Value::Text(s.to_string())
}
// ---- Static refusals ----
#[test]
fn same_type_is_statically_refused() {
for ty in Type::all() {
assert!(static_refusal(*ty, *ty).is_some(), "{ty:?}");
}
}
#[test]
fn non_int_sources_to_serial_are_statically_refused() {
// ADR-0018 §8: only `int → serial` is allowed directly.
// Other sources have to route via int. (Same-type
// `serial → serial` is the no-op identity refusal.)
for &src in Type::all() {
if matches!(src, Type::Int | Type::Serial) {
continue;
}
assert!(static_refusal(src, Type::Serial).is_some(), "{src:?}");
}
}
#[test]
fn int_to_serial_is_allowed() {
// ADR-0018 §8: `int → serial` joins the matrix as
// always-clean identity. Non-null values pass through.
assert!(static_refusal(Type::Int, Type::Serial).is_none());
assert_eq!(
transform_cell(Type::Int, Type::Serial, &int(42)),
CellOutcome::Clean(int(42))
);
}
#[test]
fn anything_involving_blob_is_statically_refused() {
for &other in Type::all() {
if other != Type::Blob {
assert!(static_refusal(Type::Blob, other).is_some(), "{other:?}");
assert!(static_refusal(other, Type::Blob).is_some(), "{other:?}");
}
}
}
#[test]
fn date_to_datetime_direct_is_statically_refused() {
assert!(static_refusal(Type::Date, Type::DateTime).is_some());
assert!(static_refusal(Type::DateTime, Type::Date).is_some());
}
#[test]
fn cross_domain_unmapped_pair_is_statically_refused() {
for (src, target) in [
(Type::Bool, Type::Date),
(Type::Bool, Type::DateTime),
(Type::Bool, Type::ShortId),
(Type::Real, Type::DateTime),
(Type::Int, Type::ShortId),
(Type::Int, Type::Date),
(Type::Date, Type::Int),
(Type::ShortId, Type::Int),
] {
assert!(static_refusal(src, target).is_some(), "{src:?} -> {target:?}");
}
}
#[test]
fn matrix_pairs_pass_static_check() {
// A representative subset; all-pairs coverage is via
// is_in_matrix's match.
let pairs = [
(Type::Int, Type::Real),
(Type::Int, Type::Text),
(Type::Serial, Type::Int),
(Type::Serial, Type::Text),
(Type::Real, Type::Int),
(Type::Text, Type::ShortId),
(Type::Text, Type::DateTime),
(Type::Bool, Type::Text),
];
for (s, t) in pairs {
assert!(static_refusal(s, t).is_none(), "{s:?} -> {t:?}");
}
}
// ---- NULL ----
#[test]
fn null_passes_through_clean_for_any_pair() {
let pairs = [
(Type::Int, Type::Text),
(Type::Real, Type::Int),
(Type::Text, Type::Date),
(Type::Bool, Type::Real),
];
for (s, t) in pairs {
assert_eq!(transform_cell(s, t, &Value::Null), CellOutcome::Clean(Value::Null));
}
}
// ---- Always-clean: int / serial source ----
#[test]
fn int_to_real_is_clean() {
assert_eq!(
transform_cell(Type::Int, Type::Real, &int(7)),
CellOutcome::Clean(real(7.0))
);
}
#[test]
fn int_to_text_is_clean() {
assert_eq!(
transform_cell(Type::Int, Type::Text, &int(42)),
CellOutcome::Clean(text("42"))
);
}
#[test]
fn int_to_decimal_is_clean() {
assert_eq!(
transform_cell(Type::Int, Type::Decimal, &int(-12)),
CellOutcome::Clean(text("-12"))
);
}
#[test]
fn serial_to_real_or_text_is_clean() {
// The (Int | Serial, Real | Decimal | Text) transformer
// handles serial sources symmetrically with int.
assert_eq!(
transform_cell(Type::Serial, Type::Real, &int(3)),
CellOutcome::Clean(real(3.0))
);
assert_eq!(
transform_cell(Type::Serial, Type::Text, &int(3)),
CellOutcome::Clean(text("3"))
);
}
#[test]
fn serial_to_int_is_identity_clean() {
// ADR-0017 §4.1's canonical "drop auto-increment" case:
// storage class is unchanged, value passes through.
assert_eq!(
transform_cell(Type::Serial, Type::Int, &int(42)),
CellOutcome::Clean(int(42))
);
assert!(static_refusal(Type::Serial, Type::Int).is_none());
}
// ---- Always-clean: bool source ----
#[test]
fn bool_to_int_is_clean_for_zero_and_one() {
assert_eq!(
transform_cell(Type::Bool, Type::Int, &int(0)),
CellOutcome::Clean(int(0))
);
assert_eq!(
transform_cell(Type::Bool, Type::Int, &int(1)),
CellOutcome::Clean(int(1))
);
}
#[test]
fn bool_to_text_uses_dsl_keywords() {
assert_eq!(
transform_cell(Type::Bool, Type::Text, &int(0)),
CellOutcome::Clean(text("false"))
);
assert_eq!(
transform_cell(Type::Bool, Type::Text, &int(1)),
CellOutcome::Clean(text("true"))
);
}
#[test]
fn bool_to_decimal_yields_zero_or_one() {
assert_eq!(
transform_cell(Type::Bool, Type::Decimal, &int(0)),
CellOutcome::Clean(text("0"))
);
assert_eq!(
transform_cell(Type::Bool, Type::Decimal, &int(1)),
CellOutcome::Clean(text("1"))
);
}
#[test]
fn bool_to_real_yields_zero_or_one() {
assert_eq!(
transform_cell(Type::Bool, Type::Real, &int(0)),
CellOutcome::Clean(real(0.0))
);
assert_eq!(
transform_cell(Type::Bool, Type::Real, &int(1)),
CellOutcome::Clean(real(1.0))
);
}
// ---- Always-clean: text-backed source -> text ----
#[test]
fn decimal_to_text_passes_through() {
assert_eq!(
transform_cell(Type::Decimal, Type::Text, &text("3.14")),
CellOutcome::Clean(text("3.14"))
);
}
#[test]
fn date_to_text_passes_through() {
assert_eq!(
transform_cell(Type::Date, Type::Text, &text("2025-01-15")),
CellOutcome::Clean(text("2025-01-15"))
);
}
#[test]
fn datetime_to_text_passes_through() {
assert_eq!(
transform_cell(Type::DateTime, Type::Text, &text("2025-01-15T14:30:00")),
CellOutcome::Clean(text("2025-01-15T14:30:00"))
);
}
#[test]
fn shortid_to_text_passes_through() {
assert_eq!(
transform_cell(Type::ShortId, Type::Text, &text("23456789Ab")),
CellOutcome::Clean(text("23456789Ab"))
);
}
#[test]
fn real_to_text_uses_shortest_round_trip() {
assert_eq!(
transform_cell(Type::Real, Type::Text, &real(3.14)),
CellOutcome::Clean(text("3.14"))
);
// Whole numbers print without trailing zero, matching
// Rust's default Display.
assert_eq!(
transform_cell(Type::Real, Type::Text, &real(3.0)),
CellOutcome::Clean(text("3"))
);
}
// ---- Per-cell: real -> int ----
#[test]
fn real_to_int_clean_for_whole_numbers() {
assert_eq!(
transform_cell(Type::Real, Type::Int, &real(3.0)),
CellOutcome::Clean(int(3))
);
assert_eq!(
transform_cell(Type::Real, Type::Int, &real(-7.0)),
CellOutcome::Clean(int(-7))
);
}
#[test]
fn real_to_int_lossy_for_fractional() {
match transform_cell(Type::Real, Type::Int, &real(3.14)) {
CellOutcome::Lossy { new, reason } => {
assert_eq!(new, int(3));
assert!(reason.contains("truncated"), "{reason}");
}
other => panic!("unexpected: {other:?}"),
}
}
#[test]
fn real_to_int_incompatible_for_nan_or_inf() {
assert!(matches!(
transform_cell(Type::Real, Type::Int, &real(f64::NAN)),
CellOutcome::Incompatible { .. }
));
assert!(matches!(
transform_cell(Type::Real, Type::Int, &real(f64::INFINITY)),
CellOutcome::Incompatible { .. }
));
}
// ---- Per-cell: real -> bool ----
#[test]
fn real_to_bool_clean_for_zero_and_one() {
assert_eq!(
transform_cell(Type::Real, Type::Bool, &real(0.0)),
CellOutcome::Clean(int(0))
);
assert_eq!(
transform_cell(Type::Real, Type::Bool, &real(1.0)),
CellOutcome::Clean(int(1))
);
}
#[test]
fn real_to_bool_incompatible_for_other_values() {
assert!(matches!(
transform_cell(Type::Real, Type::Bool, &real(0.5)),
CellOutcome::Incompatible { .. }
));
assert!(matches!(
transform_cell(Type::Real, Type::Bool, &real(2.0)),
CellOutcome::Incompatible { .. }
));
}
// ---- Per-cell: real -> decimal ----
#[test]
fn real_to_decimal_is_clean_for_finite() {
assert_eq!(
transform_cell(Type::Real, Type::Decimal, &real(3.14)),
CellOutcome::Clean(text("3.14"))
);
}
// ---- Per-cell: decimal -> int ----
#[test]
fn decimal_to_int_clean_for_integer_values() {
assert_eq!(
transform_cell(Type::Decimal, Type::Int, &text("42")),
CellOutcome::Clean(int(42))
);
}
#[test]
fn decimal_to_int_lossy_for_fractional() {
match transform_cell(Type::Decimal, Type::Int, &text("3.14")) {
CellOutcome::Lossy { new, reason } => {
assert_eq!(new, int(3));
assert!(reason.contains("truncated"), "{reason}");
}
other => panic!("unexpected: {other:?}"),
}
}
#[test]
fn decimal_to_int_incompatible_for_garbage() {
assert!(matches!(
transform_cell(Type::Decimal, Type::Int, &text("notanumber")),
CellOutcome::Incompatible { .. }
));
}
// ---- Per-cell: decimal -> bool ----
#[test]
fn decimal_to_bool_clean_for_exact_zero_one() {
assert_eq!(
transform_cell(Type::Decimal, Type::Bool, &text("0")),
CellOutcome::Clean(int(0))
);
assert_eq!(
transform_cell(Type::Decimal, Type::Bool, &text("1.0")),
CellOutcome::Clean(int(1))
);
}
#[test]
fn decimal_to_bool_incompatible_for_other() {
assert!(matches!(
transform_cell(Type::Decimal, Type::Bool, &text("0.5")),
CellOutcome::Incompatible { .. }
));
}
// ---- Per-cell: int -> bool ----
#[test]
fn int_to_bool_clean_for_zero_one() {
assert_eq!(
transform_cell(Type::Int, Type::Bool, &int(0)),
CellOutcome::Clean(int(0))
);
assert_eq!(
transform_cell(Type::Int, Type::Bool, &int(1)),
CellOutcome::Clean(int(1))
);
}
#[test]
fn int_to_bool_incompatible_for_other() {
assert!(matches!(
transform_cell(Type::Int, Type::Bool, &int(2)),
CellOutcome::Incompatible { .. }
));
assert!(matches!(
transform_cell(Type::Int, Type::Bool, &int(-1)),
CellOutcome::Incompatible { .. }
));
}
// ---- Per-cell: text -> int ----
#[test]
fn text_to_int_clean_for_integer_string() {
assert_eq!(
transform_cell(Type::Text, Type::Int, &text("42")),
CellOutcome::Clean(int(42))
);
}
#[test]
fn text_to_int_lossy_via_real_for_fractional_string() {
match transform_cell(Type::Text, Type::Int, &text("3.14")) {
CellOutcome::Lossy { new, .. } => assert_eq!(new, int(3)),
other => panic!("unexpected: {other:?}"),
}
}
#[test]
fn text_to_int_lossy_for_real_lookalike_whole_number() {
// ADR-0017 §3: text "3.0" -> int via real-then-narrow is
// lossy because the source carried decimal notation the
// int representation discards.
match transform_cell(Type::Text, Type::Int, &text("3.0")) {
CellOutcome::Lossy { new, .. } => assert_eq!(new, int(3)),
other => panic!("unexpected: {other:?}"),
}
}
#[test]
fn text_to_int_incompatible_for_garbage() {
assert!(matches!(
transform_cell(Type::Text, Type::Int, &text("abc")),
CellOutcome::Incompatible { .. }
));
assert!(matches!(
transform_cell(Type::Text, Type::Int, &text("")),
CellOutcome::Incompatible { .. }
));
}
// ---- Per-cell: text -> real / decimal ----
#[test]
fn text_to_real_clean_for_numeric_string() {
assert_eq!(
transform_cell(Type::Text, Type::Real, &text("3.14")),
CellOutcome::Clean(real(3.14))
);
}
#[test]
fn text_to_real_incompatible_for_garbage() {
assert!(matches!(
transform_cell(Type::Text, Type::Real, &text("xyz")),
CellOutcome::Incompatible { .. }
));
}
#[test]
fn text_to_decimal_clean_for_numeric_string() {
assert_eq!(
transform_cell(Type::Text, Type::Decimal, &text("3.14")),
CellOutcome::Clean(text("3.14"))
);
}
#[test]
fn text_to_decimal_incompatible_for_garbage() {
assert!(matches!(
transform_cell(Type::Text, Type::Decimal, &text("xyz")),
CellOutcome::Incompatible { .. }
));
}
// ---- Per-cell: text -> bool ----
#[test]
fn text_to_bool_accepts_true_false_case_insensitively() {
for s in ["true", "True", "TRUE", "tRue"] {
assert_eq!(
transform_cell(Type::Text, Type::Bool, &text(s)),
CellOutcome::Clean(int(1)),
"{s}"
);
}
for s in ["false", "False", "FALSE"] {
assert_eq!(
transform_cell(Type::Text, Type::Bool, &text(s)),
CellOutcome::Clean(int(0)),
"{s}"
);
}
}
#[test]
fn text_to_bool_refuses_zero_one_strings() {
// ADR-0017 §3: "no implicit 0/1 parse — matches the DSL
// boolean grammar."
assert!(matches!(
transform_cell(Type::Text, Type::Bool, &text("0")),
CellOutcome::Incompatible { .. }
));
assert!(matches!(
transform_cell(Type::Text, Type::Bool, &text("1")),
CellOutcome::Incompatible { .. }
));
}
// ---- Per-cell: text -> date ----
#[test]
fn text_to_date_clean_for_iso() {
assert_eq!(
transform_cell(Type::Text, Type::Date, &text("2025-01-15")),
CellOutcome::Clean(text("2025-01-15"))
);
}
#[test]
fn text_to_date_incompatible_for_other_formats() {
assert!(matches!(
transform_cell(Type::Text, Type::Date, &text("2025/01/15")),
CellOutcome::Incompatible { .. }
));
}
// ---- Per-cell: text -> datetime ----
#[test]
fn text_to_datetime_clean_for_iso_datetime() {
assert_eq!(
transform_cell(Type::Text, Type::DateTime, &text("2025-01-15T14:30:00")),
CellOutcome::Clean(text("2025-01-15T14:30:00"))
);
}
#[test]
fn text_to_datetime_lossy_for_bare_date() {
match transform_cell(Type::Text, Type::DateTime, &text("2025-01-15")) {
CellOutcome::Lossy { new, reason } => {
assert_eq!(new, text("2025-01-15T00:00:00Z"));
assert!(reason.contains("promoted"), "{reason}");
}
other => panic!("unexpected: {other:?}"),
}
}
#[test]
fn text_to_datetime_incompatible_for_garbage() {
assert!(matches!(
transform_cell(Type::Text, Type::DateTime, &text("not a date")),
CellOutcome::Incompatible { .. }
));
}
// ---- Per-cell: text -> shortid ----
#[test]
fn text_to_shortid_clean_for_valid_shortid() {
assert_eq!(
transform_cell(Type::Text, Type::ShortId, &text("23456789Ab")),
CellOutcome::Clean(text("23456789Ab"))
);
}
#[test]
fn text_to_shortid_incompatible_for_invalid() {
// Too long.
assert!(matches!(
transform_cell(Type::Text, Type::ShortId, &text("toolong_xyz_more")),
CellOutcome::Incompatible { .. }
));
// Ambiguous chars (per shortid alphabet).
assert!(matches!(
transform_cell(Type::Text, Type::ShortId, &text("0OIl234567")),
CellOutcome::Incompatible { .. }
));
}
// ---- is_non_identity ----
#[test]
fn null_to_null_is_identity() {
assert!(!is_non_identity(&Value::Null, &Value::Null));
}
#[test]
fn storage_class_change_counts_as_non_identity() {
// Same human reading, different storage class: counts.
assert!(is_non_identity(&text("42"), &int(42)));
assert!(is_non_identity(&int(3), &real(3.0)));
}
#[test]
fn identical_value_is_identity() {
assert!(!is_non_identity(&text("hi"), &text("hi")));
assert!(!is_non_identity(&int(42), &int(42)));
}
}