From 3d4a0fd45e7a29d91b7ba6a9b3a5ba1370640e80 Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Fri, 12 Jun 2026 14:42:22 +0000 Subject: [PATCH] fix(render): trim IEEE-754 noise from displayed decimal arithmetic (#32) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `decimal` is stored as exact TEXT, but SQLite has no native decimal type, so arithmetic/aggregation implicitly coerces it to an IEEE-754 double. The computed result carries no playground type, so `sum(price * qty)` rendered the double's full noise — `298.59999999999997` for `298.60` — a confusing, off-topic float lesson for a teaching tool. Add `format_real_display`: round REAL values to 15 significant figures (a double's reliable precision) then take the shortest round-tripping form, collapsing `298.59999999999997` to `298.6`. Wired into `format_cell` (result-set / `show data` cells) only — the sole surface where the noise appears, since it arises from arithmetic. Every other f64->string path keeps full precision for semantic, not cosmetic, reasons: CSV persistence stays byte-exact for round-trip; `render_value` is a canonical identity key for the uniqueness dry-runs (dry_run_unique, check_uniqueness_collisions), where rounding would report collisions the exact-valued engine wouldn't; FK-key matching and EXPLAIN-SQL literals likewise stay exact. ADR-0005 Amendment 1; +7 tests. --- docs/adr/0005-column-type-vocabulary.md | 62 ++++++++++++ docs/adr/README.md | 2 +- src/db.rs | 121 +++++++++++++++++++++++- tests/it/sql_select.rs | 61 ++++++++++++ 4 files changed, 244 insertions(+), 2 deletions(-) diff --git a/docs/adr/0005-column-type-vocabulary.md b/docs/adr/0005-column-type-vocabulary.md index 3714ec1..91a67bf 100644 --- a/docs/adr/0005-column-type-vocabulary.md +++ b/docs/adr/0005-column-type-vocabulary.md @@ -70,3 +70,65 @@ True UUIDs are intentionally **not** in the type set. - Learners who later need a true UUID column will find that the app does not provide one; this is a deliberate trade-off in favour of TUI legibility. + +## Amendment 1 — display rounding of coerced doubles (2026-06-12) + +Issue #32. The Decision keeps `decimal` exact by storing it as +TEXT, noting that "numeric ops require casts" — the engine has no +native decimal/BCD type (SQLite's storage classes are only NULL / +INTEGER / REAL / TEXT / BLOB; `NUMERIC` is an affinity, not a +type). What the original wording did not anticipate is that the +engine performs that cast **implicitly**: `sum(price * qty)` over +TEXT decimals coerces to an IEEE-754 double with no explicit cast, +and the computed result carries no playground type (ADR-0030 §6), +so it rendered with the double's full noise — +`298.59999999999997` for `298.60`. For a teaching tool that is a +confusing, off-topic lesson about float representation. + +### Decision + +**Round floating-point values to 15 significant figures for +display only.** A double carries ~15–17 significant decimal digits +and the noise lives in the last one or two; rounding to 15 then +taking the shortest round-tripping form of the rounded value +collapses `298.59999999999997` → `298.6` and +`0.30000000000000004` → `0.3`. A clean value rounds to itself, so +the result is never longer than before; non-finite values pass +through. Implemented as `format_real_display` in `db.rs`. + +The rounding is wired into **exactly one place — `format_cell`, +the result-set / `show data` cell formatter** — because that is +the only surface where the IEEE-754 noise actually appears: noise +arises from *arithmetic/aggregation*, whose results flow through +`format_cell`. Every other `f64`-to-string path deliberately keeps +full precision, and the distinction is **semantic, not cosmetic**: + +- **Persistence stays exact.** The CSV encoder + (`persistence::csv_io::format_real`) keeps the shortest + round-tripping form so a stored `real` survives save/load + byte-for-byte — rounding there would corrupt data. +- **Uniqueness dry-runs key on exact values.** `render_value` + (the diagnostic/echo formatter) is reused as a *canonical + identity key* by `dry_run_unique` (ADR-0029 §5) and + `check_uniqueness_collisions` (ADR-0017 §4.3): they group rows + by this string to predict the duplicates the engine would + reject. Rounding there would merge two distinct doubles into one + key and report a collision the engine — which compares exact + values — would not. So `render_value` keeps `format!("{r}")`. + (It also never displays a *computed* value, so it has no noise + to trim.) +- **FK-key matching and EXPLAIN-SQL literals keep full + precision** — neither is a data-cell display. + +Within `format_cell` the rounding applies to **all** REAL cells +(stored `real` columns and computed results alike), for one +consistent rule; the lost digits are at the double's precision +limit, not real information, and a stored `real` typed by the user +is itself noise-free so its display is unchanged in practice. Raw +`decimal` columns are unaffected — they are TEXT and render +verbatim, trailing zeros and all (`100.10`). Exact decimal +*arithmetic* (a SQLite extension exposing +`decimal_mul`/`decimal_sum`) was considered and rejected: it would +require rewriting the user's standard-SQL operators into function +calls, defeating both the "validated SQL runs verbatim" model and +the goal of teaching ordinary SQL. diff --git a/docs/adr/README.md b/docs/adr/README.md index 0ded724..2e15522 100644 --- a/docs/adr/README.md +++ b/docs/adr/README.md @@ -10,7 +10,7 @@ This directory contains the project's ADRs, recorded per - [ADR-0002 — Database engine](0002-database-engine.md) - [ADR-0003 — Input modes and command dispatch](0003-input-modes-and-command-dispatch.md) — the persistent `Simple`/`Advanced` mode and the `:` one-shot escape. The **startup mode is no longer always `simple`**: it is restored from the project's stored mode and overridable with `--mode` (see **ADR-0015 Amendment 1**, issue #14). The app-command registry gains **`copy`** (ADR-0041, issue #11) - [ADR-0004 — Project file format](0004-project-file-format.md) -- [ADR-0005 — Column type vocabulary](0005-column-type-vocabulary.md) +- [ADR-0005 — Column type vocabulary](0005-column-type-vocabulary.md) — the ten-type set (`text`/`int`/`real`/`decimal`/`bool`/`date`/`datetime`/`blob`/`serial`/`shortid`), compound PKs, no true UUIDs; `decimal` stored as exact TEXT. **Amendment 1, 2026-06-12** (issue #32): SQLite has no native decimal/BCD type, so arithmetic/aggregation over a TEXT `decimal` is implicitly coerced to an IEEE-754 double and the computed (typeless) result leaked float noise (`298.59999999999997` for `298.60`); floating-point values are now rounded to **15 significant figures for display only** (`format_real_display` in `db.rs`, wired into `format_cell` — the result-set/`show data` cell formatter, the only surface where arithmetic noise surfaces) while every other f64→string path keeps full precision because the distinction is *semantic*: persistence (`csv_io::format_real`) stays byte-exact for round-trip; `render_value` is a *canonical identity key* for the uniqueness dry-runs (`dry_run_unique` ADR-0029 §5, `check_uniqueness_collisions` ADR-0017 §4.3) so rounding it would report collisions the exact-valued engine wouldn't; FK-key matching and EXPLAIN-SQL literals likewise stay exact — so stored `real`/`decimal` round-trips stay byte-exact and raw `decimal` columns render verbatim - [ADR-0006 — Undo snapshots and replay log](0006-undo-snapshots-and-replay-log.md) — **Accepted**. The **replay/journal half** (U3/U4) shipped via ADR-0034; the **undo/snapshot half** (U1/U2) is settled by **Amendment 1 (2026-05-24)** and **implemented 2026-05-24** (plan: `docs/plans/20260524-adr-0006-undo-snapshots.md`; ring in `src/undo.rs`, worker hook in `src/db.rs`). Amendment 1 **supersedes the original "snapshots only before destructive operations" model**: a snapshot is taken before **every** data/schema mutation (DSL + SQL) for familiar single-step (Ctrl-Z) undo — so the confirmation collapses to *naming the one command being undone* (no db-diff). Snapshot is a **hybrid whole-project copy** — database via the online backup API **plus** `project.yaml`/`data/*.csv` as files — reconciling this ADR with ADR-0015's "text is authoritative, db is derived"; undo restores all three directly. Staged before the mutation's transaction, finalised after the db commit (preserves ADR-0015 §6 commit-db-last); rolled-back ops leave no snapshot. **Persisted** ring under `.snapshots/`, **N = 50** (raised from 10), git-ignored + export-excluded + temp-cleanup-aware. `redo` supported, **redo stack discarded on new work**. **Batch ops record one undo step** (`replay` + future batch via a Begin/EndBatch worker primitive); **`import` is outside undo** (it switches projects per ADR-0015 §11, leaving the current project untouched). A **`--no-undo` CLI flag** disables snapshotting (hardware escape hatch). Adds the `backup` feature to `rusqlite` - [ADR-0007 — Sharing and export](0007-sharing-and-export.md) - [ADR-0008 — Testing approach](0008-testing-approach.md) diff --git a/src/db.rs b/src/db.rs index 19e4d07..a057ebe 100644 --- a/src/db.rs +++ b/src/db.rs @@ -6041,6 +6041,16 @@ fn render_value(v: &rusqlite::types::Value) -> String { match v { RV::Null => "(null)".to_string(), RV::Integer(i) => i.to_string(), + // Full-precision, shortest round-trip — NOT the issue-#32 + // display rounding. `render_value` is reused as a *canonical + // identity key* by the uniqueness dry-runs (`dry_run_unique`, + // `check_uniqueness_collisions`, ADR-0017 §4.3 / ADR-0029 §5): + // they group rows by this string to detect duplicates the + // engine would reject. Rounding here would merge two distinct + // doubles into one key and report a collision the engine — + // which compares exact values — would not. Display rounding + // lives in `format_cell` (query / `show data` cells) only, + // where no value is ever used as a key. RV::Real(r) => format!("{r}"), RV::Text(s) => s.clone(), RV::Blob(_) => "".to_string(), @@ -10809,12 +10819,49 @@ fn format_cell(value: rusqlite::types::Value, ty: Option) -> Option Some(format!("{r}")), + V::Real(r) => Some(format_real_display(r)), V::Text(s) => Some(s), V::Blob(b) => Some(format!("", b.len())), } } +/// Render a floating-point value for **display**, trimming the +/// IEEE-754 noise that surfaces when the engine coerces a +/// `decimal` (stored as exact TEXT, ADR-0005) to a double for +/// arithmetic or aggregation (issue #32): `sum(price * qty)` +/// would otherwise show `298.59999999999997` for `298.60`. +/// +/// A double carries ~15–17 significant decimal digits and the +/// noise lives in the last one or two, so we round to 15 +/// significant figures and then take the *shortest* round-tripping +/// form of the rounded value. That collapses +/// `298.59999999999997` → `298.6` and `0.30000000000000004` → +/// `0.3`. A clean value rounds to itself, so the result is never +/// longer than the previous `format!("{r}")` — the magnitude and +/// whatever digit expansion `Display` already chose are preserved +/// (Rust's `Display` for `f64` never uses scientific notation, so +/// a very large value still expands to full digits, exactly as +/// before). Non-finite values pass through unchanged. +/// +/// DISPLAY ONLY, and only for genuine display cells. The CSV +/// encoder (`persistence::csv_io::format_real`) keeps the exact +/// shortest round-trip so a stored `real` survives save/load +/// byte-for-byte; the uniqueness dry-runs key on the full-precision +/// `render_value`; FK-key matching and EXPLAIN-SQL literals keep +/// full precision too. Rounding any of those would change behaviour, +/// not just looks. This is wired only into `format_cell`. +fn format_real_display(r: f64) -> String { + if !r.is_finite() { + return format!("{r}"); + } + // `{:.14e}` is a 15-significant-figure scientific form (one + // mantissa digit + 14 after the point); parsing it back and + // letting `Display` pick the shortest exact form drops the + // trailing IEEE-754 noise. + let rounded: f64 = format!("{r:.14e}").parse().unwrap_or(r); + format!("{rounded}") +} + /// Capture the query plan for an explainable command /// (ADR-0028 §2). Matches the inner command, builds the exact /// SQL it would otherwise run via the shared `build_*_sql` @@ -11442,6 +11489,78 @@ mod tests { Database::open(":memory:").expect("open in-memory") } + // ---- Issue #32 — display rounding of coerced doubles ---- + + #[test] + fn format_real_display_trims_ieee754_noise() { + // The reported case and the classic float artifacts. + assert_eq!(format_real_display(298.599_999_999_999_97), "298.6"); + assert_eq!(format_real_display(0.1 + 0.2), "0.3"); + assert_eq!(format_real_display(59.97), "59.97"); + } + + #[test] + fn format_real_display_keeps_whole_numbers_and_sign() { + assert_eq!(format_real_display(5.0), "5"); + assert_eq!(format_real_display(0.0), "0"); + assert_eq!(format_real_display(-298.599_999_999_999_97), "-298.6"); + assert_eq!(format_real_display(1.5e-12), "0.0000000000015"); + } + + #[test] + fn format_real_display_preserves_clean_values_no_regression() { + // A value with no IEEE-754 noise rounds to itself, so the + // helper is never *worse* (or longer) than the previous + // `format!("{r}")` — including a very large magnitude, which + // `Display` expands to full digits both before and after. + for v in [1e300_f64, 1.25_f64, 1.5e-12_f64, 42.0_f64, -0.5_f64] { + assert_eq!( + format_real_display(v), + format!("{v}"), + "clean value {v} must format exactly as before", + ); + } + } + + #[test] + fn render_value_keeps_full_precision_for_identity_grouping() { + // DA guard (issue #32): `render_value` must NOT adopt the + // display rounding — the uniqueness dry-runs key on its output + // to detect duplicates the engine (which compares exact + // values) would reject. Two adjacent doubles that agree to 15 + // significant figures must still render to *distinct* strings, + // or `dry_run_unique` / `check_uniqueness_collisions` would + // report a false collision. + use rusqlite::types::Value as V; + let a = 1.0_f64; + let b = 1.000_000_000_000_000_2_f64; // 1.0.next_up() + assert_ne!(a, b, "the two doubles are genuinely distinct"); + assert_eq!(render_value(&V::Real(a)), "1"); + assert_eq!(render_value(&V::Real(b)), "1.0000000000000002"); + assert_ne!( + render_value(&V::Real(a)), + render_value(&V::Real(b)), + "render_value must keep distinct doubles distinct (identity key)", + ); + // And the display formatter *does* round both to the same + // string — which is exactly why it must not be used as a key. + assert_eq!(format_real_display(a), format_real_display(b)); + } + + #[test] + fn format_real_display_passes_through_non_finite() { + assert_eq!(format_real_display(f64::INFINITY), "inf"); + assert_eq!(format_real_display(f64::NEG_INFINITY), "-inf"); + assert_eq!(format_real_display(f64::NAN), "NaN"); + } + + #[test] + fn format_real_display_rounds_to_fifteen_significant_figures() { + // 1/3 collapses to 15 sig figs (the noise past that is the + // double's representation limit, not real precision). + assert_eq!(format_real_display(1.0 / 3.0), "0.333333333333333"); + } + fn col(name: &str, ty: Type) -> ColumnSpec { ColumnSpec::new(name, ty) } diff --git a/tests/it/sql_select.rs b/tests/it/sql_select.rs index 791b5d8..eb79c9f 100644 --- a/tests/it/sql_select.rs +++ b/tests/it/sql_select.rs @@ -175,6 +175,67 @@ fn open_project_db() -> (project::Project, Database, tempfile::TempDir) { (project, db, dir) } +#[test] +fn decimal_aggregation_display_trims_ieee754_noise() { + // Issue #32: `decimal` is stored as exact TEXT, but SQLite + // coerces it to an IEEE-754 double for arithmetic/aggregation, + // so `sum(price * qty)` would render `298.59999999999997` for + // `298.60`. The display layer rounds computed REAL cells to ~15 + // significant figures, trimming that noise — while raw decimal + // columns stay byte-exact (TEXT, untouched). + let (_p, db, _dir) = open_project_db(); + let rt = rt(); + rt.block_on(async { + db.create_table( + "Products".to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + ColumnSpec::new("price", Type::Decimal), + ColumnSpec::new("qty", Type::Int), + ], + vec!["id".to_string()], + None, + ) + .await + .expect("create"); + for (price, qty) in [("19.99", 3), ("5.49", 7), ("100.10", 2)] { + db.insert( + "Products".to_string(), + Some(vec!["price".to_string(), "qty".to_string()]), + vec![ + Value::Number(price.to_string()), + Value::Number(qty.to_string()), + ], + None, + ) + .await + .expect("insert"); + } + }); + + // The reported case: the aggregate no longer leaks float noise. + let agg = rt + .block_on(db.run_select("select sum(price * qty) from Products".to_string(), None)) + .expect("aggregate select"); + assert_eq!( + agg.rows[0][0].as_deref(), + Some("298.6"), + "sum(price*qty) must trim IEEE-754 noise (298.60), not show 298.59999999999997", + ); + + // Raw decimal column is still exact — TEXT storage preserves + // the input string verbatim, including the trailing zero. + let raw = rt + .block_on(db.run_select("select price from Products".to_string(), None)) + .expect("raw decimal select"); + let prices: Vec<&str> = raw.rows.iter().map(|r| r[0].as_deref().unwrap()).collect(); + assert_eq!( + prices, + vec!["19.99", "5.49", "100.10"], + "raw decimal column must stay byte-exact (TEXT storage untouched)", + ); +} + #[test] fn database_run_select_constant_returns_a_single_row() { let (_p, db, _dir) = open_project_db();