diff --git a/docs/adr/0017-column-type-change-compatibility.md b/docs/adr/0017-column-type-change-compatibility.md index f9d3b3b..70cc613 100644 --- a/docs/adr/0017-column-type-change-compatibility.md +++ b/docs/adr/0017-column-type-change-compatibility.md @@ -108,6 +108,7 @@ statically refused (carried over from B2/C2). Anything ↔ | `int` / `serial` | `real` | widening; precision caveat for ¦v¦ > 2⁵³ noted in docs but not policed | | `int` / `serial` | `decimal` | exact decimal representation | | `int` / `serial` | `text` | stringify | +| `serial` | `int` | identity at the storage class level (both store as INTEGER); drops the auto-increment metadata. The canonical PK conversion enabled by §4.1's `fk_target_type`-aware refinement. | | `bool` | `int` | 0/1 | | `bool` | `real` | 0.0/1.0 | | `bool` | `decimal` | "0"/"1" | @@ -359,14 +360,14 @@ and these conversion diagnostics. Cannot change `T.col` from real to int: 50 row(s) would discard information. -┌─────┬───────┬─────┬───────────────────────────────────┐ -│ Row │ From │ To │ Reason │ -├─────┼───────┼─────┼───────────────────────────────────┤ -│ 5 │ 3.14 │ 3 │ truncated; would discard 0.14 │ -│ 12 │ 2.71 │ 2 │ truncated; would discard 0.71 │ -│ 18 │ 1.5 │ 1 │ truncated; would discard 0.5 │ -│ … │ … │ … │ … and 47 more │ -└─────┴───────┴─────┴───────────────────────────────────┘ +┌─────────┬───────┬─────┬───────────────────────────────────┐ +│ id (PK) │ From │ To │ Reason │ +├─────────┼───────┼─────┼───────────────────────────────────┤ +│ 5 │ 3.14 │ 3 │ truncated; would discard 0.14 │ +│ 12 │ 2.71 │ 2 │ truncated; would discard 0.71 │ +│ 18 │ 1.5 │ 1 │ truncated; would discard 0.5 │ +│ … │ … │ … │ … and 47 more │ +└─────────┴───────┴─────┴───────────────────────────────────┘ if you want to execute this conversion in spite of the problems, re-run with `--force-conversion`. @@ -378,13 +379,13 @@ problems, re-run with `--force-conversion`. Cannot change `T.col` from text to int: 3 row(s) cannot be converted. -┌─────┬───────┬───────────────────────┐ -│ Row │ Value │ Reason │ -├─────┼───────┼───────────────────────┤ -│ 3 │ abc │ not a valid int │ -│ 7 │ x42 │ not a valid int │ -│ 12 │ │ not a valid int │ -└─────┴───────┴───────────────────────┘ +┌─────────┬───────┬───────────────────────┐ +│ id (PK) │ Value │ Reason │ +├─────────┼───────┼───────────────────────┤ +│ 3 │ abc │ not a valid int │ +│ 7 │ x42 │ not a valid int │ +│ 12 │ │ not a valid int │ +└─────────┴───────┴───────────────────────┘ ``` The trailing `--force-conversion` hint is omitted for @@ -397,11 +398,11 @@ forward-look — would re-introduce one). Cannot change `T.col` from real to int: 1 collision(s) would violate uniqueness. -┌─────────┬─────────────────┬──────────────────┐ -│ Becomes │ Source rows │ Source values │ -├─────────┼─────────────────┼──────────────────┤ -│ 3 │ row 5, row 12 │ 3.14, 3.7 │ -└─────────┴─────────────────┴──────────────────┘ +┌─────────┬──────────────────┬──────────────────┐ +│ Becomes │ Source rows (id) │ Source values │ +├─────────┼──────────────────┼──────────────────┤ +│ 3 │ 5, 12 │ 3.14, 3.7 │ +└─────────┴──────────────────┴──────────────────┘ ``` #### Common rules @@ -411,9 +412,33 @@ would violate uniqueness. literal text "and N more" inside the row is rendered inside the table — not as a footer line. Keeps the bordered shape intact. -- Row indices are 1-based to match how learners count - rows; the tool counts internally from 0. -- Numeric "Row" / "Becomes" columns inherit numeric +- Rows are identified by their **primary-key value(s)**, not + by positional indices. SQLite returns rows in unspecified + order without `ORDER BY`, so a positional "row 5" would + not be reproducible or addressable by the user. The PK is + the natural row identifier in a relational setting and is + what the user would type in a `where` clause to find or + fix the offending cell. + - Single PK: rendered as one column whose header is the + PK column name with a trailing `(PK)` marker + (e.g. `id (PK)`); cells carry the raw PK value with no + `column=` prefix. The marker appears once per table, in + the header. + - Compound PK: one column per PK component, each header + annotated `(PK)` (e.g. `a (PK)`, `b (PK)`); cells carry + the raw component values. + - Uniqueness-collision tables list the colliding rows' + PK values comma-separated inside a single `Source rows` + cell whose header carries the PK column name(s) in + parentheses (e.g. `Source rows (id)` or `Source rows + (a, b)`). Compound-PK source rows render as tuples: + `(1,2), (1,3)`. +- The change-column command always operates on a table + with at least one PK column (every `create table` in v1 + produces a PK; the AST permits PK-less tables, but no + grammar produces one today). If a PK-less surface ever + lands, this section will be revisited. +- Numeric PK and "Becomes" columns inherit numeric right-alignment from ADR-0016 §2. - Cells that would render multi-line content (for `text →` conversions where source values contain newlines) honour diff --git a/src/app.rs b/src/app.rs index 1f50969..b8e30ef 100644 --- a/src/app.rs +++ b/src/app.rs @@ -13,7 +13,8 @@ use tracing::{trace, warn}; use crate::action::Action; use crate::db::{ - CascadeEffect, DataResult, DeleteResult, InsertResult, TableDescription, UpdateResult, + CascadeEffect, ChangeColumnTypeResult, DataResult, DeleteResult, InsertResult, + TableDescription, UpdateResult, }; use crate::dsl::{Command, ParseError, parse_command}; use crate::event::AppEvent; @@ -312,6 +313,10 @@ impl App { self.handle_dsl_delete_success(&command, &result); Vec::new() } + AppEvent::DslChangeColumnSucceeded { command, result } => { + self.handle_dsl_change_column_success(&command, result); + Vec::new() + } AppEvent::DslFailed { command, error } => { self.handle_dsl_failure(&command, &error); Vec::new() @@ -761,6 +766,43 @@ impl App { } } + fn handle_dsl_change_column_success( + &mut self, + command: &Command, + result: ChangeColumnTypeResult, + ) { + let summary = format!("[ok] {} {}", command.verb(), command.display_subject()); + self.note_system(summary); + if let Some(note) = result.client_side { + // ADR-0017 §6: pedagogical hook telling the learner + // "the tool did this for you; raw SQL would need a + // CAST or application-level code." Lossy variant + // adds the lossy count when --force-conversion was + // used. + let line = if note.lossy > 0 { + format!( + "[client-side] {n} row(s) transformed; {l} of those discarded \ + information (lossy). In raw SQL this would need an explicit \ + `CAST` or application-level code.", + n = note.transformed, + l = note.lossy + ) + } else { + format!( + "[client-side] {n} row(s) were transformed before being stored. \ + In raw SQL this would need an explicit `CAST` or \ + application-level code.", + n = note.transformed + ) + }; + self.note_system(line); + } + for line in crate::output_render::render_structure(&result.description) { + self.note_system(line); + } + self.current_table = Some(result.description); + } + fn handle_dsl_delete_success(&mut self, command: &Command, result: &DeleteResult) { self.note_system(format!( "[ok] {} {}", diff --git a/src/db.rs b/src/db.rs index 4d18107..b0d49d5 100644 --- a/src/db.rs +++ b/src/db.rs @@ -32,11 +32,13 @@ use tokio::sync::{mpsc, oneshot}; use tracing::{debug, info, warn}; use crate::dsl::action::ReferentialAction; -use crate::dsl::command::{RelationshipSelector, RowFilter}; +use crate::dsl::command::{ChangeColumnMode, RelationshipSelector, RowFilter}; use crate::dsl::ColumnSpec; use crate::dsl::shortid; use crate::dsl::types::Type; use crate::dsl::value::{Bound, Value, ValueError}; +use crate::output_render::{Alignment, render_diagnostic_table}; +use crate::type_change; use crate::persistence::{ CellValue, ColumnSchema, Persistence, PersistenceError, RelationshipSchema, SchemaSnapshot, TableSchema, TableSnapshot, decode_cell, parse_csv, parse_schema, @@ -160,6 +162,34 @@ pub struct InsertResult { pub data: DataResult, } +/// Outcome of a successful `change column …` (ADR-0017 §6). +/// +/// `description` is the post-rebuild table structure (used for +/// the auto-show that follows DDL). `client_side` carries the +/// pedagogical note that fires whenever the playground rewrote +/// any cell value before storing it — the moment that tells the +/// learner "the tool did this for you; raw SQL would need a +/// `CAST` or application code." `None` when the change was +/// pure metadata (storage class unchanged for every row). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ChangeColumnTypeResult { + pub description: TableDescription, + pub client_side: Option, +} + +/// Counts feeding the `[client-side]` success line. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ClientSideNote { + /// Cells whose stored value differs from what the user + /// originally inserted (any non-identity transformation, + /// including pure storage-class changes like + /// `Text("42")` → `Integer(42)`). + pub transformed: usize, + /// Subset of `transformed` where information was discarded + /// (only non-zero when `--force-conversion` was used). + pub lossy: usize, +} + /// Outcome of a successful UPDATE — a count plus the rows that /// matched (and were updated). Captured by rowid so that even an /// UPDATE which changes the WHERE-target column still finds the @@ -295,8 +325,9 @@ enum Request { table: String, column: String, ty: Type, + mode: ChangeColumnMode, source: Option, - reply: oneshot::Sender>, + reply: oneshot::Sender>, }, ListTables { reply: oneshot::Sender, DbError>>, @@ -491,12 +522,14 @@ impl Database { table: String, column: String, ty: Type, + mode: ChangeColumnMode, source: Option, - ) -> Result { + ) -> Result { let (reply, recv) = oneshot::channel(); self.send(Request::ChangeColumnType { table, column, + mode, ty, source, reply, @@ -834,6 +867,7 @@ fn handle_request(conn: &Connection, persistence: Option<&Persistence>, req: Req table, column, ty, + mode, source, reply, } => { @@ -844,6 +878,7 @@ fn handle_request(conn: &Connection, persistence: Option<&Persistence>, req: Req &table, &column, ty, + mode, )); } Request::ListTables { reply } => { @@ -1552,28 +1587,37 @@ fn do_rename_column( /// Change a column's type. /// -/// SQLite's `ALTER TABLE` cannot change column types, so -/// this routes through the rebuild-table primitive -/// (ADR-0013). The new schema differs from the old in -/// exactly one column's `sqlite_type` and `user_type`; data -/// copies via `INSERT INTO new SELECT FROM old`. STRICT -/// typing means any cell value that doesn't fit the new -/// type triggers a SQLite error and the transaction rolls -/// back. +/// Change a column's user-facing type, per ADR-0017's per-cell +/// transformer model. /// -/// V1 refuses two cases that would require cascading -/// changes outside the local table: +/// SQLite's `ALTER TABLE` cannot change column types, so this +/// routes through the rebuild-table primitive (ADR-0013) with a +/// row-by-row transformation step inserted between read-out and +/// write-back. /// -/// - PK columns (FKs in other tables target this column; -/// changing its type would invalidate their target -/// types per ADR-0011). -/// - Columns involved in a declared relationship -/// (parent or child side) — drop the relationship -/// first. +/// Refusal preconditions (in order): /// -/// The `serial` type is also refused as a target since -/// `serial` carries auto-increment semantics that only -/// apply at create-table time. +/// 1. Target is `serial` (auto-increment is create-table-only). +/// 2. Source ↔ target is statically refused per the matrix +/// (same-type, blob, date↔datetime, undefined cross-domain). +/// 3. Column is the *child* side of any relationship (outbound +/// FK) — drop the relationship first. +/// 4. Column has any inbound FK (parent side) and the new type +/// would change `fk_target_type()` (ADR-0017 §4.1) — which +/// means referencing FK columns in other tables would need a +/// cascading type change v1 doesn't perform. +/// +/// Then per the mode: +/// +/// - `Default`: dry-run; refuse on any incompatible cell; +/// refuse on any lossy cell (with a hint about +/// `--force-conversion`); enforce uniqueness for PK / shortid +/// columns; rebuild with transformed values. +/// - `ForceConversion`: same as Default except lossy cells are +/// accepted (incompatible cells still refuse). +/// - `DontConvert`: skip the entire client-side layer, identity +/// copy via `INSERT…SELECT`; engine-level errors are wrapped +/// in friendly form (ADR-0002 user-facing posture). fn do_change_column_type( conn: &Connection, persistence: Option<&Persistence>, @@ -1581,15 +1625,8 @@ fn do_change_column_type( table: &str, column: &str, ty: Type, -) -> Result { - if ty == Type::Serial { - return Err(DbError::Unsupported( - "the 'serial' type carries auto-increment primary-key semantics \ - that only apply at create-table time; pick a different target \ - type (e.g. `int`)." - .to_string(), - )); - } + mode: ChangeColumnMode, +) -> Result { let old_schema = read_schema(conn, table)?; let col_info = old_schema .columns @@ -1599,33 +1636,54 @@ fn do_change_column_type( message: format!("no such column: {table}.{column}"), kind: SqliteErrorKind::NoSuchColumn, })?; - if col_info.primary_key { - return Err(DbError::Unsupported(format!( - "cannot change the type of primary-key column `{table}.{column}` \ - in v1; PK type changes would cascade to FK target types in \ - other tables." - ))); + let src_ty = col_info.user_type.ok_or_else(|| { + DbError::Unsupported(format!( + "cannot change type of `{table}.{column}`: column has no \ + user-facing type metadata." + )) + })?; + + // (1) + (2): static refusals via the matrix. + if let Some(reason) = type_change::static_refusal(src_ty, ty) { + return Err(DbError::Unsupported(reason)); } - let rel_count: i64 = conn + + // (3): outbound FK (column is child side of any relationship). + let outbound_count: i64 = conn .query_row( &format!( "SELECT COUNT(*) FROM {REL_TABLE} \ - WHERE (parent_table = ?1 AND parent_column = ?2) \ - OR (child_table = ?1 AND child_column = ?2);" + WHERE child_table = ?1 AND child_column = ?2;" ), rusqlite::params![table, column], |row| row.get(0), ) .map_err(DbError::from_rusqlite)?; - if rel_count > 0 { + if outbound_count > 0 { return Err(DbError::Unsupported(format!( "cannot change type of `{table}.{column}` while a relationship \ - references it; drop the relationship first." + uses it as a foreign key; drop the relationship first." ))); } - if col_info.user_type == Some(ty) { + + // (4): inbound FKs (column is parent / target of any FK). + let inbound_count: i64 = conn + .query_row( + &format!( + "SELECT COUNT(*) FROM {REL_TABLE} \ + WHERE parent_table = ?1 AND parent_column = ?2;" + ), + rusqlite::params![table, column], + |row| row.get(0), + ) + .map_err(DbError::from_rusqlite)?; + if inbound_count > 0 && src_ty.fk_target_type() != ty.fk_target_type() { return Err(DbError::Unsupported(format!( - "column `{table}.{column}` is already `{ty}`." + "`{table}.{column}` is referenced by {inbound_count} relationship(s); \ + changing its type to `{ty}` would change the type that \ + referencing columns require — drop those relationships first \ + or pick a target type whose foreign-key shape matches the \ + current one." ))); } @@ -1633,16 +1691,18 @@ fn do_change_column_type( // user_type / sqlite_type are updated. PK and notnull // flags carry over unchanged. let mut new_schema = old_schema.clone(); - let target = new_schema - .columns - .iter_mut() - .find(|c| c.name == column) - .expect("column existence checked above"); - target.user_type = Some(ty); - target.sqlite_type = ty.sqlite_strict_type().to_string(); + { + let target = new_schema + .columns + .iter_mut() + .find(|c| c.name == column) + .expect("column existence checked above"); + target.user_type = Some(ty); + target.sqlite_type = ty.sqlite_strict_type().to_string(); + } let ty_keyword = ty.keyword(); - rebuild_table(conn, table, &old_schema, &new_schema, |tx| { + let metadata_updates = |tx: &rusqlite::Transaction<'_>| -> Result<(), DbError> { tx.execute( &format!( "UPDATE {META_TABLE} SET user_type = ?1 \ @@ -1658,9 +1718,566 @@ fn do_change_column_type( }; finalize_persistence(tx, persistence, source, &changes)?; Ok(()) - })?; + }; - do_describe_table(conn, table) + let client_side = match mode { + ChangeColumnMode::DontConvert => { + // Skip the client-side layer entirely. The engine's + // STRICT typing will accept or refuse cells; we wrap + // any error in a friendly message (ADR-0017 §5, + // ADR-0002 user-facing posture). + rebuild_table(conn, table, &old_schema, &new_schema, metadata_updates) + .map_err(|e| friendly_change_column_engine_error(table, column, src_ty, ty, e))?; + None + } + ChangeColumnMode::Default | ChangeColumnMode::ForceConversion => Some( + run_change_column_with_dry_run( + conn, + table, + column, + src_ty, + ty, + mode, + &old_schema, + &new_schema, + metadata_updates, + )?, + ), + }; + // Client-side note fires only when at least one cell was + // materially transformed. A pure metadata change (e.g. + // identity-clean transformer over zero rows, or zero + // non-identity outcomes) emits no note (ADR-0017 §6). + let client_side = client_side.filter(|note| note.transformed > 0); + + let description = do_describe_table(conn, table)?; + Ok(ChangeColumnTypeResult { + description, + client_side, + }) +} + +/// Read every row from the source table, classify the target +/// column's cell via the transformer matrix, refuse on +/// incompatibles or lossy (per `mode`), check uniqueness for +/// PK / shortid, and finally rebuild the table with the +/// transformed values bound row-by-row. +/// +/// Returns the per-row counts feeding the `[client-side]` +/// success note (ADR-0017 §6). +#[allow(clippy::too_many_arguments)] +fn run_change_column_with_dry_run( + conn: &Connection, + table: &str, + column: &str, + src_ty: Type, + target_ty: Type, + mode: ChangeColumnMode, + old_schema: &ReadSchema, + new_schema: &ReadSchema, + metadata_updates: M, +) -> Result +where + M: FnOnce(&rusqlite::Transaction<'_>) -> Result<(), DbError>, +{ + use rusqlite::types::Value as RV; + use type_change::CellOutcome; + + let pk_columns: Vec = old_schema + .columns + .iter() + .filter(|c| c.primary_key) + .map(|c| c.name.clone()) + .collect(); + if pk_columns.is_empty() { + return Err(DbError::Unsupported(format!( + "cannot run change-column dry run on `{table}` — table has no \ + primary key; use `--dont-convert` to bypass the client-side \ + layer." + ))); + } + + // Read every row's data: the target column's value, plus the + // PK column values (used for diagnostic identifiers and for + // re-binding the row during rebuild). + let all_columns: Vec = old_schema.columns.iter().map(|c| c.name.clone()).collect(); + let target_idx = all_columns + .iter() + .position(|c| c == column) + .expect("column existence checked above"); + let pk_indices: Vec = pk_columns + .iter() + .map(|pk| all_columns.iter().position(|c| c == pk).expect("pk in cols")) + .collect(); + + let select_cols = all_columns + .iter() + .map(|c| quote_ident(c)) + .collect::>() + .join(", "); + let order_by = pk_columns + .iter() + .map(|c| quote_ident(c)) + .collect::>() + .join(", "); + let select_sql = format!( + "SELECT {select_cols} FROM {tbl} ORDER BY {order_by};", + tbl = quote_ident(table), + ); + + let mut rows: Vec> = Vec::new(); + { + let mut stmt = conn.prepare(&select_sql).map_err(DbError::from_rusqlite)?; + let mut sql_rows = stmt.query([]).map_err(DbError::from_rusqlite)?; + while let Some(r) = sql_rows.next().map_err(DbError::from_rusqlite)? { + let mut row: Vec = Vec::with_capacity(all_columns.len()); + for i in 0..all_columns.len() { + let v: RV = r.get(i).map_err(DbError::from_rusqlite)?; + row.push(v); + } + rows.push(row); + } + } + + // Classify every row's target cell. + let mut outcomes: Vec = Vec::with_capacity(rows.len()); + for row in &rows { + let pk_values: Vec = pk_indices.iter().map(|i| row[*i].clone()).collect(); + let original = row[target_idx].clone(); + let outcome = type_change::transform_cell(src_ty, target_ty, &original); + outcomes.push(Outcome { + pk_values, + original, + outcome, + }); + } + + // Refuse on incompatibles. + let incompatibles: Vec<&Outcome> = outcomes + .iter() + .filter(|o| matches!(o.outcome, CellOutcome::Incompatible { .. })) + .collect(); + if !incompatibles.is_empty() { + return Err(DbError::Unsupported(render_incompatible_diagnostic( + table, + column, + src_ty, + target_ty, + &pk_columns, + old_schema, + &incompatibles, + ))); + } + + // Refuse on lossy unless --force-conversion. + let lossies: Vec<&Outcome> = outcomes + .iter() + .filter(|o| matches!(o.outcome, CellOutcome::Lossy { .. })) + .collect(); + if !lossies.is_empty() && mode != ChangeColumnMode::ForceConversion { + return Err(DbError::Unsupported(render_lossy_diagnostic( + table, + column, + src_ty, + target_ty, + &pk_columns, + old_schema, + &lossies, + ))); + } + + // Uniqueness check for PK / shortid columns. + let target_col_info = old_schema + .columns + .iter() + .find(|c| c.name == column) + .expect("target column exists"); + let uniqueness_required = target_col_info.primary_key || target_ty == Type::ShortId; + if uniqueness_required + && let Some(error_msg) = check_uniqueness_collisions( + table, + column, + src_ty, + target_ty, + &pk_columns, + old_schema, + &outcomes, + ) + { + return Err(DbError::Unsupported(error_msg)); + } + + // All cells classified Clean or Lossy-with-force. Build the + // transformed values list, indexed by row, and rebuild the + // table with row-by-row binding. + let transformed_values: Vec = outcomes + .iter() + .map(|o| match &o.outcome { + CellOutcome::Clean(v) | CellOutcome::Lossy { new: v, .. } => v.clone(), + CellOutcome::Incompatible { .. } => { + unreachable!("incompatibles refused above") + } + }) + .collect(); + + let copy_data = |tx: &rusqlite::Transaction<'_>, + temp_name: &str, + _orig: &str| + -> Result<(), DbError> { + if rows.is_empty() { + return Ok(()); + } + let cols_csv = all_columns + .iter() + .map(|c| quote_ident(c)) + .collect::>() + .join(", "); + let placeholders = (0..all_columns.len()) + .map(|i| format!("?{}", i + 1)) + .collect::>() + .join(", "); + let insert_sql = format!( + "INSERT INTO {temp} ({cols}) VALUES ({ph});", + temp = quote_ident(temp_name), + cols = cols_csv, + ph = placeholders, + ); + let mut stmt = tx.prepare(&insert_sql).map_err(DbError::from_rusqlite)?; + for (row_idx, row) in rows.iter().enumerate() { + let mut bound: Vec = row.clone(); + bound[target_idx] = transformed_values[row_idx].clone(); + let params: Vec<&dyn rusqlite::ToSql> = + bound.iter().map(|v| v as &dyn rusqlite::ToSql).collect(); + stmt.execute(rusqlite::params_from_iter(params)) + .map_err(DbError::from_rusqlite)?; + } + Ok(()) + }; + + rebuild_table_with_copy(conn, table, new_schema, copy_data, metadata_updates)?; + + // Tally counts for the [client-side] note. A cell counts + // as "transformed" if its stored value materially differs + // from the original (per ADR-0017 §6 / DA #5: storage class + // changes count). Lossy is a strict subset. + let mut transformed = 0usize; + let mut lossy = 0usize; + for (idx, o) in outcomes.iter().enumerate() { + let new = &transformed_values[idx]; + if type_change::is_non_identity(&o.original, new) { + transformed += 1; + } + if matches!(o.outcome, CellOutcome::Lossy { .. }) { + lossy += 1; + } + } + Ok(ClientSideNote { transformed, lossy }) +} + +/// Wrap an engine-level error from the `--dont-convert` path +/// into a friendly form per ADR-0017 §5 / ADR-0002. Avoids +/// surfacing engine vocabulary (SQLite, STRICT, PRAGMA, …) by +/// recognising the common kinds and producing a generic +/// abstract phrasing. +fn friendly_change_column_engine_error( + table: &str, + column: &str, + src_ty: Type, + target_ty: Type, + err: DbError, +) -> DbError { + let detail = match &err { + DbError::Sqlite { message, .. } => { + let lower = message.to_ascii_lowercase(); + if lower.contains("constraint") || lower.contains("not null") { + "the database refused at least one cell." + } else if lower.contains("type") || lower.contains("non-text") || lower.contains("non-integer") { + "the database refused at least one cell as the wrong shape \ + for the new type." + } else { + "the database refused the operation." + } + } + _ => return err, + }; + DbError::Unsupported(format!( + "Cannot change `{table}.{column}` from {src_ty} to {target_ty} with \ + `--dont-convert`: {detail} Re-run without the flag to see per-row \ + diagnostics." + )) +} + +/// Maximum diagnostic rows rendered per refusal (ADR-0017 §7). +/// Rows beyond this collapse into a trailing `… and N more` row. +const DIAGNOSTIC_ROW_CAP: usize = 100; + +#[allow(clippy::too_many_arguments)] +fn render_lossy_diagnostic( + table: &str, + column: &str, + src_ty: Type, + target_ty: Type, + pk_columns: &[String], + old_schema: &ReadSchema, + lossies: &[&Outcome], +) -> String { + let mut headers = pk_header_cells(pk_columns); + headers.extend(["From".to_string(), "To".to_string(), "Reason".to_string()]); + + let mut alignments = pk_header_alignments(pk_columns, old_schema); + alignments.extend([ + type_change::is_in_matrix_alignment(src_ty), + type_change::is_in_matrix_alignment(target_ty), + Alignment::Left, + ]); + + let total = lossies.len(); + let visible = total.min(DIAGNOSTIC_ROW_CAP); + let mut rows: Vec> = Vec::with_capacity(visible + 1); + for o in lossies.iter().take(visible) { + let mut cells = pk_value_cells(&o.pk_values); + let (new_str, reason) = match &o.outcome { + type_change::CellOutcome::Lossy { new, reason } => { + (render_value(new), reason.clone()) + } + _ => unreachable!("filtered to Lossy"), + }; + cells.push(render_value(&o.original)); + cells.push(new_str); + cells.push(reason); + rows.push(cells); + } + if total > visible { + rows.push(more_row(headers.len(), total - visible)); + } + + let mut out = format!( + "Cannot change `{table}.{column}` from {src_ty} to {target_ty}: \ + {total} row(s) would discard information.\n\n" + ); + for line in render_diagnostic_table(&headers, &rows, &alignments) { + out.push_str(&line); + out.push('\n'); + } + out.push('\n'); + out.push_str( + "if you want to execute this conversion in spite of the problems, \ + re-run with `--force-conversion`.", + ); + out +} + +#[allow(clippy::too_many_arguments)] +fn render_incompatible_diagnostic( + table: &str, + column: &str, + src_ty: Type, + target_ty: Type, + pk_columns: &[String], + old_schema: &ReadSchema, + incompatibles: &[&Outcome], +) -> String { + let mut headers = pk_header_cells(pk_columns); + headers.extend(["Value".to_string(), "Reason".to_string()]); + + let mut alignments = pk_header_alignments(pk_columns, old_schema); + alignments.extend([ + type_change::is_in_matrix_alignment(src_ty), + Alignment::Left, + ]); + + let total = incompatibles.len(); + let visible = total.min(DIAGNOSTIC_ROW_CAP); + let mut rows: Vec> = Vec::with_capacity(visible + 1); + for o in incompatibles.iter().take(visible) { + let reason = match &o.outcome { + type_change::CellOutcome::Incompatible { reason } => reason.clone(), + _ => unreachable!("filtered to Incompatible"), + }; + let mut cells = pk_value_cells(&o.pk_values); + cells.push(render_value(&o.original)); + cells.push(reason); + rows.push(cells); + } + if total > visible { + rows.push(more_row(headers.len(), total - visible)); + } + + let mut out = format!( + "Cannot change `{table}.{column}` from {src_ty} to {target_ty}: \ + {total} row(s) cannot be converted.\n\n" + ); + for line in render_diagnostic_table(&headers, &rows, &alignments) { + out.push_str(&line); + out.push('\n'); + } + out +} + +/// Detect post-transformation duplicates among `outcomes` and +/// produce a refusal diagnostic if any exist. Returns `None` if +/// uniqueness is preserved. Per ADR-0017 §4.3 collisions are +/// classified incompatible — no `--force-conversion` override. +fn check_uniqueness_collisions( + table: &str, + column: &str, + src_ty: Type, + target_ty: Type, + pk_columns: &[String], + old_schema: &ReadSchema, + outcomes: &[Outcome], +) -> Option { + use std::collections::BTreeMap; + + // Keyed by the canonical string form of the transformed + // value; NULL is excluded from uniqueness collision checks + // (the new column may carry NULL until C3 lands NOT NULL + // constraints; treat NULLs as not-colliding). + let mut groups: BTreeMap> = BTreeMap::new(); + for (i, o) in outcomes.iter().enumerate() { + let new = match &o.outcome { + type_change::CellOutcome::Clean(v) => v, + type_change::CellOutcome::Lossy { new, .. } => new, + type_change::CellOutcome::Incompatible { .. } => continue, + }; + if matches!(new, rusqlite::types::Value::Null) { + continue; + } + groups.entry(render_value(new)).or_default().push(i); + } + + let collisions: Vec<(String, Vec)> = groups + .into_iter() + .filter(|(_, idxs)| idxs.len() >= 2) + .collect(); + if collisions.is_empty() { + return None; + } + + let pk_label = pk_columns.join(", "); + let headers = vec![ + "Becomes".to_string(), + format!("Source rows ({pk_label})"), + "Source values".to_string(), + ]; + + let alignments = vec![ + type_change::is_in_matrix_alignment(target_ty), + Alignment::Left, + Alignment::Left, + ]; + + let total = collisions.len(); + let visible = total.min(DIAGNOSTIC_ROW_CAP); + let mut rows: Vec> = Vec::with_capacity(visible + 1); + for (becomes, idxs) in collisions.iter().take(visible) { + let pks_csv = idxs + .iter() + .take(5) + .map(|i| pk_value_cells_inline(&outcomes[*i].pk_values)) + .collect::>() + .join(", "); + let pks_str = if idxs.len() > 5 { + format!("{pks_csv}, …") + } else { + pks_csv + }; + let sources_csv = idxs + .iter() + .take(5) + .map(|i| render_value(&outcomes[*i].original)) + .collect::>() + .join(", "); + let sources_str = if idxs.len() > 5 { + format!("{sources_csv}, …") + } else { + sources_csv + }; + rows.push(vec![becomes.clone(), pks_str, sources_str]); + } + if total > visible { + rows.push(more_row(headers.len(), total - visible)); + } + + // ColumnDescription guarantees old_schema known; not used + // beyond the §4.3 wording so silence the warning. + let _ = old_schema; + + let mut out = format!( + "Cannot change `{table}.{column}` from {src_ty} to {target_ty}: \ + {total} collision(s) would violate uniqueness.\n\n" + ); + for line in render_diagnostic_table(&headers, &rows, &alignments) { + out.push_str(&line); + out.push('\n'); + } + Some(out) +} + +/// Outcome record for the dry-run pass. Mirrors the inner type +/// inside `run_change_column_with_dry_run` so the diagnostic +/// helpers can take it as a slice. +struct Outcome { + pk_values: Vec, + original: rusqlite::types::Value, + outcome: type_change::CellOutcome, +} + +fn pk_header_cells(pk_columns: &[String]) -> Vec { + pk_columns + .iter() + .map(|c| format!("{c} (PK)")) + .collect() +} + +fn pk_header_alignments(pk_columns: &[String], schema: &ReadSchema) -> Vec { + pk_columns + .iter() + .map(|name| { + schema + .columns + .iter() + .find(|c| &c.name == name) + .and_then(|c| c.user_type) + .map_or(Alignment::Left, type_change::is_in_matrix_alignment) + }) + .collect() +} + +fn pk_value_cells(values: &[rusqlite::types::Value]) -> Vec { + values.iter().map(render_value).collect() +} + +/// Single-cell inline rendering of a row's PK values, used in +/// uniqueness-collision diagnostics where one cell lists many +/// PK identifiers. Single-PK forms render as bare values (`5`); +/// compound-PK forms render as tuples (`(1, 2)`). +fn pk_value_cells_inline(values: &[rusqlite::types::Value]) -> String { + if values.len() == 1 { + render_value(&values[0]) + } else { + let parts: Vec = values.iter().map(render_value).collect(); + format!("({})", parts.join(", ")) + } +} + +fn render_value(v: &rusqlite::types::Value) -> String { + use rusqlite::types::Value as RV; + match v { + RV::Null => "(null)".to_string(), + RV::Integer(i) => i.to_string(), + RV::Real(r) => format!("{r}"), + RV::Text(s) => s.clone(), + RV::Blob(_) => "".to_string(), + } +} + +fn more_row(width: usize, more: usize) -> Vec { + let mut row = vec!["…".to_string(); width]; + if let Some(last) = row.last_mut() { + *last = format!("… and {more} more"); + } + row } fn do_list_tables(conn: &Connection) -> Result, DbError> { @@ -1844,24 +2461,31 @@ fn schema_to_ddl(table: &str, schema: &ReadSchema) -> String { } /// The rebuild-table dance. Replaces `table` with a fresh table -/// constructed from `new_schema`, copying data column-by-name. -/// The caller is responsible for any metadata-table updates that -/// need to happen alongside the rebuild — those are passed in as -/// a closure and run inside the same transaction. +/// constructed from `new_schema`, then runs a caller-provided +/// `copy_data` step to populate it from the old table. The +/// `metadata_updates` closure runs inside the same transaction +/// after the data copy. +/// +/// Most callers want the default INSERT…SELECT copy — they +/// should use [`rebuild_table`] which wraps this with that copy +/// strategy. The custom-copy form is reserved for `change column +/// type` (ADR-0017), which transforms one column's values +/// row-by-row before binding them. /// /// Per the official SQLite ALTER-via-rebuild recipe: foreign-key /// enforcement must be temporarily disabled at session level, /// not just deferred, because the connection-level pragma is a /// no-op inside transactions. -fn rebuild_table( +fn rebuild_table_with_copy( conn: &Connection, table: &str, - old_schema: &ReadSchema, new_schema: &ReadSchema, - metadata_updates: F, + copy_data: C, + metadata_updates: M, ) -> Result<(), DbError> where - F: FnOnce(&rusqlite::Transaction<'_>) -> Result<(), DbError>, + C: FnOnce(&rusqlite::Transaction<'_>, &str, &str) -> Result<(), DbError>, + M: FnOnce(&rusqlite::Transaction<'_>) -> Result<(), DbError>, { // foreign_keys=OFF must be set *outside* a transaction. conn.execute_batch("PRAGMA foreign_keys = OFF;") @@ -1885,30 +2509,7 @@ where tx.execute_batch(&create_temp) .map_err(DbError::from_rusqlite)?; - // Copy data: only for columns that exist on both sides. - // Auto-created columns (e.g. via --create-fk) are absent - // from the old table, so they remain NULL in the new one. - let copy_cols: Vec<&str> = new_schema - .columns - .iter() - .filter(|c| old_schema.columns.iter().any(|oc| oc.name == c.name)) - .map(|c| c.name.as_str()) - .collect(); - if !copy_cols.is_empty() { - let cols_csv = copy_cols - .iter() - .map(|c| quote_ident(c)) - .collect::>() - .join(", "); - let copy_sql = format!( - "INSERT INTO {temp} ({cols}) SELECT {cols} FROM {orig};", - temp = quote_ident(&temp_name), - cols = cols_csv, - orig = quote_ident(table), - ); - tx.execute_batch(©_sql) - .map_err(DbError::from_rusqlite)?; - } + copy_data(&tx, &temp_name, table)?; tx.execute_batch(&format!( "DROP TABLE {ident};", @@ -1954,6 +2555,51 @@ where result.and(pragma_result) } +/// The default rebuild-table strategy: column-by-name copy via +/// `INSERT INTO new (cols) SELECT cols FROM old`, where `cols` +/// is the intersection of old and new column names. +fn rebuild_table( + conn: &Connection, + table: &str, + old_schema: &ReadSchema, + new_schema: &ReadSchema, + metadata_updates: F, +) -> Result<(), DbError> +where + F: FnOnce(&rusqlite::Transaction<'_>) -> Result<(), DbError>, +{ + let copy_cols: Vec = new_schema + .columns + .iter() + .filter(|c| old_schema.columns.iter().any(|oc| oc.name == c.name)) + .map(|c| c.name.clone()) + .collect(); + + rebuild_table_with_copy( + conn, + table, + new_schema, + |tx, temp_name, orig| { + if copy_cols.is_empty() { + return Ok(()); + } + let cols_csv = copy_cols + .iter() + .map(|c| quote_ident(c)) + .collect::>() + .join(", "); + let copy_sql = format!( + "INSERT INTO {temp} ({cols}) SELECT {cols} FROM {orig};", + temp = quote_ident(temp_name), + cols = cols_csv, + orig = quote_ident(orig), + ); + tx.execute_batch(©_sql).map_err(DbError::from_rusqlite) + }, + metadata_updates, + ) +} + #[allow(clippy::too_many_arguments)] fn do_add_relationship( conn: &Connection, @@ -3674,28 +4320,467 @@ mod tests { .await .unwrap(); } - let desc = db - .change_column_type("T".to_string(), "Score".to_string(), Type::Int, None) + let result = db + .change_column_type( + "T".to_string(), + "Score".to_string(), + Type::Int, + ChangeColumnMode::Default, + None, + ) .await .unwrap(); - let score = desc.columns.iter().find(|c| c.name == "Score").unwrap(); + let score = result + .description + .columns + .iter() + .find(|c| c.name == "Score") + .unwrap(); assert_eq!(score.user_type, Some(Type::Int)); - // Data preserved via SQLite's coercion (TEXT '1' → INTEGER 1). + // Per ADR-0017 §6, the [client-side] note fires when + // any cell was rewritten (storage class change counts). + let note = result + .client_side + .expect("text -> int rewrites every cell; expected client-side note"); + assert_eq!(note.transformed, 3); + assert_eq!(note.lossy, 0); + // Data preserved via the per-cell transformer. let data = db.query_data("T".to_string(), None).await.unwrap(); assert_eq!(data.rows.len(), 3); } #[tokio::test] - async fn change_column_type_refuses_pk() { + async fn change_column_type_pk_without_inbound_fk_allowed() { + // ADR-0017 §4.1: a PK without inbound FKs may have its + // type changed freely (no cascade to worry about). let db = db(); make_id_table(&db, "T").await; + // serial -> int (canonical "drop auto-increment" case). + db.change_column_type( + "T".to_string(), + "id".to_string(), + Type::Int, + ChangeColumnMode::Default, + None, + ) + .await + .expect("serial -> int on PK without inbound FK should succeed"); + } + + #[tokio::test] + async fn change_column_type_pk_with_inbound_fk_refused_when_target_type_changes() { + // PK referenced by another table's FK; new type would + // change `fk_target_type` (serial -> text means + // fk_target_type goes Int -> Text). Refused. + let db = db(); + make_id_table(&db, "Customers").await; + make_id_table(&db, "Orders").await; + db.add_column( + "Orders".to_string(), + "cust_id".to_string(), + Type::Int, + None, + ) + .await + .unwrap(); + db.add_relationship( + None, + "Customers".to_string(), + "id".to_string(), + "Orders".to_string(), + "cust_id".to_string(), + ReferentialAction::NoAction, + ReferentialAction::NoAction, + false, + None, + ) + .await + .unwrap(); let err = db - .change_column_type("T".to_string(), "id".to_string(), Type::Text, None) + .change_column_type( + "Customers".to_string(), + "id".to_string(), + Type::Text, + ChangeColumnMode::Default, + None, + ) + .await + .unwrap_err(); + match err { + DbError::Unsupported(message) => { + assert!( + message.contains("referenced") || message.contains("relationship"), + "expected FK-cascade language: {message}" + ); + } + other => panic!("unexpected: {other:?}"), + } + } + + #[tokio::test] + async fn change_column_type_pk_with_inbound_fk_allowed_when_target_type_preserved() { + // PK referenced by another table's FK; serial -> int + // preserves `fk_target_type` (both Int). Allowed. + let db = db(); + make_id_table(&db, "Customers").await; + make_id_table(&db, "Orders").await; + db.add_column( + "Orders".to_string(), + "cust_id".to_string(), + Type::Int, + None, + ) + .await + .unwrap(); + db.add_relationship( + None, + "Customers".to_string(), + "id".to_string(), + "Orders".to_string(), + "cust_id".to_string(), + ReferentialAction::NoAction, + ReferentialAction::NoAction, + false, + None, + ) + .await + .unwrap(); + db.change_column_type( + "Customers".to_string(), + "id".to_string(), + Type::Int, + ChangeColumnMode::Default, + None, + ) + .await + .expect("serial -> int on FK-referenced PK should succeed"); + } + + // ---- ADR-0017 dry-run / mode / uniqueness coverage ---- + + #[tokio::test] + async fn change_column_type_refuses_lossy_by_default() { + // real -> int with fractional values: every cell is + // Lossy. Default mode refuses with a friendly diagnostic + // table. + let db = db(); + make_id_table(&db, "T").await; + db.add_column("T".to_string(), "Score".to_string(), Type::Real, None) + .await + .unwrap(); + for v in ["3.14", "2.71"] { + db.insert( + "T".to_string(), + Some(vec!["Score".to_string()]), + vec![Value::Number(v.to_string())], + None, + ) + .await + .unwrap(); + } + let err = db + .change_column_type( + "T".to_string(), + "Score".to_string(), + Type::Int, + ChangeColumnMode::Default, + None, + ) + .await + .unwrap_err(); + match err { + DbError::Unsupported(message) => { + assert!( + message.contains("discard information"), + "expected lossy header: {message}" + ); + assert!( + message.contains("--force-conversion"), + "expected hint about --force-conversion: {message}" + ); + // Bordered diagnostic table is present. + assert!(message.contains('┌'), "expected bordered table: {message}"); + assert!( + message.contains("(PK)"), + "expected PK header marker: {message}" + ); + } + other => panic!("unexpected: {other:?}"), + } + } + + #[tokio::test] + async fn change_column_type_force_conversion_accepts_lossy() { + // Same setup as above, but with --force-conversion the + // change succeeds and the [client-side] note carries + // the lossy count. + let db = db(); + make_id_table(&db, "T").await; + db.add_column("T".to_string(), "Score".to_string(), Type::Real, None) + .await + .unwrap(); + for v in ["3.14", "2.71", "5.0"] { + db.insert( + "T".to_string(), + Some(vec!["Score".to_string()]), + vec![Value::Number(v.to_string())], + None, + ) + .await + .unwrap(); + } + let result = db + .change_column_type( + "T".to_string(), + "Score".to_string(), + Type::Int, + ChangeColumnMode::ForceConversion, + None, + ) + .await + .expect("--force-conversion should accept lossy rows"); + let note = result.client_side.expect("transformations happened"); + assert_eq!(note.transformed, 3); + // Two of three rows are fractional (lossy); 5.0 is clean. + assert_eq!(note.lossy, 2); + } + + #[tokio::test] + async fn change_column_type_refuses_incompatible_even_with_force() { + // text "abc" -> int: incompatible. --force-conversion + // does NOT help (per ADR-0017 §5 / §2 step 3). + let db = db(); + make_id_table(&db, "T").await; + db.add_column("T".to_string(), "Note".to_string(), Type::Text, None) + .await + .unwrap(); + for v in ["abc", "123", "xyz"] { + db.insert( + "T".to_string(), + Some(vec!["Note".to_string()]), + vec![Value::Text(v.to_string())], + None, + ) + .await + .unwrap(); + } + for mode in [ChangeColumnMode::Default, ChangeColumnMode::ForceConversion] { + let err = db + .change_column_type( + "T".to_string(), + "Note".to_string(), + Type::Int, + mode, + None, + ) + .await + .unwrap_err(); + match err { + DbError::Unsupported(message) => { + assert!( + message.contains("cannot be converted"), + "expected incompatible header for {mode:?}: {message}" + ); + assert!( + !message.contains("--force-conversion"), + "incompatible refusal must NOT mention --force-conversion: {message}" + ); + } + other => panic!("unexpected ({mode:?}): {other:?}"), + } + } + } + + #[tokio::test] + async fn change_column_type_dont_convert_skips_client_side() { + // text -> int: under the per-cell matrix, "1"/"2"/"3" + // would all classify Clean and fire the [client-side] + // note. With --dont-convert we bypass that and rely on + // engine coercion; the note is suppressed. + let db = db(); + make_id_table(&db, "T").await; + db.add_column("T".to_string(), "Score".to_string(), Type::Text, None) + .await + .unwrap(); + for v in ["1", "2", "3"] { + db.insert( + "T".to_string(), + Some(vec!["Score".to_string()]), + vec![Value::Text(v.to_string())], + None, + ) + .await + .unwrap(); + } + let result = db + .change_column_type( + "T".to_string(), + "Score".to_string(), + Type::Int, + ChangeColumnMode::DontConvert, + None, + ) + .await + .expect("text->int with all-clean cells should succeed under --dont-convert"); + assert!( + result.client_side.is_none(), + "--dont-convert must NOT emit a client-side note" + ); + } + + #[tokio::test] + async fn change_column_type_uniqueness_collision_on_pk_refused() { + // PK column with values 3.14 and 3.7: real -> int with + // --force-conversion would collapse both to 3, violating + // PK uniqueness. ADR-0017 §4.3: this is incompatible + // even under --force-conversion. + let db = db(); + db.create_table( + "T".to_string(), + vec![col("id", Type::Real)], + vec!["id".to_string()], + None, + ) + .await + .unwrap(); + for v in ["3.14", "3.7"] { + db.insert( + "T".to_string(), + Some(vec!["id".to_string()]), + vec![Value::Number(v.to_string())], + None, + ) + .await + .unwrap(); + } + let err = db + .change_column_type( + "T".to_string(), + "id".to_string(), + Type::Int, + ChangeColumnMode::ForceConversion, + None, + ) + .await + .unwrap_err(); + match err { + DbError::Unsupported(message) => { + assert!( + message.contains("collision") && message.contains("uniqueness"), + "expected collision diagnostic: {message}" + ); + assert!(message.contains('┌'), "expected bordered table: {message}"); + // Source rows column with PK column name in the header. + assert!( + message.contains("Source rows (id)"), + "expected PK-aware Source rows header: {message}" + ); + } + other => panic!("unexpected: {other:?}"), + } + } + + #[tokio::test] + async fn change_column_type_blob_target_refused_statically() { + let db = db(); + make_id_table(&db, "T").await; + db.add_column("T".to_string(), "Note".to_string(), Type::Text, None) + .await + .unwrap(); + let err = db + .change_column_type( + "T".to_string(), + "Note".to_string(), + Type::Blob, + ChangeColumnMode::Default, + None, + ) .await .unwrap_err(); assert!(matches!(err, DbError::Unsupported(_)), "got {err:?}"); } + #[tokio::test] + async fn change_column_type_outbound_fk_refused() { + // Child-side FK column: §4.2 says always refuse, + // regardless of target type. + let db = db(); + make_id_table(&db, "Customers").await; + make_id_table(&db, "Orders").await; + db.add_column( + "Orders".to_string(), + "cust_id".to_string(), + Type::Int, + None, + ) + .await + .unwrap(); + db.add_relationship( + None, + "Customers".to_string(), + "id".to_string(), + "Orders".to_string(), + "cust_id".to_string(), + ReferentialAction::NoAction, + ReferentialAction::NoAction, + false, + None, + ) + .await + .unwrap(); + let err = db + .change_column_type( + "Orders".to_string(), + "cust_id".to_string(), + Type::Real, + ChangeColumnMode::Default, + None, + ) + .await + .unwrap_err(); + match err { + DbError::Unsupported(message) => { + assert!( + message.contains("foreign key") || message.contains("relationship"), + "expected outbound-FK refusal: {message}" + ); + } + other => panic!("unexpected: {other:?}"), + } + } + + #[tokio::test] + async fn change_column_type_no_op_when_no_rows() { + // Empty table: no cells to transform, no [client-side] + // note, and the structural change goes through. + let db = db(); + make_id_table(&db, "T").await; + db.add_column("T".to_string(), "Note".to_string(), Type::Text, None) + .await + .unwrap(); + let result = db + .change_column_type( + "T".to_string(), + "Note".to_string(), + Type::Int, + ChangeColumnMode::Default, + None, + ) + .await + .expect("empty table should still allow the type change"); + assert!( + result.client_side.is_none(), + "no rows means no client-side note" + ); + let note_col = result + .description + .columns + .iter() + .find(|c| c.name == "Note") + .unwrap(); + assert_eq!(note_col.user_type, Some(Type::Int)); + } + #[tokio::test] async fn change_column_type_refuses_serial_target() { let db = db(); @@ -3704,7 +4789,7 @@ mod tests { .await .unwrap(); let err = db - .change_column_type("T".to_string(), "A".to_string(), Type::Serial, None) + .change_column_type("T".to_string(), "A".to_string(), Type::Serial, ChangeColumnMode::Default, None) .await .unwrap_err(); assert!(matches!(err, DbError::Unsupported(_)), "got {err:?}"); @@ -3741,6 +4826,7 @@ mod tests { "Orders".to_string(), "cust_id".to_string(), Type::Text, + ChangeColumnMode::Default, None, ) .await @@ -3756,7 +4842,7 @@ mod tests { .await .unwrap(); let err = db - .change_column_type("T".to_string(), "A".to_string(), Type::Int, None) + .change_column_type("T".to_string(), "A".to_string(), Type::Int, ChangeColumnMode::Default, None) .await .unwrap_err(); assert!(matches!(err, DbError::Unsupported(_)), "got {err:?}"); diff --git a/src/dsl/command.rs b/src/dsl/command.rs index fdb8a52..b88abb3 100644 --- a/src/dsl/command.rs +++ b/src/dsl/command.rs @@ -63,15 +63,17 @@ pub enum Command { }, /// Change a column's type. Implemented via the /// rebuild-table primitive (ADR-0013) since SQLite's - /// ALTER TABLE does not support type changes. Refused - /// for PK columns and for columns involved in a declared - /// relationship — those would require cascading FK - /// type updates the v1 surface deliberately doesn't - /// expose. + /// ALTER TABLE does not support type changes. + /// + /// Per ADR-0017 the actual conversion model is a per-cell + /// dry-run against a curated transformer matrix; the two + /// optional flags carried in `mode` let the user opt into + /// lossy conversions or skip the client-side layer entirely. ChangeColumnType { table: String, column: String, ty: Type, + mode: ChangeColumnMode, }, /// Establish a 1:n relationship: parent_table.parent_column /// is the primary-key side; child_table.child_column is the @@ -125,6 +127,21 @@ pub enum Command { }, } +/// Conversion mode for `change column …` (ADR-0017 §5). +/// +/// `Default` runs the per-cell dry-run and refuses on lossy or +/// incompatible cells. `ForceConversion` accepts lossy cells but +/// still refuses incompatibles. `DontConvert` skips the entire +/// client-side layer and lets the database's STRICT typing +/// decide. `ForceConversion` and `DontConvert` are mutually +/// exclusive at the grammar level. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ChangeColumnMode { + Default, + ForceConversion, + DontConvert, +} + /// How an UPDATE / DELETE selects which rows to operate on. /// `Where` is the default safe form. `AllRows` is the explicit /// `--all-rows` flag opt-in for unfiltered operations. diff --git a/src/dsl/mod.rs b/src/dsl/mod.rs index bbef2b7..3c2cbda 100644 --- a/src/dsl/mod.rs +++ b/src/dsl/mod.rs @@ -17,7 +17,9 @@ pub mod types; pub mod value; pub use action::ReferentialAction; -pub use command::{ColumnSpec, Command, RelationshipSelector, RowFilter}; +pub use command::{ + ChangeColumnMode, ColumnSpec, Command, RelationshipSelector, RowFilter, +}; pub use parser::{ParseError, parse_command}; pub use types::Type; pub use value::Value; diff --git a/src/dsl/parser.rs b/src/dsl/parser.rs index 9a8b904..4f9bb70 100644 --- a/src/dsl/parser.rs +++ b/src/dsl/parser.rs @@ -14,7 +14,9 @@ use chumsky::error::RichReason; use chumsky::prelude::*; use crate::dsl::action::ReferentialAction; -use crate::dsl::command::{ColumnSpec, Command, RelationshipSelector, RowFilter}; +use crate::dsl::command::{ + ChangeColumnMode, ColumnSpec, Command, RelationshipSelector, RowFilter, +}; use crate::dsl::types::Type; use crate::dsl::value::Value; @@ -172,9 +174,10 @@ fn command_parser<'a>() .then(identifier()) .map(|((table, old), new)| Command::RenameColumn { table, old, new }); - // `change column [in] [table] : ()`. - // Same shape as `add column` — the column-and-type - // tuple in parens — but the verb is `change`. + // `change column [in] [table] : () [flags]` + // where `flags` is at most one of `--force-conversion` / + // `--dont-convert` (mutually exclusive at parse time per + // ADR-0017 §5). let change_column = keyword_ci("change") .ignore_then(keyword_ci("column")) .ignore_then(optional_keyword("in")) @@ -185,7 +188,13 @@ fn command_parser<'a>() .then_ignore(just('(').padded()) .then(type_keyword()) .then_ignore(just(')').padded()) - .map(|((table, column), ty)| Command::ChangeColumnType { table, column, ty }); + .then(change_column_flags()) + .map(|(((table, column), ty), mode)| Command::ChangeColumnType { + table, + column, + ty, + mode, + }); let add_relationship = add_relationship_parser(); let drop_relationship = drop_relationship_parser(); @@ -555,6 +564,33 @@ fn create_fk_flag<'a>() .map(|opt| opt.is_some()) } +/// Optional flags for `change column …` (ADR-0017 §5). +/// Allows zero or one of the two mutually-exclusive flags; +/// emits a custom parse error if both are present, naming both +/// flags so the user knows what the conflict is. +fn change_column_flags<'a>() +-> impl Parser<'a, &'a str, ChangeColumnMode, extra::Err>> + Clone { + let force = just("--force-conversion") + .padded() + .to(ChangeColumnMode::ForceConversion); + let dont = just("--dont-convert") + .padded() + .to(ChangeColumnMode::DontConvert); + choice((force, dont)) + .repeated() + .collect::>() + .try_map(|flags, span| match flags.as_slice() { + [] => Ok(ChangeColumnMode::Default), + [single] => Ok(*single), + _ => Err(Rich::custom( + span, + "`--force-conversion` and `--dont-convert` are mutually \ + exclusive — pick one." + .to_string(), + )), + }) +} + /// Parse the optional `with pk []` clause that may follow /// `create table `. Returns the list of (name, type) pairs /// that form the primary key. An absent clause returns an empty @@ -870,6 +906,7 @@ mod tests { table: "Customers".to_string(), column: "Score".to_string(), ty: Type::Int, + mode: ChangeColumnMode::Default, } ); } @@ -882,6 +919,7 @@ mod tests { table: "Customers".to_string(), column: "Score".to_string(), ty: Type::Real, + mode: ChangeColumnMode::Default, } ); } @@ -894,10 +932,71 @@ mod tests { table: "Customers".to_string(), column: "Score".to_string(), ty: Type::Text, + mode: ChangeColumnMode::Default, } ); } + #[test] + fn change_column_with_force_conversion_flag() { + assert_eq!( + ok("change column Customers: Score (int) --force-conversion"), + Command::ChangeColumnType { + table: "Customers".to_string(), + column: "Score".to_string(), + ty: Type::Int, + mode: ChangeColumnMode::ForceConversion, + } + ); + } + + #[test] + fn change_column_with_dont_convert_flag() { + assert_eq!( + ok("change column Customers: Score (int) --dont-convert"), + Command::ChangeColumnType { + table: "Customers".to_string(), + column: "Score".to_string(), + ty: Type::Int, + mode: ChangeColumnMode::DontConvert, + } + ); + } + + #[test] + fn change_column_rejects_both_flags() { + let e = err( + "change column Customers: Score (int) --force-conversion --dont-convert", + ); + match e { + ParseError::Invalid { message, .. } => { + assert!( + message.contains("--force-conversion") + && message.contains("--dont-convert"), + "expected both flag names in error: {message}" + ); + assert!( + message.contains("mutually exclusive") || message.contains("pick one"), + "{message}" + ); + } + ParseError::Empty => panic!("unexpected empty error"), + } + } + + #[test] + fn change_column_rejects_both_flags_in_either_order() { + // Both orderings — and same-flag-twice — should reject + // with a uniform "pick one" signal. + let e = err("change column T: c (int) --dont-convert --force-conversion"); + match e { + ParseError::Invalid { message, .. } => { + assert!(message.contains("mutually exclusive"), "{message}"); + } + ParseError::Empty => panic!("unexpected empty error"), + } + } + #[test] fn drop_table_simple() { assert_eq!( diff --git a/src/dsl/value.rs b/src/dsl/value.rs index a7ccabe..fdaab48 100644 --- a/src/dsl/value.rs +++ b/src/dsl/value.rs @@ -208,7 +208,7 @@ impl Value { } } -fn validate_date(s: &str) -> Result<(), String> { +pub(crate) fn validate_date(s: &str) -> Result<(), String> { // Expect YYYY-MM-DD: 10 chars, two dashes at fixed positions. let bytes = s.as_bytes(); if bytes.len() != 10 || bytes[4] != b'-' || bytes[7] != b'-' { @@ -231,7 +231,7 @@ fn validate_date(s: &str) -> Result<(), String> { Ok(()) } -fn validate_datetime(s: &str) -> Result<(), String> { +pub(crate) fn validate_datetime(s: &str) -> Result<(), String> { // Minimum: YYYY-MM-DDTHH:MM:SS = 19 chars. Allow optional // fractional seconds (.fff) and optional Z or ±HH:MM offset. if s.len() < 19 { diff --git a/src/event.rs b/src/event.rs index 036558e..6f67a15 100644 --- a/src/event.rs +++ b/src/event.rs @@ -7,7 +7,10 @@ use crossterm::event::KeyEvent; -use crate::db::{DataResult, DeleteResult, InsertResult, TableDescription, UpdateResult}; +use crate::db::{ + ChangeColumnTypeResult, DataResult, DeleteResult, InsertResult, TableDescription, + UpdateResult, +}; use crate::dsl::Command; #[derive(Debug, Clone)] @@ -39,6 +42,13 @@ pub enum AppEvent { command: Command, result: DeleteResult, }, + /// A `change column …` succeeded. `result` carries both the + /// post-rebuild description (for the auto-show) and the + /// optional `[client-side]` note (ADR-0017 §6). + DslChangeColumnSucceeded { + command: Command, + result: ChangeColumnTypeResult, + }, /// A DSL command failed. `error` is already a friendly /// message produced via `DbError::friendly_message`. DslFailed { diff --git a/src/lib.rs b/src/lib.rs index a097435..d3f7a19 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,4 +19,5 @@ pub mod persistence; pub mod project; pub mod runtime; pub mod theme; +pub mod type_change; pub mod ui; diff --git a/src/output_render.rs b/src/output_render.rs index b3fed8f..bde0c25 100644 --- a/src/output_render.rs +++ b/src/output_render.rs @@ -136,11 +136,38 @@ pub fn render_structure(desc: &TableDescription) -> Vec { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum Alignment { +pub enum Alignment { Left, Right, } +/// Per-column alignment hint for [`render_diagnostic_table`]. +#[must_use] +pub const fn numeric_alignment_for(ty: Type) -> Alignment { + alignment_for(Some(ty)) +} + +/// Render an arbitrary bordered table from headers + rows + +/// per-column alignments. Used for change-column diagnostic +/// output (ADR-0017 §7) — anything tabular goes through the +/// pretty-table renderer. +/// +/// Cell contents are sanitised the same way as +/// [`render_data_table`]: newlines and control characters +/// become visible markers (display-only). +#[must_use] +pub fn render_diagnostic_table( + headers: &[String], + rows: &[Vec], + alignments: &[Alignment], +) -> Vec { + let body: Vec> = rows + .iter() + .map(|r| r.iter().map(|c| sanitize_cell(c)).collect()) + .collect(); + render_table(headers, &body, alignments) +} + /// Map a user-facing type to its column alignment per /// ADR-0016 §2. `None` (foreign-attached, no metadata) /// falls back to Left. diff --git a/src/runtime.rs b/src/runtime.rs index 65c0c8b..9341599 100644 --- a/src/runtime.rs +++ b/src/runtime.rs @@ -29,7 +29,8 @@ use crate::action::Action; use crate::app::App; use crate::cli::Args; use crate::db::{ - DataResult, Database, DbError, DeleteResult, InsertResult, TableDescription, UpdateResult, + ChangeColumnTypeResult, DataResult, Database, DbError, DeleteResult, InsertResult, + TableDescription, UpdateResult, }; use crate::dsl::Command; use crate::event::AppEvent; @@ -946,6 +947,10 @@ fn spawn_dsl_dispatch( command: command.clone(), result, }, + Ok(CommandOutcome::ChangeColumn(result)) => AppEvent::DslChangeColumnSucceeded { + command: command.clone(), + result, + }, Err(DbError::PersistenceFatal { operation, path, @@ -982,6 +987,7 @@ enum CommandOutcome { Insert(InsertResult), Update(UpdateResult), Delete(DeleteResult), + ChangeColumn(ChangeColumnTypeResult), } /// Execute a parsed user command and return either a typed @@ -1019,10 +1025,15 @@ async fn execute_command_typed( .rename_column(table, old, new, src) .await .map(|d| CommandOutcome::Schema(Some(d))), - Command::ChangeColumnType { table, column, ty } => database - .change_column_type(table, column, ty, src) + Command::ChangeColumnType { + table, + column, + ty, + mode, + } => database + .change_column_type(table, column, ty, mode, src) .await - .map(|d| CommandOutcome::Schema(Some(d))), + .map(CommandOutcome::ChangeColumn), Command::AddRelationship { name, parent_table, diff --git a/src/type_change.rs b/src/type_change.rs new file mode 100644 index 0000000..6e8d6f5 --- /dev/null +++ b/src/type_change.rs @@ -0,0 +1,1141 @@ +//! Per-cell type-change transformer matrix (ADR-0017). +//! +//! When a `change column T: c (newtype)` invocation runs, every +//! cell of `c` is classified into one of three outcomes: +//! +//! - **Clean** — the transformer produced a value the new type +//! accepts without information loss. +//! - **Lossy** — valid output, but some property (precision, +//! fractional part, time component, …) was discarded. +//! - **Incompatible** — no transformer for this pair can produce +//! a valid output for this cell. +//! +//! The classification is a property of (source type, target type, +//! cell value) — the same pair yields different outcomes for +//! different cell values. NULL passes through any transformer +//! unchanged (always Clean). +//! +//! Pairs not present in the matrix are statically refused via +//! [`static_refusal`] before any per-cell pass runs. Same-type +//! identity, anything → `serial`, anything ↔ `blob`, and +//! `date` ↔ `datetime` direct are all statically refused. + +use rusqlite::types::Value; + +use crate::dsl::Type; +use crate::dsl::shortid; +use crate::output_render::{Alignment, numeric_alignment_for}; + +/// Outcome of attempting to transform one cell's value to fit a +/// new column type. +#[derive(Debug, Clone, PartialEq)] +pub enum CellOutcome { + /// Transformer produced a value the new type accepts without + /// information loss. + Clean(Value), + /// Valid output, but some property of the original cell was + /// discarded. + Lossy { new: Value, reason: String }, + /// No transformer for this pair can produce a valid value + /// for this cell. + Incompatible { reason: String }, +} + +/// Whether a (source, target) type pair is statically refused +/// before per-cell classification. Returns `Some(reason)` when +/// refused; `None` when the per-cell matrix should be consulted. +/// +/// Static refusals cover: same-type identity, anything → +/// `serial`, anything ↔ `blob`, `date` ↔ `datetime` direct, and +/// any cross-domain pair not present in the matrix +/// (e.g. `bool` → `date`). +#[must_use] +pub fn static_refusal(src: Type, target: Type) -> Option { + if src == target { + return Some(format!("column is already `{src}`.")); + } + if matches!(target, Type::Serial) { + return Some( + "the 'serial' type carries auto-increment primary-key semantics \ + that only apply at create-table time; pick a different target \ + type (e.g. `int`)." + .to_string(), + ); + } + if matches!(src, Type::Blob) || matches!(target, Type::Blob) { + return Some(format!( + "conversion between `{src}` and `{target}` is not supported \ + in this version." + )); + } + if matches!( + (src, target), + (Type::Date, Type::DateTime) | (Type::DateTime, Type::Date) + ) { + return Some(format!( + "direct `{src}` to `{target}` conversion is not supported in \ + this version; route via `text` if needed." + )); + } + if !is_in_matrix(src, target) { + return Some(format!( + "no conversion is defined from `{src}` to `{target}`." + )); + } + None +} + +const fn is_in_matrix(src: Type, target: Type) -> bool { + use Type::{ + Bool, Date, DateTime, Decimal, Int, Real, Serial, ShortId, Text, + }; + matches!( + (src, target), + // Always-clean transformers + (Int | Serial, Real | Decimal | Text) + // serial -> int: ADR-0017 §4.1's canonical allowed + // case ("removing auto-increment while preserving + // stored values"). Storage stays INTEGER; we treat + // it as an identity transformer. + | (Serial, Int) + | (Bool, Int | Real | Decimal | Text) + | (Decimal | Date | DateTime | ShortId | Real, Text) + // Per-cell-classified + | (Real, Int | Decimal | Bool) + | (Decimal, Int | Real | Bool) + | (Int, Bool) + | (Text, Int | Real | Decimal | Bool | Date | DateTime | ShortId) + ) +} + +/// Transform a single cell's value through the matrix. +/// +/// Caller is responsible for first checking [`static_refusal`]; +/// calling this with a statically-refused pair returns +/// `Incompatible` for every value. `value` is the cell as read +/// from SQLite — its variant matches the source type's storage +/// class (e.g. `Type::Real` ⇒ `Value::Real`). `Value::Null` is +/// always returned `Clean(Null)`. +#[must_use] +pub fn transform_cell(src: Type, target: Type, value: &Value) -> CellOutcome { + if matches!(value, Value::Null) { + return CellOutcome::Clean(Value::Null); + } + use Type::{ + Bool, Date, DateTime, Decimal, Int, Real, Serial, ShortId, Text, + }; + match (src, target) { + // ---- Always-clean: int / serial source ---- + (Int | Serial, Real) => match value { + Value::Integer(i) => CellOutcome::Clean(Value::Real(*i as f64)), + other => unexpected_storage("int/serial", other), + }, + (Int | Serial, Decimal | Text) => match value { + Value::Integer(i) => CellOutcome::Clean(Value::Text(i.to_string())), + other => unexpected_storage("int/serial", other), + }, + // serial -> int: identity at the storage class level; + // the conversion drops auto-increment semantics from the + // column metadata. ADR-0017 §4.1. + (Serial, Int) => match value { + Value::Integer(i) => CellOutcome::Clean(Value::Integer(*i)), + other => unexpected_storage("serial", other), + }, + + // ---- Always-clean: bool source (stored as INTEGER 0/1) ---- + (Bool, Int) => match value { + Value::Integer(i) => CellOutcome::Clean(Value::Integer(*i)), + other => unexpected_storage("bool", other), + }, + (Bool, Real) => match value { + Value::Integer(i) => CellOutcome::Clean(Value::Real(*i as f64)), + other => unexpected_storage("bool", other), + }, + (Bool, Decimal) => match value { + Value::Integer(i) => { + CellOutcome::Clean(Value::Text(if *i == 0 { "0".into() } else { "1".into() })) + } + other => unexpected_storage("bool", other), + }, + (Bool, Text) => match value { + // "true" / "false" matches the DSL boolean grammar + // (ADR-0014 §5), not raw 0/1 stringification. + Value::Integer(i) => CellOutcome::Clean(Value::Text( + if *i == 0 { "false".into() } else { "true".into() }, + )), + other => unexpected_storage("bool", other), + }, + + // ---- Always-clean: text-backed source -> text ---- + (Decimal | Date | DateTime | ShortId, Text) => match value { + Value::Text(s) => CellOutcome::Clean(Value::Text(s.clone())), + other => unexpected_storage("text-backed", other), + }, + + // ---- Always-clean: real -> text ---- + (Real, Text) => match value { + Value::Real(r) => CellOutcome::Clean(Value::Text(format_real(*r))), + other => unexpected_storage("real", other), + }, + + // ---- Per-cell: real -> int ---- + (Real, Int) => match value { + Value::Real(r) => real_to_int(*r), + other => unexpected_storage("real", other), + }, + + // ---- Per-cell: real -> decimal ---- + // f64 -> shortest-round-trip decimal string. The f64 was + // already at the storage precision; representing it as a + // decimal preserves what we have. Always-clean in + // practice; the ADR's "lossy otherwise" provision exists + // for theoretical higher-precision sources we don't + // currently produce. + (Real, Decimal) => match value { + Value::Real(r) => { + if r.is_finite() { + CellOutcome::Clean(Value::Text(format_real(*r))) + } else { + CellOutcome::Incompatible { + reason: format!("`{r}` is not finite"), + } + } + } + other => unexpected_storage("real", other), + }, + + // ---- Per-cell: real -> bool ---- + (Real, Bool) => match value { + Value::Real(r) => { + if *r == 0.0 { + CellOutcome::Clean(Value::Integer(0)) + } else if *r == 1.0 { + CellOutcome::Clean(Value::Integer(1)) + } else { + CellOutcome::Incompatible { + reason: format!("`{}` is not 0 or 1", format_real(*r)), + } + } + } + other => unexpected_storage("real", other), + }, + + // ---- Per-cell: decimal -> int ---- + (Decimal, Int) => match value { + Value::Text(s) => decimal_text_to_int(s), + other => unexpected_storage("decimal", other), + }, + + // ---- Per-cell: decimal -> real ---- + (Decimal, Real) => match value { + Value::Text(s) => decimal_text_to_real(s), + other => unexpected_storage("decimal", other), + }, + + // ---- Per-cell: decimal -> bool ---- + (Decimal, Bool) => match value { + Value::Text(s) => { + let trimmed = s.trim(); + // Accept "0" / "1" plus their decimal equivalents + // (e.g. "0.0", "1.0", "1.00") since those are the + // exact-zero / exact-one cases per ADR-0017 §3. + trimmed.parse::().map_or_else( + |_| CellOutcome::Incompatible { + reason: format!("`{s}` is not 0 or 1"), + }, + |parsed| { + if parsed == 0.0 { + CellOutcome::Clean(Value::Integer(0)) + } else if parsed == 1.0 { + CellOutcome::Clean(Value::Integer(1)) + } else { + CellOutcome::Incompatible { + reason: format!("`{s}` is not 0 or 1"), + } + } + }, + ) + } + other => unexpected_storage("decimal", other), + }, + + // ---- Per-cell: int -> bool ---- + (Int, Bool) => match value { + Value::Integer(i) => { + if *i == 0 { + CellOutcome::Clean(Value::Integer(0)) + } else if *i == 1 { + CellOutcome::Clean(Value::Integer(1)) + } else { + CellOutcome::Incompatible { + reason: format!("`{i}` is not 0 or 1"), + } + } + } + other => unexpected_storage("int", other), + }, + + // ---- Per-cell: text -> int ---- + (Text, Int) => match value { + Value::Text(s) => text_to_int(s), + other => unexpected_storage("text", other), + }, + + // ---- Per-cell: text -> real ---- + (Text, Real) => match value { + Value::Text(s) => match s.trim().parse::() { + Ok(r) if r.is_finite() => CellOutcome::Clean(Value::Real(r)), + Ok(_) => CellOutcome::Incompatible { + reason: format!("`{s}` is not a finite real number"), + }, + Err(_) => CellOutcome::Incompatible { + reason: format!("`{s}` is not a valid real number"), + }, + }, + other => unexpected_storage("text", other), + }, + + // ---- Per-cell: text -> decimal ---- + (Text, Decimal) => match value { + Value::Text(s) => { + if s.trim().parse::().is_ok() { + CellOutcome::Clean(Value::Text(s.clone())) + } else { + CellOutcome::Incompatible { + reason: format!("`{s}` is not a valid decimal number"), + } + } + } + other => unexpected_storage("text", other), + }, + + // ---- Per-cell: text -> bool ---- + (Text, Bool) => match value { + Value::Text(s) => { + let lowered = s.trim().to_ascii_lowercase(); + if lowered == "true" { + CellOutcome::Clean(Value::Integer(1)) + } else if lowered == "false" { + CellOutcome::Clean(Value::Integer(0)) + } else { + CellOutcome::Incompatible { + reason: format!("`{s}` is not `true` or `false`"), + } + } + } + other => unexpected_storage("text", other), + }, + + // ---- Per-cell: text -> date ---- + (Text, Date) => match value { + Value::Text(s) => match crate::dsl::value::validate_date(s) { + Ok(()) => CellOutcome::Clean(Value::Text(s.clone())), + Err(_) => CellOutcome::Incompatible { + reason: format!("`{s}` is not a date in `YYYY-MM-DD` form"), + }, + }, + other => unexpected_storage("text", other), + }, + + // ---- Per-cell: text -> datetime ---- + (Text, DateTime) => match value { + Value::Text(s) => { + if crate::dsl::value::validate_datetime(s).is_ok() { + CellOutcome::Clean(Value::Text(s.clone())) + } else if crate::dsl::value::validate_date(s).is_ok() { + let promoted = format!("{s}T00:00:00Z"); + CellOutcome::Lossy { + new: Value::Text(promoted), + reason: "bare date promoted to `T00:00:00Z`".to_string(), + } + } else { + CellOutcome::Incompatible { + reason: format!( + "`{s}` is not a datetime in `YYYY-MM-DDTHH:MM:SS` form" + ), + } + } + } + other => unexpected_storage("text", other), + }, + + // ---- Per-cell: text -> shortid ---- + (Text, ShortId) => match value { + Value::Text(s) => match shortid::validate(s) { + Ok(()) => CellOutcome::Clean(Value::Text(s.clone())), + Err(message) => CellOutcome::Incompatible { reason: message }, + }, + other => unexpected_storage("text", other), + }, + + // Pairs caught by `static_refusal` before reaching here. + _ => CellOutcome::Incompatible { + reason: format!("no transformer defined for `{src}` to `{target}`"), + }, + } +} + +/// Map a user-facing type to the column alignment used in +/// diagnostic tables (ADR-0017 §7). Numeric types right-align, +/// everything else left-aligns; matches ADR-0016 §2. +#[must_use] +pub const fn is_in_matrix_alignment(ty: Type) -> Alignment { + numeric_alignment_for(ty) +} + +/// Whether the transformed value is materially different from +/// the source (i.e. counts as a "client-side transformation" +/// for the §6 note). +/// +/// Two values are considered the same only when both are NULL. +/// Different storage classes (e.g. `Text("42")` vs +/// `Integer(42)`) count as transformations even though the +/// human reading is identical, because the database now stores +/// a different shape. +#[must_use] +pub fn is_non_identity(original: &Value, transformed: &Value) -> bool { + match (original, transformed) { + (Value::Null, Value::Null) => false, + (a, b) => !values_equal(a, b), + } +} + +fn values_equal(a: &Value, b: &Value) -> bool { + match (a, b) { + (Value::Null, Value::Null) => true, + (Value::Integer(x), Value::Integer(y)) => x == y, + (Value::Real(x), Value::Real(y)) => x.to_bits() == y.to_bits(), + (Value::Text(x), Value::Text(y)) => x == y, + (Value::Blob(x), Value::Blob(y)) => x == y, + _ => false, + } +} + +fn real_to_int(r: f64) -> CellOutcome { + if !r.is_finite() { + return CellOutcome::Incompatible { + reason: format!("`{r}` is not finite"), + }; + } + let i64_min = i64::MIN as f64; + let i64_max = i64::MAX as f64; + if r < i64_min || r > i64_max { + return CellOutcome::Incompatible { + reason: format!("`{}` is out of int range", format_real(r)), + }; + } + if r.fract() == 0.0 { + CellOutcome::Clean(Value::Integer(r as i64)) + } else { + let truncated = r.trunc() as i64; + let discarded = r - r.trunc(); + CellOutcome::Lossy { + new: Value::Integer(truncated), + reason: format!( + "truncated; would discard {}", + format_real(discarded) + ), + } + } +} + +fn decimal_text_to_int(s: &str) -> CellOutcome { + let trimmed = s.trim(); + if let Ok(i) = trimmed.parse::() { + return CellOutcome::Clean(Value::Integer(i)); + } + match trimmed.parse::() { + Ok(r) if r.is_finite() => real_to_int(r), + _ => CellOutcome::Incompatible { + reason: format!("`{s}` is not a valid number"), + }, + } +} + +fn decimal_text_to_real(s: &str) -> CellOutcome { + match s.trim().parse::() { + Ok(r) if r.is_finite() => { + // If the f64's shortest round-trip form matches the + // source string (after canonicalising whitespace), we + // haven't lost precision. Otherwise this is a lossy + // conversion (typical for long decimals). + let canonical = format_real(r); + if canonical == s.trim() || equal_after_normalising(&canonical, s.trim()) { + CellOutcome::Clean(Value::Real(r)) + } else { + CellOutcome::Lossy { + new: Value::Real(r), + reason: format!("precision reduced from `{}` to `{}`", s.trim(), canonical), + } + } + } + _ => CellOutcome::Incompatible { + reason: format!("`{s}` is not a valid number"), + }, + } +} + +/// Compare two numeric strings ignoring trailing-zero/decimal- +/// notation differences that don't carry information +/// (e.g. "3" vs "3.0", "3.10" vs "3.1"). +fn equal_after_normalising(a: &str, b: &str) -> bool { + fn normalise(s: &str) -> String { + let s = s.trim(); + s.find('.').map_or_else( + || s.to_string(), + |dot| { + let (int_part, frac_part) = s.split_at(dot); + let frac = frac_part[1..].trim_end_matches('0'); + if frac.is_empty() { + int_part.to_string() + } else { + format!("{int_part}.{frac}") + } + }, + ) + } + normalise(a) == normalise(b) +} + +fn text_to_int(s: &str) -> CellOutcome { + let trimmed = s.trim(); + if let Ok(i) = trimmed.parse::() { + return CellOutcome::Clean(Value::Integer(i)); + } + match trimmed.parse::() { + Ok(r) if r.is_finite() => match real_to_int(r) { + // For text -> int via real: a clean integer parse + // of the float (e.g. "3.0" -> 3) still counts as + // lossy because the source carried decimal notation + // the int can't preserve. Promote Clean -> Lossy. + CellOutcome::Clean(value) => CellOutcome::Lossy { + new: value, + reason: format!("parsed as real then narrowed; source was `{s}`"), + }, + other => other, + }, + _ => CellOutcome::Incompatible { + reason: format!("`{s}` is not a valid int"), + }, + } +} + +/// Shortest round-trip f64 representation. Rust's default `{}` +/// for f64 since 1.0 produces a string that always parses back +/// to the exact same f64. +fn format_real(r: f64) -> String { + let s = format!("{r}"); + // Rust prints integral f64 without a dot ("3" rather than + // "3.0"). Some downstream consumers expect to spot reals by + // their dot; for our purposes — storing in a TEXT-backed + // decimal column or rendering in a diagnostic — the no-dot + // form is fine and matches what the user would type for a + // whole-number real literal. + s +} + +fn unexpected_storage(label: &str, value: &Value) -> CellOutcome { + CellOutcome::Incompatible { + reason: format!( + "internal: cell stored unexpectedly for `{label}` source ({value:?})" + ), + } +} + +#[cfg(test)] +#[allow(clippy::approx_constant)] // 3.14 is a convenient lossy fixture, not PI +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + fn int(i: i64) -> Value { + Value::Integer(i) + } + fn real(r: f64) -> Value { + Value::Real(r) + } + fn text(s: &str) -> Value { + Value::Text(s.to_string()) + } + + // ---- Static refusals ---- + + #[test] + fn same_type_is_statically_refused() { + for ty in Type::all() { + assert!(static_refusal(*ty, *ty).is_some(), "{ty:?}"); + } + } + + #[test] + fn anything_to_serial_is_statically_refused() { + for &src in Type::all() { + assert!(static_refusal(src, Type::Serial).is_some(), "{src:?}"); + } + } + + #[test] + fn anything_involving_blob_is_statically_refused() { + for &other in Type::all() { + if other != Type::Blob { + assert!(static_refusal(Type::Blob, other).is_some(), "{other:?}"); + assert!(static_refusal(other, Type::Blob).is_some(), "{other:?}"); + } + } + } + + #[test] + fn date_to_datetime_direct_is_statically_refused() { + assert!(static_refusal(Type::Date, Type::DateTime).is_some()); + assert!(static_refusal(Type::DateTime, Type::Date).is_some()); + } + + #[test] + fn cross_domain_unmapped_pair_is_statically_refused() { + for (src, target) in [ + (Type::Bool, Type::Date), + (Type::Bool, Type::DateTime), + (Type::Bool, Type::ShortId), + (Type::Real, Type::DateTime), + (Type::Int, Type::ShortId), + (Type::Int, Type::Date), + (Type::Date, Type::Int), + (Type::ShortId, Type::Int), + ] { + assert!(static_refusal(src, target).is_some(), "{src:?} -> {target:?}"); + } + } + + #[test] + fn matrix_pairs_pass_static_check() { + // A representative subset; all-pairs coverage is via + // is_in_matrix's match. + let pairs = [ + (Type::Int, Type::Real), + (Type::Int, Type::Text), + (Type::Serial, Type::Int), + (Type::Serial, Type::Text), + (Type::Real, Type::Int), + (Type::Text, Type::ShortId), + (Type::Text, Type::DateTime), + (Type::Bool, Type::Text), + ]; + for (s, t) in pairs { + assert!(static_refusal(s, t).is_none(), "{s:?} -> {t:?}"); + } + } + + // ---- NULL ---- + + #[test] + fn null_passes_through_clean_for_any_pair() { + let pairs = [ + (Type::Int, Type::Text), + (Type::Real, Type::Int), + (Type::Text, Type::Date), + (Type::Bool, Type::Real), + ]; + for (s, t) in pairs { + assert_eq!(transform_cell(s, t, &Value::Null), CellOutcome::Clean(Value::Null)); + } + } + + // ---- Always-clean: int / serial source ---- + + #[test] + fn int_to_real_is_clean() { + assert_eq!( + transform_cell(Type::Int, Type::Real, &int(7)), + CellOutcome::Clean(real(7.0)) + ); + } + + #[test] + fn int_to_text_is_clean() { + assert_eq!( + transform_cell(Type::Int, Type::Text, &int(42)), + CellOutcome::Clean(text("42")) + ); + } + + #[test] + fn int_to_decimal_is_clean() { + assert_eq!( + transform_cell(Type::Int, Type::Decimal, &int(-12)), + CellOutcome::Clean(text("-12")) + ); + } + + #[test] + fn serial_to_real_or_text_is_clean() { + // The (Int | Serial, Real | Decimal | Text) transformer + // handles serial sources symmetrically with int. + assert_eq!( + transform_cell(Type::Serial, Type::Real, &int(3)), + CellOutcome::Clean(real(3.0)) + ); + assert_eq!( + transform_cell(Type::Serial, Type::Text, &int(3)), + CellOutcome::Clean(text("3")) + ); + } + + #[test] + fn serial_to_int_is_identity_clean() { + // ADR-0017 §4.1's canonical "drop auto-increment" case: + // storage class is unchanged, value passes through. + assert_eq!( + transform_cell(Type::Serial, Type::Int, &int(42)), + CellOutcome::Clean(int(42)) + ); + assert!(static_refusal(Type::Serial, Type::Int).is_none()); + } + + // ---- Always-clean: bool source ---- + + #[test] + fn bool_to_int_is_clean_for_zero_and_one() { + assert_eq!( + transform_cell(Type::Bool, Type::Int, &int(0)), + CellOutcome::Clean(int(0)) + ); + assert_eq!( + transform_cell(Type::Bool, Type::Int, &int(1)), + CellOutcome::Clean(int(1)) + ); + } + + #[test] + fn bool_to_text_uses_dsl_keywords() { + assert_eq!( + transform_cell(Type::Bool, Type::Text, &int(0)), + CellOutcome::Clean(text("false")) + ); + assert_eq!( + transform_cell(Type::Bool, Type::Text, &int(1)), + CellOutcome::Clean(text("true")) + ); + } + + #[test] + fn bool_to_decimal_yields_zero_or_one() { + assert_eq!( + transform_cell(Type::Bool, Type::Decimal, &int(0)), + CellOutcome::Clean(text("0")) + ); + assert_eq!( + transform_cell(Type::Bool, Type::Decimal, &int(1)), + CellOutcome::Clean(text("1")) + ); + } + + #[test] + fn bool_to_real_yields_zero_or_one() { + assert_eq!( + transform_cell(Type::Bool, Type::Real, &int(0)), + CellOutcome::Clean(real(0.0)) + ); + assert_eq!( + transform_cell(Type::Bool, Type::Real, &int(1)), + CellOutcome::Clean(real(1.0)) + ); + } + + // ---- Always-clean: text-backed source -> text ---- + + #[test] + fn decimal_to_text_passes_through() { + assert_eq!( + transform_cell(Type::Decimal, Type::Text, &text("3.14")), + CellOutcome::Clean(text("3.14")) + ); + } + + #[test] + fn date_to_text_passes_through() { + assert_eq!( + transform_cell(Type::Date, Type::Text, &text("2025-01-15")), + CellOutcome::Clean(text("2025-01-15")) + ); + } + + #[test] + fn datetime_to_text_passes_through() { + assert_eq!( + transform_cell(Type::DateTime, Type::Text, &text("2025-01-15T14:30:00")), + CellOutcome::Clean(text("2025-01-15T14:30:00")) + ); + } + + #[test] + fn shortid_to_text_passes_through() { + assert_eq!( + transform_cell(Type::ShortId, Type::Text, &text("23456789Ab")), + CellOutcome::Clean(text("23456789Ab")) + ); + } + + #[test] + fn real_to_text_uses_shortest_round_trip() { + assert_eq!( + transform_cell(Type::Real, Type::Text, &real(3.14)), + CellOutcome::Clean(text("3.14")) + ); + // Whole numbers print without trailing zero, matching + // Rust's default Display. + assert_eq!( + transform_cell(Type::Real, Type::Text, &real(3.0)), + CellOutcome::Clean(text("3")) + ); + } + + // ---- Per-cell: real -> int ---- + + #[test] + fn real_to_int_clean_for_whole_numbers() { + assert_eq!( + transform_cell(Type::Real, Type::Int, &real(3.0)), + CellOutcome::Clean(int(3)) + ); + assert_eq!( + transform_cell(Type::Real, Type::Int, &real(-7.0)), + CellOutcome::Clean(int(-7)) + ); + } + + #[test] + fn real_to_int_lossy_for_fractional() { + match transform_cell(Type::Real, Type::Int, &real(3.14)) { + CellOutcome::Lossy { new, reason } => { + assert_eq!(new, int(3)); + assert!(reason.contains("truncated"), "{reason}"); + } + other => panic!("unexpected: {other:?}"), + } + } + + #[test] + fn real_to_int_incompatible_for_nan_or_inf() { + assert!(matches!( + transform_cell(Type::Real, Type::Int, &real(f64::NAN)), + CellOutcome::Incompatible { .. } + )); + assert!(matches!( + transform_cell(Type::Real, Type::Int, &real(f64::INFINITY)), + CellOutcome::Incompatible { .. } + )); + } + + // ---- Per-cell: real -> bool ---- + + #[test] + fn real_to_bool_clean_for_zero_and_one() { + assert_eq!( + transform_cell(Type::Real, Type::Bool, &real(0.0)), + CellOutcome::Clean(int(0)) + ); + assert_eq!( + transform_cell(Type::Real, Type::Bool, &real(1.0)), + CellOutcome::Clean(int(1)) + ); + } + + #[test] + fn real_to_bool_incompatible_for_other_values() { + assert!(matches!( + transform_cell(Type::Real, Type::Bool, &real(0.5)), + CellOutcome::Incompatible { .. } + )); + assert!(matches!( + transform_cell(Type::Real, Type::Bool, &real(2.0)), + CellOutcome::Incompatible { .. } + )); + } + + // ---- Per-cell: real -> decimal ---- + + #[test] + fn real_to_decimal_is_clean_for_finite() { + assert_eq!( + transform_cell(Type::Real, Type::Decimal, &real(3.14)), + CellOutcome::Clean(text("3.14")) + ); + } + + // ---- Per-cell: decimal -> int ---- + + #[test] + fn decimal_to_int_clean_for_integer_values() { + assert_eq!( + transform_cell(Type::Decimal, Type::Int, &text("42")), + CellOutcome::Clean(int(42)) + ); + } + + #[test] + fn decimal_to_int_lossy_for_fractional() { + match transform_cell(Type::Decimal, Type::Int, &text("3.14")) { + CellOutcome::Lossy { new, reason } => { + assert_eq!(new, int(3)); + assert!(reason.contains("truncated"), "{reason}"); + } + other => panic!("unexpected: {other:?}"), + } + } + + #[test] + fn decimal_to_int_incompatible_for_garbage() { + assert!(matches!( + transform_cell(Type::Decimal, Type::Int, &text("notanumber")), + CellOutcome::Incompatible { .. } + )); + } + + // ---- Per-cell: decimal -> bool ---- + + #[test] + fn decimal_to_bool_clean_for_exact_zero_one() { + assert_eq!( + transform_cell(Type::Decimal, Type::Bool, &text("0")), + CellOutcome::Clean(int(0)) + ); + assert_eq!( + transform_cell(Type::Decimal, Type::Bool, &text("1.0")), + CellOutcome::Clean(int(1)) + ); + } + + #[test] + fn decimal_to_bool_incompatible_for_other() { + assert!(matches!( + transform_cell(Type::Decimal, Type::Bool, &text("0.5")), + CellOutcome::Incompatible { .. } + )); + } + + // ---- Per-cell: int -> bool ---- + + #[test] + fn int_to_bool_clean_for_zero_one() { + assert_eq!( + transform_cell(Type::Int, Type::Bool, &int(0)), + CellOutcome::Clean(int(0)) + ); + assert_eq!( + transform_cell(Type::Int, Type::Bool, &int(1)), + CellOutcome::Clean(int(1)) + ); + } + + #[test] + fn int_to_bool_incompatible_for_other() { + assert!(matches!( + transform_cell(Type::Int, Type::Bool, &int(2)), + CellOutcome::Incompatible { .. } + )); + assert!(matches!( + transform_cell(Type::Int, Type::Bool, &int(-1)), + CellOutcome::Incompatible { .. } + )); + } + + // ---- Per-cell: text -> int ---- + + #[test] + fn text_to_int_clean_for_integer_string() { + assert_eq!( + transform_cell(Type::Text, Type::Int, &text("42")), + CellOutcome::Clean(int(42)) + ); + } + + #[test] + fn text_to_int_lossy_via_real_for_fractional_string() { + match transform_cell(Type::Text, Type::Int, &text("3.14")) { + CellOutcome::Lossy { new, .. } => assert_eq!(new, int(3)), + other => panic!("unexpected: {other:?}"), + } + } + + #[test] + fn text_to_int_lossy_for_real_lookalike_whole_number() { + // ADR-0017 §3: text "3.0" -> int via real-then-narrow is + // lossy because the source carried decimal notation the + // int representation discards. + match transform_cell(Type::Text, Type::Int, &text("3.0")) { + CellOutcome::Lossy { new, .. } => assert_eq!(new, int(3)), + other => panic!("unexpected: {other:?}"), + } + } + + #[test] + fn text_to_int_incompatible_for_garbage() { + assert!(matches!( + transform_cell(Type::Text, Type::Int, &text("abc")), + CellOutcome::Incompatible { .. } + )); + assert!(matches!( + transform_cell(Type::Text, Type::Int, &text("")), + CellOutcome::Incompatible { .. } + )); + } + + // ---- Per-cell: text -> real / decimal ---- + + #[test] + fn text_to_real_clean_for_numeric_string() { + assert_eq!( + transform_cell(Type::Text, Type::Real, &text("3.14")), + CellOutcome::Clean(real(3.14)) + ); + } + + #[test] + fn text_to_real_incompatible_for_garbage() { + assert!(matches!( + transform_cell(Type::Text, Type::Real, &text("xyz")), + CellOutcome::Incompatible { .. } + )); + } + + #[test] + fn text_to_decimal_clean_for_numeric_string() { + assert_eq!( + transform_cell(Type::Text, Type::Decimal, &text("3.14")), + CellOutcome::Clean(text("3.14")) + ); + } + + #[test] + fn text_to_decimal_incompatible_for_garbage() { + assert!(matches!( + transform_cell(Type::Text, Type::Decimal, &text("xyz")), + CellOutcome::Incompatible { .. } + )); + } + + // ---- Per-cell: text -> bool ---- + + #[test] + fn text_to_bool_accepts_true_false_case_insensitively() { + for s in ["true", "True", "TRUE", "tRue"] { + assert_eq!( + transform_cell(Type::Text, Type::Bool, &text(s)), + CellOutcome::Clean(int(1)), + "{s}" + ); + } + for s in ["false", "False", "FALSE"] { + assert_eq!( + transform_cell(Type::Text, Type::Bool, &text(s)), + CellOutcome::Clean(int(0)), + "{s}" + ); + } + } + + #[test] + fn text_to_bool_refuses_zero_one_strings() { + // ADR-0017 §3: "no implicit 0/1 parse — matches the DSL + // boolean grammar." + assert!(matches!( + transform_cell(Type::Text, Type::Bool, &text("0")), + CellOutcome::Incompatible { .. } + )); + assert!(matches!( + transform_cell(Type::Text, Type::Bool, &text("1")), + CellOutcome::Incompatible { .. } + )); + } + + // ---- Per-cell: text -> date ---- + + #[test] + fn text_to_date_clean_for_iso() { + assert_eq!( + transform_cell(Type::Text, Type::Date, &text("2025-01-15")), + CellOutcome::Clean(text("2025-01-15")) + ); + } + + #[test] + fn text_to_date_incompatible_for_other_formats() { + assert!(matches!( + transform_cell(Type::Text, Type::Date, &text("2025/01/15")), + CellOutcome::Incompatible { .. } + )); + } + + // ---- Per-cell: text -> datetime ---- + + #[test] + fn text_to_datetime_clean_for_iso_datetime() { + assert_eq!( + transform_cell(Type::Text, Type::DateTime, &text("2025-01-15T14:30:00")), + CellOutcome::Clean(text("2025-01-15T14:30:00")) + ); + } + + #[test] + fn text_to_datetime_lossy_for_bare_date() { + match transform_cell(Type::Text, Type::DateTime, &text("2025-01-15")) { + CellOutcome::Lossy { new, reason } => { + assert_eq!(new, text("2025-01-15T00:00:00Z")); + assert!(reason.contains("promoted"), "{reason}"); + } + other => panic!("unexpected: {other:?}"), + } + } + + #[test] + fn text_to_datetime_incompatible_for_garbage() { + assert!(matches!( + transform_cell(Type::Text, Type::DateTime, &text("not a date")), + CellOutcome::Incompatible { .. } + )); + } + + // ---- Per-cell: text -> shortid ---- + + #[test] + fn text_to_shortid_clean_for_valid_shortid() { + assert_eq!( + transform_cell(Type::Text, Type::ShortId, &text("23456789Ab")), + CellOutcome::Clean(text("23456789Ab")) + ); + } + + #[test] + fn text_to_shortid_incompatible_for_invalid() { + // Too long. + assert!(matches!( + transform_cell(Type::Text, Type::ShortId, &text("toolong_xyz_more")), + CellOutcome::Incompatible { .. } + )); + // Ambiguous chars (per shortid alphabet). + assert!(matches!( + transform_cell(Type::Text, Type::ShortId, &text("0OIl234567")), + CellOutcome::Incompatible { .. } + )); + } + + // ---- is_non_identity ---- + + #[test] + fn null_to_null_is_identity() { + assert!(!is_non_identity(&Value::Null, &Value::Null)); + } + + #[test] + fn storage_class_change_counts_as_non_identity() { + // Same human reading, different storage class: counts. + assert!(is_non_identity(&text("42"), &int(42))); + assert!(is_non_identity(&int(3), &real(3.0))); + } + + #[test] + fn identical_value_is_identity() { + assert!(!is_non_identity(&text("hi"), &text("hi"))); + assert!(!is_non_identity(&int(42), &int(42))); + } +}