From 1d5534b2bdaec623996024e9f72199ebd5ba8b00 Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Tue, 26 May 2026 21:58:25 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20ADR-0036=20Phase=201=20=E2=80=94=20vali?= =?UTF-8?q?date=20advanced-mode=20INSERT=20literals=20+=20show=20the=20val?= =?UTF-8?q?ue?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Capture literal VALUES at parse onto Command::SqlInsert (no grammar change, no reparse); validate them against column types before the still-verbatim insert (reusing impl_value_for for DSL-parity wording); read them in the error enricher so a constraint error names the real value. Execution, auto-fill, and command identity unchanged. Adds run_sql_insert_with_literals (runtime path); run_sql_insert stays the no-capture raw entry. Proven: malformed date 2025/01/15 now refused in advanced-mode SQL; replayed UNIQUE shows the real value. Tests +3 (expression runs, multi-row, natural order) + 2 flipped/strengthened. 1930 pass / 0 fail / 0 skip; clippy clean. --- src/db.rs | 69 ++++++++++++++++++++++ src/dsl/command.rs | 9 +++ src/dsl/grammar/data.rs | 91 +++++++++++++++++++++++++++++ src/runtime.rs | 24 +++++++- tests/replay_command.rs | 13 +++-- tests/sql_dml_e2e.rs | 1 + tests/sql_insert.rs | 126 +++++++++++++++++++++++++++++++++++----- tests/undo_snapshots.rs | 1 + 8 files changed, 312 insertions(+), 22 deletions(-) diff --git a/src/db.rs b/src/db.rs index 8a58569..9246a6a 100644 --- a/src/db.rs +++ b/src/db.rs @@ -696,6 +696,10 @@ enum Request { listed_columns: Vec, row_source: String, returning: bool, + /// Captured literal `VALUES` (per row, per position; `None` = + /// expression) for app-level type validation before the verbatim + /// insert (ADR-0036 Phase 1). + literal_rows: Vec>>, reply: oneshot::Sender>, }, /// Run a grammar-validated SQL `UPDATE` (ADR-0033 §2). The @@ -1444,6 +1448,12 @@ impl Database { /// `sql` is the grammar-validated statement text; `source` is /// the literal submitted line for `history.log`; `target_table` /// is the parsed target whose CSV is re-persisted. + /// Run a grammar-validated SQL `INSERT` with **no** captured literals + /// (no app-level value validation — the verbatim ADR-0033 path). Used + /// by worker-level callers that build the statement directly. The + /// runtime, which has a parsed command, uses + /// [`Self::run_sql_insert_with_literals`] instead so the literals are + /// validated (ADR-0036 Phase 1). pub async fn run_sql_insert( &self, sql: String, @@ -1452,6 +1462,34 @@ impl Database { listed_columns: Vec, row_source: String, returning: bool, + ) -> Result { + self.run_sql_insert_with_literals( + sql, + source, + target_table, + listed_columns, + row_source, + returning, + Vec::new(), + ) + .await + } + + /// As [`Self::run_sql_insert`], plus the literal `VALUES` captured at + /// parse (per row, per position; `None` = expression) so the worker + /// can validate each literal against its column type before the + /// (still verbatim) insert and the error layer can name the offending + /// value (ADR-0036 Phase 1). + #[allow(clippy::too_many_arguments)] + pub async fn run_sql_insert_with_literals( + &self, + sql: String, + source: Option, + target_table: String, + listed_columns: Vec, + row_source: String, + returning: bool, + literal_rows: Vec>>, ) -> Result { let (reply, recv) = oneshot::channel(); self.send(Request::RunSqlInsert { @@ -1461,6 +1499,7 @@ impl Database { listed_columns, row_source, returning, + literal_rows, reply, }) .await?; @@ -2434,6 +2473,7 @@ fn handle_request( listed_columns, row_source, returning, + literal_rows, reply, } => { snapshot_then(snap, batch, conn, source.as_deref(), reply, || do_sql_insert( @@ -2445,6 +2485,7 @@ fn handle_request( &listed_columns, &row_source, returning, + &literal_rows, )); } Request::RunSqlUpdate { @@ -8388,10 +8429,38 @@ fn do_sql_insert( listed_columns: &[String], row_source: &str, returning: bool, + literal_rows: &[Vec>], ) -> Result { debug!(sql = %sql, table = %target_table, returning, "sql_insert"); let canonical_table = require_canonical_table(conn, target_table)?; let target_table = canonical_table.as_str(); + + // ADR-0036 Phase 1: validate captured literal VALUES against their + // column types BEFORE the (still verbatim) insert runs — sharing the + // DSL's per-type validators (`impl_value_for`) for identical wording. + // Only literal positions are checked; expression positions (`None`) + // are left to the engine. Column mapping mirrors the engine's: an + // explicit column list maps by position; natural order maps to the + // schema's columns in definition order. An out-of-range position + // (arity mismatch) is left for the engine / parse-time diagnostic. + // Execution below is unchanged (no binding, no auto-fill change). + if literal_rows.iter().any(|r| r.iter().any(Option::is_some)) { + let schema = read_schema(conn, target_table)?; + let columns: Vec<&str> = if listed_columns.is_empty() { + schema.columns.iter().map(|c| c.name.as_str()).collect() + } else { + listed_columns.iter().map(String::as_str).collect() + }; + for row in literal_rows { + for (idx, slot) in row.iter().enumerate() { + if let Some(value) = slot + && let Some(col) = columns.get(idx) + { + impl_value_for(&schema, col, value)?; + } + } + } + } // The `shortid` auto-fill rewrite reconstructs only `INSERT … // VALUES …` and would drop any trailing clause — `ON CONFLICT …` // (3h) and/or `RETURNING …` (3g). `row_source` is the clean diff --git a/src/dsl/command.rs b/src/dsl/command.rs index 481723b..5c76372 100644 --- a/src/dsl/command.rs +++ b/src/dsl/command.rs @@ -422,6 +422,15 @@ pub enum Command { /// `DataResult` when true; otherwise it surfaces the /// affected-row count (+ auto-show) as before. returning: bool, + /// Captured literal values per `VALUES` row, per position + /// (ADR-0036 Phase 1). `Some(v)` for a bare literal (incl. a + /// signed number); `None` for an expression position (nothing + /// static to validate). Empty when the row source is a + /// `SELECT`/`WITH` query. The worker validates these against the + /// column types before the (still verbatim) insert; the error + /// enricher reads them to show the offending value. Execution + /// itself is unchanged — these are *not* bound. + literal_rows: Vec>>, }, /// A SQL `UPDATE` validated by the walker (ADR-0033 §2, /// advanced mode). Grammar-as-text: the worker executes `sql` diff --git a/src/dsl/grammar/data.rs b/src/dsl/grammar/data.rs index cc67b7a..73b5594 100644 --- a/src/dsl/grammar/data.rs +++ b/src/dsl/grammar/data.rs @@ -950,15 +950,106 @@ fn build_sql_insert(path: &MatchedPath, source: &str) -> Result Vec>> { + let mut rows: Vec>> = Vec::new(); + let mut depth: i32 = 0; + let mut cur_row: Vec> = Vec::new(); + let mut pos: Vec<&MatchedItem> = Vec::new(); + for item in &path.items { + if item.span.0 < values_start || item.span.0 >= tail_end { + continue; + } + match &item.kind { + MatchedKind::Word("values") => {} + MatchedKind::Punct('(') => { + depth += 1; + if depth == 1 { + cur_row = Vec::new(); + pos.clear(); + } else { + pos.push(item); + } + } + MatchedKind::Punct(')') => { + if depth == 1 { + cur_row.push(classify_value_position(&pos)); + pos.clear(); + rows.push(std::mem::take(&mut cur_row)); + } else if depth > 1 { + pos.push(item); + } + depth -= 1; + } + MatchedKind::Punct(',') if depth == 1 => { + cur_row.push(classify_value_position(&pos)); + pos.clear(); + } + _ if depth >= 1 => pos.push(item), + _ => {} + } + } + rows +} + +/// Classify one `VALUES` position's matched tokens into `Some(Value)` (a +/// bare literal) or `None` (an expression). A single literal token, or a +/// sign followed by a number, is a literal; anything else is an +/// expression (ADR-0036 §1). +fn classify_value_position(tokens: &[&MatchedItem]) -> Option { + match tokens { + [one] => item_to_value(one), + [sign, num] + if matches!(sign.kind, MatchedKind::Punct('-') | MatchedKind::Punct('+')) + && matches!(num.kind, MatchedKind::NumberLit) => + { + let text = if matches!(sign.kind, MatchedKind::Punct('-')) { + format!("-{}", num.text) + } else { + num.text.clone() + }; + Some(Value::Number(text)) + } + _ => None, + } +} + /// Whether the matched path contains a `RETURNING` clause /// (ADR-0033 §5, sub-phase 3g). Located by the `returning` *Word /// token* in the path — path-based, so a string literal can't be diff --git a/src/runtime.rs b/src/runtime.rs index 48701b1..a2fc4c1 100644 --- a/src/runtime.rs +++ b/src/runtime.rs @@ -1570,6 +1570,19 @@ fn user_value_for_column(command: &Command, column: &str) -> Option { + let idx = listed_columns.iter().position(|c| c == column)?; + literal_rows[0].get(idx).cloned().flatten() + } _ => None, } } @@ -2236,8 +2249,17 @@ async fn execute_command_typed( listed_columns, row_source, returning, + literal_rows, } => database - .run_sql_insert(sql, src, target_table, listed_columns, row_source, returning) + .run_sql_insert_with_literals( + sql, + src, + target_table, + listed_columns, + row_source, + returning, + literal_rows, + ) .await .map(CommandOutcome::Insert), // A SQL `UPDATE` (advanced mode; ADR-0033 §2). Grammar-as- diff --git a/tests/replay_command.rs b/tests/replay_command.rs index e26015e..d003ebf 100644 --- a/tests/replay_command.rs +++ b/tests/replay_command.rs @@ -340,17 +340,18 @@ fn replay_constraint_failure_shows_real_names_not_placeholders() { let AppEvent::ReplayFailed { error, .. } = failed else { unreachable!() }; - // No unsubstituted placeholders (the safety net + enrichment). + // No unsubstituted placeholders. assert!( !error.contains("{table}") && !error.contains("{column}") && !error.contains("{value}"), "no unsubstituted placeholders; got: {error}" ); - // The real table + column are shown (resolved from the engine - // message). The offending value is NOT shown: replay parses in - // advanced mode → `SqlInsert`, whose values are raw SQL text (ADR-0033 - // verbatim execution), not retained typed values — so it degrades to - // the neutral "that value" rather than leaking `{value}`. + // The real table + column are shown (from the engine message), and — + // since ADR-0036 Phase 1 retains the captured literal on the SQL + // INSERT command — the **real offending value** is shown too (it used + // to degrade to the neutral "that value" because `SqlInsert` discarded + // its literals). assert!(error.contains("T.email"), "names the real table.column; got: {error}"); + assert!(error.contains("a@b.com"), "shows the real offending value; got: {error}"); } #[test] diff --git a/tests/sql_dml_e2e.rs b/tests/sql_dml_e2e.rs index a1d4a3b..a604a10 100644 --- a/tests/sql_dml_e2e.rs +++ b/tests/sql_dml_e2e.rs @@ -95,6 +95,7 @@ fn run_insert( listed_columns, row_source, returning, + .. } => rt.block_on(db.run_sql_insert( sql, Some(input.to_string()), diff --git a/tests/sql_insert.rs b/tests/sql_insert.rs index df62d08..dc56f04 100644 --- a/tests/sql_insert.rs +++ b/tests/sql_insert.rs @@ -445,6 +445,7 @@ fn run_sqlinsert( listed_columns, row_source, returning, + .. } => rt.block_on(db.run_sql_insert( sql, Some(input.to_string()), @@ -1037,17 +1038,13 @@ fn autofill_preserves_on_conflict_clause() { } #[test] -fn sql_dml_skips_app_level_value_validation_that_the_dsl_enforces() { - // CHARACTERIZATION of a real divergence (ADR pending — see the - // verbatim-vs-structural DML discussion). The DSL insert path validates - // a value against the playground type system at *bind* time - // (`Value::bind_for_column` → `validate_date`); the verbatim SQL insert - // path hands the literal straight to the engine, whose STRICT `date` - // column is TEXT and accepts any string. `2025/01/15` is a malformed - // date (slashes, not dashes): the DSL rejects it, advanced-mode SQL - // currently accepts it. When the forthcoming ADR routes SQL literal - // values through the same validation, FLIP the SQL assertion to expect - // a rejection. +fn sql_dml_validates_literal_values_like_the_dsl() { + // ADR-0036 Phase 1: advanced-mode SQL `INSERT` now validates each + // literal value against its column type before the (still verbatim) + // insert runs, sharing the DSL's per-type validators. `2025/01/15` is + // a malformed date (slashes, not dashes) — the DSL rejects it at bind + // time, and advanced-mode SQL now refuses it too (it used to splice the + // literal into text and let a STRICT TEXT column accept anything). let (project, db, _d) = open_project_db(); let r = rt(); r.block_on(db.create_table( @@ -1073,7 +1070,7 @@ fn sql_dml_skips_app_level_value_validation_that_the_dsl_enforces() { "the DSL insert path validates `date` and rejects 2025/01/15; got {dsl:?}" ); - // SQL path (advanced mode, full pipeline) — currently ACCEPTS it. + // SQL path (advanced mode, full pipeline) — now REJECTS it too. std::fs::write( project.path().join("ins.commands"), "insert into T (id, d) values (2, '2025/01/15')\n", @@ -1081,8 +1078,107 @@ fn sql_dml_skips_app_level_value_validation_that_the_dsl_enforces() { .expect("write script"); let events = r.block_on(run_replay(&db, project.path(), "ins.commands")); assert!( - matches!(events.last(), Some(AppEvent::ReplayCompleted { count, .. }) if *count == 1), - "CHARACTERIZATION: verbatim SQL insert skips `date` validation and \ - accepts 2025/01/15 — the gap the ADR will close; events: {events:?}" + matches!(events.last(), Some(AppEvent::ReplayFailed { .. })), + "advanced-mode SQL validates the `date` literal and refuses \ + 2025/01/15 (ADR-0036 Phase 1); events: {events:?}" + ); + // A valid date still inserts (the bound/verbatim path is unaffected). + std::fs::write( + project.path().join("ok.commands"), + "insert into T (id, d) values (3, '2025-01-15')\n", + ) + .expect("write script"); + let ok = r.block_on(run_replay(&db, project.path(), "ok.commands")); + assert!( + matches!(ok.last(), Some(AppEvent::ReplayCompleted { count, .. }) if *count == 1), + "a well-formed date still inserts; events: {ok:?}" + ); +} + +#[test] +fn sql_insert_expression_value_is_not_validated_and_runs() { + // An expression position (not a bare literal) is left to the engine — + // ADR-0036 Phase 1 has nothing static to validate, so `1 + 2` into an + // int column computes 3 and inserts; it must not be mis-classified as + // a literal or rejected. + let (project, db, _d) = open_project_db(); + let r = rt(); + r.block_on(db.create_table( + "T".to_string(), + vec![ + ColumnSpec::new("id", Type::Int), + ColumnSpec::new("n", Type::Int), + ], + vec!["id".to_string()], + Some("create table T with pk id(int)".to_string()), + )) + .expect("create T"); + std::fs::write( + project.path().join("e.commands"), + "insert into T (id, n) values (1, 1 + 2)\n", + ) + .expect("write script"); + let events = r.block_on(run_replay(&db, project.path(), "e.commands")); + assert!( + matches!(events.last(), Some(AppEvent::ReplayCompleted { count, .. }) if *count == 1), + "the expression value executes (engine computes it); events: {events:?}" + ); +} + +#[test] +fn sql_insert_multi_row_validates_each_literal() { + // Validation applies to every literal row; a malformed `date` in the + // second tuple is caught (ADR-0036 Phase 1 — execution is verbatim, so + // multi-row comes for free). + let (project, db, _d) = open_project_db(); + let r = rt(); + r.block_on(db.create_table( + "T".to_string(), + vec![ + ColumnSpec::new("id", Type::Int), + ColumnSpec::new("d", Type::Date), + ], + vec!["id".to_string()], + Some("create table T with pk id(int)".to_string()), + )) + .expect("create T"); + std::fs::write( + project.path().join("m.commands"), + "insert into T (id, d) values (1, '2025-01-15'), (2, '2025/02/20')\n", + ) + .expect("write script"); + let events = r.block_on(run_replay(&db, project.path(), "m.commands")); + assert!( + matches!(events.last(), Some(AppEvent::ReplayFailed { .. })), + "the malformed date in the second row is caught; events: {events:?}" + ); +} + +#[test] +fn sql_insert_natural_order_validates_against_schema_columns() { + // With no explicit column list, positions map to the schema's columns + // in definition order (engine semantics) — validation must use that + // mapping, not an explicit list (ADR-0036 Phase 1). + let (project, db, _d) = open_project_db(); + let r = rt(); + r.block_on(db.create_table( + "T".to_string(), + vec![ + ColumnSpec::new("id", Type::Int), + ColumnSpec::new("d", Type::Date), + ], + vec!["id".to_string()], + Some("create table T with pk id(int)".to_string()), + )) + .expect("create T"); + std::fs::write( + project.path().join("nat.commands"), + "insert into T values (1, '2025/02/20')\n", + ) + .expect("write script"); + let events = r.block_on(run_replay(&db, project.path(), "nat.commands")); + assert!( + matches!(events.last(), Some(AppEvent::ReplayFailed { .. })), + "natural-order insert validates the date against column `d`; events: {events:?}" ); } diff --git a/tests/undo_snapshots.rs b/tests/undo_snapshots.rs index b1389e3..d78ae8e 100644 --- a/tests/undo_snapshots.rs +++ b/tests/undo_snapshots.rs @@ -273,6 +273,7 @@ async fn sql_insert(db: &Database, input: &str) { listed_columns, row_source, returning, + .. } => { db.run_sql_insert( sql,