feat: ADR-0036 Phase 2 — validate advanced-mode UPDATE SET literals + retain the value

Mirror Phase 1's capture-at-parse technique on the UPDATE SET assignment list. build_sql_update calls the new capture_set_literals (data.rs), which walks the matched tokens (no reparse, no grammar change) and classifies each top-level `SET col = <rhs>` as a literal (Some, incl. signed numbers) or an expression (None), using paren depth so a comma inside a function call or a `where` inside a scalar subquery is not mistaken for a boundary, and the trailing top-level WHERE is excluded. Command::SqlUpdate gains set_literals; do_sql_update validates the literals against their column types via the shared impl_value_for before the still verbatim update; user_value_for_column reads them so a constraint error names the offending value. WHERE stays unvalidated; execution and command identity are unchanged. Also corrects the stale data.rs header comment (DSL typed slots are wired, not "deferred") and flips ADR-0036 + README to Phases 1–2 implemented. Tests: 1934 passing (+4), 0 failed, 0 skipped, 1 ignored; clippy clean.
2026-05-26 22:20:12 +00:00
parent 2f0af31b3b
commit 8c3b13b313
10 changed files with 413 additions and 27 deletions
@@ -3865,6 +3865,7 @@ mod tests {
                sql: "update t set v = 1".to_string(),
                target_table: "t".to_string(),
                returning: false,
+                set_literals: Vec::new(),
            },
            result: crate::db::UpdateResult {
                rows_affected: 2,
@@ -3897,6 +3898,7 @@ mod tests {
                sql: "update t set v = 1".to_string(),
                target_table: "t".to_string(),
                returning: false,
+                set_literals: Vec::new(),
            },
            result: crate::db::UpdateResult {
                rows_affected: 1,
@@ -711,6 +711,10 @@ enum Request {
        source: Option<String>,
        target_table: String,
        returning: bool,
+        /// Captured literal `SET col = <literal>` values (`(col, None)` =
+        /// expression RHS) for app-level type validation before the
+        /// verbatim update (ADR-0036 Phase 2).
+        set_literals: Vec<(String, Option<Value>)>,
        reply: oneshot::Sender<Result<UpdateResult, DbError>>,
    },
    /// Run a grammar-validated SQL `DELETE` (ADR-0033 §1/§7). The
@@ -1506,17 +1510,38 @@ impl Database {
        recv.await.map_err(|_| DbError::WorkerGone)?
    }

-    /// Run a validated SQL `UPDATE` and return the affected-row
-    /// count (ADR-0033 §2, sub-phase 3e). `sql` is the
+    /// Run a validated SQL `UPDATE` with **no** captured literals (no
+    /// app-level value validation — the verbatim ADR-0033 path). Used by
+    /// worker-level callers that build the statement directly. The
+    /// runtime, which has a parsed command, uses
+    /// [`Self::run_sql_update_with_literals`] instead so the `SET`
+    /// literals are validated (ADR-0036 Phase 2). `sql` is the
    /// grammar-validated statement text; `source` is the literal
-    /// submitted line for `history.log`; `target_table` is the
-    /// parsed target whose CSV is re-persisted.
+    /// submitted line for `history.log`; `target_table` is the parsed
+    /// target whose CSV is re-persisted.
    pub async fn run_sql_update(
        &self,
        sql: String,
        source: Option<String>,
        target_table: String,
        returning: bool,
+    ) -> Result<UpdateResult, DbError> {
+        self.run_sql_update_with_literals(sql, source, target_table, returning, Vec::new())
+            .await
+    }
+
+    /// As [`Self::run_sql_update`], plus the literal `SET` values captured
+    /// at parse (`(col, None)` = expression RHS) so the worker can
+    /// validate each literal against its column type before the (still
+    /// verbatim) update and the error layer can name the offending value
+    /// (ADR-0036 Phase 2).
+    pub async fn run_sql_update_with_literals(
+        &self,
+        sql: String,
+        source: Option<String>,
+        target_table: String,
+        returning: bool,
+        set_literals: Vec<(String, Option<Value>)>,
    ) -> Result<UpdateResult, DbError> {
        let (reply, recv) = oneshot::channel();
        self.send(Request::RunSqlUpdate {
@@ -1524,6 +1549,7 @@ impl Database {
            source,
            target_table,
            returning,
+            set_literals,
            reply,
        })
        .await?;
@@ -2493,6 +2519,7 @@ fn handle_request(
            source,
            target_table,
            returning,
+            set_literals,
            reply,
        } => {
            snapshot_then(snap, batch, conn, source.as_deref(), reply, || do_sql_update(
@@ -2502,6 +2529,7 @@ fn handle_request(
                &sql,
                &target_table,
                returning,
+                &set_literals,
            ));
        }
        Request::RunSqlDelete {
@@ -8545,10 +8573,27 @@ fn do_sql_update(
    sql: &str,
    target_table: &str,
    returning: bool,
+    set_literals: &[(String, Option<Value>)],
 ) -> Result<UpdateResult, DbError> {
    debug!(sql = %sql, table = %target_table, returning, "sql_update");
    let canonical_table = require_canonical_table(conn, target_table)?;
    let target_table = canonical_table.as_str();
+
+    // ADR-0036 Phase 2: validate each captured `SET col = <literal>`
+    // against its column type BEFORE the (still verbatim) update runs —
+    // sharing the DSL's per-type validators (`impl_value_for`, the same
+    // helper `build_update_sql` uses) for identical wording. Only literal
+    // assignments are checked; expression positions (`None`) and the
+    // `WHERE` predicate are left to the engine (ADR-0036 §2). Execution
+    // below is unchanged (no binding).
+    if set_literals.iter().any(|(_, v)| v.is_some()) {
+        let schema = read_schema(conn, target_table)?;
+        for (col, slot) in set_literals {
+            if let Some(value) = slot {
+                impl_value_for(&schema, col, value)?;
+            }
+        }
+    }
    let tx = conn
        .unchecked_transaction()
        .map_err(DbError::from_rusqlite)?;
@@ -442,6 +442,16 @@ pub enum Command {
        /// Whether a `RETURNING` clause matched (ADR-0033 §5,
        /// sub-phase 3g).
        returning: bool,
+        /// Captured literal RHS of each top-level `SET col = <literal>`
+        /// assignment (ADR-0036 Phase 2). `(col, Some(v))` for a bare
+        /// literal (incl. a signed number); `(col, None)` for an
+        /// expression RHS (arithmetic, function call, scalar subquery,
+        /// column ref — nothing static to validate). The worker validates
+        /// the `Some` values against their column types before the (still
+        /// verbatim) update; the error enricher reads them to name the
+        /// offending value. Execution itself is unchanged — these are
+        /// *not* bound. `WHERE` is deliberately excluded (ADR-0036 §2).
+        set_literals: Vec<(String, Option<Value>)>,
    },
    /// A SQL `DELETE` validated by the walker (ADR-0033 §1/§7,
    /// advanced mode). Grammar-as-text: the worker executes `sql`,
@@ -4,17 +4,25 @@
 //! show table), `insert`, `update`, `delete`. The walker route
 //! owns these end-to-end.
 //!
-//! Phase D scope deviation note: ADR-0024's Phase D describes
-//! "full schema awareness" via `DynamicSubgrammar
-//! (column_value_list)` that unfolds typed slots per column. This
-//! milestone lands the data commands at functional parity with
-//! the existing chumsky parser — value slots accept any
-//! literal regardless of column type, with type validation
-//! happening at bind time (matching today's behaviour). The
-//! `DynamicSubgrammar` machinery and schema-cache plumbing are
-//! deferred to a follow-up refinement; the trie shape is
-//! ready to consume them when the schema reference flows
-//! through `parse_command`.
+//! Schema awareness (ADR-0024 §Phase D): the DSL value slots are
+//! wired to `DynamicSubgrammar(column_value_list)` /
+//! `current_column_value` (see `INSERT_VALUES_LIST`,
+//! `insert_first_paren`, `PER_COLUMN_VALUE`), so the schema reference
+//! that flows through `parse_command` unfolds a typed slot per column:
+//! numeric-shape mismatch is caught at parse (`int`/`decimal`/`bool`
+//! slots in `shared.rs`) and the full semantic type (`date` / `shortid`
+//! format) is validated at bind time. So the simple-mode DSL gives data
+//! values per-column feedback end-to-end.
+//!
+//! The advanced-mode SQL DML surface (`build_sql_insert` /
+//! `build_sql_update` below) is a separate path: it executes the
+//! validated statement verbatim (ADR-0030 §4) and is NOT yet wired to
+//! the typed slots. ADR-0036 closes the resulting value-feedback gap
+//! without a grammar change by *capturing* each literal value position
+//! at parse (`capture_literal_rows` / `capture_set_literals`) and
+//! validating it against the column type in the worker — Phase 3 will
+//! later swap that capture for the same typed slots used here, adding
+//! live hints/highlighting.

 use crate::dsl::command::{Command, Expr, RowFilter};
 use crate::dsl::grammar::{
@@ -1080,13 +1088,117 @@ fn build_sql_update(path: &MatchedPath, source: &str) -> Result<Command, Validat
        })
        .unwrap_or_default();
    let sql = source.trim().to_string();
+    // Capture the literal RHS of each top-level `SET col = <literal>`
+    // assignment for app-level type validation + error enrichment
+    // (ADR-0036 Phase 2). Purely from the matched tokens — no reparse.
+    let set_literals = capture_set_literals(path);
    Ok(Command::SqlUpdate {
        sql,
        target_table,
        returning: path_has_returning(path),
+        set_literals,
    })
 }

+/// Capture the literal RHS of each top-level `SET col = <literal>`
+/// assignment from the matched path (ADR-0036 Phase 2). Returns
+/// `(col, Some(Value))` for a bare-literal RHS (incl. a signed number)
+/// and `(col, None)` for an expression RHS (arithmetic, function call,
+/// scalar subquery, column ref — nothing static to validate). Works
+/// purely from the tokens the walker already matched (no reparse).
+///
+/// Boundaries: the assignment LHS is the `update_set_column` ident (a
+/// role only ever emitted at the top level of an assignment — expression
+/// column refs carry `sql_expr_ident` / `sql_expr_qualified_ref`, so they
+/// are never confused with it). A *depth-0* comma separates assignments;
+/// a *depth-0* `where` / `returning` keyword (or `;` / end of path) ends
+/// the SET list. Parens raise the depth so a comma, `where`, or `=`
+/// inside a function call or scalar subquery on the RHS is never mistaken
+/// for an assignment / clause boundary or the assignment operator.
+fn capture_set_literals(path: &MatchedPath) -> Vec<(String, Option<Value>)> {
+    let mut out: Vec<(String, Option<Value>)> = Vec::new();
+    let mut after_set = false;
+    let mut depth: i32 = 0;
+    // The assignment currently being accumulated: its column name, its
+    // RHS tokens so far, and whether the assignment `=` has been consumed.
+    let mut cur_col: Option<String> = None;
+    let mut cur_rhs: Vec<&MatchedItem> = Vec::new();
+    let mut seen_eq = false;
+
+    // Finalise the pending assignment (if any) into `out`.
+    fn flush(
+        col: &mut Option<String>,
+        rhs: &mut Vec<&MatchedItem>,
+        out: &mut Vec<(String, Option<Value>)>,
+    ) {
+        if let Some(c) = col.take() {
+            out.push((c, classify_value_position(rhs)));
+        }
+        rhs.clear();
+    }
+
+    for item in &path.items {
+        if !after_set {
+            // Scan only the SET list — skip everything up to (and
+            // including) the `set` keyword. The first `update_set_column`
+            // appears after it.
+            if matches!(item.kind, MatchedKind::Word("set")) {
+                after_set = true;
+            }
+            continue;
+        }
+        // A depth-0 `where` / `returning` / `;` ends the SET list.
+        if depth == 0
+            && matches!(
+                item.kind,
+                MatchedKind::Word("where" | "returning") | MatchedKind::Punct(';')
+            )
+        {
+            break;
+        }
+        match &item.kind {
+            MatchedKind::Punct('(') => {
+                depth += 1;
+                if cur_col.is_some() && seen_eq {
+                    cur_rhs.push(item);
+                }
+            }
+            MatchedKind::Punct(')') => {
+                depth -= 1;
+                if cur_col.is_some() && seen_eq {
+                    cur_rhs.push(item);
+                }
+            }
+            MatchedKind::Ident {
+                role: "update_set_column",
+                ..
+            } if depth == 0 => {
+                // A new assignment begins — finalise the previous one.
+                flush(&mut cur_col, &mut cur_rhs, &mut out);
+                cur_col = Some(item.text.clone());
+                seen_eq = false;
+            }
+            MatchedKind::Punct(',') if depth == 0 => {
+                // Assignment separator — finalise the current assignment;
+                // the next `update_set_column` starts the following one.
+                flush(&mut cur_col, &mut cur_rhs, &mut out);
+            }
+            MatchedKind::Punct('=') if depth == 0 && !seen_eq && cur_col.is_some() => {
+                // The assignment operator — consumed, not part of the RHS.
+                seen_eq = true;
+            }
+            _ => {
+                if cur_col.is_some() && seen_eq {
+                    cur_rhs.push(item);
+                }
+            }
+        }
+    }
+    // Finalise the last assignment (ended by `where`/`returning`/`;`/EOF).
+    flush(&mut cur_col, &mut cur_rhs, &mut out);
+    out
+}
+
 /// Build `Command::SqlDelete` from a validated SQL `DELETE`
 /// (ADR-0033 §1/§7). Extracts the target table from the matched
 /// path so the worker re-persists the right CSV and snapshots the
@@ -1583,6 +1583,14 @@ fn user_value_for_column(command: &Command, column: &str) -> Option<crate::dsl::
            let idx = listed_columns.iter().position(|c| c == column)?;
            literal_rows[0].get(idx).cloned().flatten()
        }
+        // ADR-0036 Phase 2: a SQL UPDATE retains its captured `SET`
+        // literals, so a constraint error can name the real value.
+        // Assignments are explicitly named, so (unlike SqlInsert) there is
+        // no positional/multi-row ambiguity — mirror the DSL `Update` case.
+        Command::SqlUpdate { set_literals, .. } => set_literals
+            .iter()
+            .find(|(c, _)| c == column)
+            .and_then(|(_, v)| v.clone()),
        _ => None,
    }
 }
@@ -2270,8 +2278,9 @@ async fn execute_command_typed(
            sql,
            target_table,
            returning,
+            set_literals,
        } => database
-            .run_sql_update(sql, src, target_table, returning)
+            .run_sql_update_with_literals(sql, src, target_table, returning, set_literals)
            .await
            .map(CommandOutcome::Update),
        // A SQL `DELETE` (advanced mode; ADR-0033 §1/§7). Grammar-