feat: ADR-0036 Phase 2 — validate advanced-mode UPDATE SET literals + retain the value

Mirror Phase 1's capture-at-parse technique on the UPDATE SET assignment
list. build_sql_update calls the new capture_set_literals (data.rs), which
walks the matched tokens (no reparse, no grammar change) and classifies
each top-level `SET col = <rhs>` as a literal (Some, incl. signed numbers)
or an expression (None), using paren depth so a comma inside a function
call or a `where` inside a scalar subquery is not mistaken for a boundary,
and the trailing top-level WHERE is excluded.

Command::SqlUpdate gains set_literals; do_sql_update validates the literals
against their column types via the shared impl_value_for before the still
verbatim update; user_value_for_column reads them so a constraint error
names the offending value. WHERE stays unvalidated; execution and command
identity are unchanged.

Also corrects the stale data.rs header comment (DSL typed slots are wired,
not "deferred") and flips ADR-0036 + README to Phases 1–2 implemented.

Tests: 1934 passing (+4), 0 failed, 0 skipped, 1 ignored; clippy clean.
This commit is contained in:
claude@clouddev1
2026-05-26 22:20:12 +00:00
parent 2f0af31b3b
commit 8c3b13b313
10 changed files with 413 additions and 27 deletions
+20 -4
View File
@@ -11,8 +11,13 @@ consolidating the two modes and a concrete auto-fill difference confirmed
that even the single-row literal case is **not** identical across modes).
**Phase 1 implemented 2026-05-26** (`INSERT … VALUES` literal validation +
offending-value retention; capture-at-parse, no grammar change, execution
unchanged). Phases 2 (`UPDATE … SET` literals) and 3 (completion
hinting/highlighting) pending.
unchanged). **Phase 2 implemented 2026-05-26** (`UPDATE … SET` literal
validation + offending-value retention; the same capture-at-parse technique
on the SET assignment list — `capture_set_literals` in `data.rs`
classifying each top-level RHS literal-vs-expression, validating literals in
`do_sql_update`, and reading them in `user_value_for_column`; `WHERE` is not
validated, execution stays verbatim). Phase 3 (completion
hinting/highlighting — the only part needing a grammar change) pending.
**Augments** **ADR-0030 §4** and **ADR-0033 §10** — it does **not**
supersede them and does **not** change the execution model. Advanced-mode
@@ -246,8 +251,19 @@ execution), only its `Result` is used.
the verbatim insert; the enricher reads them. Covers single- and
multi-row, with or without `RETURNING`/`ON CONFLICT`, because execution
is untouched.
- **Phase 2 — `UPDATE … SET` literal validation** (same technique on the
SET assignment list).
- **Phase 2 (implemented 2026-05-26) — `UPDATE … SET` literal
validation.** The same capture-at-parse technique on the SET assignment
list: `build_sql_update` calls `capture_set_literals`, which walks the
matched tokens (no reparse) and classifies each *top-level* `SET col =
<rhs>` into `(col, Some(Value))` for a bare literal (incl. a signed
number) or `(col, None)` for an expression — using paren depth so a comma
inside a function call or a `where` inside a scalar subquery is never
mistaken for an assignment/clause boundary, and so the trailing top-level
`WHERE` predicate is excluded. `Command::SqlUpdate` gains a
`set_literals` payload; `do_sql_update` validates the literals against
their column types (via the shared `impl_value_for`) before the still
verbatim update; `user_value_for_column` reads them so a constraint error
names the offending value. `WHERE` is deliberately not validated (§2).
- **Phase 3 — completion hinting / highlighting.** This is the *only*
part that needs a grammar change: a `Choice(typed-literal-slot,
sql_expr)` at each value position (reusing the DSL's live
+1 -1
View File
File diff suppressed because one or more lines are too long
+2
View File
@@ -3865,6 +3865,7 @@ mod tests {
sql: "update t set v = 1".to_string(),
target_table: "t".to_string(),
returning: false,
set_literals: Vec::new(),
},
result: crate::db::UpdateResult {
rows_affected: 2,
@@ -3897,6 +3898,7 @@ mod tests {
sql: "update t set v = 1".to_string(),
target_table: "t".to_string(),
returning: false,
set_literals: Vec::new(),
},
result: crate::db::UpdateResult {
rows_affected: 1,
+49 -4
View File
@@ -711,6 +711,10 @@ enum Request {
source: Option<String>,
target_table: String,
returning: bool,
/// Captured literal `SET col = <literal>` values (`(col, None)` =
/// expression RHS) for app-level type validation before the
/// verbatim update (ADR-0036 Phase 2).
set_literals: Vec<(String, Option<Value>)>,
reply: oneshot::Sender<Result<UpdateResult, DbError>>,
},
/// Run a grammar-validated SQL `DELETE` (ADR-0033 §1/§7). The
@@ -1506,17 +1510,38 @@ impl Database {
recv.await.map_err(|_| DbError::WorkerGone)?
}
/// Run a validated SQL `UPDATE` and return the affected-row
/// count (ADR-0033 §2, sub-phase 3e). `sql` is the
/// Run a validated SQL `UPDATE` with **no** captured literals (no
/// app-level value validation — the verbatim ADR-0033 path). Used by
/// worker-level callers that build the statement directly. The
/// runtime, which has a parsed command, uses
/// [`Self::run_sql_update_with_literals`] instead so the `SET`
/// literals are validated (ADR-0036 Phase 2). `sql` is the
/// grammar-validated statement text; `source` is the literal
/// submitted line for `history.log`; `target_table` is the
/// parsed target whose CSV is re-persisted.
/// submitted line for `history.log`; `target_table` is the parsed
/// target whose CSV is re-persisted.
pub async fn run_sql_update(
&self,
sql: String,
source: Option<String>,
target_table: String,
returning: bool,
) -> Result<UpdateResult, DbError> {
self.run_sql_update_with_literals(sql, source, target_table, returning, Vec::new())
.await
}
/// As [`Self::run_sql_update`], plus the literal `SET` values captured
/// at parse (`(col, None)` = expression RHS) so the worker can
/// validate each literal against its column type before the (still
/// verbatim) update and the error layer can name the offending value
/// (ADR-0036 Phase 2).
pub async fn run_sql_update_with_literals(
&self,
sql: String,
source: Option<String>,
target_table: String,
returning: bool,
set_literals: Vec<(String, Option<Value>)>,
) -> Result<UpdateResult, DbError> {
let (reply, recv) = oneshot::channel();
self.send(Request::RunSqlUpdate {
@@ -1524,6 +1549,7 @@ impl Database {
source,
target_table,
returning,
set_literals,
reply,
})
.await?;
@@ -2493,6 +2519,7 @@ fn handle_request(
source,
target_table,
returning,
set_literals,
reply,
} => {
snapshot_then(snap, batch, conn, source.as_deref(), reply, || do_sql_update(
@@ -2502,6 +2529,7 @@ fn handle_request(
&sql,
&target_table,
returning,
&set_literals,
));
}
Request::RunSqlDelete {
@@ -8545,10 +8573,27 @@ fn do_sql_update(
sql: &str,
target_table: &str,
returning: bool,
set_literals: &[(String, Option<Value>)],
) -> Result<UpdateResult, DbError> {
debug!(sql = %sql, table = %target_table, returning, "sql_update");
let canonical_table = require_canonical_table(conn, target_table)?;
let target_table = canonical_table.as_str();
// ADR-0036 Phase 2: validate each captured `SET col = <literal>`
// against its column type BEFORE the (still verbatim) update runs —
// sharing the DSL's per-type validators (`impl_value_for`, the same
// helper `build_update_sql` uses) for identical wording. Only literal
// assignments are checked; expression positions (`None`) and the
// `WHERE` predicate are left to the engine (ADR-0036 §2). Execution
// below is unchanged (no binding).
if set_literals.iter().any(|(_, v)| v.is_some()) {
let schema = read_schema(conn, target_table)?;
for (col, slot) in set_literals {
if let Some(value) = slot {
impl_value_for(&schema, col, value)?;
}
}
}
let tx = conn
.unchecked_transaction()
.map_err(DbError::from_rusqlite)?;
+10
View File
@@ -442,6 +442,16 @@ pub enum Command {
/// Whether a `RETURNING` clause matched (ADR-0033 §5,
/// sub-phase 3g).
returning: bool,
/// Captured literal RHS of each top-level `SET col = <literal>`
/// assignment (ADR-0036 Phase 2). `(col, Some(v))` for a bare
/// literal (incl. a signed number); `(col, None)` for an
/// expression RHS (arithmetic, function call, scalar subquery,
/// column ref — nothing static to validate). The worker validates
/// the `Some` values against their column types before the (still
/// verbatim) update; the error enricher reads them to name the
/// offending value. Execution itself is unchanged — these are
/// *not* bound. `WHERE` is deliberately excluded (ADR-0036 §2).
set_literals: Vec<(String, Option<Value>)>,
},
/// A SQL `DELETE` validated by the walker (ADR-0033 §1/§7,
/// advanced mode). Grammar-as-text: the worker executes `sql`,
+123 -11
View File
@@ -4,17 +4,25 @@
//! show table), `insert`, `update`, `delete`. The walker route
//! owns these end-to-end.
//!
//! Phase D scope deviation note: ADR-0024's Phase D describes
//! "full schema awareness" via `DynamicSubgrammar
//! (column_value_list)` that unfolds typed slots per column. This
//! milestone lands the data commands at functional parity with
//! the existing chumsky parser — value slots accept any
//! literal regardless of column type, with type validation
//! happening at bind time (matching today's behaviour). The
//! `DynamicSubgrammar` machinery and schema-cache plumbing are
//! deferred to a follow-up refinement; the trie shape is
//! ready to consume them when the schema reference flows
//! through `parse_command`.
//! Schema awareness (ADR-0024 §Phase D): the DSL value slots are
//! wired to `DynamicSubgrammar(column_value_list)` /
//! `current_column_value` (see `INSERT_VALUES_LIST`,
//! `insert_first_paren`, `PER_COLUMN_VALUE`), so the schema reference
//! that flows through `parse_command` unfolds a typed slot per column:
//! numeric-shape mismatch is caught at parse (`int`/`decimal`/`bool`
//! slots in `shared.rs`) and the full semantic type (`date` / `shortid`
//! format) is validated at bind time. So the simple-mode DSL gives data
//! values per-column feedback end-to-end.
//!
//! The advanced-mode SQL DML surface (`build_sql_insert` /
//! `build_sql_update` below) is a separate path: it executes the
//! validated statement verbatim (ADR-0030 §4) and is NOT yet wired to
//! the typed slots. ADR-0036 closes the resulting value-feedback gap
//! without a grammar change by *capturing* each literal value position
//! at parse (`capture_literal_rows` / `capture_set_literals`) and
//! validating it against the column type in the worker — Phase 3 will
//! later swap that capture for the same typed slots used here, adding
//! live hints/highlighting.
use crate::dsl::command::{Command, Expr, RowFilter};
use crate::dsl::grammar::{
@@ -1080,13 +1088,117 @@ fn build_sql_update(path: &MatchedPath, source: &str) -> Result<Command, Validat
})
.unwrap_or_default();
let sql = source.trim().to_string();
// Capture the literal RHS of each top-level `SET col = <literal>`
// assignment for app-level type validation + error enrichment
// (ADR-0036 Phase 2). Purely from the matched tokens — no reparse.
let set_literals = capture_set_literals(path);
Ok(Command::SqlUpdate {
sql,
target_table,
returning: path_has_returning(path),
set_literals,
})
}
/// Capture the literal RHS of each top-level `SET col = <literal>`
/// assignment from the matched path (ADR-0036 Phase 2). Returns
/// `(col, Some(Value))` for a bare-literal RHS (incl. a signed number)
/// and `(col, None)` for an expression RHS (arithmetic, function call,
/// scalar subquery, column ref — nothing static to validate). Works
/// purely from the tokens the walker already matched (no reparse).
///
/// Boundaries: the assignment LHS is the `update_set_column` ident (a
/// role only ever emitted at the top level of an assignment — expression
/// column refs carry `sql_expr_ident` / `sql_expr_qualified_ref`, so they
/// are never confused with it). A *depth-0* comma separates assignments;
/// a *depth-0* `where` / `returning` keyword (or `;` / end of path) ends
/// the SET list. Parens raise the depth so a comma, `where`, or `=`
/// inside a function call or scalar subquery on the RHS is never mistaken
/// for an assignment / clause boundary or the assignment operator.
fn capture_set_literals(path: &MatchedPath) -> Vec<(String, Option<Value>)> {
let mut out: Vec<(String, Option<Value>)> = Vec::new();
let mut after_set = false;
let mut depth: i32 = 0;
// The assignment currently being accumulated: its column name, its
// RHS tokens so far, and whether the assignment `=` has been consumed.
let mut cur_col: Option<String> = None;
let mut cur_rhs: Vec<&MatchedItem> = Vec::new();
let mut seen_eq = false;
// Finalise the pending assignment (if any) into `out`.
fn flush(
col: &mut Option<String>,
rhs: &mut Vec<&MatchedItem>,
out: &mut Vec<(String, Option<Value>)>,
) {
if let Some(c) = col.take() {
out.push((c, classify_value_position(rhs)));
}
rhs.clear();
}
for item in &path.items {
if !after_set {
// Scan only the SET list — skip everything up to (and
// including) the `set` keyword. The first `update_set_column`
// appears after it.
if matches!(item.kind, MatchedKind::Word("set")) {
after_set = true;
}
continue;
}
// A depth-0 `where` / `returning` / `;` ends the SET list.
if depth == 0
&& matches!(
item.kind,
MatchedKind::Word("where" | "returning") | MatchedKind::Punct(';')
)
{
break;
}
match &item.kind {
MatchedKind::Punct('(') => {
depth += 1;
if cur_col.is_some() && seen_eq {
cur_rhs.push(item);
}
}
MatchedKind::Punct(')') => {
depth -= 1;
if cur_col.is_some() && seen_eq {
cur_rhs.push(item);
}
}
MatchedKind::Ident {
role: "update_set_column",
..
} if depth == 0 => {
// A new assignment begins — finalise the previous one.
flush(&mut cur_col, &mut cur_rhs, &mut out);
cur_col = Some(item.text.clone());
seen_eq = false;
}
MatchedKind::Punct(',') if depth == 0 => {
// Assignment separator — finalise the current assignment;
// the next `update_set_column` starts the following one.
flush(&mut cur_col, &mut cur_rhs, &mut out);
}
MatchedKind::Punct('=') if depth == 0 && !seen_eq && cur_col.is_some() => {
// The assignment operator — consumed, not part of the RHS.
seen_eq = true;
}
_ => {
if cur_col.is_some() && seen_eq {
cur_rhs.push(item);
}
}
}
}
// Finalise the last assignment (ended by `where`/`returning`/`;`/EOF).
flush(&mut cur_col, &mut cur_rhs, &mut out);
out
}
/// Build `Command::SqlDelete` from a validated SQL `DELETE`
/// (ADR-0033 §1/§7). Extracts the target table from the matched
/// path so the worker re-persists the right CSV and snapshots the
+10 -1
View File
@@ -1583,6 +1583,14 @@ fn user_value_for_column(command: &Command, column: &str) -> Option<crate::dsl::
let idx = listed_columns.iter().position(|c| c == column)?;
literal_rows[0].get(idx).cloned().flatten()
}
// ADR-0036 Phase 2: a SQL UPDATE retains its captured `SET`
// literals, so a constraint error can name the real value.
// Assignments are explicitly named, so (unlike SqlInsert) there is
// no positional/multi-row ambiguity — mirror the DSL `Update` case.
Command::SqlUpdate { set_literals, .. } => set_literals
.iter()
.find(|(c, _)| c == column)
.and_then(|(_, v)| v.clone()),
_ => None,
}
}
@@ -2270,8 +2278,9 @@ async fn execute_command_typed(
sql,
target_table,
returning,
set_literals,
} => database
.run_sql_update(sql, src, target_table, returning)
.run_sql_update_with_literals(sql, src, target_table, returning, set_literals)
.await
.map(CommandOutcome::Update),
// A SQL `DELETE` (advanced mode; ADR-0033 §1/§7). Grammar-
+70
View File
@@ -200,6 +200,76 @@ fn enrich_unique_update_resolves_value_from_assignments() {
});
}
#[test]
fn enrich_unique_sql_update_resolves_value_from_set_literals() {
// ADR-0036 Phase 2: an advanced-mode SQL `UPDATE` now retains its
// `SET` literals, so a UNIQUE violation names the offending value —
// closing the error-value gap for advanced mode, mirroring the DSL
// `Update` case above. The value flows from the parse-captured
// `set_literals` through `user_value_for_column`.
let db = db();
rt().block_on(async {
db.create_table(
"Customers".to_string(),
vec![
ColumnSpec::new("id".to_string(), Type::Int),
ColumnSpec::new("name".to_string(), Type::Text),
],
vec!["id".to_string()],
None,
)
.await
.unwrap();
db.insert(
"Customers".to_string(),
None,
vec![Value::Number("1".to_string()), Value::Text("Alice".to_string())],
None,
)
.await
.unwrap();
db.insert(
"Customers".to_string(),
None,
vec![Value::Number("2".to_string()), Value::Text("Bob".to_string())],
None,
)
.await
.unwrap();
// Advanced-mode SQL: set Bob's id to 1 — collides with Alice.
let input = "update Customers set id = 1 where name = 'Bob'";
let cmd = parse_command(input).expect("parses as advanced-mode SQL update");
let Command::SqlUpdate {
sql,
target_table,
returning,
set_literals,
} = cmd.clone()
else {
panic!("expected Command::SqlUpdate, got {cmd:?}");
};
// The literal `1` is a valid int, so Phase-2 validation passes and
// the engine-level UNIQUE violation is what surfaces.
let err = db
.run_sql_update_with_literals(sql, None, target_table, returning, set_literals)
.await
.unwrap_err();
assert!(matches!(
err,
DbError::Sqlite { kind: SqliteErrorKind::UniqueViolation, .. }
));
let facts = enrich_dsl_failure(&db, &cmd, &err).await;
assert_eq!(facts.column.as_deref(), Some("id"));
assert_eq!(
facts.value.as_deref(),
Some("1"),
"the offending SET value is named (from set_literals)"
);
});
}
// ---- NOT NULL ---------------------------------------------------
#[test]
+8 -2
View File
@@ -114,8 +114,14 @@ fn run_update(
input: &str,
) -> Result<UpdateResult, DbError> {
match parse_command(input).unwrap_or_else(|e| panic!("parse {input:?}: {e:?}")) {
Command::SqlUpdate { sql, target_table, returning } => rt.block_on(
db.run_sql_update(sql, Some(input.to_string()), target_table, returning),
Command::SqlUpdate { sql, target_table, returning, set_literals } => rt.block_on(
db.run_sql_update_with_literals(
sql,
Some(input.to_string()),
target_table,
returning,
set_literals,
),
),
other => panic!("expected Command::SqlUpdate from {input:?}, got {other:?}"),
}
+120 -4
View File
@@ -8,9 +8,11 @@
//! across all rows with no rail (ADR-0030 §12).
use rdbms_playground::db::{Database, DbError, UpdateResult};
use rdbms_playground::dsl::{ColumnSpec, Command, Type, parse_command};
use rdbms_playground::dsl::{ColumnSpec, Command, Type, Value, parse_command};
use rdbms_playground::event::AppEvent;
use rdbms_playground::persistence::Persistence;
use rdbms_playground::project;
use rdbms_playground::runtime::run_replay;
fn rt() -> tokio::runtime::Runtime {
tokio::runtime::Builder::new_current_thread()
@@ -70,9 +72,15 @@ fn run_update(
input: &str,
) -> Result<UpdateResult, DbError> {
match parse_command(input).expect("parse update") {
Command::SqlUpdate { sql, target_table, returning } => {
rt.block_on(db.run_sql_update(sql, Some(input.to_string()), target_table, returning))
}
Command::SqlUpdate { sql, target_table, returning, set_literals } => rt.block_on(
db.run_sql_update_with_literals(
sql,
Some(input.to_string()),
target_table,
returning,
set_literals,
),
),
other => panic!("expected Command::SqlUpdate, got {other:?}"),
}
}
@@ -205,6 +213,114 @@ fn update_appends_literal_line_to_history() {
assert!(body.contains(input), "history records the literal line: {body:?}");
}
// =================================================================
// ADR-0036 Phase 2 — `SET` literal value validation
// =================================================================
#[test]
fn sql_update_validates_set_literals_like_the_dsl() {
// ADR-0036 Phase 2: advanced-mode SQL `UPDATE` now validates each
// literal `SET col = <literal>` value against its column type before
// the (still verbatim) update runs, sharing the DSL's per-type
// validators. `2025/01/15` is a malformed date (slashes, not dashes):
// the DSL update rejects it at bind time, and advanced-mode SQL now
// refuses it too (it used to splice the literal into text and let a
// STRICT TEXT column accept anything).
let (project, db, _dir) = open_project_db();
let rt = rt();
create_cols(&db, &rt, "t", &[("id", Type::Int), ("d", Type::Date)], &["id"]);
seed(&db, &rt, "insert into t (id, d) values (1, '2025-01-15')", "t");
// SQL path (advanced mode, full replay pipeline) — REJECTS the bad date.
std::fs::write(
project.path().join("bad.commands"),
"update t set d = '2025/01/15' where id = 1\n",
)
.expect("write script");
let events = rt.block_on(run_replay(&db, project.path(), "bad.commands"));
assert!(
matches!(events.last(), Some(AppEvent::ReplayFailed { .. })),
"advanced-mode SQL validates the `date` SET literal and refuses \
2025/01/15 (ADR-0036 Phase 2); events: {events:?}"
);
// A well-formed date still updates (the verbatim path is unaffected).
std::fs::write(
project.path().join("ok.commands"),
"update t set d = '2025-02-20' where id = 1\n",
)
.expect("write script");
let ok = rt.block_on(run_replay(&db, project.path(), "ok.commands"));
assert!(
matches!(ok.last(), Some(AppEvent::ReplayCompleted { count, .. }) if *count == 1),
"a well-formed date still updates; events: {ok:?}"
);
}
#[test]
fn sql_update_captures_set_literal_classification() {
// ADR-0036 Phase 2 seam (the "one new seam to keep honest"): each
// top-level `SET` RHS is classified — a bare literal (string / signed
// number / bool / null) is captured as `Some`, while an expression
// (arithmetic / scalar subquery / function call / column ref) is
// `None` and left to the engine. Critically, a comma *inside* a
// function call and a `where` *inside* a subquery must NOT be mistaken
// for an assignment separator / SET-list terminator (paren-depth
// guard), and the trailing top-level `WHERE` predicate is not captured.
let cmd = parse_command(
"update t set a = '2025-01-15', b = price * qty, c = -5, \
d = (select max(n) from o where n < 100), e = true, \
f = coalesce(g, 0), h = null where id = 7",
)
.expect("advanced-mode SQL update parses");
match cmd {
Command::SqlUpdate { set_literals, .. } => {
assert_eq!(
set_literals,
vec![
("a".to_string(), Some(Value::Text("2025-01-15".to_string()))),
("b".to_string(), None),
("c".to_string(), Some(Value::Number("-5".to_string()))),
("d".to_string(), None),
("e".to_string(), Some(Value::Bool(true))),
("f".to_string(), None),
("h".to_string(), Some(Value::Null)),
],
"literals captured; arithmetic / subquery (with inner WHERE) / \
function call (with inner comma) skipped; trailing WHERE excluded",
);
}
other => panic!("expected Command::SqlUpdate, got {other:?}"),
}
}
#[test]
fn sql_update_validates_every_assignment_not_just_the_first() {
// A malformed literal in the *second* assignment is caught — the
// validation loop covers every `SET` literal, not only the first
// (ADR-0036 Phase 2). The first assignment (`v = 'ok'`) is well-formed.
let (project, db, _dir) = open_project_db();
let rt = rt();
create_cols(
&db,
&rt,
"t",
&[("id", Type::Int), ("v", Type::Text), ("d", Type::Date)],
&["id"],
);
seed(&db, &rt, "insert into t (id, v, d) values (1, 'a', '2025-01-01')", "t");
std::fs::write(
project.path().join("multi.commands"),
"update t set v = 'ok', d = '2025/01/15' where id = 1\n",
)
.expect("write script");
let events = rt.block_on(run_replay(&db, project.path(), "multi.commands"));
assert!(
matches!(events.last(), Some(AppEvent::ReplayFailed { .. })),
"the malformed date in the second assignment is caught; events: {events:?}"
);
}
// =================================================================
// Sub-phase 3g — RETURNING (ADR-0033 §5)
// =================================================================