feat: ADR-0036 Phase 1 — validate advanced-mode INSERT literals + show the value
Capture literal VALUES at parse onto Command::SqlInsert (no grammar change, no reparse); validate them against column types before the still-verbatim insert (reusing impl_value_for for DSL-parity wording); read them in the error enricher so a constraint error names the real value. Execution, auto-fill, and command identity unchanged. Adds run_sql_insert_with_literals (runtime path); run_sql_insert stays the no-capture raw entry. Proven: malformed date 2025/01/15 now refused in advanced-mode SQL; replayed UNIQUE shows the real value. Tests +3 (expression runs, multi-row, natural order) + 2 flipped/strengthened. 1930 pass / 0 fail / 0 skip; clippy clean.
This commit is contained in:
@@ -696,6 +696,10 @@ enum Request {
|
||||
listed_columns: Vec<String>,
|
||||
row_source: String,
|
||||
returning: bool,
|
||||
/// Captured literal `VALUES` (per row, per position; `None` =
|
||||
/// expression) for app-level type validation before the verbatim
|
||||
/// insert (ADR-0036 Phase 1).
|
||||
literal_rows: Vec<Vec<Option<Value>>>,
|
||||
reply: oneshot::Sender<Result<InsertResult, DbError>>,
|
||||
},
|
||||
/// Run a grammar-validated SQL `UPDATE` (ADR-0033 §2). The
|
||||
@@ -1444,6 +1448,12 @@ impl Database {
|
||||
/// `sql` is the grammar-validated statement text; `source` is
|
||||
/// the literal submitted line for `history.log`; `target_table`
|
||||
/// is the parsed target whose CSV is re-persisted.
|
||||
/// Run a grammar-validated SQL `INSERT` with **no** captured literals
|
||||
/// (no app-level value validation — the verbatim ADR-0033 path). Used
|
||||
/// by worker-level callers that build the statement directly. The
|
||||
/// runtime, which has a parsed command, uses
|
||||
/// [`Self::run_sql_insert_with_literals`] instead so the literals are
|
||||
/// validated (ADR-0036 Phase 1).
|
||||
pub async fn run_sql_insert(
|
||||
&self,
|
||||
sql: String,
|
||||
@@ -1452,6 +1462,34 @@ impl Database {
|
||||
listed_columns: Vec<String>,
|
||||
row_source: String,
|
||||
returning: bool,
|
||||
) -> Result<InsertResult, DbError> {
|
||||
self.run_sql_insert_with_literals(
|
||||
sql,
|
||||
source,
|
||||
target_table,
|
||||
listed_columns,
|
||||
row_source,
|
||||
returning,
|
||||
Vec::new(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// As [`Self::run_sql_insert`], plus the literal `VALUES` captured at
|
||||
/// parse (per row, per position; `None` = expression) so the worker
|
||||
/// can validate each literal against its column type before the
|
||||
/// (still verbatim) insert and the error layer can name the offending
|
||||
/// value (ADR-0036 Phase 1).
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn run_sql_insert_with_literals(
|
||||
&self,
|
||||
sql: String,
|
||||
source: Option<String>,
|
||||
target_table: String,
|
||||
listed_columns: Vec<String>,
|
||||
row_source: String,
|
||||
returning: bool,
|
||||
literal_rows: Vec<Vec<Option<Value>>>,
|
||||
) -> Result<InsertResult, DbError> {
|
||||
let (reply, recv) = oneshot::channel();
|
||||
self.send(Request::RunSqlInsert {
|
||||
@@ -1461,6 +1499,7 @@ impl Database {
|
||||
listed_columns,
|
||||
row_source,
|
||||
returning,
|
||||
literal_rows,
|
||||
reply,
|
||||
})
|
||||
.await?;
|
||||
@@ -2434,6 +2473,7 @@ fn handle_request(
|
||||
listed_columns,
|
||||
row_source,
|
||||
returning,
|
||||
literal_rows,
|
||||
reply,
|
||||
} => {
|
||||
snapshot_then(snap, batch, conn, source.as_deref(), reply, || do_sql_insert(
|
||||
@@ -2445,6 +2485,7 @@ fn handle_request(
|
||||
&listed_columns,
|
||||
&row_source,
|
||||
returning,
|
||||
&literal_rows,
|
||||
));
|
||||
}
|
||||
Request::RunSqlUpdate {
|
||||
@@ -8388,10 +8429,38 @@ fn do_sql_insert(
|
||||
listed_columns: &[String],
|
||||
row_source: &str,
|
||||
returning: bool,
|
||||
literal_rows: &[Vec<Option<Value>>],
|
||||
) -> Result<InsertResult, DbError> {
|
||||
debug!(sql = %sql, table = %target_table, returning, "sql_insert");
|
||||
let canonical_table = require_canonical_table(conn, target_table)?;
|
||||
let target_table = canonical_table.as_str();
|
||||
|
||||
// ADR-0036 Phase 1: validate captured literal VALUES against their
|
||||
// column types BEFORE the (still verbatim) insert runs — sharing the
|
||||
// DSL's per-type validators (`impl_value_for`) for identical wording.
|
||||
// Only literal positions are checked; expression positions (`None`)
|
||||
// are left to the engine. Column mapping mirrors the engine's: an
|
||||
// explicit column list maps by position; natural order maps to the
|
||||
// schema's columns in definition order. An out-of-range position
|
||||
// (arity mismatch) is left for the engine / parse-time diagnostic.
|
||||
// Execution below is unchanged (no binding, no auto-fill change).
|
||||
if literal_rows.iter().any(|r| r.iter().any(Option::is_some)) {
|
||||
let schema = read_schema(conn, target_table)?;
|
||||
let columns: Vec<&str> = if listed_columns.is_empty() {
|
||||
schema.columns.iter().map(|c| c.name.as_str()).collect()
|
||||
} else {
|
||||
listed_columns.iter().map(String::as_str).collect()
|
||||
};
|
||||
for row in literal_rows {
|
||||
for (idx, slot) in row.iter().enumerate() {
|
||||
if let Some(value) = slot
|
||||
&& let Some(col) = columns.get(idx)
|
||||
{
|
||||
impl_value_for(&schema, col, value)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// The `shortid` auto-fill rewrite reconstructs only `INSERT …
|
||||
// VALUES …` and would drop any trailing clause — `ON CONFLICT …`
|
||||
// (3h) and/or `RETURNING …` (3g). `row_source` is the clean
|
||||
|
||||
@@ -422,6 +422,15 @@ pub enum Command {
|
||||
/// `DataResult` when true; otherwise it surfaces the
|
||||
/// affected-row count (+ auto-show) as before.
|
||||
returning: bool,
|
||||
/// Captured literal values per `VALUES` row, per position
|
||||
/// (ADR-0036 Phase 1). `Some(v)` for a bare literal (incl. a
|
||||
/// signed number); `None` for an expression position (nothing
|
||||
/// static to validate). Empty when the row source is a
|
||||
/// `SELECT`/`WITH` query. The worker validates these against the
|
||||
/// column types before the (still verbatim) insert; the error
|
||||
/// enricher reads them to show the offending value. Execution
|
||||
/// itself is unchanged — these are *not* bound.
|
||||
literal_rows: Vec<Vec<Option<Value>>>,
|
||||
},
|
||||
/// A SQL `UPDATE` validated by the walker (ADR-0033 §2,
|
||||
/// advanced mode). Grammar-as-text: the worker executes `sql`
|
||||
|
||||
@@ -950,15 +950,106 @@ fn build_sql_insert(path: &MatchedPath, source: &str) -> Result<Command, Validat
|
||||
// so the validated line runs verbatim (grammar-as-text,
|
||||
// ADR-0030 §4) — no keyword reconstruction.
|
||||
let sql = source.trim().to_string();
|
||||
// Capture literal values per `VALUES` row for app-level type
|
||||
// validation + error enrichment (ADR-0036 Phase 1). Only for a
|
||||
// `VALUES` source (a `SELECT`/`WITH` source has no `values` keyword,
|
||||
// so this stays empty). Bounded to the row-source region by the same
|
||||
// `tail_start` the row_source slice used.
|
||||
let values_start = path
|
||||
.items
|
||||
.iter()
|
||||
.find(|i| matches!(i.kind, MatchedKind::Word("values")))
|
||||
.map(|i| i.span.0);
|
||||
let literal_rows = values_start.map_or_else(Vec::new, |vs| {
|
||||
capture_literal_rows(path, vs, tail_start.unwrap_or(source.len()))
|
||||
});
|
||||
Ok(Command::SqlInsert {
|
||||
sql,
|
||||
target_table,
|
||||
listed_columns,
|
||||
row_source,
|
||||
returning: path_has_returning(path),
|
||||
literal_rows,
|
||||
})
|
||||
}
|
||||
|
||||
/// Capture the literal values of each `VALUES` tuple from the matched
|
||||
/// path (ADR-0036 Phase 1). Each position is `Some(Value)` for a bare
|
||||
/// literal (incl. a signed number — the leading sign is folded into the
|
||||
/// number) and `None` for an expression position (a `func(x)`, `a+1`,
|
||||
/// subquery, column ref — nothing static to validate). Works purely from
|
||||
/// the tokens the walker already matched (no reparse); rows and positions
|
||||
/// are delimited by tuple parens and depth-1 commas. `values_start` is the
|
||||
/// byte offset of the `values` keyword; only items in `[values_start,
|
||||
/// tail_end)` are considered (so any trailing `ON CONFLICT`/`RETURNING`
|
||||
/// clause is excluded).
|
||||
fn capture_literal_rows(
|
||||
path: &MatchedPath,
|
||||
values_start: usize,
|
||||
tail_end: usize,
|
||||
) -> Vec<Vec<Option<Value>>> {
|
||||
let mut rows: Vec<Vec<Option<Value>>> = Vec::new();
|
||||
let mut depth: i32 = 0;
|
||||
let mut cur_row: Vec<Option<Value>> = Vec::new();
|
||||
let mut pos: Vec<&MatchedItem> = Vec::new();
|
||||
for item in &path.items {
|
||||
if item.span.0 < values_start || item.span.0 >= tail_end {
|
||||
continue;
|
||||
}
|
||||
match &item.kind {
|
||||
MatchedKind::Word("values") => {}
|
||||
MatchedKind::Punct('(') => {
|
||||
depth += 1;
|
||||
if depth == 1 {
|
||||
cur_row = Vec::new();
|
||||
pos.clear();
|
||||
} else {
|
||||
pos.push(item);
|
||||
}
|
||||
}
|
||||
MatchedKind::Punct(')') => {
|
||||
if depth == 1 {
|
||||
cur_row.push(classify_value_position(&pos));
|
||||
pos.clear();
|
||||
rows.push(std::mem::take(&mut cur_row));
|
||||
} else if depth > 1 {
|
||||
pos.push(item);
|
||||
}
|
||||
depth -= 1;
|
||||
}
|
||||
MatchedKind::Punct(',') if depth == 1 => {
|
||||
cur_row.push(classify_value_position(&pos));
|
||||
pos.clear();
|
||||
}
|
||||
_ if depth >= 1 => pos.push(item),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
rows
|
||||
}
|
||||
|
||||
/// Classify one `VALUES` position's matched tokens into `Some(Value)` (a
|
||||
/// bare literal) or `None` (an expression). A single literal token, or a
|
||||
/// sign followed by a number, is a literal; anything else is an
|
||||
/// expression (ADR-0036 §1).
|
||||
fn classify_value_position(tokens: &[&MatchedItem]) -> Option<Value> {
|
||||
match tokens {
|
||||
[one] => item_to_value(one),
|
||||
[sign, num]
|
||||
if matches!(sign.kind, MatchedKind::Punct('-') | MatchedKind::Punct('+'))
|
||||
&& matches!(num.kind, MatchedKind::NumberLit) =>
|
||||
{
|
||||
let text = if matches!(sign.kind, MatchedKind::Punct('-')) {
|
||||
format!("-{}", num.text)
|
||||
} else {
|
||||
num.text.clone()
|
||||
};
|
||||
Some(Value::Number(text))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether the matched path contains a `RETURNING` clause
|
||||
/// (ADR-0033 §5, sub-phase 3g). Located by the `returning` *Word
|
||||
/// token* in the path — path-based, so a string literal can't be
|
||||
|
||||
+23
-1
@@ -1570,6 +1570,19 @@ fn user_value_for_column(command: &Command, column: &str) -> Option<crate::dsl::
|
||||
.iter()
|
||||
.find(|(c, _)| c == column)
|
||||
.map(|(_, v)| v.clone()),
|
||||
// ADR-0036 Phase 1: a single-row literal SQL INSERT retains its
|
||||
// captured literals, so a constraint error can name the real
|
||||
// value. Explicit column list only here (natural order needs the
|
||||
// schema); multi-row is skipped (which row conflicted is
|
||||
// ambiguous) — both degrade to the neutral "that value".
|
||||
Command::SqlInsert {
|
||||
listed_columns,
|
||||
literal_rows,
|
||||
..
|
||||
} if literal_rows.len() == 1 && !listed_columns.is_empty() => {
|
||||
let idx = listed_columns.iter().position(|c| c == column)?;
|
||||
literal_rows[0].get(idx).cloned().flatten()
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -2236,8 +2249,17 @@ async fn execute_command_typed(
|
||||
listed_columns,
|
||||
row_source,
|
||||
returning,
|
||||
literal_rows,
|
||||
} => database
|
||||
.run_sql_insert(sql, src, target_table, listed_columns, row_source, returning)
|
||||
.run_sql_insert_with_literals(
|
||||
sql,
|
||||
src,
|
||||
target_table,
|
||||
listed_columns,
|
||||
row_source,
|
||||
returning,
|
||||
literal_rows,
|
||||
)
|
||||
.await
|
||||
.map(CommandOutcome::Insert),
|
||||
// A SQL `UPDATE` (advanced mode; ADR-0033 §2). Grammar-as-
|
||||
|
||||
@@ -340,17 +340,18 @@ fn replay_constraint_failure_shows_real_names_not_placeholders() {
|
||||
let AppEvent::ReplayFailed { error, .. } = failed else {
|
||||
unreachable!()
|
||||
};
|
||||
// No unsubstituted placeholders (the safety net + enrichment).
|
||||
// No unsubstituted placeholders.
|
||||
assert!(
|
||||
!error.contains("{table}") && !error.contains("{column}") && !error.contains("{value}"),
|
||||
"no unsubstituted placeholders; got: {error}"
|
||||
);
|
||||
// The real table + column are shown (resolved from the engine
|
||||
// message). The offending value is NOT shown: replay parses in
|
||||
// advanced mode → `SqlInsert`, whose values are raw SQL text (ADR-0033
|
||||
// verbatim execution), not retained typed values — so it degrades to
|
||||
// the neutral "that value" rather than leaking `{value}`.
|
||||
// The real table + column are shown (from the engine message), and —
|
||||
// since ADR-0036 Phase 1 retains the captured literal on the SQL
|
||||
// INSERT command — the **real offending value** is shown too (it used
|
||||
// to degrade to the neutral "that value" because `SqlInsert` discarded
|
||||
// its literals).
|
||||
assert!(error.contains("T.email"), "names the real table.column; got: {error}");
|
||||
assert!(error.contains("a@b.com"), "shows the real offending value; got: {error}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -95,6 +95,7 @@ fn run_insert(
|
||||
listed_columns,
|
||||
row_source,
|
||||
returning,
|
||||
..
|
||||
} => rt.block_on(db.run_sql_insert(
|
||||
sql,
|
||||
Some(input.to_string()),
|
||||
|
||||
+111
-15
@@ -445,6 +445,7 @@ fn run_sqlinsert(
|
||||
listed_columns,
|
||||
row_source,
|
||||
returning,
|
||||
..
|
||||
} => rt.block_on(db.run_sql_insert(
|
||||
sql,
|
||||
Some(input.to_string()),
|
||||
@@ -1037,17 +1038,13 @@ fn autofill_preserves_on_conflict_clause() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sql_dml_skips_app_level_value_validation_that_the_dsl_enforces() {
|
||||
// CHARACTERIZATION of a real divergence (ADR pending — see the
|
||||
// verbatim-vs-structural DML discussion). The DSL insert path validates
|
||||
// a value against the playground type system at *bind* time
|
||||
// (`Value::bind_for_column` → `validate_date`); the verbatim SQL insert
|
||||
// path hands the literal straight to the engine, whose STRICT `date`
|
||||
// column is TEXT and accepts any string. `2025/01/15` is a malformed
|
||||
// date (slashes, not dashes): the DSL rejects it, advanced-mode SQL
|
||||
// currently accepts it. When the forthcoming ADR routes SQL literal
|
||||
// values through the same validation, FLIP the SQL assertion to expect
|
||||
// a rejection.
|
||||
fn sql_dml_validates_literal_values_like_the_dsl() {
|
||||
// ADR-0036 Phase 1: advanced-mode SQL `INSERT` now validates each
|
||||
// literal value against its column type before the (still verbatim)
|
||||
// insert runs, sharing the DSL's per-type validators. `2025/01/15` is
|
||||
// a malformed date (slashes, not dashes) — the DSL rejects it at bind
|
||||
// time, and advanced-mode SQL now refuses it too (it used to splice the
|
||||
// literal into text and let a STRICT TEXT column accept anything).
|
||||
let (project, db, _d) = open_project_db();
|
||||
let r = rt();
|
||||
r.block_on(db.create_table(
|
||||
@@ -1073,7 +1070,7 @@ fn sql_dml_skips_app_level_value_validation_that_the_dsl_enforces() {
|
||||
"the DSL insert path validates `date` and rejects 2025/01/15; got {dsl:?}"
|
||||
);
|
||||
|
||||
// SQL path (advanced mode, full pipeline) — currently ACCEPTS it.
|
||||
// SQL path (advanced mode, full pipeline) — now REJECTS it too.
|
||||
std::fs::write(
|
||||
project.path().join("ins.commands"),
|
||||
"insert into T (id, d) values (2, '2025/01/15')\n",
|
||||
@@ -1081,8 +1078,107 @@ fn sql_dml_skips_app_level_value_validation_that_the_dsl_enforces() {
|
||||
.expect("write script");
|
||||
let events = r.block_on(run_replay(&db, project.path(), "ins.commands"));
|
||||
assert!(
|
||||
matches!(events.last(), Some(AppEvent::ReplayCompleted { count, .. }) if *count == 1),
|
||||
"CHARACTERIZATION: verbatim SQL insert skips `date` validation and \
|
||||
accepts 2025/01/15 — the gap the ADR will close; events: {events:?}"
|
||||
matches!(events.last(), Some(AppEvent::ReplayFailed { .. })),
|
||||
"advanced-mode SQL validates the `date` literal and refuses \
|
||||
2025/01/15 (ADR-0036 Phase 1); events: {events:?}"
|
||||
);
|
||||
// A valid date still inserts (the bound/verbatim path is unaffected).
|
||||
std::fs::write(
|
||||
project.path().join("ok.commands"),
|
||||
"insert into T (id, d) values (3, '2025-01-15')\n",
|
||||
)
|
||||
.expect("write script");
|
||||
let ok = r.block_on(run_replay(&db, project.path(), "ok.commands"));
|
||||
assert!(
|
||||
matches!(ok.last(), Some(AppEvent::ReplayCompleted { count, .. }) if *count == 1),
|
||||
"a well-formed date still inserts; events: {ok:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sql_insert_expression_value_is_not_validated_and_runs() {
|
||||
// An expression position (not a bare literal) is left to the engine —
|
||||
// ADR-0036 Phase 1 has nothing static to validate, so `1 + 2` into an
|
||||
// int column computes 3 and inserts; it must not be mis-classified as
|
||||
// a literal or rejected.
|
||||
let (project, db, _d) = open_project_db();
|
||||
let r = rt();
|
||||
r.block_on(db.create_table(
|
||||
"T".to_string(),
|
||||
vec![
|
||||
ColumnSpec::new("id", Type::Int),
|
||||
ColumnSpec::new("n", Type::Int),
|
||||
],
|
||||
vec!["id".to_string()],
|
||||
Some("create table T with pk id(int)".to_string()),
|
||||
))
|
||||
.expect("create T");
|
||||
std::fs::write(
|
||||
project.path().join("e.commands"),
|
||||
"insert into T (id, n) values (1, 1 + 2)\n",
|
||||
)
|
||||
.expect("write script");
|
||||
let events = r.block_on(run_replay(&db, project.path(), "e.commands"));
|
||||
assert!(
|
||||
matches!(events.last(), Some(AppEvent::ReplayCompleted { count, .. }) if *count == 1),
|
||||
"the expression value executes (engine computes it); events: {events:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sql_insert_multi_row_validates_each_literal() {
|
||||
// Validation applies to every literal row; a malformed `date` in the
|
||||
// second tuple is caught (ADR-0036 Phase 1 — execution is verbatim, so
|
||||
// multi-row comes for free).
|
||||
let (project, db, _d) = open_project_db();
|
||||
let r = rt();
|
||||
r.block_on(db.create_table(
|
||||
"T".to_string(),
|
||||
vec![
|
||||
ColumnSpec::new("id", Type::Int),
|
||||
ColumnSpec::new("d", Type::Date),
|
||||
],
|
||||
vec!["id".to_string()],
|
||||
Some("create table T with pk id(int)".to_string()),
|
||||
))
|
||||
.expect("create T");
|
||||
std::fs::write(
|
||||
project.path().join("m.commands"),
|
||||
"insert into T (id, d) values (1, '2025-01-15'), (2, '2025/02/20')\n",
|
||||
)
|
||||
.expect("write script");
|
||||
let events = r.block_on(run_replay(&db, project.path(), "m.commands"));
|
||||
assert!(
|
||||
matches!(events.last(), Some(AppEvent::ReplayFailed { .. })),
|
||||
"the malformed date in the second row is caught; events: {events:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sql_insert_natural_order_validates_against_schema_columns() {
|
||||
// With no explicit column list, positions map to the schema's columns
|
||||
// in definition order (engine semantics) — validation must use that
|
||||
// mapping, not an explicit list (ADR-0036 Phase 1).
|
||||
let (project, db, _d) = open_project_db();
|
||||
let r = rt();
|
||||
r.block_on(db.create_table(
|
||||
"T".to_string(),
|
||||
vec![
|
||||
ColumnSpec::new("id", Type::Int),
|
||||
ColumnSpec::new("d", Type::Date),
|
||||
],
|
||||
vec!["id".to_string()],
|
||||
Some("create table T with pk id(int)".to_string()),
|
||||
))
|
||||
.expect("create T");
|
||||
std::fs::write(
|
||||
project.path().join("nat.commands"),
|
||||
"insert into T values (1, '2025/02/20')\n",
|
||||
)
|
||||
.expect("write script");
|
||||
let events = r.block_on(run_replay(&db, project.path(), "nat.commands"));
|
||||
assert!(
|
||||
matches!(events.last(), Some(AppEvent::ReplayFailed { .. })),
|
||||
"natural-order insert validates the date against column `d`; events: {events:?}"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -273,6 +273,7 @@ async fn sql_insert(db: &Database, input: &str) {
|
||||
listed_columns,
|
||||
row_source,
|
||||
returning,
|
||||
..
|
||||
} => {
|
||||
db.run_sql_insert(
|
||||
sql,
|
||||
|
||||
Reference in New Issue
Block a user