grammar+db: 3i — insert_arity_mismatch diagnostic (ADR-0033 §8.1)

New dml_insert_arity_diagnostics pass (ERROR): when an explicit
(column_name_list) arity disagrees with a row's arity. VALUES tuples
are checked per-row (each offending tuple emits its own diagnostic on
its span; matched rows stay silent). INSERT … SELECT compares the
first SELECT leg's projection arity, anchored on the first projection
item; a WITH-prefixed row source is skipped (engine still reports it —
a false positive would be worse). No-column-list form deferred
(needs schema; outside the 3i gate).

The VALUES walk stops at the first depth-0 keyword so an ON CONFLICT
(col) conflict target / RETURNING tail is not mis-counted as a value
tuple (caught by the existing upsert_excluded tests during dev).

Catalog key diagnostic.insert_arity_mismatch (engine-neutral).
Tests (+7): single-row + matched + per-row multi-row; INSERT…SELECT
mismatch + matched; ON CONFLICT interaction (only the real tuple
flagged, clean case silent). 1587 pass / 0 fail / 1 ignored. Clippy
clean. Remaining 3i: not_null_missing (needs TableColumn
not_null+default), cross-cut verification, #12 UPSERT DO UPDATE
validation.
This commit is contained in:
claude@clouddev1
2026-05-22 21:50:09 +00:00
parent be63315e61
commit 6db8253c25
3 changed files with 221 additions and 0 deletions
+219
View File
@@ -1172,6 +1172,138 @@ fn dml_auto_column_diagnostics(
diagnostics diagnostics
} }
/// `insert_arity_mismatch` ERROR (ADR-0033 §8.1, sub-phase 3i).
///
/// Walker pre-flight when the explicit `(column_name_list)` arity
/// disagrees with a row's arity — pre-empting the engine's
/// less-helpful "N values for M columns". Two row-source shapes:
///
/// - **VALUES**: each `value_tuple` is checked independently;
/// **each** offending row emits its own diagnostic on that tuple's
/// span (a five-row list with three wrong tuples emits three).
/// - **INSERT … SELECT** (plain, non-`WITH`): the first SELECT leg's
/// projection arity is compared, anchored on the first projection
/// item. A `WITH`-prefixed row source is skipped here (the outer
/// projection isn't the first `select` token) — the engine still
/// reports it; a false positive would be worse than deferring.
///
/// Only fires when an explicit column list is present; the
/// no-column-list form (arity vs the table's full column count)
/// is deferred (needs the schema and is outside the 3i exit gate).
fn dml_insert_arity_diagnostics(path: &MatchedPath) -> Vec<outcome::Diagnostic> {
use crate::dsl::grammar::IdentSource;
use outcome::{Diagnostic, MatchedKind, Severity};
let col_arity = path
.items
.iter()
.filter(|it| {
matches!(
it.kind,
MatchedKind::Ident {
source: IdentSource::Columns,
role: "insert_column",
}
)
})
.count();
if col_arity == 0 {
return Vec::new();
}
// Index of the row-source keyword (first VALUES / SELECT / WITH).
let Some(kw_idx) = path
.items
.iter()
.position(|it| matches!(it.kind, MatchedKind::Word("values" | "select" | "with")))
else {
return Vec::new();
};
let MatchedKind::Word(kw) = path.items[kw_idx].kind else {
return Vec::new();
};
let mut diagnostics = Vec::new();
let tail = &path.items[kw_idx + 1..];
let emit = |span: (usize, usize), actual: usize, diagnostics: &mut Vec<Diagnostic>| {
diagnostics.push(Diagnostic {
severity: Severity::Error,
span,
message: crate::friendly::translate(
"diagnostic.insert_arity_mismatch",
&[
("expected", &col_arity as &dyn std::fmt::Display),
("actual", &actual as &dyn std::fmt::Display),
],
),
});
};
if kw == "values" {
// Per-tuple arity: count value exprs (commas at the tuple's
// interior depth, +1). Nested parens (calls / subqueries)
// sit deeper and their commas are ignored.
let mut depth: i32 = 0;
let mut tuple_arity = 0usize;
let mut tuple_start = 0usize;
for it in tail {
match it.kind {
MatchedKind::Punct('(') => {
if depth == 0 {
tuple_arity = 1;
tuple_start = it.span.0;
}
depth += 1;
}
MatchedKind::Punct(')') => {
depth -= 1;
if depth == 0 && tuple_arity != col_arity {
emit((tuple_start, it.span.1), tuple_arity, &mut diagnostics);
}
}
MatchedKind::Punct(',') if depth == 1 => tuple_arity += 1,
// A depth-0 keyword ends the VALUES clause — the
// tuples are over and what follows (`ON CONFLICT (…)`,
// `RETURNING …`) must NOT be mis-read as a value
// tuple. Value exprs live inside the tuple parens
// (depth ≥ 1), so no legitimate value keyword sits at
// depth 0.
MatchedKind::Word(_) if depth == 0 => break,
_ => {}
}
}
} else if kw == "select" {
// First SELECT leg's projection arity: commas at the leg's
// depth (0 here — INSERT … SELECT is not parenthesised) up to
// the first leg-end / compound keyword. Anchor on the first
// projection item.
let mut depth: i32 = 0;
let mut proj_arity = 1usize;
let mut anchor: Option<(usize, usize)> = None;
for it in tail {
match it.kind {
MatchedKind::Punct('(') => depth += 1,
MatchedKind::Punct(')') => depth -= 1,
MatchedKind::Punct(',') if depth == 0 => proj_arity += 1,
MatchedKind::Word(
"from" | "where" | "group" | "having" | "order" | "limit" | "offset"
| "union" | "intersect" | "except" | "on" | "returning",
) if depth == 0 => break,
_ => {
if depth == 0 && anchor.is_none() {
anchor = Some(it.span);
}
}
}
}
if proj_arity != col_arity {
emit(anchor.unwrap_or(path.items[kw_idx].span), proj_arity, &mut diagnostics);
}
}
diagnostics
}
/// SQL-expression predicate-warning pass (ADR-0032 §11.6 — the /// SQL-expression predicate-warning pass (ADR-0032 §11.6 — the
/// Phase-1 carry-over gap closure). /// Phase-1 carry-over gap closure).
/// ///
@@ -2236,6 +2368,10 @@ fn walk_one_command<'a>(
// ADR-0033 §8.2 — WARNING when a SQL INSERT lists a // ADR-0033 §8.2 — WARNING when a SQL INSERT lists a
// serial/shortid (auto-generated) column explicitly. // serial/shortid (auto-generated) column explicitly.
d.extend(dml_auto_column_diagnostics(&path, ctx.schema)); d.extend(dml_auto_column_diagnostics(&path, ctx.schema));
// ADR-0033 §8.1 — ERROR when a column list's arity disagrees
// with a VALUES tuple (per row) or the INSERT…SELECT
// projection.
d.extend(dml_insert_arity_diagnostics(&path));
// ADR-0032 §10.3 / §11.2 — diagnostics emitted during // ADR-0032 §10.3 / §11.2 — diagnostics emitted during
// the walk by node handlers with direct context the // the walk by node handlers with direct context the
// post-walk passes can't reconstruct (primarily the // post-walk passes can't reconstruct (primarily the
@@ -4059,6 +4195,89 @@ mod tests {
); );
} }
#[test]
fn insert_arity_mismatch_single_row_fires() {
let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Int)]);
let diags = diag_keys("sqlinsert into t (a, b) values (1, 2, 3)", &schema);
assert!(
diags.iter().any(|d| d.contains("value(s) are given")),
"3 values for 2 columns should fire; got {diags:?}",
);
}
#[test]
fn insert_arity_match_is_silent() {
let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Int)]);
let diags = diag_keys("sqlinsert into t (a, b) values (1, 2)", &schema);
assert!(
!diags.iter().any(|d| d.contains("value(s) are given")),
"matched arity must not fire; got {diags:?}",
);
}
#[test]
fn insert_arity_mismatch_emits_per_row() {
// ADR-0033 §8.1: each offending tuple emits its own
// diagnostic; the matched rows stay silent.
let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Int)]);
let diags = diag_keys(
"sqlinsert into t (a, b) values (1, 2), (3, 4, 5), (6), (7, 8)",
&schema,
);
let n = diags.iter().filter(|d| d.contains("value(s) are given")).count();
assert_eq!(n, 2, "rows 2 and 3 mismatch, rows 1 and 4 match; got {diags:?}");
}
#[test]
fn insert_arity_with_on_conflict_flags_only_the_tuple() {
// Regression guard: the VALUES walk must stop at ON CONFLICT —
// its `(col)` conflict target is not a value tuple. A real
// tuple mismatch still fires; the conflict target never does.
let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Int)]);
let diags = diag_keys(
"sqlinsert into t (a, b) values (1, 2, 3) on conflict (a) do nothing",
&schema,
);
let n = diags.iter().filter(|d| d.contains("value(s) are given")).count();
assert_eq!(n, 1, "only the 3-value tuple is flagged, not the conflict target; got {diags:?}");
}
#[test]
fn insert_arity_clean_with_on_conflict_is_silent() {
// Matched tuple arity + an ON CONFLICT target must be fully
// silent (the conflict target isn't counted as a tuple).
let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Int)]);
let diags = diag_keys(
"sqlinsert into t (a, b) values (1, 2) on conflict (a) do update set b = excluded.b",
&schema,
);
assert!(
!diags.iter().any(|d| d.contains("value(s) are given")),
"no arity diagnostic expected; got {diags:?}",
);
}
#[test]
fn insert_select_arity_mismatch_fires() {
// INSERT … SELECT: 1 listed column vs a 2-item projection.
let schema = two_table_schema(); // a(id,name), b(id,total)
let diags = diag_keys("sqlinsert into a (id) select id, total from b", &schema);
assert!(
diags.iter().any(|d| d.contains("value(s) are given")),
"2-col projection into 1-col list should fire; got {diags:?}",
);
}
#[test]
fn insert_select_arity_match_is_silent() {
let schema = two_table_schema();
let diags = diag_keys("sqlinsert into a (id, name) select id, total from b", &schema);
assert!(
!diags.iter().any(|d| d.contains("value(s) are given")),
"matched projection arity must not fire; got {diags:?}",
);
}
#[test] #[test]
fn auto_column_overridden_fires_on_explicit_serial() { fn auto_column_overridden_fires_on_explicit_serial() {
// ADR-0033 §8.2: listing a serial column explicitly warns. // ADR-0033 §8.2: listing a serial column explicitly warns.
+1
View File
@@ -44,6 +44,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[
("diagnostic.cte_arity_mismatch", &["cte", "declared", "actual"]), ("diagnostic.cte_arity_mismatch", &["cte", "declared", "actual"]),
("diagnostic.duplicate_cte", &["name"]), ("diagnostic.duplicate_cte", &["name"]),
("diagnostic.eq_null", &[]), ("diagnostic.eq_null", &[]),
("diagnostic.insert_arity_mismatch", &["expected", "actual"]),
("diagnostic.like_numeric", &["column", "type"]), ("diagnostic.like_numeric", &["column", "type"]),
("diagnostic.projection_alias_misplaced", &["alias", "clause"]), ("diagnostic.projection_alias_misplaced", &["alias", "clause"]),
("diagnostic.type_mismatch", &["column", "type"]), ("diagnostic.type_mismatch", &["column", "type"]),
+1
View File
@@ -503,6 +503,7 @@ diagnostic:
duplicate_cte: "duplicate `WITH` table name: `{name}`" duplicate_cte: "duplicate `WITH` table name: `{name}`"
# ADR-0033 §8 — Phase-3 DML diagnostic keys. # ADR-0033 §8 — Phase-3 DML diagnostic keys.
auto_column_overridden: "column `{column}` is auto-generated (`{type}`); providing an explicit value bypasses the auto-counter and may collide with later auto-generated values" auto_column_overridden: "column `{column}` is auto-generated (`{type}`); providing an explicit value bypasses the auto-counter and may collide with later auto-generated values"
insert_arity_mismatch: "the column list names {expected} column(s) but {actual} value(s) are given"
# Engine-error translations: an engine-rejected SQL statement # Engine-error translations: an engine-rejected SQL statement
# reaches the friendly-error layer (ADR-0019) and these keys # reaches the friendly-error layer (ADR-0019) and these keys