diff --git a/src/dsl/walker/mod.rs b/src/dsl/walker/mod.rs index 2c6c662..1543d36 100644 --- a/src/dsl/walker/mod.rs +++ b/src/dsl/walker/mod.rs @@ -1172,6 +1172,138 @@ fn dml_auto_column_diagnostics( diagnostics } +/// `insert_arity_mismatch` ERROR (ADR-0033 §8.1, sub-phase 3i). +/// +/// Walker pre-flight when the explicit `(column_name_list)` arity +/// disagrees with a row's arity — pre-empting the engine's +/// less-helpful "N values for M columns". Two row-source shapes: +/// +/// - **VALUES**: each `value_tuple` is checked independently; +/// **each** offending row emits its own diagnostic on that tuple's +/// span (a five-row list with three wrong tuples emits three). +/// - **INSERT … SELECT** (plain, non-`WITH`): the first SELECT leg's +/// projection arity is compared, anchored on the first projection +/// item. A `WITH`-prefixed row source is skipped here (the outer +/// projection isn't the first `select` token) — the engine still +/// reports it; a false positive would be worse than deferring. +/// +/// Only fires when an explicit column list is present; the +/// no-column-list form (arity vs the table's full column count) +/// is deferred (needs the schema and is outside the 3i exit gate). +fn dml_insert_arity_diagnostics(path: &MatchedPath) -> Vec { + use crate::dsl::grammar::IdentSource; + use outcome::{Diagnostic, MatchedKind, Severity}; + + let col_arity = path + .items + .iter() + .filter(|it| { + matches!( + it.kind, + MatchedKind::Ident { + source: IdentSource::Columns, + role: "insert_column", + } + ) + }) + .count(); + if col_arity == 0 { + return Vec::new(); + } + + // Index of the row-source keyword (first VALUES / SELECT / WITH). + let Some(kw_idx) = path + .items + .iter() + .position(|it| matches!(it.kind, MatchedKind::Word("values" | "select" | "with"))) + else { + return Vec::new(); + }; + let MatchedKind::Word(kw) = path.items[kw_idx].kind else { + return Vec::new(); + }; + + let mut diagnostics = Vec::new(); + let tail = &path.items[kw_idx + 1..]; + + let emit = |span: (usize, usize), actual: usize, diagnostics: &mut Vec| { + diagnostics.push(Diagnostic { + severity: Severity::Error, + span, + message: crate::friendly::translate( + "diagnostic.insert_arity_mismatch", + &[ + ("expected", &col_arity as &dyn std::fmt::Display), + ("actual", &actual as &dyn std::fmt::Display), + ], + ), + }); + }; + + if kw == "values" { + // Per-tuple arity: count value exprs (commas at the tuple's + // interior depth, +1). Nested parens (calls / subqueries) + // sit deeper and their commas are ignored. + let mut depth: i32 = 0; + let mut tuple_arity = 0usize; + let mut tuple_start = 0usize; + for it in tail { + match it.kind { + MatchedKind::Punct('(') => { + if depth == 0 { + tuple_arity = 1; + tuple_start = it.span.0; + } + depth += 1; + } + MatchedKind::Punct(')') => { + depth -= 1; + if depth == 0 && tuple_arity != col_arity { + emit((tuple_start, it.span.1), tuple_arity, &mut diagnostics); + } + } + MatchedKind::Punct(',') if depth == 1 => tuple_arity += 1, + // A depth-0 keyword ends the VALUES clause — the + // tuples are over and what follows (`ON CONFLICT (…)`, + // `RETURNING …`) must NOT be mis-read as a value + // tuple. Value exprs live inside the tuple parens + // (depth ≥ 1), so no legitimate value keyword sits at + // depth 0. + MatchedKind::Word(_) if depth == 0 => break, + _ => {} + } + } + } else if kw == "select" { + // First SELECT leg's projection arity: commas at the leg's + // depth (0 here — INSERT … SELECT is not parenthesised) up to + // the first leg-end / compound keyword. Anchor on the first + // projection item. + let mut depth: i32 = 0; + let mut proj_arity = 1usize; + let mut anchor: Option<(usize, usize)> = None; + for it in tail { + match it.kind { + MatchedKind::Punct('(') => depth += 1, + MatchedKind::Punct(')') => depth -= 1, + MatchedKind::Punct(',') if depth == 0 => proj_arity += 1, + MatchedKind::Word( + "from" | "where" | "group" | "having" | "order" | "limit" | "offset" + | "union" | "intersect" | "except" | "on" | "returning", + ) if depth == 0 => break, + _ => { + if depth == 0 && anchor.is_none() { + anchor = Some(it.span); + } + } + } + } + if proj_arity != col_arity { + emit(anchor.unwrap_or(path.items[kw_idx].span), proj_arity, &mut diagnostics); + } + } + diagnostics +} + /// SQL-expression predicate-warning pass (ADR-0032 §11.6 — the /// Phase-1 carry-over gap closure). /// @@ -2236,6 +2368,10 @@ fn walk_one_command<'a>( // ADR-0033 §8.2 — WARNING when a SQL INSERT lists a // serial/shortid (auto-generated) column explicitly. d.extend(dml_auto_column_diagnostics(&path, ctx.schema)); + // ADR-0033 §8.1 — ERROR when a column list's arity disagrees + // with a VALUES tuple (per row) or the INSERT…SELECT + // projection. + d.extend(dml_insert_arity_diagnostics(&path)); // ADR-0032 §10.3 / §11.2 — diagnostics emitted during // the walk by node handlers with direct context the // post-walk passes can't reconstruct (primarily the @@ -4059,6 +4195,89 @@ mod tests { ); } + #[test] + fn insert_arity_mismatch_single_row_fires() { + let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Int)]); + let diags = diag_keys("sqlinsert into t (a, b) values (1, 2, 3)", &schema); + assert!( + diags.iter().any(|d| d.contains("value(s) are given")), + "3 values for 2 columns should fire; got {diags:?}", + ); + } + + #[test] + fn insert_arity_match_is_silent() { + let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Int)]); + let diags = diag_keys("sqlinsert into t (a, b) values (1, 2)", &schema); + assert!( + !diags.iter().any(|d| d.contains("value(s) are given")), + "matched arity must not fire; got {diags:?}", + ); + } + + #[test] + fn insert_arity_mismatch_emits_per_row() { + // ADR-0033 §8.1: each offending tuple emits its own + // diagnostic; the matched rows stay silent. + let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Int)]); + let diags = diag_keys( + "sqlinsert into t (a, b) values (1, 2), (3, 4, 5), (6), (7, 8)", + &schema, + ); + let n = diags.iter().filter(|d| d.contains("value(s) are given")).count(); + assert_eq!(n, 2, "rows 2 and 3 mismatch, rows 1 and 4 match; got {diags:?}"); + } + + #[test] + fn insert_arity_with_on_conflict_flags_only_the_tuple() { + // Regression guard: the VALUES walk must stop at ON CONFLICT — + // its `(col)` conflict target is not a value tuple. A real + // tuple mismatch still fires; the conflict target never does. + let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Int)]); + let diags = diag_keys( + "sqlinsert into t (a, b) values (1, 2, 3) on conflict (a) do nothing", + &schema, + ); + let n = diags.iter().filter(|d| d.contains("value(s) are given")).count(); + assert_eq!(n, 1, "only the 3-value tuple is flagged, not the conflict target; got {diags:?}"); + } + + #[test] + fn insert_arity_clean_with_on_conflict_is_silent() { + // Matched tuple arity + an ON CONFLICT target must be fully + // silent (the conflict target isn't counted as a tuple). + let schema = schema_with("t", &[("a", Type::Int), ("b", Type::Int)]); + let diags = diag_keys( + "sqlinsert into t (a, b) values (1, 2) on conflict (a) do update set b = excluded.b", + &schema, + ); + assert!( + !diags.iter().any(|d| d.contains("value(s) are given")), + "no arity diagnostic expected; got {diags:?}", + ); + } + + #[test] + fn insert_select_arity_mismatch_fires() { + // INSERT … SELECT: 1 listed column vs a 2-item projection. + let schema = two_table_schema(); // a(id,name), b(id,total) + let diags = diag_keys("sqlinsert into a (id) select id, total from b", &schema); + assert!( + diags.iter().any(|d| d.contains("value(s) are given")), + "2-col projection into 1-col list should fire; got {diags:?}", + ); + } + + #[test] + fn insert_select_arity_match_is_silent() { + let schema = two_table_schema(); + let diags = diag_keys("sqlinsert into a (id, name) select id, total from b", &schema); + assert!( + !diags.iter().any(|d| d.contains("value(s) are given")), + "matched projection arity must not fire; got {diags:?}", + ); + } + #[test] fn auto_column_overridden_fires_on_explicit_serial() { // ADR-0033 §8.2: listing a serial column explicitly warns. diff --git a/src/friendly/keys.rs b/src/friendly/keys.rs index 72203c9..0863eab 100644 --- a/src/friendly/keys.rs +++ b/src/friendly/keys.rs @@ -44,6 +44,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[ ("diagnostic.cte_arity_mismatch", &["cte", "declared", "actual"]), ("diagnostic.duplicate_cte", &["name"]), ("diagnostic.eq_null", &[]), + ("diagnostic.insert_arity_mismatch", &["expected", "actual"]), ("diagnostic.like_numeric", &["column", "type"]), ("diagnostic.projection_alias_misplaced", &["alias", "clause"]), ("diagnostic.type_mismatch", &["column", "type"]), diff --git a/src/friendly/strings/en-US.yaml b/src/friendly/strings/en-US.yaml index 6fb47bb..e7a0b06 100644 --- a/src/friendly/strings/en-US.yaml +++ b/src/friendly/strings/en-US.yaml @@ -503,6 +503,7 @@ diagnostic: duplicate_cte: "duplicate `WITH` table name: `{name}`" # ADR-0033 §8 — Phase-3 DML diagnostic keys. auto_column_overridden: "column `{column}` is auto-generated (`{type}`); providing an explicit value bypasses the auto-counter and may collide with later auto-generated values" + insert_arity_mismatch: "the column list names {expected} column(s) but {actual} value(s) are given" # Engine-error translations: an engine-rejected SQL statement # reaches the friendly-error layer (ADR-0019) and these keys