From 6ff9144c7ae946810b206c288f3b7a2074a67ed1 Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Thu, 21 May 2026 22:08:25 +0000 Subject: [PATCH] =?UTF-8?q?grammar:=203c=20=E2=80=94=20INSERT=20=E2=80=A6?= =?UTF-8?q?=20SELECT=20row=20source=20(ADR-0033=20=C2=A74)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the INSERT row source a Choice between the VALUES clause and Subgrammar(&sql_select::SQL_SELECT_COMPOUND). SQL_SELECT_COMPOUND is itself a Choice that admits a leading WITH, so a WITH-prefixed SELECT row source (R4) parses through it for free; the two branches start on disjoint keywords (values vs select/with) so the Choice never ambiguously commits. No worker change — do_sql_insert already executes the validated SQL and re-persists, and the engine handles insert-from-query. Tests: grammar accept (plain / column-list+projection / WITH- prefixed / trailing-semi) and reject (__rdbms_* on the SELECT's FROM slot, incomplete select); integration parse-path lowering + worker round-trip (rows land, CSV re-persisted) incl. R4 WITH end- to-end; walker cross-cut that the Phase-2 unknown_column diagnostic fires on the INSERT…SELECT projection; DA-gate test that a self- sourced INSERT…SELECT runs as a plain insert (no cascade summary — that is DELETE-only). Still behind the dev `sqlinsert` entry word (shared `insert` is 3j). 1493 tests green, clippy clean. --- src/dsl/grammar/sql_insert.rs | 56 +++++++++++- src/dsl/walker/mod.rs | 18 ++++ tests/sql_insert.rs | 158 ++++++++++++++++++++++++++++++++++ 3 files changed, 230 insertions(+), 2 deletions(-) diff --git a/src/dsl/grammar/sql_insert.rs b/src/dsl/grammar/sql_insert.rs index 4fad97b..37974cb 100644 --- a/src/dsl/grammar/sql_insert.rs +++ b/src/dsl/grammar/sql_insert.rs @@ -14,7 +14,7 @@ //! sub-phases. use crate::dsl::grammar::sql_expr; -use crate::dsl::grammar::sql_select::reject_internal_table; +use crate::dsl::grammar::sql_select::{SQL_SELECT_COMPOUND, reject_internal_table}; use crate::dsl::grammar::{IdentSource, Node, Word}; static COMMA: Node = Node::Punct(','); @@ -95,11 +95,21 @@ static VALUES_CLAUSE_NODES: &[Node] = &[ /// `VALUES tuple (',' tuple)*` — single- or multi-row. const VALUES_CLAUSE: Node = Node::Seq(VALUES_CLAUSE_NODES); +/// The row source: either a `VALUES` clause or a `SELECT` +/// compound (ADR-0033 §4, sub-phase 3c). `SQL_SELECT_COMPOUND` +/// is itself a Choice that admits a leading `WITH` (ADR-0032 +/// §10.3), so `INSERT INTO t WITH x AS (…) SELECT …` parses +/// through this slot for free (R4). The two branches start on +/// disjoint keywords (`values` vs `select`/`with`), so the +/// Choice never ambiguously commits. +static ROW_SOURCE_CHOICES: &[Node] = &[VALUES_CLAUSE, Node::Subgrammar(&SQL_SELECT_COMPOUND)]; +const ROW_SOURCE: Node = Node::Choice(ROW_SOURCE_CHOICES); + static SQL_INSERT_TAIL_NODES: &[Node] = &[ Node::Word(Word::keyword("into")), TARGET_TABLE, OPTIONAL_COLUMN_LIST, - VALUES_CLAUSE, + ROW_SOURCE, Node::Optional(&Node::Punct(';')), ]; @@ -180,6 +190,48 @@ mod tests { bad("into __rdbms_playground_relationships (a) values (1)"); } + #[test] + fn select_row_source() { + // 3c: the row source is a Choice between VALUES and a + // SELECT compound (which itself admits a leading WITH). + good("into archive select * from orders"); + good("into archive select * from orders where created < '2025-01-01'"); + good("into archive select * from orders;"); + } + + #[test] + fn select_row_source_with_column_list() { + good("into target (a, b) select x, y from source"); + good("into target (id) select id from source"); + } + + #[test] + fn with_prefixed_select_row_source() { + // R4 invariant: a WITH-prefixed SELECT row source parses + // through SQL_SELECT_COMPOUND's WITH-prefixed branch. + good("into archive with t as (select * from orders) select * from t"); + good( + "into summary (id, total) with t as (select * from orders) \ + select id, total from t", + ); + } + + #[test] + fn select_row_source_rejects_internal_from_table() { + // DA gate: the SELECT's FROM slot must still reject + // `__rdbms_*` tables (Phase-2 gate, not silently dropped on + // the DML path). + bad("into archive select * from __rdbms_playground_columns"); + } + + #[test] + fn incomplete_select_row_source_rejected() { + // A bare `select` with no projection is not a complete row + // source. + bad("into archive select"); + bad("into archive select * from"); + } + #[test] fn structurally_incomplete_or_wrong_rejected() { // Missing VALUES. diff --git a/src/dsl/walker/mod.rs b/src/dsl/walker/mod.rs index ae0ec65..b200f17 100644 --- a/src/dsl/walker/mod.rs +++ b/src/dsl/walker/mod.rs @@ -3915,6 +3915,24 @@ mod tests { ); } + #[test] + fn insert_select_unknown_projection_column_is_error() { + // ADR-0033 sub-phase 3c cross-cut: the Phase-2 + // schema-existence pass fires on a SELECT row source + // embedded in an INSERT (no re-implementation needed). + // `nonexistent_col` is not a column of `a`. + let schema = two_table_schema(); + let diags = diag_keys( + "sqlinsert into b select nonexistent_col from a", + &schema, + ); + assert!( + diags.iter().any(|d| d.contains("no such column")), + "expected unknown_column on the INSERT…SELECT projection; \ + got {diags:?}", + ); + } + #[test] fn cte_name_is_valid_table_source() { let schema = schema_with("base", &[("id", Type::Int)]); diff --git a/tests/sql_insert.rs b/tests/sql_insert.rs index e47b869..6aa0e6e 100644 --- a/tests/sql_insert.rs +++ b/tests/sql_insert.rs @@ -217,3 +217,161 @@ fn parse_path_rejects_internal_target_table() { "an internal `__rdbms_*` target must be rejected: {result:?}", ); } + +// ================================================================= +// Sub-phase 3c — INSERT … SELECT +// ================================================================= + +/// Create a two-column table `name(a int pk, b text)`. +fn create_named(db: &Database, rt: &tokio::runtime::Runtime, name: &str) { + rt.block_on(db.create_table( + name.to_string(), + vec![ + ColumnSpec::new("a", Type::Int), + ColumnSpec::new("b", Type::Text), + ], + vec!["a".to_string()], + None, + )) + .unwrap_or_else(|e| panic!("create table {name}: {e:?}")); +} + +#[test] +fn parse_path_lowers_insert_select_to_command() { + let command = parse_command("sqlinsert into archive select * from source") + .expect("INSERT … SELECT parses in advanced mode"); + match command { + Command::SqlInsert { sql, target_table } => { + assert_eq!(sql, "insert into archive select * from source"); + assert_eq!(target_table, "archive"); + } + other => panic!("expected Command::SqlInsert, got {other:?}"), + } +} + +#[test] +fn parse_path_lowers_with_prefixed_insert_select() { + // R4: a WITH-prefixed SELECT row source lowers verbatim. + let command = parse_command( + "sqlinsert into archive with t as (select * from orders) select * from t", + ) + .expect("WITH-prefixed INSERT … SELECT parses"); + match command { + Command::SqlInsert { sql, target_table } => { + assert_eq!( + sql, + "insert into archive with t as (select * from orders) select * from t", + ); + assert_eq!(target_table, "archive"); + } + other => panic!("expected Command::SqlInsert, got {other:?}"), + } +} + +#[test] +fn insert_select_copies_rows_and_persists() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_named(&db, &rt, "source"); + create_named(&db, &rt, "archive"); + rt.block_on(db.run_sql_insert( + "insert into source (a, b) values (1, 'one'), (2, 'two')".to_string(), + None, + "source".to_string(), + )) + .expect("seed source"); + let result = rt + .block_on(db.run_sql_insert( + "insert into archive select * from source".to_string(), + Some("insert into archive select * from source".to_string()), + "archive".to_string(), + )) + .expect("INSERT … SELECT runs"); + assert_eq!(result.rows_affected, 2, "both source rows copied"); + let csv = read_csv(&project, "archive").expect("archive.csv written"); + assert!( + csv.contains("one") && csv.contains("two"), + "archive CSV reflects the copied rows: {csv:?}", + ); +} + +#[test] +fn insert_select_with_column_list_and_projection_persists() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_named(&db, &rt, "source"); + create_named(&db, &rt, "target"); + rt.block_on(db.run_sql_insert( + "insert into source (a, b) values (5, 'five')".to_string(), + None, + "source".to_string(), + )) + .expect("seed source"); + let result = rt + .block_on(db.run_sql_insert( + "insert into target (a, b) select a, b from source".to_string(), + None, + "target".to_string(), + )) + .expect("column-list + projection INSERT … SELECT runs"); + assert_eq!(result.rows_affected, 1); + let csv = read_csv(&project, "target").expect("target.csv written"); + assert!(csv.contains("five"), "target CSV reflects the row: {csv:?}"); +} + +#[test] +fn with_prefixed_insert_select_runs_and_persists() { + // R4 end-to-end: the CTE row source executes and lands rows. + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_named(&db, &rt, "orders"); + create_named(&db, &rt, "archive"); + rt.block_on(db.run_sql_insert( + "insert into orders (a, b) values (1, 'a'), (2, 'b')".to_string(), + None, + "orders".to_string(), + )) + .expect("seed orders"); + let result = rt + .block_on(db.run_sql_insert( + "insert into archive with t as (select * from orders) select * from t".to_string(), + None, + "archive".to_string(), + )) + .expect("WITH-prefixed INSERT … SELECT runs"); + assert_eq!(result.rows_affected, 2); + let csv = read_csv(&project, "archive").expect("archive.csv written"); + assert!( + csv.contains('a') && csv.contains('b'), + "archive CSV reflects the CTE-sourced rows: {csv:?}", + ); +} + +#[test] +fn insert_select_from_self_runs_as_plain_insert() { + // DA gate: INSERT … SELECT where the source is the target + // executes as a plain insert (InsertResult — no cascade + // summary; cascade output is a DELETE-only concept, 3f). + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_named(&db, &rt, "T"); + rt.block_on(db.run_sql_insert( + "insert into T (a, b) values (1, 'x'), (2, 'y')".to_string(), + None, + "T".to_string(), + )) + .expect("seed"); + let result = rt + .block_on(db.run_sql_insert( + "insert into T select a + 10, b from T".to_string(), + None, + "T".to_string(), + )) + .expect("self-sourced INSERT … SELECT runs"); + assert_eq!(result.rows_affected, 2, "two rows copied with shifted PK"); + let csv = read_csv(&project, "T").expect("T.csv written"); + assert!( + csv.contains("11") && csv.contains("12"), + "the shifted-PK copies landed: {csv:?}", + ); +}