grammar: 3c — INSERT … SELECT row source (ADR-0033 §4)

Make the INSERT row source a Choice between the VALUES clause and
Subgrammar(&sql_select::SQL_SELECT_COMPOUND). SQL_SELECT_COMPOUND
is itself a Choice that admits a leading WITH, so a WITH-prefixed
SELECT row source (R4) parses through it for free; the two
branches start on disjoint keywords (values vs select/with) so the
Choice never ambiguously commits. No worker change — do_sql_insert
already executes the validated SQL and re-persists, and the engine
handles insert-from-query.

Tests: grammar accept (plain / column-list+projection / WITH-
prefixed / trailing-semi) and reject (__rdbms_* on the SELECT's
FROM slot, incomplete select); integration parse-path lowering +
worker round-trip (rows land, CSV re-persisted) incl. R4 WITH end-
to-end; walker cross-cut that the Phase-2 unknown_column diagnostic
fires on the INSERT…SELECT projection; DA-gate test that a self-
sourced INSERT…SELECT runs as a plain insert (no cascade summary —
that is DELETE-only). Still behind the dev `sqlinsert` entry word
(shared `insert` is 3j). 1493 tests green, clippy clean.
This commit is contained in:
claude@clouddev1
2026-05-21 22:08:25 +00:00
parent 7f68a53f86
commit 6ff9144c7a
3 changed files with 230 additions and 2 deletions
+54 -2
View File
@@ -14,7 +14,7 @@
//! sub-phases. //! sub-phases.
use crate::dsl::grammar::sql_expr; use crate::dsl::grammar::sql_expr;
use crate::dsl::grammar::sql_select::reject_internal_table; use crate::dsl::grammar::sql_select::{SQL_SELECT_COMPOUND, reject_internal_table};
use crate::dsl::grammar::{IdentSource, Node, Word}; use crate::dsl::grammar::{IdentSource, Node, Word};
static COMMA: Node = Node::Punct(','); static COMMA: Node = Node::Punct(',');
@@ -95,11 +95,21 @@ static VALUES_CLAUSE_NODES: &[Node] = &[
/// `VALUES tuple (',' tuple)*` — single- or multi-row. /// `VALUES tuple (',' tuple)*` — single- or multi-row.
const VALUES_CLAUSE: Node = Node::Seq(VALUES_CLAUSE_NODES); const VALUES_CLAUSE: Node = Node::Seq(VALUES_CLAUSE_NODES);
/// The row source: either a `VALUES` clause or a `SELECT`
/// compound (ADR-0033 §4, sub-phase 3c). `SQL_SELECT_COMPOUND`
/// is itself a Choice that admits a leading `WITH` (ADR-0032
/// §10.3), so `INSERT INTO t WITH x AS (…) SELECT …` parses
/// through this slot for free (R4). The two branches start on
/// disjoint keywords (`values` vs `select`/`with`), so the
/// Choice never ambiguously commits.
static ROW_SOURCE_CHOICES: &[Node] = &[VALUES_CLAUSE, Node::Subgrammar(&SQL_SELECT_COMPOUND)];
const ROW_SOURCE: Node = Node::Choice(ROW_SOURCE_CHOICES);
static SQL_INSERT_TAIL_NODES: &[Node] = &[ static SQL_INSERT_TAIL_NODES: &[Node] = &[
Node::Word(Word::keyword("into")), Node::Word(Word::keyword("into")),
TARGET_TABLE, TARGET_TABLE,
OPTIONAL_COLUMN_LIST, OPTIONAL_COLUMN_LIST,
VALUES_CLAUSE, ROW_SOURCE,
Node::Optional(&Node::Punct(';')), Node::Optional(&Node::Punct(';')),
]; ];
@@ -180,6 +190,48 @@ mod tests {
bad("into __rdbms_playground_relationships (a) values (1)"); bad("into __rdbms_playground_relationships (a) values (1)");
} }
#[test]
fn select_row_source() {
// 3c: the row source is a Choice between VALUES and a
// SELECT compound (which itself admits a leading WITH).
good("into archive select * from orders");
good("into archive select * from orders where created < '2025-01-01'");
good("into archive select * from orders;");
}
#[test]
fn select_row_source_with_column_list() {
good("into target (a, b) select x, y from source");
good("into target (id) select id from source");
}
#[test]
fn with_prefixed_select_row_source() {
// R4 invariant: a WITH-prefixed SELECT row source parses
// through SQL_SELECT_COMPOUND's WITH-prefixed branch.
good("into archive with t as (select * from orders) select * from t");
good(
"into summary (id, total) with t as (select * from orders) \
select id, total from t",
);
}
#[test]
fn select_row_source_rejects_internal_from_table() {
// DA gate: the SELECT's FROM slot must still reject
// `__rdbms_*` tables (Phase-2 gate, not silently dropped on
// the DML path).
bad("into archive select * from __rdbms_playground_columns");
}
#[test]
fn incomplete_select_row_source_rejected() {
// A bare `select` with no projection is not a complete row
// source.
bad("into archive select");
bad("into archive select * from");
}
#[test] #[test]
fn structurally_incomplete_or_wrong_rejected() { fn structurally_incomplete_or_wrong_rejected() {
// Missing VALUES. // Missing VALUES.
+18
View File
@@ -3915,6 +3915,24 @@ mod tests {
); );
} }
#[test]
fn insert_select_unknown_projection_column_is_error() {
// ADR-0033 sub-phase 3c cross-cut: the Phase-2
// schema-existence pass fires on a SELECT row source
// embedded in an INSERT (no re-implementation needed).
// `nonexistent_col` is not a column of `a`.
let schema = two_table_schema();
let diags = diag_keys(
"sqlinsert into b select nonexistent_col from a",
&schema,
);
assert!(
diags.iter().any(|d| d.contains("no such column")),
"expected unknown_column on the INSERT…SELECT projection; \
got {diags:?}",
);
}
#[test] #[test]
fn cte_name_is_valid_table_source() { fn cte_name_is_valid_table_source() {
let schema = schema_with("base", &[("id", Type::Int)]); let schema = schema_with("base", &[("id", Type::Int)]);
+158
View File
@@ -217,3 +217,161 @@ fn parse_path_rejects_internal_target_table() {
"an internal `__rdbms_*` target must be rejected: {result:?}", "an internal `__rdbms_*` target must be rejected: {result:?}",
); );
} }
// =================================================================
// Sub-phase 3c — INSERT … SELECT
// =================================================================
/// Create a two-column table `name(a int pk, b text)`.
fn create_named(db: &Database, rt: &tokio::runtime::Runtime, name: &str) {
rt.block_on(db.create_table(
name.to_string(),
vec![
ColumnSpec::new("a", Type::Int),
ColumnSpec::new("b", Type::Text),
],
vec!["a".to_string()],
None,
))
.unwrap_or_else(|e| panic!("create table {name}: {e:?}"));
}
#[test]
fn parse_path_lowers_insert_select_to_command() {
let command = parse_command("sqlinsert into archive select * from source")
.expect("INSERT … SELECT parses in advanced mode");
match command {
Command::SqlInsert { sql, target_table } => {
assert_eq!(sql, "insert into archive select * from source");
assert_eq!(target_table, "archive");
}
other => panic!("expected Command::SqlInsert, got {other:?}"),
}
}
#[test]
fn parse_path_lowers_with_prefixed_insert_select() {
// R4: a WITH-prefixed SELECT row source lowers verbatim.
let command = parse_command(
"sqlinsert into archive with t as (select * from orders) select * from t",
)
.expect("WITH-prefixed INSERT … SELECT parses");
match command {
Command::SqlInsert { sql, target_table } => {
assert_eq!(
sql,
"insert into archive with t as (select * from orders) select * from t",
);
assert_eq!(target_table, "archive");
}
other => panic!("expected Command::SqlInsert, got {other:?}"),
}
}
#[test]
fn insert_select_copies_rows_and_persists() {
let (project, db, _dir) = open_project_db();
let rt = rt();
create_named(&db, &rt, "source");
create_named(&db, &rt, "archive");
rt.block_on(db.run_sql_insert(
"insert into source (a, b) values (1, 'one'), (2, 'two')".to_string(),
None,
"source".to_string(),
))
.expect("seed source");
let result = rt
.block_on(db.run_sql_insert(
"insert into archive select * from source".to_string(),
Some("insert into archive select * from source".to_string()),
"archive".to_string(),
))
.expect("INSERT … SELECT runs");
assert_eq!(result.rows_affected, 2, "both source rows copied");
let csv = read_csv(&project, "archive").expect("archive.csv written");
assert!(
csv.contains("one") && csv.contains("two"),
"archive CSV reflects the copied rows: {csv:?}",
);
}
#[test]
fn insert_select_with_column_list_and_projection_persists() {
let (project, db, _dir) = open_project_db();
let rt = rt();
create_named(&db, &rt, "source");
create_named(&db, &rt, "target");
rt.block_on(db.run_sql_insert(
"insert into source (a, b) values (5, 'five')".to_string(),
None,
"source".to_string(),
))
.expect("seed source");
let result = rt
.block_on(db.run_sql_insert(
"insert into target (a, b) select a, b from source".to_string(),
None,
"target".to_string(),
))
.expect("column-list + projection INSERT … SELECT runs");
assert_eq!(result.rows_affected, 1);
let csv = read_csv(&project, "target").expect("target.csv written");
assert!(csv.contains("five"), "target CSV reflects the row: {csv:?}");
}
#[test]
fn with_prefixed_insert_select_runs_and_persists() {
// R4 end-to-end: the CTE row source executes and lands rows.
let (project, db, _dir) = open_project_db();
let rt = rt();
create_named(&db, &rt, "orders");
create_named(&db, &rt, "archive");
rt.block_on(db.run_sql_insert(
"insert into orders (a, b) values (1, 'a'), (2, 'b')".to_string(),
None,
"orders".to_string(),
))
.expect("seed orders");
let result = rt
.block_on(db.run_sql_insert(
"insert into archive with t as (select * from orders) select * from t".to_string(),
None,
"archive".to_string(),
))
.expect("WITH-prefixed INSERT … SELECT runs");
assert_eq!(result.rows_affected, 2);
let csv = read_csv(&project, "archive").expect("archive.csv written");
assert!(
csv.contains('a') && csv.contains('b'),
"archive CSV reflects the CTE-sourced rows: {csv:?}",
);
}
#[test]
fn insert_select_from_self_runs_as_plain_insert() {
// DA gate: INSERT … SELECT where the source is the target
// executes as a plain insert (InsertResult — no cascade
// summary; cascade output is a DELETE-only concept, 3f).
let (project, db, _dir) = open_project_db();
let rt = rt();
create_named(&db, &rt, "T");
rt.block_on(db.run_sql_insert(
"insert into T (a, b) values (1, 'x'), (2, 'y')".to_string(),
None,
"T".to_string(),
))
.expect("seed");
let result = rt
.block_on(db.run_sql_insert(
"insert into T select a + 10, b from T".to_string(),
None,
"T".to_string(),
))
.expect("self-sourced INSERT … SELECT runs");
assert_eq!(result.rows_affected, 2, "two rows copied with shifted PK");
let csv = read_csv(&project, "T").expect("T.csv written");
assert!(
csv.contains("11") && csv.contains("12"),
"the shifted-PK copies landed: {csv:?}",
);
}