From e6ff63daa2e589a1847b29ab494f823840a19f99 Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Thu, 11 Jun 2026 20:44:34 +0000 Subject: [PATCH] perf(seed): single-transaction multi-row insert path (ADR-0048 P1.3d) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit do_seed inserted row-by-row through do_insert, re-writing the whole table CSV each time — O(N^2). Extract do_insert's row core into a new insert_one_row (bind + serial/shortid autofill + FK-enriched execute, no tx/persist), shared by: - do_insert: one row in its own transaction (behaviour unchanged). - do_seed: all rows in ONE transaction, with a single finalize_persistence before the single commit — O(N), preserving ADR-0015 §6 commit-db-last. A mid-batch failure now rolls the whole seed back atomically; the capped preview is read back by rowid. A near-max 10000-row seed drops from ~tens of seconds to well under one. do_insert behaviour unchanged (whole suite green: 2346 pass / 0 fail / 0 skip, clippy clean); seed's existing tests exercise the batch path. --- src/db.rs | 137 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 83 insertions(+), 54 deletions(-) diff --git a/src/db.rs b/src/db.rs index 3cf658b..e49dbe4 100644 --- a/src/db.rs +++ b/src/db.rs @@ -8999,18 +8999,22 @@ fn do_seed( const MAX_ATTEMPTS: u32 = 200; let mut rng = seed::make_rng(rng_seed); - let mut preview = DataResult { - table_name: table.to_string(), - columns: Vec::new(), - column_types: Vec::new(), - rows: Vec::new(), - }; + let mut preview_rowids: Vec = Vec::new(); let mut accepted: u64 = 0; let mut capped = false; + // All rows insert in a single transaction; persistence (the CSV and + // the one history line) is written once, before the single commit — + // preserving ADR-0015 §6 commit-db-last while staying O(N) instead + // of the O(N^2) of per-row CSV rewrites. A mid-batch failure rolls + // the whole seed back (atomic). + let tx = conn + .unchecked_transaction() + .map_err(DbError::from_rusqlite)?; + while accepted < n { let mut attempt = 0u32; - let inserted = loop { + let rowid = loop { // One sampled parent row per FK for this attempt, so a // compound FK's children stay consistent. let fk_choice: Vec = fk_samples @@ -9056,28 +9060,16 @@ fn do_seed( for (gi, k) in keys.into_iter().enumerate() { used[gi].insert(k); } - // Only the first inserted row carries the `source`, so the - // whole seed writes exactly one `history.log` line. - let row_source = if accepted == 0 { source } else { None }; - break Some(do_insert( - conn, - persistence, - row_source, - table, - Some(&col_names), - &values, - )?); + let (_rows, rowid) = + insert_one_row(conn, table, &schema, Some(&col_names), &values)?; + break Some(rowid); }; - match inserted { - Some(result) => { - // Accumulate the capped preview (D18). - if preview.columns.is_empty() { - preview.columns = result.data.columns; - preview.column_types = result.data.column_types; - } - if preview.rows.len() < SEED_PREVIEW_CAP { - preview.rows.extend(result.data.rows); - preview.rows.truncate(SEED_PREVIEW_CAP); + match rowid { + Some(rowid) => { + // Keep the first `SEED_PREVIEW_CAP` rowids for the + // capped auto-show (D18). + if preview_rowids.len() < SEED_PREVIEW_CAP { + preview_rowids.push(rowid); } accepted += 1; } @@ -9094,43 +9086,66 @@ fn do_seed( ); } + // Persist once (CSV + the single history line), then commit (db-last). + let changes = Changes { + schema_dirty: false, + rewritten_tables: vec![table.to_string()], + ..Changes::default() + }; + finalize_persistence(conn, persistence, source, &changes)?; + tx.commit().map_err(DbError::from_rusqlite)?; + + let data = if preview_rowids.is_empty() { + DataResult { + table_name: table.to_string(), + columns: Vec::new(), + column_types: Vec::new(), + rows: Vec::new(), + } + } else { + query_rows_by_rowid(conn, table, &preview_rowids)? + }; + Ok(SeedResult { table: table.to_string(), requested: n, produced: accepted, - data: preview, + data, advisory_columns, }) } -fn do_insert( +/// Build and execute a single-row `INSERT` — column resolution, value +/// binding, `serial`/`shortid` autofill, and the FK-enriched execute — +/// returning `(rows_affected, new rowid)`. +/// +/// It does **not** manage the transaction or persistence: the caller +/// owns those. This lets `do_insert` run one row in its own +/// transaction while `do_seed` runs N rows in a single transaction and +/// persists once (preserving ADR-0015 §6 commit-db-last while staying +/// O(N)). **The caller must hold an open transaction.** `table` must +/// already be canonical and `schema` already read. +fn insert_one_row( conn: &Connection, - persistence: Option<&Persistence>, - source: Option<&str>, table: &str, + schema: &ReadSchema, user_columns: Option<&[String]>, user_values: &[Value], -) -> Result { - debug!(table = %table, "insert"); - let canonical_table = require_canonical_table(conn, table)?; - let table = canonical_table.as_str(); - let schema = read_schema(conn, table)?; - - // Resolve which columns the user is providing values for. - let user_cols: Vec = match user_columns { - Some(cols) => cols.to_vec(), - None => { - // Short form: every non-auto-generated column in - // schema declaration order. Serial and shortid both - // get auto-filled below. +) -> Result<(usize, i64), DbError> { + // Resolve which columns the user is providing values for. The short + // form (None) is every non-auto-generated column in schema + // declaration order; serial and shortid get auto-filled below. + let user_cols: Vec = user_columns.map_or_else( + || { schema .columns .iter() .filter(|c| !matches!(c.user_type, Some(Type::Serial) | Some(Type::ShortId))) .map(|c| c.name.clone()) .collect() - } - }; + }, + <[String]>::to_vec, + ); if user_cols.len() != user_values.len() { return Err(DbError::InvalidValue(format!( @@ -9143,7 +9158,7 @@ fn do_insert( let mut bindings: Vec<(String, Bound)> = Vec::with_capacity(user_cols.len()); for (col_name, value) in user_cols.iter().zip(user_values.iter()) { - let bound = impl_value_for(&schema, col_name, value)?; + let bound = impl_value_for(schema, col_name, value)?; bindings.push((col_name.clone(), bound)); } @@ -9214,11 +9229,28 @@ fn do_insert( debug!(sql = %sql, "insert"); let params: Vec = bindings.iter().map(|(_, b)| bound_to_sqlite_value(b)).collect(); + let rows_affected = execute_with_fk_enrichment(conn, table, &sql, ¶ms)?; + let new_rowid = conn.last_insert_rowid(); + Ok((rows_affected, new_rowid)) +} + +fn do_insert( + conn: &Connection, + persistence: Option<&Persistence>, + source: Option<&str>, + table: &str, + user_columns: Option<&[String]>, + user_values: &[Value], +) -> Result { + debug!(table = %table, "insert"); + let canonical_table = require_canonical_table(conn, table)?; + let table = canonical_table.as_str(); + let schema = read_schema(conn, table)?; let tx = conn .unchecked_transaction() .map_err(DbError::from_rusqlite)?; - let rows_affected = execute_with_fk_enrichment(conn, table, &sql, ¶ms)?; - let new_rowid = conn.last_insert_rowid(); + let (rows_affected, new_rowid) = + insert_one_row(conn, table, &schema, user_columns, user_values)?; let data = query_rows_by_rowid(conn, table, &[new_rowid])?; let changes = Changes { schema_dirty: false, @@ -9227,10 +9259,7 @@ fn do_insert( }; finalize_persistence(conn, persistence, source, &changes)?; tx.commit().map_err(DbError::from_rusqlite)?; - Ok(InsertResult { - rows_affected, - data, - }) + Ok(InsertResult { rows_affected, data }) } /// Build the parameterised `UPDATE … SET … WHERE …` statement.