diff --git a/Cargo.lock b/Cargo.lock
index 78a8c41..c1fc106 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -419,6 +419,12 @@ dependencies = [
"syn 2.0.117",
]
+[[package]]
+name = "deunicode"
+version = "1.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04"
+
[[package]]
name = "diff"
version = "0.1.13"
@@ -518,6 +524,17 @@ dependencies = [
"num-traits",
]
+[[package]]
+name = "fake"
+version = "5.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea6be833b323a56361118a747470a45a1bcd5c52a2ec9b1e40c83dafe687e453"
+dependencies = [
+ "deunicode",
+ "either",
+ "rand 0.10.1",
+]
+
[[package]]
name = "fallible-iterator"
version = "0.3.0"
@@ -1527,6 +1544,7 @@ dependencies = [
"crossterm",
"csv",
"directories",
+ "fake",
"futures-util",
"gethostname",
"insta",
diff --git a/Cargo.toml b/Cargo.toml
index f3b74d9..10c5fd4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,6 +24,14 @@ chrono = { version = "0.4.44", default-features = false, features = ["clock"] }
crossterm = { version = "0.29.0", features = ["event-stream"] }
csv = "1.4.0"
directories = "6.0.0"
+# Realistic fake-data generators for the `seed` command (ADR-0048):
+# names, emails, addresses, companies, lorem, etc. Default features
+# only — the basic fakers need no flags; date/datetime values are
+# generated in-house (rand + the existing `chrono`) for the bounded
+# windows ADR-0048 D8 requires, so `fake`'s `chrono` feature is
+# deliberately omitted. No commerce/product module exists, so the
+# `product` generator is hand-rolled (D9).
+fake = "5"
futures-util = "0.3.32"
gethostname = "1.1.0"
rand = "0.10.1"
diff --git a/docs/adr/0048-seed-fake-data-generation.md b/docs/adr/0048-seed-fake-data-generation.md
new file mode 100644
index 0000000..9dfd1ed
--- /dev/null
+++ b/docs/adr/0048-seed-fake-data-generation.md
@@ -0,0 +1,677 @@
+# ADR-0048: `seed` — fake-data generation command (SD1, opens SD2)
+
+## Status
+
+**Accepted (2026-06-11); Phase 1 + Phase 2 implemented (2026-06-11).** Design
+settled with the user across an extended fork dialogue (every decision
+below was escalated and user-chosen), then hardened by a pre-build
+`/runda` Devil's-Advocate pass that found six blockers — undo
+integration (D15), replay semantics (D16), `set` value quoting (D2),
+CHECK-constraint handling (D17), a phase-ordering bug in the advisory
+(D13), and auto-show flooding (D18) — plus refinements (state-relative
+reproducibility, compound-FK tuple sampling, column-fill constraint
+rules, the `fake` dependency scan), all folded in.
+
+**Phase 1 shipped** test-first across commits `202e25a` (generation
+library + `fake` dependency) → `f1e9484` (command skeleton) →
+`73493fa` (FK sampling) → `9c13501` (uniqueness / junction / IN-CHECK)
+→ `0b3ab3c` (`SeedResult` / preview / advisory / count cap) →
+`e6ff63d` (single-transaction O(N) path) → `fbd219b` (`--seed` flag,
+ambient wiring, and a whole-implementation `/runda` pass). The
+post-implementation `/runda` found eight gaps — FK-sampling
+determinism (now `ORDER BY`), shortid reproducibility (now from the
+seeded RNG, so **D4 holds with no exceptions**), and six untested
+ADR decisions (D5/D15/D16/D17 + atomicity + zero-count), all closed.
+**2358 tests pass / 0 fail / 0 skip; clippy clean.**
+
+**Implemented in Phase 1:** the whole-row `seed
[count]
+[--seed ]` form and every D1–D18 decision *except* the two
+Phase-2 surfaces.
+
+**Phase 2 implemented (2026-06-11):** both remaining surfaces — the
+**`set` override clause** (D2: fixed value / pick-list / named
+generator / range, quoted literals, type-aware) and the
+**`
.` column-fill** form (D1 form 2: an UPDATE over
+existing rows, refusing PK/autogen targets, empty-table no-op, one undo
+step). The named-generator vocabulary (D9) lives in `src/seed`
+(`KNOWN_GENERATORS` / `generator_for_name`); a new range `Generator`
+(`src/seed/generators.rs`) backs `between`; the override clause is
+folded from the flat matched path (`build_seed_overrides`,
+`src/dsl/grammar/data.rs`) and applied to the per-column plan
+(`apply_seed_overrides`, `src/db.rs`), with column-fill in
+`do_seed_column_fill`. Full ambient wiring: completion (the generator
+vocabulary after `as`, the `set`/`.col` column slots), highlighting
+(`HighlightClass::Function` → `tok_function`, the generator slot), the
+validity indicator (`IdentSource::Generators` — an unknown name flagged
+`[ERR]`), help, and parse-error pedagogy rows. The D13 advisory now
+carries its Phase-2/3 wording (points at `set` and the column-fill
+repair). A post-implementation `/runda` pass then added one
+user-chosen refinement: a **bounded override on a UNIQUE column** (a
+fixed value / too-short pick-list) is now a **friendly error** rather
+than a silent uniqueness cap (see D2). **2400 tests pass / 0 fail / 0
+skip; clippy clean.** Two
+implementation refinements vs. this ADR's wording, both met the
+user-facing contract: dates in the range form are **quoted** (the D2
+amendment, above — no date-literal token exists); and the `set` value
+slots reuse `update`'s typed `current_column_value` (no spurious
+column-ref match) rather than the raw expression operand.
+
+Further SD2 increments (custom user generators, NULL injection,
+multi-locale, recursive parent auto-seed) remain out of scope (see Out
+of scope).
+
+Closes `requirements.md` **SD1** and delivers the core of **SD2**
+(per-type generators, determinism, the `fake`-backed catalogue). It
+also closes one of the two remaining gaps in **A1** ("all canonical
+app-level commands") — `seed`; the other, `hint` (**H2**), is
+separate.
+
+Builds on: ADR-0014 (data operations, the `Value`/`Bound` value model,
+the auto-show pattern, FK-error enrichment), ADR-0005/0011 (the type
+vocabulary and `Type::fk_target_type()`), ADR-0012/0013 (the column /
+relationship metadata tables, the rebuild-table primitive — *read* by
+seed for schema introspection), ADR-0024 (the unified grammar tree /
+`CommandNode` registration that gives completion, hints, help-id,
+usage-id for free), ADR-0022 (ambient typing assistance — the
+`KNOWN_SQL_FUNCTIONS` curated-vocabulary pattern that the
+generator-name list mirrors), ADR-0026 (the `in (...)` / `between ...
+and ...` expression grammar the override clause reuses), ADR-0027 (the
+validity-indicator diagnostics model), and ADR-0038 (the
+`OutputStyleClass::Hint` styled output used for the post-seed
+advisory). Honours ADR-0003 (both modes, no sigil), ADR-0009 (DSL
+conventions — keyword grammar, `--` flags for opt-in choices, one
+sigil only), ADR-0002 (no engine name in user-facing strings), and
+ADR-0015 (per-command write-through persistence).
+
+## Context
+
+`seed
[count]` is the last unbuilt **data-authoring** command
+in the requirements. The pedagogical value is high: a learner who has
+just modelled a schema wants rows to query against *now*, without
+hand-typing dozens of `insert`s. A teacher wants a one-liner that
+fills a demo database with believable data. SD1 commits to "plausible
+fake data; junction tables seeded with valid foreign-key references
+drawn from existing parent rows." SD2 deferred the *how* — "per-type
+generators, locale, determinism, override hooks" — explicitly pending
+this ADR.
+
+The design conversation widened the scope deliberately, with the user
+confirming each step:
+
+- **Realism matters more than minimalism** for a teaching tool. Random
+ `text_a3f9` values teach nothing; `Alice Martinez` /
+ `alice.m@example.com` make queries feel real. → adopt a faker
+ library and make generation **name-aware**.
+- **The column *name* is the strongest signal** for what a value should
+ look like, but it is **ambiguous** without the **table** for the
+ `name`/`title` family (`products.name` ≠ `users.name`).
+- **Heuristics will miss**, so a **manual override** surface is
+ required, not optional — this is SD2's "override hooks", brought
+ forward.
+- **Identifiers and enums** are special: `id`-ish columns want
+ uniqueness; `status`-ish columns have no sensible generic value and
+ should be *flagged*, not guessed.
+
+The novel work is the **generation layer**. Everything downstream —
+type validation, autogen autofill (`serial`/`shortid`), FK
+enforcement, per-command persistence, the auto-show outcome — is
+reused from the existing insert/update machinery as **shared helper
+functions**, per the X5 architecture preference (unique commands, with
+mechanics shared as library functions — *not* by emitting
+`Command::Insert` to borrow `do_insert`).
+
+## Decision
+
+Add a dedicated **`seed`** command (its own AST variant and its own
+`do_seed` worker executor) available in **both modes**, with the
+surface and behaviour below. Generation is realistic, name- and
+table-aware, type-gated, with a manual override clause and a
+reproducibility flag.
+
+**Command classification (important, set by the replay decision
+D16).** Although `requirements.md` A1 lists `seed` among the
+"app-level commands" (meaning: part of the canonical command surface,
+no sigil, both modes), `seed` is architecturally a **data-authoring
+command** — a sibling of `insert`/`update`/`delete`, **not** an
+app-lifecycle `AppCommand`. It is therefore **not** added to
+`is_app_lifecycle_entry_word` / completion's
+`empty_input_offers_app_command_entry_keywords` (those mirror the
+`AppCommand` set and must match — `seed` belongs in neither): `replay`
+re-runs it as a data write (D16).
+
+### D1 — Command surface (fork, user-chosen: "whole-row + column-fill")
+
+Two forms:
+
+1. **Whole-row generation** — `seed
[count]`
+ Generates `count` new rows (an INSERT path). `count` **defaults to
+ 20** (D6) when omitted. Every user-fillable column is filled per the
+ generation rules (D7–D12); `serial`/`shortid` autogen columns are
+ left to the existing autofill helpers.
+
+2. **Column-fill on existing rows** — `seed
.`
+ Fills `` across the table's **existing** rows (an UPDATE
+ path) — the natural follow-up to `add column`. Combined with the
+ `set` clause (D2) this is also the precise repair for a single
+ mis-guessed column: `seed users.work_addr set work_addr as email`.
+ Column-fill **refuses** PK columns and autogen (`serial`/`shortid`)
+ columns (a friendly error — you don't "fill" an identity column),
+ and **respects** the same UNIQUE / FK / required rules as whole-row
+ generation (a UNIQUE target gets collision-free values; an FK
+ target samples from the parent, D14). On an **empty** table it is a
+ friendly no-op ("no rows to fill").
+
+**Zero / over-cap counts.** `seed
0` is a friendly no-op;
+`count` over the maximum (D6) is a friendly error.
+
+The column-restricted-*insert* form (`seed t (a, b)` — new rows, only
+some columns filled) was considered and **rejected** as marginal and
+constraint-fragile (see Alternatives).
+
+**Required-column block guard (user requirement).** If seed cannot
+produce a value for a `NOT NULL` column — the only real case is a
+`NOT NULL blob` column, which has no DSL value path — it **refuses the
+whole operation with a friendly error** naming the column, rather than
+attempting a NULL insert that would violate the constraint. The check
+is a pre-flight over the resolved per-column plan, before any write.
+
+### D2 — Manual override: the `set` clause (fork, user-chosen: "value + list + generator + range")
+
+An optional, comma-separated `set` clause overrides generation per
+column. Four forms, all reusing existing grammar vocabulary so there
+is nothing new to learn:
+
+| Form | Example | Meaning |
+|---|---|---|
+| Fixed value | `set status = 'pending'` | every row gets the constant |
+| Pick-from-list | `set role in ('admin', 'editor', 'viewer')` | uniform random choice from the list |
+| Explicit generator | `set work_addr as email` | force a named generator (D9) |
+| Range | `set price between 10 and 100` | uniform in range; **also dates** — `set signup between '2023-01-01' and '2024-12-31'` |
+
+Multiple clauses combine: `seed users 20 set role in ('admin',
+'user'), status = 'active', signup between '2023-01-01' and
+'2024-12-31'`.
+
+**Override × UNIQUE capacity (post-implementation `/runda`, user-chosen:
+"friendly error").** A *bounded* override — a fixed value, or a
+pick-list — on a **single-column-UNIQUE** target (a `UNIQUE` column or a
+single-column PK) that offers fewer **distinct** values than the row
+count cannot fill the run; rather than let the D10 uniqueness machinery
+silently cap it (e.g. `seed users 100 set email = 'x'` → 1 row), seed
+**refuses up front** with a friendly error pointing at the fixes (use a
+generator, or a longer list). Generators and ranges are treated as
+effectively unbounded sources — if one genuinely exhausts, the D14
+distinct-combination cap still applies. Compound uniqueness is exempt
+(the *other* key columns can still vary).
+
+**Quoting (fork, user-chosen: "quoted, grammar-consistent").** Text
+values and list items are **quoted string literals** (`'admin'`),
+exactly as everywhere else in the DSL — only **numbers** stay
+unquoted. **Amendment (2026-06-11, Phase 2 build):** the original
+wording said "numbers *and dates* stay unquoted", but this DSL has
+**no date-literal token** — `Value` is `Number`/`Text` only, and a
+date is a **quoted string** validated by `bind_date` (`'2023-01-01'`)
+everywhere else (insert / update / `where`). An unquoted `2023-01-01`
+lexes as `2023`,`-`,`01`,… and cannot parse. So **dates in the range
+form are quoted** (`between '2023-01-01' and '2024-12-31'`) — which is
+in fact *more* faithful to this decision's own "quoted,
+grammar-consistent" principle. Numbers remain unquoted (`NumberLit`).
+This reuses the ADR-0026 expression grammar **unchanged**:
+the DA pass confirmed that the `in (...)` form's operands are typed
+value slots, so a *bare* `admin` would parse as a **column reference**
+(→ "unknown column"), not a string. Quoting is therefore not a style
+preference but a correctness requirement of grammar reuse. The range
+form is **type-aware**: numeric bounds for numeric columns, date
+bounds for date/datetime columns; a type-incompatible bound is a
+friendly error. `=`, `in (...)`, and `between ... and ...` are the
+ADR-0026 expression operators; `set` is the ADR-0014 UPDATE keyword;
+`as` is borrowed from the SQL alias slot. The `as ` operand
+is a bare name from the curated generator vocabulary (D9), not a
+value. The override takes precedence over every heuristic.
+
+### D3 — Generation library: `fake` crate + hand-rolled gaps (fork, user-chosen: "name-aware + realistic")
+
+Add the **`fake`** crate (v5.x at time of writing; English locale for
+v1 per X2) for realistic values: names, emails, usernames, addresses,
+companies, phone numbers, lorem text, dates. Generation is driven by a
+per-column **generator** chosen by the heuristics (D7) or the override
+(D2), falling back to **type-based** generation (D8).
+
+**Implementation-time verifications (resolved 2026-06-11 when the
+dependency was added):**
+
+- **`rand` de-duplication — clean.** `fake` 5.1.0 depends on
+ `rand = "0.10"`, the **same major** as the project's `rand 0.10.1`,
+ so `cargo tree -e normal` resolves a **single** `rand 0.10.1` (no
+ runtime duplication; the `rand 0.8.6` visible to `cargo tree -i
+ rand` is only `fake`'s own dev-dependency, never compiled for us).
+ Consequence for D4: one seeded `rand 0.10` `StdRng` can drive
+ **both** `fake`'s `fake_with_rng` and the hand-rolled generators —
+ determinism is single-RNG, single-version, and shares `shortid.rs`'s
+ `rand` version.
+- **`fake` module inventory / features — confirmed.** Default features
+ (`["either"]`) cover the core string fakers used here
+ (Name/Internet/Address/Company/Lorem/PhoneNumber); `fake`'s `chrono`
+ feature is **deliberately omitted** (dates generated in-house for
+ D8's bounded windows). No commerce/product module exists → `product`
+ is hand-rolled (D9). (The exact faker call sites are pinned when the
+ generation library is built.)
+- **Security (new-dependency posture) — clean.** The `fake` tree (296
+ packages total) scanned clean by **all three** mandated scanners:
+ `osv-scanner` (no issues), `grype` (no vulnerabilities), `trivy fs
+ --scanners vuln` (0). No findings to document or accept.
+
+### D4 — Determinism: `--seed ` (fork, user-chosen: "optional flag")
+
+Generation is **random by default**. The optional `--seed ` flag
+makes a run **reproducible**: **same database state + same `--seed` →
+identical data**. The "database state" qualifier matters (DA
+refinement) — FK sampling (D14), identifier sequencing (D10), and
+UNIQUE collision-avoidance all *read existing rows*, so reproducibility
+is relative to the data already present, not absolute. Value: teachers
+hand out one dataset; demos are stable; and the feature's own tests
+can assert **exact** output (against a known starting state).
+Implemented with a seedable RNG threaded through every generator (no
+`thread_rng` on the seeded path). `--` flag per ADR-0009 (opt-in
+choice). Naming note: the flag `--seed` and the command `seed` share a
+word but never collide grammatically (`seed users 20 --seed 42` parses
+unambiguously). This flag is also the determinism lever for **replay**
+(D16): a recorded `seed … --seed N` line reproduces on replay; a bare
+`seed …` line regenerates fresh data.
+
+### D5 — Both modes (A1)
+
+`seed` is a canonical app-level command, available in **simple and
+advanced** mode, no sigil — like `save`/`load`/`export`/`replay`.
+
+### D6 — Default count: 20; bounded maximum
+
+Omitted `count` → **20** rows: enough to make `where`, `group by`,
+`order by`, and `limit` meaningful without flooding the output pane.
+A **maximum** is enforced (proposed 10 000) to prevent a typo
+(`seed t 1000000`) from hanging the app or bloating the project; over
+the cap → friendly error stating the limit.
+
+### D7 — Name-aware heuristics, type-gated (the catalogue)
+
+A column's **name** selects a generator, but a name rule only fires
+when the column's **type** is compatible (a column named `email` typed
+`int` does **not** get a string — it falls through to type-based int).
+Matching is **case-insensitive**, **token-based** (split on `_`,
+camelCase, kebab), **most-specific-first**, with documented
+false-positive guards. The catalogue (representative; full table lives
+with the implementation):
+
+| Column name (tokens) | Generator | Type gate |
+|---|---|---|
+| `first_name`/`fname` · `last_name`/`surname`/`lname` | first / last name | text |
+| `name`/`full_name` · `title` | **table-context** name (D11) | text |
+| `email`/`*_email` | email | text |
+| `username`/`login`/`handle` | username | text |
+| `password`/`pwd` | password | text |
+| `phone`/`mobile`/`cell`/`tel` | phone number | text |
+| `city`/`town` · `country` · `state`/`province` | address parts | text |
+| `street`/`address`/`addr` · `zip`/`postcode`/`postal` | address parts | text |
+| `company`/`employer`/`org` · `job`/`position`/`profession` | company / job | text |
+| `description`/`bio`/`notes`/`summary`/`comment` | sentence / paragraph | text |
+| `url`/`website`/`homepage` · `color`/`colour` | URL / hex colour | text |
+| `price`/`amount`/`cost`/`salary`/`balance`/`total` | currency-range number | numeric |
+| `age` · `quantity`/`qty`/`stock`/`count` | 18–80 · small int | numeric |
+| `date`/`*_date` | date, recent ~3 yr window | date |
+| `dob`/`birthday` | date, adult window (18–80 yr ago) | date |
+| `timestamp`/`datetime` · `created_at`/`updated_at`/`*_at` | datetime, recent window (`updated_at` ≥ `created_at`) | datetime |
+| `is_*`/`has_*`/`active`/`enabled` | boolean | bool |
+| **identifier family** (D10) | unique sequential | int/text |
+| **enum-ish family** (D12) | generic text + flag | (text) |
+
+**False-positive guards (documented):** `username`/`filename`/
+`table_name`/`*_name` handled before the bare `name` rule so they do
+**not** resolve to person-name; the bare `name`/`title` rule requires a
+standalone token or a recognised `*_name` suffix.
+
+### D8 — Type-based fallback
+
+When no name rule matches (or to satisfy a name rule's type gate),
+generate by **type**: `text`→realistic words/short phrase, `int`→
+bounded random, `real`→random double, `decimal`→formatted number,
+`bool`→random, `date`/`datetime`→**bounded recent** value (never "any
+point in all of history" — per the user's date concern), `serial`/
+`shortid`→omitted (autogen helpers fill them), `blob`→unsupported
+(nullable→NULL; `NOT NULL`→D1 block guard).
+
+### D9 — Named generators + the `product` generator
+
+The generators addressable via `set ... as ` (D2) and
+chosen by D7 form a **curated, named vocabulary** — `name`,
+`first_name`, `last_name`, `email`, `username`, `phone`, `city`,
+`country`, `street`, `zip`, `company`, `job`, `sentence`, `paragraph`,
+`url`, `color`, `price`, `age`, `date`, `datetime`, `bool`, `product`,
+… — the single source of truth shared by the executor, the completion
+source, and the highlighter (mirroring `KNOWN_SQL_FUNCTIONS`,
+ADR-0022 Amд6).
+
+**`product`** is **hand-rolled** (the `fake` crate has no
+commerce/product module — D3): `{adjective} {material} {noun}` from
+three small baked-in word lists (~20 each) → "Sleek Bamboo Keyboard",
+"Vintage Leather Backpack". Seedable through the D4 RNG. Always
+addressable as `set
as product`, and auto-selected by D11 for
+the `name`/`title` family in product-ish tables.
+
+### D10 — Identifier family → unique by name (fork, user-chosen: "unique sequential")
+
+A column in the identifier family — `id`, `*_id` **that is not an FK**,
+`code`, `sku`, `ref`/`reference`, `number`/`no`, `barcode` — that is
+**not** a serial/shortid autogen column and **not** the PK is treated
+as an identifier and gets **unique** values: **int → sequential**
+(`MAX(col)+1` ascending, reads like real ids, never collides);
+**text → unique short code** (generate-with-retry). Precedence:
+**FK detection wins** over this rule (an FK `user_id` *should* have
+duplicates — many children per parent), so `*_id` only triggers
+uniqueness when the column is not a foreign key.
+
+**Constraint-driven uniqueness is independent and mandatory:** any
+column with a `UNIQUE` constraint (or a user-fillable single-column
+PK) gets guaranteed-unique generation regardless of name — a
+correctness requirement, not a heuristic. Generation for such columns
+uses retry/sequence to guarantee no collision within the batch and
+against existing rows.
+
+### D11 — Table-context disambiguation for `name`/`title` (fork, user-chosen: "table-context-aware")
+
+For the `name`/`title` family **only**, the heuristic also reads the
+**table** name token:
+
+- `product`/`item`/`goods`/`merchandise`/`catalog`/`inventory` →
+ `product` generator (D9)
+- `company`/`companies`/`vendor`/`supplier`/`manufacturer`/`brand` →
+ company name
+- `user`/`customer`/`person`/`people`/`employee`/`member`/`contact`/
+ `author`/`student` → person name
+- unrecognised table → generic word
+
+This resolves the real ambiguity (`products.name` → "Sleek Bamboo
+Keyboard"; `users.name` → "Alice Martinez"; `vendors.name` → "Globex
+Corp"). It is a deliberately **scoped** use of table context — the only
+place the table name influences generation.
+
+### D12 — Enum-ish names → generic + post-seed advisory (fork, user-chosen: "flag enum-ish only")
+
+Enum-ish names — `role`, `status`, `type`, `state`, `kind`,
+`category`, `level`, `tier`, `stage`, `priority`, `gender` — have **no
+sensible generic generator**, so they are **not guessed**: they fall
+through to generic text (they must still be filled — a `NOT NULL`
+status cannot be left empty). Seed then emits a **post-seed advisory**
+(D13) naming them and pointing at the `set ... in (...)` override.
+
+### D13 — Reporting: post-seed advisory (fork, user-chosen: "flag enum-ish only")
+
+After a successful seed, in addition to the normal auto-show outcome
+(row count + the affected rows, per ADR-0014), seed appends a
+**`OutputStyleClass::Hint`** advisory **only** when one or more
+enum-ish columns (D12) — **or columns guarded by a CHECK that seed
+could not derive values from** (D17) — were filled generically.
+
+The wording is **phase-aware** (DA finding: the advisory must not name
+features that ship later). In **Phase 1** (no `set` clause yet) it
+names the columns and explains they were filled generically. From
+**Phase 2/3** it points at the concrete repair:
+
+```
+# Phase 1 wording:
+✓ Seeded 20 rows into users
+ ℹ status, role were filled with generic text — they look like
+ fixed value sets you may want to choose deliberately.
+
+# Phase 2/3 wording (set clause + column-fill exist):
+✓ Seeded 20 rows into users
+ ℹ status, role filled generically. Fix existing rows with
+ seed users.status set status in ('active','inactive'),
+ or pass set … on the next seed.
+```
+
+Note the repair for **already-seeded rows** is the **column-fill**
+form (`seed users.status set …`), not "re-seed" (which would add more
+rows) — DA correction. This is a **result-time** note (cheap, reusing
+ADR-0038's hint rendering), not a typing-time warning. The fuller
+"per-column report" (every column → its generator) was considered and
+**deferred** (see Alternatives / Out of scope).
+
+### D14 — Foreign keys (SD1; fork on empty-parent, user-chosen: "friendly error")
+
+- **Each FK** is filled by sampling **uniformly** from the **existing
+ rows** of the parent table's referenced column(s). Duplicates are
+ expected and correct (many children per parent). For a **compound
+ FK**, the referenced **tuple is sampled jointly** (a whole existing
+ parent key), never per-column independently — independent sampling
+ could fabricate a `(a, b)` pair that exists in no parent row and
+ would fail FK enforcement (DA refinement).
+- **Empty parent** → seed **refuses with a friendly error** naming the
+ parent and the FK column ("seed `users` first — `orders.user_id`
+ references it"). Safe, predictable, teaches FK dependency order.
+ Recursive parent auto-seed is **deferred** to a future `--recursive`
+ opt-in (Out of scope).
+- **Junction / compound-PK tables** (SD1's explicit case): sample
+ **distinct combinations** of the parent PK tuples to satisfy the
+ compound PK's uniqueness; if `count` exceeds the number of available
+ distinct combinations, **cap** at the maximum and note it in the
+ outcome.
+- **Self-referential FK** (`manager_id → id`): if nullable, leave NULL
+ or point at an earlier row in the same batch; if `NOT NULL` on an
+ otherwise-empty table, friendly error. Documented edge case.
+- **Nullable FKs** are **always filled** in v1 (predictable);
+ occasional-NULL injection is deferred.
+
+### D15 — Undo: one snapshot per seed (DA finding; ADR-0006)
+
+Seed is a mutation, so it must participate in undo. The draft omitted
+this; the DA found the codebase already has the right primitive —
+`BeginBatch` / `EndBatch` (`db.rs`), used by `replay` so a multi-write
+run collapses to **one** boundary snapshot. `do_seed` wraps its
+generated writes in `begin_batch` / `end_batch`, so **`seed users 20`
+is a single undo step**, not 20 — matching ADR-0006 Amendment 1's
+batch model. Column-fill's bulk UPDATE is likewise one step. (`import`
+remains the only data-affecting op outside undo, per ADR-0015 §11;
+seed is firmly inside it.)
+
+### D16 — Replay: seed re-runs as a data write (fork, user-chosen)
+
+`replay` re-executes a recorded `seed` line as a **data-write
+command** — it is **not** in the app-lifecycle skip-set (see Command
+classification, above). Consequence, accepted by the user: a **bare**
+`seed users 20` regenerates **fresh, divergent** data on each replay;
+a `seed users 20 --seed 42` line (the determinism lever, D4)
+**reproduces** the original data. This keeps seed faithful to its
+nature as a data write and puts reproducibility exactly where the
+`--seed` flag already lives. (Seeded *data* is in any case durable
+independently of replay, via the ADR-0015 CSV store + `rebuild`;
+replay is the scripting re-run path, U4.) The DA confirmed the wiring
+trap: because seed is *not* an `AppCommand`, it is correctly absent
+from `is_app_lifecycle_entry_word` and replay dispatches it through
+the normal data path rather than aborting.
+
+### D17 — CHECK constraints: derive from simple `IN`, else friendly-fail (fork, user-chosen)
+
+A CHECK on a generically-filled column would otherwise fail the whole
+batch (DA finding — the block guard only covered `NOT NULL blob`).
+Two-tier handling, per the user:
+
+1. **Derive from simple `IN`-CHECKs.** When a column's CHECK is the
+ common enum-as-CHECK shape — `col IN ('a', 'b', …)` (the column's
+ own CHECK, single-column, literal list) — seed **parses out the
+ allowed values and uses them as the generator** (uniform choice).
+ The frequent `CHECK (status IN ('active','closed'))` case then
+ "just works" with no override needed.
+2. **Best-effort + friendly fail for the rest.** For CHECKs seed
+ cannot interpret (ranges, expressions, multi-column), it generates
+ best-effort; if a generated row violates the CHECK, the insert
+ fails through the existing **H1 friendly-error layer** (ADR-0019)
+ naming the constraint and pointing at `set`. Such CHECK-guarded
+ columns are also **pre-flagged in the advisory** (D13) alongside
+ enum-ish names, so the user is warned before hitting the failure.
+
+No new CHECK engine — tier 1 is a narrow literal-`IN` parse over the
+CHECK text already stored in metadata; tier 2 is the existing failure
+path.
+
+### D18 — Auto-show is capped for large seeds (DA finding)
+
+ADR-0014 auto-show renders "the affected rows" — fine for one insert,
+a wall for a 10 000-row seed. Seed's outcome shows a **capped
+preview** (proposed first **20** rows) with a `(showing 20 of N)`
+note, not the full set. The row **count** is always reported in full;
+only the rendered table is capped.
+
+## Grammar, AST, and cross-cutting wiring
+
+Per ADR-0024, `seed` is registered as a `CommandNode` so completion,
+hints, help, and usage flow from one definition. The wiring, as
+**explicit acceptance criteria** (a `/runda` pass must verify each —
+ADR-0045 showed "claimed verified" is not verified):
+
+- **AST + executor.** A dedicated command variant (`Seed { table,
+ target_column: Option, count: Option, overrides:
+ Vec, rng_seed: Option }`) and a dedicated
+ `do_seed` worker executor. `do_seed` **reuses shared helpers**
+ (value binding `impl_value_for`, autogen autofill, FK enrichment,
+ the multi-row parameterised-insert pattern of `plan_autogen_autofill`,
+ the UPDATE path for column-fill, per-command persistence, the
+ `begin_batch`/`end_batch` undo primitive of D15) as library
+ functions — it does **not** emit `Command::Insert`/`Command::Update`
+ (X5).
+- **Replay / undo classification (D15/D16).** `do_seed` brackets its
+ writes in one batch (one undo step). The `seed` entry word is
+ **deliberately absent** from `is_app_lifecycle_entry_word` and
+ completion's `empty_input_offers_app_command_entry_keywords` (the
+ `AppCommand` mirror) so replay re-runs it as a data write — an
+ explicit acceptance check, since the default for an unlisted
+ recognised command must be "replayed", not "abort".
+- **Completion sources:** table-name (existing tables); `.column` and
+ `set`-clause column slots (columns of the named table); the
+ generator-name vocabulary (D9) after `as`; `count` number; `set` /
+ `=` / `in` / `as` / `between` / `and` keywords; `--seed` flag.
+- **Syntax highlighting:** `seed` keyword; the generator-name
+ vocabulary highlighted as **`tok_function`** (reuse the existing
+ ADR-0022 Amд6 blue — no new theme colour).
+- **Hints:** ambient per-slot "what's next" and usage hints, both
+ modes.
+- **Help:** `help seed` topic (`help_id` + per-command block); the
+ general `help` list picks it up automatically via REGISTRY.
+- **Parse-error pedagogy (ADR-0042):** near-miss matrix rows for `seed`
+ (bare / missing-table / wrong-token / malformed `set`), both modes.
+- **Validity indicator (ADR-0027):** typing-time `[ERR]`/`[WRN]` for
+ unknown table, unknown column (in `.column` or `set`), unknown
+ generator name after `as`.
+- **No DSL→SQL teaching echo (ADR-0038).** `seed` is a utility/app
+ command, not a DSL form of a SQL statement, so the echo does not
+ apply. (A future "show the generated INSERTs" is out of scope —
+ it would dump `count` statements.)
+
+## Implementation phasing
+
+Design is whole; the **implementation** is phased into reviewable,
+test-first commits:
+
+1. **Core whole-row seed** *(done, Phase 1)* — grammar/AST/executor;
+ type-based generation + the `fake`-backed name heuristics
+ (D7/D8/D11); identifier uniqueness (D10) + constraint uniqueness; FK
+ sampling (joint tuples) + empty-parent error + junction
+ distinct-combos (D14); `--seed` determinism (D4); default count + cap
+ + zero-no-op (D6/D1); required-column block guard (D1); **undo batch
+ (D15)**; **replay-as-data-write classification (D16)**; **CHECK
+ derive / friendly-fail (D17)**; **capped auto-show (D18)**; the
+ enum/CHECK advisory in its **Phase-1 wording** (D12/D13); full
+ ambient wiring; both modes.
+2. **The `set` override clause** (D2) *(done, Phase 2)* — value / list /
+ generator / range, type-aware, with completion + highlight +
+ validity for the generator-name slot.
+3. **Column-fill mode** (`seed
.`, D1 form 2) *(done,
+ Phase 2)* — the UPDATE path.
+
+Each phase is independently green before the next. (Phases 2 and 3
+landed together — they share the `set`-override executor machinery, so
+splitting them risked a state where `set` parsed but column-fill
+silently no-op'd.)
+
+## Testing (ADR-0008 tiers 1–3; test-first)
+
+- **Tier 1 (unit, deterministic via `--seed`):** generator selection
+ (name × type-gate matrix, including every false-positive guard of
+ D7); table-context disambiguation (D11); identifier uniqueness and
+ the FK-wins-over-`*_id` precedence (D10); bounded-date windows (D8);
+ the `product` generator shape; override resolution + precedence (D2);
+ the required-column block guard (D1); the count cap (D6). Exact-value
+ assertions are possible because `--seed` fixes the RNG.
+- **Tier 2 (insta snapshots):** the seeded data table render and the
+ enum advisory (D13) at representative sizes, light + dark.
+- **Tier 3 (integration, full event loop):** `seed users 20` end to
+ end (rows land in db + CSV + history, auto-show, persistence);
+ FK sampling against a populated parent (incl. a **compound FK** —
+ every child tuple exists in the parent); **empty-parent friendly
+ error**; **junction** seeding with distinct combinations and the
+ over-cap note; the `set` clause forms (quoted literals); **column-
+ fill** on existing rows (incl. refusal of PK/autogen targets, empty-
+ table no-op); reproducibility (`--seed 42` twice → identical data
+ from a fixed state); both modes. Plus the DA-driven cases:
+ **one-undo-step** (seed then a single `undo` removes all rows);
+ **replay** of a bare `seed` line (divergent) vs a `--seed` line
+ (reproduced); **`IN`-CHECK auto-derivation** ("just works") and a
+ **complex-CHECK friendly failure**; **capped auto-show** on a large
+ seed.
+
+"All green, no skips" is the only acceptable end state; the Phase-1
+baseline (2290 passing / 0 failing / 0 skipped / 1 ignored doctest) is
+the regression floor.
+
+## Out of scope / deferred (future SD2 work)
+
+- **Recursive parent auto-seed** (`--recursive`) — D14 errors instead.
+- **NULL injection** for nullable columns (teaching optional
+ relationships / `IS NULL`) — v1 always fills.
+- **Multi-locale** generation — English only (X2).
+- **User-defined custom generators** (true "override hooks" — register
+ a named generator) — the `set ... as ` surface covers the
+ common need; custom generators are a later SD2 increment.
+- **Full per-column seed report** — D13 flags enum-ish only.
+- **Column-restricted insert** (`seed t (a, b)`) — rejected (D1).
+- **"Show the generated SQL"** teaching echo for seed.
+
+## Alternatives considered
+
+- **Hand-rolled generators only (no `fake`):** minimal dependency, but
+ synthetic-looking data (`text_a3f9`) — rejected on pedagogy
+ (pedagogy wins ties).
+- **Type-only generation (no name awareness):** simpler, but misses
+ the biggest UX win (a `users` table that reads like real people) —
+ rejected.
+- **Column-name-only `name` (no table context):** leaves
+ `products.name` → person names, requiring a manual override on every
+ product/company table — rejected for the `name`/`title` family
+ (D11).
+- **No override clause (heuristics + type only):** could not answer
+ "the heuristic guessed wrong, fix it" or enum columns — rejected;
+ the `set` clause (D2) is the answer to the user's Q3.
+- **Recursive auto-seed of empty parents:** powerful but magical and
+ can seed tables the user did not name — deferred behind a future
+ flag (D14).
+- **Always-random (no `--seed`):** simplest, but no reproducible
+ datasets and weaker tests — rejected (D4).
+- **Full per-column report by default:** a nice teaching artifact but
+ verbose on wide tables — deferred; flag-only advisory chosen (D13).
+- **Reuse `Command::Insert`/`do_insert` directly** from seed: tempting
+ for code reuse, but collapses command identity and violates X5 —
+ rejected in favour of a dedicated `do_seed` that calls shared
+ *helpers*.
+- **Skip seed on replay** (classify as app-lifecycle, D16): consistent
+ with A1's "app-level" label and avoids divergent data, but seed is a
+ data write and silently skipping it on a scripted re-run is
+ surprising — rejected; `--seed` is the determinism lever instead.
+- **Bare-word `set` list items** (`in (admin, …)`, D2): matched the
+ early mockups and reads cleaner, but bare words are column
+ references in the reused grammar (would error) and would force a
+ custom list form — rejected for quoted literals (grammar reuse +
+ DSL consistency).
+- **Pre-flight refuse any CHECK-bearing table** (D17): safest but
+ blocks seeding too many legitimate tables — rejected for the
+ derive-`IN`-else-friendly-fail tier.
+- **`set`-driven NULL / per-column report / recursive parent seed:**
+ deferred — see Out of scope.
diff --git a/docs/adr/README.md b/docs/adr/README.md
index 884d6ef..d02cb90 100644
--- a/docs/adr/README.md
+++ b/docs/adr/README.md
@@ -60,3 +60,4 @@ This directory contains the project's ADRs, recorded per
- [ADR-0045 — `create m:n relationship` convenience command (C4)](0045-mn-convenience.md) — **Accepted + implemented 2026-06-10** (closes `requirements.md` **C4**; all forks user-confirmed + a `/runda` DA pass that verified the `do_create_table` reuse against code and corrected the "no PK-less tables" assumption — advanced SQL `create table t (a int)` has none, so a parent-PK guard is retained). Implementation corrected a second ADR premise: "the walker already dispatches multiple nodes per entry word" held only in *advanced* mode — two simple-mode spots (dispatcher `decide`, completion continuation-merge) assumed ≤1 DSL form per entry word and were generalized **behaviour-preservingly** (dispatch reduces to the old single-candidate commit; completion merge gated on `simple_count > 1`). Junction echo wired (`render_create_m2n`, round-trips as SQL). `create m:n relationship from to [as ]` generates a junction table with one FK column per parent PK column, a **compound PK over all the FK columns** (the textbook junction — the pair is unique, no duplicate links), and **two 1:n relationships**, all in **one transaction = one undo step** (built by reusing `do_create_table`, which already takes `foreign_keys` + writes relationship metadata — no batch bracketing). Forks all user-chosen: junction PK = compound-over-FKs (vs surrogate serial / no PK); referential actions = **`CASCADE`** on delete+update (vs NO ACTION / RESTRICT); naming = auto `{T1}_{T2}` + optional `as` (vs auto-only); available in **both modes** (Simple-category DSL, like the sibling relationship commands). FK columns named `{parent_table}_{pk_column}` (disambiguates shared `id`; generalises to compound parents via ADR-0043), typed via `fk_target_type` (ADR-0011). A distinct `Command::CreateM2nRelationship` (not lowered to `CreateTable`) preserves command identity (X5) and lets the teaching echo speak in m:n terms. Cross-cutting wiring enumerated: separate `CREATE_M2N` `CommandNode` (own `help_id`/`usage_ids`), `("m","m:n")` completion composite, `HintMode`s, grammar-driven highlighting, `help`/`help create`, `parse_error_pedagogy` near-miss matrix, teaching echo. OOS: **self-referential m:n** (`from T to T`) refused outright (user-confirmed "full stop" — directional column-naming is more than this beginner convenience warrants); per-relationship action overrides; extra junction payload columns; m:n diagram echo; renaming the auto-generated relationships
- [ADR-0046 — Schema sidebar focus/navigation mode and responsive input & hint layout (UI #20/#21/#23)](0046-sidebar-navigation-and-responsive-input-hint.md) — **Accepted + implemented 2026-06-10, phased A→B→C** (8 commits `9f5f76b`…`22bec61`; closes Gitea **#20** hint jumpiness, **#21** left-column improvements, **#23** long input — all forks user-confirmed, including the persistent show/hide toggle which is **deferred**: the Ctrl-O peek covers #21's "keystroke to show and hide"). Two decisions landed differently from the draft (recorded inline): relationship data on **`App`** not `SchemaCache` (DB2); the nav overlay clears **only the sidebar strip + a one-column gutter**, panels staying visible behind (DC2). Treats the three UI issues as one coupled decision because they share the terminal's width/height budget. **Phase A (input & hint):** the hint panel's height becomes a function of **terminal geometry, fixed between resizes** (not of hint content), eliminating the #20 jump at its source — measured catalog shows ≥ ~54-col right-column width never needs > 2 hint lines, so 3 lines is a rare narrow-terminal-only case; height buckets `H<40` compact (input 1 row + horizontal scroll / hint 2) vs `H≥40` comfortable (input 2 rows soft-wrap / hint 2), output `Min(5)` honoured first under degradation; input gains horizontal scroll (`input_scroll_offset`, single logical `String` — **not** I1 multi-line) and 2-row soft-wrap display when tall, preserving ADR-0027's 6-col indicator reserve. **Phase B (sidebar):** the 26-col Tables column is **kept but made optional and richer** (not deleted — pedagogy wins ties) — **width-derived session-only** visibility (visible iff width > 90 or a Ctrl-O peek is active — no stored field; hides at width ≤ 90 so the 90-col screencasts drop it; ADR-0015 format untouched), plus a **relationships panel** rendered narrow with endpoints broken at the arrow, ellipsized — a **separate sibling panel** that **overrides S2**'s nested-list extension model (relationships are cross-table). the full records live on a new **`App.relationships`** field (revised from the ADR's original `SchemaCache.relationship_details` at implementation — `SchemaCache` is walker-facing and needs only the names, kept in `relationships: Vec`; details are UI-only, so `App` mirrors `app.tables` and avoids ~23 fixture edits), delivered by `Database::read_all_relationships` + an `AppEvent::RelationshipsRefreshed`; the two left panels split vertically with the relationships panel floored at 5 rows ("(none)" when empty) and capped at 50 % of the column (DB4). **Phase C (navigation mode):** **`Ctrl-O`** enters a focus cycle (Input → Tables → Relationships → Input; `Esc` exits) orthogonal to the ADR-0003 input mode — **`Ctrl-B` was rejected on review as the default tmux prefix** (unreachable inside tmux); the focused panel **expands to ~40–50 cols as a `Clear` overlay** (right panels stay unchanging underneath) and scrolls via **Up/Down (line) + PageUp/PageDown (page)** (context-rebind, reusing the output-scroll viewport mechanism), with an accent focus border; all non-nav keys inert in nav mode (and nav keys inert while a modal is open). Forks all user-chosen: keep-optional-richer (vs remove/narrow); navigation-mode (vs modeless modifier scroll); `Ctrl-O` (Ctrl-B rejected = tmux prefix); overlay (vs layout re-split); inert-non-nav-keys; geometry-fixed hint height; `H<40/≥40` thresholds; session-only persistence; Up/Down line-scroll; **separate relationships panel overriding S2**; **no hint-area toggle** (S4's stale "keyboard-toggleable" claim struck — never implemented, unwanted). A pre-build `/runda` DA pass drove these corrections: caught the `Ctrl-B`/tmux collision, the `SchemaCache` retype that would have broken completion, the 2-row-input/indicator placement, the missing nav-mode key disposition + modal gate, and three unreferenced requirements (S1 evolved, S2 overridden, S4 corrected); also cross-checked open issue **#22** (overlay/annotation layer — separate ADR, adjacent). OOS: true multi-line input (I1); readline shortcuts (I1b); cross-session sidebar persistence; output as a third nav focus; relationship search/edit from the panel; hint-area toggle; #22's annotation layer. Accepted consequence: the 90-col visibility threshold makes a terminal's output *narrower* when widened across the boundary (sidebar appears)
- [ADR-0047 — Demonstration overlay layer (keystroke badges + step captions)](0047-demonstration-overlay-layer.md) — **Accepted 2026-06-10; implemented 2026-06-11, phased A→B→C (closes Gitea #22)** (commits `f879d54`→`2d0f4b2`; no `requirements.md` item — tracked by issue + ADR per convention; all forks user-confirmed + a pre-build `/runda` pass that produced 10 tightening findings and a whole-implementation `/runda` pass that returned PASS, no blockers). An in-app **demonstration mode** (`--demo` flag / `RDBMS_PLAYGROUND_DEMO` env, **off by default, zero footprint when off**) that renders two transient overlays so `autocast` screencasts — and live teaching, and a future guided-lesson system — can show otherwise-invisible interactions. **Keystroke badges** (`[TAB]`, `[ENTER]`, `[UP]`, …): **automatic, app-detected** over a fixed set of glyph-less keys (the app already sees every key, so it re-records for free), label via a pure `demo_badge_label(&KeyEvent)`; the badge **auto-expires on a ~1.5 s timer** that extends the runtime's existing time-boxed-`recv` arm condition (`debounce.is_armed() || badge_pending`; expiry `Instant` in the runtime, `App.demo_badge` the render mirror — mirroring the `input` vs `input_indicator` split). **Step captions**: a **stealth, control-code-delimited input buffer** toggled by **`Ctrl+]`** (byte `0x1D` → arrives as `Char('5')+CONTROL`, verified against crossterm 0.29 `parse.rs:110-113`; chosen over `Ctrl+!`, which is **not a single ASCII byte so autocast cannot send it** — the same wall as arrow keys, R4) — typed characters accumulate **invisibly** (prompt untouched, no echo/history), `Backspace` edits, other keys inert, a second `Ctrl+]` **commits** to the caption box (empty commit dismisses); lives in pure-sync `App::update()`, **intercepted before the modal gate** so captions/badges work **over the load picker** (the `#24` projects cast). Both render as **floating flat black-on-yellow rectangles** (solid fill, **no border glyphs** — a one-cell text margin, deliberately unlike the app's bordered panels; user decision post-build, `2d0f4b2`) **at the output panel's inner bottom-right**, drawn **last over modals**, badge **stacked above** the caption, **no layout reflow**; caption **word-wraps to ≤ 3 lines** (3–5 rows), badge fixed 3 rows; clamp/skip guard for tiny terminals; a new **`App.last_output_area: Rect`** (set in `render_output_panel`) gives the top-level draw the anchor. Caption persists **until the next keystroke**; badge suppressed while capturing. Forks all user-chosen: `--demo` activation (vs hidden command / chord); automatic badges (vs scripted); stealth buffer (vs typed-command / preloaded-file); floating bottom-right boxes (vs HUD / banner / subtitle); `Ctrl+]` trigger; wrap-to-3-line captions; ~1.5 s badge / next-keystroke caption timing. Tested test-first across Tier 1 (label fn, capture state machine incl. over-modal + demo-off gate, nearest-deadline helper), Tier 2 (insta snapshots: badge/caption/both-stacked at 90×26 light+dark, short-terminal clamp), Tier 3 (`--demo` plumbing, badge set/suppressed, caption-without-input wiring), CLI (`--demo` parse + env fallback) — with an **honest limit** noted: the `tokio` timer wiring inside `run_loop` is exercised via the pure pieces + Tier-3 plumbing, not a standalone integration test of the timeout (same posture as the existing `IndicatorDebounce`). One intentional, user-acknowledged behaviour: `Ctrl-C` is inert while capturing (every non-`Ctrl+]` key is, by spec). Final tally **2290 passing / 0 failing / 0 skipped** (1 long-standing ignored doctest), clippy clean. OOS: scripted/manual badge push; badges for glyph keys; configurable styling/placement; the guided-lesson system itself (own ADR); cross-session/-switch persistence; localised caption content; arrow-only cast interactions (output-pane scroll); wiring the overlays into the website `casts.mjs` scripts (website-branch follow-up). Implementation phased **A** (`--demo` plumbing) → **B** (badges) → **C** (captions) + a flat-rectangle restyle
+- [ADR-0048 — `seed` fake-data generation command](0048-seed-fake-data-generation.md) — **Accepted 2026-06-11; Phase 1 + Phase 2 implemented 2026-06-11** (Phase 1 commits `202e25a`→`fbd219b`; design settled with the user across an extended fork dialogue, hardened by a pre-build `/runda` pass (six blockers folded in), a post-implementation `/runda` pass (eight gaps closed — FK/shortid determinism so **D4 holds with no exceptions**, plus six untested ADR decisions), and a Phase-2 pre-build `/runda` pass (which caught the no-date-literal-token reality → the D2 quoted-dates amendment), and a post-implementation `/runda` pass (which added a friendly error for a bounded override on a UNIQUE column — see D2); **2400 tests pass, clippy clean**). Closes `requirements.md` **SD1** and the core of **SD2**; closes the `seed` half of **A1**. **Phase 1 shipped:** whole-row `seed
[count] [--seed ]` with realistic name-aware generation (the `fake` crate + a type-gated heuristic catalogue, table-context name disambiguation, hand-rolled `product` generator, bounded dates), identifier + constraint uniqueness incl. junction distinct-combos, FK sampling from existing parent rows (empty-parent error), `IN`-CHECK derivation + complex-CHECK advisory, a required-column block guard, `--seed` reproducibility (serial/FK/shortid all deterministic), undo as one batch step, replay as a data write, a capped auto-show preview, the enum/CHECK advisory, and an O(N) single-transaction insert path. **Phase 2 shipped (2026-06-11):** the `set` override clause (D2 — fixed value / pick-list / `as ` / `between` range, **quoted** dates per the D2 amendment, type-aware, override drops the column from the advisory) and the `
.` column-fill form (D1 form 2 — an UPDATE over existing rows, refusing PK/autogen targets, empty-table no-op, FK/unique-respecting, one undo step), with the new `KNOWN_GENERATORS` vocabulary (D9), a range `Generator`, full completion/highlight (`HighlightClass::Function`)/validity (`IdentSource::Generators`)/help/pedagogy wiring, and the D13 advisory's Phase-2/3 wording. Further SD2 increments (custom generators, NULL injection, multi-locale, recursive auto-seed) out of scope. Closes `requirements.md` **SD1** and the core of **SD2**; closes the `seed` half of **A1** (the other being `hint`/**H2**). A dedicated `seed` command (own AST variant + `do_seed` executor, **both modes**) generating **realistic, name-aware** fake data. Two forms: **`seed
[count]`** (new rows, default **20**, capped) and **`seed
.`** (fill a column on existing rows, an UPDATE). Generation adds the **`fake` crate** (v5, English) driven by a **type-gated, token-matched name-heuristic catalogue** (~30 patterns, documented false-positive guards), with **table-context** disambiguating the `name`/`title` family (`products.name`→product, `users.name`→person, `vendors.name`→company), a **hand-rolled `product` generator** (`fake` has no commerce module), **bounded dates** (`date`/`timestamp`/`dob`/`*_at` recognised, recent windows — never "all of history"), the **identifier family** (`id`/`code`/`ref`/`number`, non-FK/non-PK) → **unique sequential**, and **enum-ish names** (`role`/`status`/`type`/…) left generic + a **post-seed Hint advisory** pointing at `set … in (…)`. A **`set` override clause** — `= value` / `in (a,b,c)` / `as ` / `between a and b` (numeric **and** date), reusing ADR-0026 operators — answers the heuristic-miss case. **`--seed `** makes runs reproducible (and enables exact-value tests). **FK** columns sampled uniformly from existing parent rows (**empty parent → friendly error**, no recursion v1); **junction/compound-PK** tables seeded with **distinct combinations**, capped + noted (SD1). A **required-column block guard** refuses rather than NULL-violate a `NOT NULL` column it can't fill (e.g. `NOT NULL blob`). Full ambient wiring (completion incl. a new generator-name vocabulary highlighted as `tok_function`, hints, `help seed`, ADR-0042 near-miss matrix, ADR-0027 validity); **no DSL→SQL teaching echo** (seed is a utility command, not a SQL twin). Honours **X5** — `do_seed` reuses insert/update *mechanics as helpers*, not by emitting `Command::Insert`. Implementation phased: (1) core whole-row seed → (2) `set` overrides → (3) column-fill. Deferred (future SD2): recursive auto-seed, NULL injection, multi-locale, user-defined custom generators, full per-column report
diff --git a/docs/handoff/20260611-handoff-64.md b/docs/handoff/20260611-handoff-64.md
index e5dbbe2..cec3fb8 100644
--- a/docs/handoff/20260611-handoff-64.md
+++ b/docs/handoff/20260611-handoff-64.md
@@ -8,9 +8,8 @@ to end across three phases + a restyle).
## §1. State at handoff
-**Branch:** `main`. **HEAD `2d0f4b2`** plus an **uncommitted docs
-finalization** (ADR-0047 status → implemented, README index, this
-handoff — see §6). Push is the user's step.
+**Branch:** `main`. **HEAD `f0afec3`** — all work committed, nothing
+pending. Unpushed (push is the user's step; normal working state).
**Tests: 2290 passing / 0 failing / 0 skipped / 1 ignored** (the 1
ignored is the long-standing `friendly` doctest). **Clippy clean**
@@ -18,6 +17,7 @@ ignored is the long-standing `friendly` doctest). **Clippy clean**
**This session's commits:**
```
+f0afec3 docs: session handoff 64 + ADR-0047 implemented (#22/#24)
2d0f4b2 feat(ui): flat filled rectangles for demo overlays (#22, ADR-0047 D4)
241f60c feat(ui): demo-mode step-caption stealth buffer (#22, ADR-0047 D3/D4)
2584e76 feat(ui): demo-mode keystroke badges (#22, ADR-0047 D2/D4/D5)
@@ -26,8 +26,9 @@ e9eb1b1 docs: ADR-0047 — demonstration overlay layer for casts/teaching (#22)
638b4c9 feat(app): vi-style j/k/g/G navigation in the load picker (#24)
```
-**Issues closed:** **#24** (vi nav) and **#22** (demo overlays) — close
-#22 once the docs finalization commit lands.
+**Issues closed:** both **#24** (vi nav) and **#22** (demo overlays) are
+**closed on Gitea** with closing comments — verified via the filtered
+issue list. Nothing left open from this session's scope.
## §2. #24 — vi-style load-picker navigation (commit `638b4c9`)
@@ -107,13 +108,15 @@ existing `IndicatorDebounce` already takes. A future Tier-4 PTY harness
## §6. How to take over
+**Nothing is pending from this session** — both issues are closed, all
+docs landed (`f0afec3`), tree is green. The next session **returns to the
+open requirements backlog** (§7). Suggested start: run `/whatsnext`
+(it reads this handoff), or pick from §7 below.
+
1. Read handoffs 62 → 63 → 64, `CLAUDE.md`, `docs/requirements.md`,
- `docs/adr/README.md`, and **ADR-0047** (fully landed).
-2. **Pending:** the docs finalization commit (ADR-0047 status →
- implemented; README index; this handoff). Commit as
- `docs: session handoff 64 + ADR-0047 implemented (#22/#24)` (the user
- confirms commit messages). Then close **#22** on Gitea.
-3. **For demo-overlay work:** `App` has `demo_mode`, `demo_badge`,
+ `docs/adr/README.md`. ADR-0047 is fully landed; revisit only for
+ demo-overlay follow-ups.
+2. **For demo-overlay work:** `App` has `demo_mode`, `demo_badge`,
`demo_badge_seq`, `demo_caption`, `demo_caption_capturing`,
`demo_caption_buffer`, `last_output_area`. Rendering:
`render_demo_overlays` / `render_badge_box` / `render_caption_box` /
diff --git a/docs/handoff/20260611-handoff-65.md b/docs/handoff/20260611-handoff-65.md
new file mode 100644
index 0000000..07bd865
--- /dev/null
+++ b/docs/handoff/20260611-handoff-65.md
@@ -0,0 +1,144 @@
+# Session handoff — 2026-06-11 (65)
+
+Sixty-fifth handover. Continues from handoff-64 (ADR-0047 demo
+overlays). This session designed and shipped **ADR-0048 — the `seed`
+fake-data generation command (SD1)**, Phase 1, end to end: an ADR with
+an extended fork dialogue + two `/runda` passes, then a phased
+test-first build.
+
+## §1. State at handoff
+
+**Branch:** `main`. **HEAD will be the doc-wrap-up commit** (see §6) —
+all seed work committed, nothing pending. Unpushed (push is the user's
+step; normal working state).
+
+**Tests: 2358 passing / 0 failing / 0 skipped / 1 ignored** (the long
+-standing `friendly` doctest). **Clippy clean** (nursery, all targets).
++68 over handoff-64's 2290.
+
+**`cargo sweep` run** at wrap-up: `target/` 1.6 G → 183 M.
+
+**This session's commits:**
+```
+202e25a feat(seed): fake-data generation library + fake dependency (P1.1)
+f1e9484 feat(seed): command plumbing + walking skeleton (P1.2)
+73493fa feat(seed): FK sampling, empty-parent error, block guard (P1.3a)
+9c13501 feat(seed): uniqueness, junction distinct-combos, IN-CHECK (P1.3b)
+0b3ab3c feat(seed): SeedResult outcome, capped preview, advisory, count cap (P1.3c)
+e6ff63d perf(seed): single-transaction multi-row insert path (P1.3d)
+fbd219b feat(seed): --seed flag, ambient wiring, and /runda hardening (P1.4 + DA)
+```
+(plus the earlier `4d0ae77` multi-tab-scope withdrawal and `0af7f56`
+ADR-0048 doc, and the wrap-up doc commit.)
+
+## §2. What `seed` does (Phase 1 — read ADR-0048)
+
+`seed
[count] [--seed ]` — populate a table with realistic
+fake data. **Available in both modes** (A1).
+
+- **Realistic, name-aware generation:** the **`fake` crate** (v5,
+ English) driven by a **type-gated heuristic catalogue** (`src/seed/
+ heuristics.rs`) — `email`→email, `first_name`→first name, `price`→
+ currency, etc., each only firing when the column *type* is
+ compatible. **Table-context** disambiguates `name`/`title`
+ (`products.name`→a hand-rolled **product** name, `users.name`→person,
+ `vendors.name`→company). **Bounded dates** (`dob`/`created_at`/
+ `date`/`timestamp` → recent windows, never "all of history", anchored
+ to a fixed reference epoch for reproducibility). Type-based fallback
+ otherwise.
+- **Uniqueness (D10):** the user-fillable PK, compound UNIQUE
+ constraints, single-column UNIQUE, and identifier-named columns
+ (`id`/`code`/…) stay distinct across the batch and vs existing rows;
+ **junction tables** get **distinct FK combinations** (capped at the
+ available product, reported). Identifier ints get a monotonic
+ sequence.
+- **FK (D14):** every FK column samples an existing parent row (compound
+ FK reads one consistent parent row); **empty parent → friendly
+ error**.
+- **`IN`-CHECK (D17):** a simple `col IN ('a','b')` CHECK becomes the
+ value source (enum-as-CHECK just works); complex CHECKs are flagged in
+ the advisory and best-effort generated (a violation rolls the batch
+ back).
+- **Reproducibility (D4):** `--seed ` → identical data on the same DB
+ state. **Holds with no exceptions** — serial (rowid/MAX+1), FK
+ (`ORDER BY`), **shortid (seeded RNG)**, all generators.
+- **Output:** the seeded-row count, a **capped preview** (first 20
+ rows), and a **Hint-styled advisory** naming enum-ish / underivable-
+ CHECK columns filled generically. Count cap 10 000; `seed t 0` no-op.
+- **Safety:** one **undo** step (snapshot wraps the whole seed);
+ **replay** re-runs it as a data write; the insert path is a single
+ transaction (O(N), atomic, commit-db-last preserved).
+
+## §3. Where the code lives
+
+- **`src/seed/`** — the pure generation library (no DB): `mod.rs`
+ (`ColumnSpec`, `Generator`, `SeedRng`, `make_rng`), `heuristics.rs`
+ (`choose_generator` + the catalogue + `is_enum_ish`), `generators.rs`
+ (`generate_value` + the `product` generator + bounded dates),
+ `check.rs` (`parse_in_check_values`). ~40 Tier-1 tests, deterministic.
+- **`src/db.rs`** — `do_seed` (+ `SeedColPlan`, `sample_parent_key_
+ tuples`, `seed_value_list_key`, `seed_max_int`, `SeedResult`,
+ `DEFAULT_SEED_COUNT`/`MAX_SEED_COUNT`/`SEED_PREVIEW_CAP`), the new
+ **`insert_one_row`** core extracted from `do_insert` (shared, no
+ tx/persist — so seed runs N rows in one tx), and the `Request::Seed` /
+ `Database::seed` / worker wiring.
+- **`src/dsl/grammar/data.rs`** — `SEED` `CommandNode`, `build_seed`,
+ the `--seed` flag grammar (`Seq[Flag("seed"), NumberLit]`, the first
+ DSL flag with a value). `Command::Seed` in `command.rs`.
+- **Runtime/render** — `CommandOutcome::Seed`, `AppEvent::
+ DslSeedSucceeded`, `App::handle_dsl_seed_success`. Catalog keys
+ `ok.rows_seeded` / `seed.capped` / `seed.advisory_generic` /
+ `help.data.seed` / `parse.usage.seed`.
+- **Tests** — `tests/it/seed.rs` (25 integration tests),
+ `tests/typing_surface/mod.rs` (`seed_completion_and_validity`),
+ `tests/it/parse_error_pedagogy.rs` (bare-`seed` near-miss row),
+ `src/app.rs` (two render tests), `src/dsl/shortid.rs`
+ (`generate_with_rng`).
+
+## §4. Process notes (the two `/runda` passes)
+
+- **Pre-build `/runda`** (on the ADR) found six blockers — undo
+ integration (D15), replay semantics (D16), `set`-value quoting (D2),
+ CHECK handling (D17), an advisory phase-ordering bug (D13), auto-show
+ flooding (D18) — all folded into ADR-0048 before any code; the three
+ genuine forks re-escalated and user-resolved.
+- **Post-implementation `/runda`** (on the whole implementation) found
+ **eight gaps**, all closed: FK-sampling determinism (→ `ORDER BY`),
+ **shortid not reproducible** (→ seeded RNG, fixed not documented — the
+ user chose the fix), and six **untested ADR decisions** (D5 advanced
+ mode, D15 undo, D16 replay, D17 complex-CHECK advisory, atomic
+ rollback, zero-count) — tests added for each.
+
+## §5. Phase 2 (deferred — designed in ADR-0048, NOT built)
+
+These are the only seed pieces left; both have full designs in
+ADR-0048:
+
+1. **The `set` override clause (D2)** — `seed t 20 set role in
+ ('a','b'), status = 'x', work_addr as email, price between 10 and
+ 100`. Value / pick-from-list / explicit-generator / range, **quoted
+ literals** (grammar-consistent). This is the SD2 "override hooks"
+ core. The `ColumnSpec.check_in_values` → `PickFrom` plumbing and the
+ `Generator` vocabulary already exist; this adds the grammar + a `set`
+ clause that overrides the per-column plan.
+2. **Column-fill (`seed
.`, D1 form 2)** — fill one
+ column across *existing* rows (an UPDATE). Refuses PK/autogen targets;
+ empty-table no-op.
+
+`requirements.md`: **SD1 `[x]`**, **SD2 `[/]`** (core done; the two
+above open), **A1 14/15** (only `hint`/**H2** unregistered).
+
+## §6. How to take over
+
+1. Read handoffs 63 → 64 → 65, `CLAUDE.md`, `docs/requirements.md`,
+ `docs/adr/0048-seed-fake-data-generation.md` (the whole thing — D1
+ –D18 + the as-built status block).
+2. **Seed is feature-complete for Phase 1; nothing pending.** Next
+ options (user's call): seed **Phase 2** (`set` clause + column-fill);
+ **H2 `hint`** (closes A1) — own ADR; **TT5 CI**; or the larger
+ **V4 journal** / **tutorial** ADRs.
+3. Two minor, user-deferred observations (non-blocking): the uniqueness
+ retry cap (`MAX_ATTEMPTS=200`) can cap a *medium* unique domain
+ slightly below its true size (junction/small domains are exact);
+ `literal_to_value` doesn't type-check an IN-CHECK literal vs a numeric
+ column (a malformed `int IN ('a')` CHECK fails cleanly at bind).
diff --git a/docs/handoff/20260611-handoff-66.md b/docs/handoff/20260611-handoff-66.md
new file mode 100644
index 0000000..70f8562
--- /dev/null
+++ b/docs/handoff/20260611-handoff-66.md
@@ -0,0 +1,145 @@
+# Session handoff — 2026-06-11 (66)
+
+Sixty-sixth handover. Continues from handoff-65 (ADR-0048 `seed`
+Phase 1). This session built **ADR-0048 Phase 2** end to end: the
+**`set` override clause** (D2) and the **`
.`
+column-fill** form (D1 form 2) — the two surfaces Phase 1 deliberately
+deferred. Designed-then-DA-vetted (a `/runda` pass that caught a real
+ADR-vs-grammar conflict), then built test-first.
+
+## §1. State at handoff
+
+**Branch:** `main`. All Phase-2 work is in the working tree;
+**commits are pending the user's approval** (see §6). Unpushed is the
+normal working state.
+
+**Tests: 2400 passing / 0 failing / 0 skipped / 1 ignored** (the
+long-standing `friendly` doctest). **Clippy clean** (nursery, all
+targets). +42 over handoff-65's 2358.
+
+## §2. What landed (read ADR-0048 — Status + D1/D2/D9/D13)
+
+`seed [.
] [count] [set ] [--seed ]`.
+
+- **`set` override clause (D2):** four forms, comma-separated —
+ `status = 'active'` (fixed), `role in ('a','b')` (pick-list),
+ `work_addr as email` (named generator), `price between 10 and 100`
+ (range; numeric **and quoted dates**). Type-aware; an override
+ **drops its column from the generic-fill advisory** (D13). Value
+ slots reuse `update`'s typed `current_column_value` (quoting
+ enforced structurally — a bare word is rejected).
+- **Column-fill (D1 form 2):** `seed users.email [set …]` fills one
+ column across **existing** rows (an UPDATE). Refuses PK / autogen
+ (`serial`/`shortid`/`blob`) targets; **empty table → friendly
+ no-op**; FK target samples the parent; UNIQUE/identifier target gets
+ collision-free values; **one undo step**; `set` may only adjust the
+ filled column; a row count is refused.
+- **Named-generator vocabulary (D9):** `src/seed/vocabulary.rs` —
+ `KNOWN_GENERATORS` + `generator_for_name` + `is_known_generator_prefix`,
+ the single source of truth for completion, validity, and the executor.
+- **Range generator:** `Generator::Range { low, high }` in
+ `src/seed/generators.rs`, interpreted per destination type;
+ `range_bounds_reason` validates compatibility before generation.
+- **Ambient wiring:** completion (generator names after `as`, the
+ `set
` and `.col` column slots, the `set` keyword); highlight
+ (new `HighlightClass::Function` → existing `tok_function`); validity
+ (new `IdentSource::Generators` — unknown generator flagged `[ERR]`;
+ unknown column in `set`/`.col` flagged via the existing Columns
+ path); help (`help.data.seed`); parse-error pedagogy near-miss rows;
+ the D13 advisory's **Phase-2/3 wording** (points at `set` and the
+ column-fill repair). Both modes (D5).
+
+## §3. The ADR amendment (a real DA find)
+
+The pre-build `/runda` pass found that **ADR-0048 D2's "dates stay
+unquoted" was impossible** — this DSL has **no date-literal token**
+(`Value` is `Number`/`Text`; dates are quoted strings validated by
+`bind_date`). Escalated to the user, who chose **quoted dates +
+amend the ADR** (the grammar-consistent option). D2 now carries a
+dated amendment; the range form uses `between '2023-01-01' and
+'2024-12-31'`. This was the only divergence from the ADR text; numbers
+remain unquoted.
+
+## §4. Where the code lives
+
+- **`src/dsl/command.rs`** — `Command::Seed` gains `target_column:
+ Option` + `overrides: Vec`; new `SeedOverride`
+ / `SeedOverrideKind`.
+- **`src/dsl/grammar/data.rs`** — `SEED_SET_CLAUSE` + `SEED_DOT_COLUMN`
+ grammar; `SEED_GENERATOR` slot (`IdentSource::Generators`,
+ `HighlightClass::Function`); `build_seed` + the override fold
+ (`build_seed_overrides` / `parse_seed_override_tail`).
+- **`src/dsl/grammar/mod.rs`** — `IdentSource::Generators` +
+ `HighlightClass::Function`.
+- **`src/db.rs`** — `apply_seed_overrides` / `seed_override_plan` /
+ `seed_override_literal`; `do_seed_column_fill`; `do_seed` +
+ `Database::seed` + worker wiring threaded with the new params.
+- **`src/seed/`** — `vocabulary.rs` (new); `generators.rs` (range
+ generator + `range_bounds_reason`); `mod.rs` (`Generator::Range`).
+- **`src/completion.rs`** — generator candidates after `as`; generator
+ validity. **`src/input_render.rs`** — `"generator"` invalid-ident
+ kind. **`src/theme.rs`** — `Function → tok_function`.
+- **Catalog** — `help.data.seed`, `parse.usage.seed`,
+ `seed.advisory_generic` (Phase-2/3 wording) in `en-US.yaml`;
+ `keys.rs` placeholders updated.
+- **Tests** — `tests/it/seed.rs` (+~30: builder fold, executor
+ set/column-fill, undo, advanced mode), `src/seed/{vocabulary,
+ generators}.rs` (range + vocabulary units), `src/completion.rs`
+ (generator + column validity), `src/dsl/walker/highlight.rs`,
+ `tests/typing_surface/mod.rs` (completion slots),
+ `tests/it/parse_error_pedagogy.rs` (near-miss rows).
+
+## §5. Two implementation refinements vs. the ADR (both met the contract)
+
+- **Quoted dates** (the D2 amendment, §3).
+- **Value slots reuse `current_column_value`** (the `update … set`
+ typed slot) rather than the raw ADR-0026 expression operand — no
+ spurious column-ref match, typed narrowing, consistent with
+ `update`. The user-facing contract (quoted literals, type-aware) is
+ fully met.
+
+The `seed_take_value` / `seed_set_error` builder paths are
+drift-guards (the typed slots only ever match value literals, so a bare
+word is rejected at the grammar level) — they use the generic
+`parse.error_wrapper`, mirroring `expr::build_expr`.
+
+## §6. How to take over / next steps
+
+1. Read handoffs 64 → 65 → 66, `CLAUDE.md`, `docs/requirements.md`,
+ `docs/adr/0048-…md` (Status block + D1/D2/D9/D13 + the amendment).
+2. **Seed is feature-complete (SD1 + SD2).** `requirements.md`: **SD1
+ `[x]`, SD2 `[x]`**. The only open A1 gap is `hint`/**H2** (own ADR).
+3. **Commits pending approval.** Suggested split:
+ - `feat(seed): set override clause + column-fill (ADR-0048 Phase 2)`
+ — all `src/` + `tests/` changes.
+ - `docs: ADR-0048 Phase 2 implemented + handoff 66` — ADR / README /
+ requirements / this file.
+4. Next options (user's call): **H2 `hint`** (closes A1); **TT5 CI**;
+ the larger **V4 journal** / **tutorial** ADRs; or Tier-4 PTY (TT4).
+5. Consider a `cargo sweep` at this milestone (`target/` grows).
+
+## §7. Post-implementation `/runda` pass (done this session)
+
+A DA pass over the completed code found **no correctness bugs and no
+dropped requirements**; all D1–D18 acceptance criteria verified met,
+tests confirmed to catch regressions. One **design fork** was surfaced
+and **resolved by the user**:
+
+- **Bounded override × UNIQUE column** — a fixed value / too-short
+ pick-list on a single-column-UNIQUE target used to silently cap the
+ run (e.g. `seed users 100 set email = 'x'` → 1 row). Now a **friendly
+ error** up front (`seed_override_capacity_guard`, `src/db.rs`), for
+ both whole-row and column-fill; generators/ranges stay cap-based
+ (unbounded sources). ADR-0048 D2 documents it; two tests pin it.
+
+Remaining **non-blocking** edges (noted, not bugs):
+
+- Overriding an **FK column** with a literal: the override wins (D2); a
+ non-parent value fails safely through the FK-error layer.
+- **Column-fill of one column of a *compound* FK** samples that column
+ independently → an invalid tuple fails safely (UPDATE rejected,
+ rollback), never corrupts. Single-column FKs / non-FK columns are
+ exact.
+- The generator slot uses the **default candidate-ladder hint** (offers
+ the vocabulary), not a dedicated prose intro — discoverability is met
+ by completion; a prose intro is optional polish.
diff --git a/docs/handoff/20260612-handoff-67.md b/docs/handoff/20260612-handoff-67.md
new file mode 100644
index 0000000..65477e0
--- /dev/null
+++ b/docs/handoff/20260612-handoff-67.md
@@ -0,0 +1,119 @@
+# Session handoff — 2026-06-12 (67)
+
+Sixty-seventh handover. Continues directly from handoff-66 (ADR-0048
+`seed` Phase 2, committed). This was a **manual-testing pass**: the user
+exercised the app, found several rough edges, and we triaged each into
+*fix now* vs *file an issue*. Net result: **three bug fixes committed**
+and **three enhancement issues filed**.
+
+## §1. State at handoff
+
+**Branch:** `main`. Working tree **clean**; all work committed. Unpushed
+(push is the user's step).
+
+**Tests: 2407 passing / 0 failing / 0 skipped / 1 ignored** (the
+long-standing `friendly` doctest). **Clippy clean** (nursery, all
+targets). +7 over handoff-66's 2400.
+
+**Commits since handoff-65:**
+```
+f7155ce fix(input): thread the `:` one-shot escape into live SQL feedback
+4cacb82 fix(completion): don't flag a table alias used before its FROM clause
+c3e0103 fix(completion): flag-aware partial so a dash completes flags, not keywords
+30b2677 docs: ADR-0048 Phase 2 implemented + handoff 66
+a12facc feat(seed): set override clause + column-fill (ADR-0048 Phase 2)
+```
+(`a12facc`/`30b2677` are the Phase-2 work documented in handoff-66.)
+
+## §2. Bug fixes this session (all committed, all tested)
+
+1. **`c3e0103` — flag completion ate the dash.** Typing a flag at a
+ flag position (`add 1:n relationship … -`) offered the `on` keyword
+ and, on accept, produced `-on` / `---create-fk`: the partial-token
+ walk stopped at `-`, so the dash was outside the replaced range.
+ Fix: flag-aware partial detection (a dash-prefixed token at a word
+ boundary is a flag-in-progress, **gated on a flag being expected** so
+ `where x = -5` stays a number) + a unified flag matcher
+ (`trim_start_matches('-')`). Affected **all** flags. 4 tests + 2
+ partial-flag snapshots updated (they'd captured the latent bug).
+
+2. **`4cacb82` — table alias flagged as an unknown column.** In a
+ SELECT, the projection (`sum(ol.count*…)`) can reference an alias
+ whose `FROM … OrderLines ol` sits *after* the cursor. The candidate
+ engine recovers that via the §10.6 full-input lookahead (ADR-0032),
+ but `invalid_ident_at_cursor` only walked text *before* the cursor —
+ so `ol` matched no scope and got a red "ERR" overlay on an otherwise
+ valid query. Fix: give the validity check the same full-input
+ lookahead and bail when the partial prefix-matches a binding's alias
+ or table. 1 test.
+
+3. **`f7155ce` — the `:` one-shot escape broke live SQL feedback.**
+ Submission strips the `:` (ADR-0003), but the *live* feedback kept it
+ in the buffer handed to the walker, which bailed at the `:`. Effect:
+ under `:`, Tab completed nothing and a valid query could flash `[ERR]`
+ — while the same line in full `mode advanced` worked. (The hint
+ already stripped it, hence "hint shows the name but Tab does
+ nothing".) Fix: one shared `App::feedback_view()` (the `:`-stripped
+ SQL + mapped cursor + stripped offset) routed through completion (with
+ a `replaced_range` offset shift), the validity verdict, and rendering
+ (new `render_input_runs_feedback` highlights/overlays the view shifted
+ by the offset; the `:` renders as plain text); the ambient hint was
+ consolidated onto it (removing the duplicate `strip_one_shot_prefix`).
+ 3 tests + the 9 existing colon tests still green.
+
+## §3. Investigated, **no code change** (working as designed)
+
+- **Comma-`FROM` implicit join** (`select … from A, B, C`) is
+ **deliberately rejected** — ADR-0032 §11 / OOS-3: *"comma-FROM teaches
+ habits we do not want to encourage; `CROSS JOIN` covers the same shape
+ explicitly."* The explicit equivalent (`CROSS JOIN … WHERE …`) works.
+- **`sum(…)` returning one row** with no `GROUP BY` is **correct SQL**
+ (the aggregate collapses the result to one row; SQLite/the playground
+ allow the non-aggregated columns where Postgres would error). The
+ user's query needed `group by o.id`. Verified (1 row).
+
+## §4. Open issues filed this session — **next session's candidates**
+
+All on `git.lazyeval.net/oli/rdbms-playground`, label `enhancement`:
+
+- **#26 — `seed
` hint omits the optional count.** A complete
+ command's optional positional *number* has no Tab candidate, so it's
+ invisible. `IntroProse` doesn't fit (it only fires for incomplete
+ required slots; the completing Seq match clears the hint). Needs a way
+ to advertise optional positional non-keyword args. *(I attempted +
+ reverted this during Phase 2; see the analysis in the issue.)*
+- **#27 — Bottom status line: keybindings-only, context- and
+ state-aware.** Per-nav-focus keybindings (Input vs sidebar), **include
+ transient states** (Tab-cycle, history) — user preference — and add
+ `mode advanced` to the empty-input hint. May warrant a small ADR.
+- **#28 — Reconsider relationship prose in `add column` (incidental DDL)
+ confirmations.** Currently by design (ADR-0044 §1 keeps prose, not
+ diagrams, for incidental DDL). **User preference: do NOT show the
+ `References:` / `Referenced by:` block** in the add-column
+ confirmation at all — focus on the change just made. This revisits a
+ decided area → land as a **new ADR** superseding the relevant part of
+ ADR-0016 §5 / ADR-0044 §1; confirm scope (just `add column`, or all
+ incidental DDL).
+
+## §5. Other open work (unchanged from handoff-66 §6)
+
+`seed` is **feature-complete** (`requirements.md` SD1 `[x]`, SD2 `[x]`).
+Remaining roadmap, user's call:
+
+- **H2 `hint`** — the last A1 gap (its own ADR).
+- **TT5 CI** — test infra exists; no CI workflow yet.
+- **TT4 PTY (Tier-4)** — ADR-0008 specifies it; not wired.
+- Larger: **V4 journal**, **tutorial/lesson system** (each needs an ADR).
+
+A possible quick follow-up: a friendlier "use an explicit `JOIN`"
+parse-error for comma-`FROM` (point 1) — not filed; mention if wanted.
+
+## §6. How to take over
+
+1. Read handoffs 65 → 66 → 67, `CLAUDE.md`, `docs/requirements.md`.
+2. `seed` Phase 2 is done (ADR-0048 Status block is current). The
+ manual-testing fixes (§2) are committed and green.
+3. Pick from §4 (filed issues #26/#27/#28) or §5 (roadmap). #28 is a
+ decision/ADR; #27 is UX (maybe ADR); #26 is a hint-system enhancement.
+4. Consider a `cargo sweep` at this milestone (`target/` grows across
+ sessions).
diff --git a/docs/requirements.md b/docs/requirements.md
index 68fa1fe..2222f11 100644
--- a/docs/requirements.md
+++ b/docs/requirements.md
@@ -88,12 +88,16 @@ since ADR-0027.)
because relationships are cross-table rather than per-table, they
get their own sibling panel stacked below the tables list, not
nested items within it — user-confirmed 2026-06-10.)*
-- [/] **S3** Output panel renders a visualization of the
- currently selected item and supports multiple tabs.
- *(Partial, verified 2026-06-07: single-element structure
- visualisation renders (`output_render.rs:82-180`); **multiple
- tabs are not implemented** — the output is one line buffer, no
- tab abstraction. Same multi-tab gap as V2.)*
+- [x] **S3** Output panel renders a visualization of the
+ currently selected item.
+ *(Satisfied: single-element structure visualisation renders
+ (`output_render.rs:82-180`) — select a table, see its columns /
+ types / keys. **Multi-tab clause withdrawn 2026-06-11** (user
+ decision): the original wording promised "and supports multiple
+ tabs", but the output model is settling on the single scrollable
+ **V4 journal** rather than switchable tabs, so the tab clause is
+ dropped from tracked scope. A future return to tabbed output would
+ be a fresh requirement, not this one. Same withdrawal as V2.)*
- [x] **S4** Hint area below the input field, showing hints about
the current input or last error.
*(Verified 2026-06-07: `ui.rs:1088-1110` `render_hint_panel` /
@@ -242,13 +246,12 @@ since ADR-0027.)
available in both modes: `save`, `save as`, `load`, `new`,
`rebuild`, `export`, `import`, `seed`, `replay`, `undo`,
`redo`, `mode`, `help`, `hint`, `quit`.
- *(Partial, verified 2026-06-07: 13 of 15 implemented and
- available in both modes — `quit`/`q`, `mode simple|advanced`,
- `help`, `save`, `save as`, `load`, `new`, `rebuild`, `export`,
- `import`, `replay`, `undo`, `redo` (REGISTRY in
- `grammar/app.rs:249-333`). **Missing: `seed`** (tracked as SD1)
- **and `hint`** (tracked as H2) — neither is registered. A1
- closes when SD1 + H2 land.)*
+ *(Partial: **14 of 15** implemented and available in both modes —
+ `quit`/`q`, `mode simple|advanced`, `help`, `save`, `save as`,
+ `load`, `new`, `rebuild`, `export`, `import`, `replay`, `undo`,
+ `redo`, and now **`seed`** (ADR-0048 / SD1, done 2026-06-11).
+ **Only `hint`** (tracked as H2) remains unregistered. A1 closes
+ when H2 lands.)*
## DSL data commands
@@ -469,15 +472,18 @@ since ADR-0027.)
"relationship-relevant" reach). The §3 last-resort helper line was
considered and rejected. Two `/runda` passes (design + implementation).
Selection-nav and the broader journal direction remain in V4.)*
-- [/] **V2** SQL query results render as a dynamic table view in
- the output pane, with multiple result tabs supported.
- *(Partial, verified 2026-06-07: the **table view** is done —
- `output_render.rs:38-72` `render_data_table` renders a
- box-drawing frame with aligned columns (numeric right, text
- left) and NULL/control-char sanitisation, for `show data` and
- after every write (ADR-0014). **Missing: multiple result tabs**
- — the output is a single `VecDeque` with no tab
- abstraction (same gap as S3). Multi-tab sits in V4 territory.)*
+- [x] **V2** SQL query results render as a dynamic table view in
+ the output pane.
+ *(Satisfied: the **table view** is done — `output_render.rs:38-72`
+ `render_data_table` renders a box-drawing frame with aligned
+ columns (numeric right, text left) and NULL/control-char
+ sanitisation, for `show data` and after every write (ADR-0014).
+ **Multi-tab clause withdrawn 2026-06-11** (user decision): the
+ original wording promised "with multiple result tabs supported";
+ retained multi-result output, if ever wanted, now belongs to the
+ single scrollable **V4 journal** direction rather than switchable
+ tabs, so the tab clause is dropped from tracked scope. A future
+ return would be a new requirement. Same withdrawal as S3.)*
- [~] **V3** Full ER-diagram export (whole-database graph, viewed
outside the TUI) — low priority; design and ADR pending.
- [~] **V4** Output panel as a *scrollable per-session log* with
@@ -492,7 +498,13 @@ since ADR-0027.)
*(Partial: PageUp / PageDown scrolling of the existing line
buffer is in, with new output snapping the view to the most
recent. The full V4 scope — smart structure rendering, log
- styling, Markdown export, scroll indicator — remains pending.)*
+ styling, Markdown export, scroll indicator — remains pending.
+ **As of 2026-06-11 this journal model is the sole tracked
+ direction for evolving the output pane:** the competing multi-tab
+ output alternative (the trailing clauses of S3 and V2) was
+ withdrawn from scope by user decision, so retained / multi-result
+ output, if pursued, is folded into this journal rather than into
+ switchable tabs.)*
- [x] **V5** `show []` family of commands for
redisplaying schema info on demand.
*(Done 2026-06-07: `show table ` + `show data
`
@@ -652,11 +664,39 @@ since ADR-0027.)
## Sample data / seeding
-- [ ] **SD1** `seed
[count]` generates plausible fake
data; junction tables are seeded with valid foreign-key
references drawn from existing parent rows.
-- [~] **SD2** Detailed seeding rules (per-type generators,
- locale, determinism, override hooks) — design and ADR pending.
+ *(Done 2026-06-11 via **ADR-0048** (commits `202e25a`→`fbd219b`).
+ Whole-row `seed
[count] [--seed ]` with realistic
+ name-aware generation (`fake` crate + a type-gated heuristic
+ catalogue, table-context name disambiguation, hand-rolled
+ `product` generator, bounded dates), identifier + constraint
+ uniqueness, **junction tables seeded with valid FK references
+ drawn from existing parent rows** (distinct combinations, capped;
+ empty-parent friendly error), `IN`-CHECK derivation, a
+ required-column block guard, undo as one step, replay as a data
+ write, a capped auto-show + enum/CHECK advisory, and an O(N)
+ single-transaction path. The `set` override clause and
+ `
.` column-fill landed in SD2 Phase 2, below.)*
+- [x] **SD2** Detailed seeding rules (per-type generators,
+ locale, determinism, override hooks).
+ *(Done 2026-06-11 via **ADR-0048** (Phase 1 + Phase 2). Phase 1:
+ type-gated name-aware per-type generators with a `fake`-backed
+ catalogue + table-context disambiguation, **`--seed` determinism**
+ (serial/FK/shortid all reproducible — D4 holds with no
+ exceptions), English-only locale (X2). **Phase 2 (the "override
+ hooks" core):** the `set` override clause — fixed value /
+ pick-from-list / `as ` / `between` range (numeric and
+ **quoted** dates, type-aware; an override drops the column from
+ the generic-fill advisory) — and the `
.`
+ column-fill form (an UPDATE over existing rows, refusing
+ PK/autogen targets, empty-table no-op, FK/unique-respecting, one
+ undo step). Adds the `KNOWN_GENERATORS` vocabulary (D9), a range
+ `Generator`, and full completion / highlight / validity / help /
+ parse-error-pedagogy wiring. Deferred SD2 increments:
+ user-defined custom generators, NULL injection, multi-locale,
+ recursive parent auto-seed.)*
## Query analysis
diff --git a/src/app.rs b/src/app.rs
index 56fc7fc..2863382 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -646,6 +646,44 @@ impl App {
}
}
+ /// The input view the **live-feedback** walkers (completion, ambient
+ /// hint, validity verdict, highlight overlays) should see, plus the
+ /// byte offset stripped from the front and the cursor mapped into the
+ /// view.
+ ///
+ /// Under the `:` one-shot escape (ADR-0003) the buffer carries a
+ /// leading `:` (and an auto-inserted space) that is *not* advanced
+ /// SQL — submission already strips it before parsing, but the live
+ /// feedback did not, so the walker bailed at the `:` and resolved
+ /// nothing (no completion / hint, a spurious error overlay). This
+ /// returns the stripped SQL exactly as submission sees it, so the
+ /// feedback matches a real advanced-mode session. `offset` maps any
+ /// walker-returned byte position (completion `replaced_range`,
+ /// overlay spans) back to real-buffer coordinates.
+ ///
+ /// For every non-one-shot input this is the identity
+ /// `(&input, cursor, 0)`.
+ #[must_use]
+ pub fn feedback_view(&self) -> (&str, usize, usize) {
+ if matches!(self.effective_mode(), EffectiveMode::AdvancedOneShot) {
+ // The first non-whitespace char is the `:` (per
+ // `effective_mode`); strip up to and including it, then any
+ // following whitespace — mirroring submission's
+ // `trimmed[1..].trim()`.
+ let leading_ws = self.input.len() - self.input.trim_start().len();
+ let mut offset = leading_ws + 1; // past the `:`
+ while offset < self.input.len()
+ && self.input.as_bytes()[offset].is_ascii_whitespace()
+ {
+ offset += 1;
+ }
+ let view = &self.input[offset..];
+ let cursor = self.input_cursor.saturating_sub(offset).min(view.len());
+ return (view, cursor, offset);
+ }
+ (&self.input, self.input_cursor, 0)
+ }
+
/// The validity-indicator verdict for the current input
/// (ADR-0027 §3). `None` when the input would run clean.
///
@@ -667,11 +705,10 @@ impl App {
EffectiveMode::AdvancedPersistent
| EffectiveMode::AdvancedOneShot => Mode::Advanced,
};
- crate::dsl::walker::input_verdict_in_mode(
- &self.input,
- Some(&self.schema_cache),
- mode,
- )
+ // Strip the `:` one-shot prefix so the walker verdicts the SQL
+ // itself, not the escape marker (which it can't parse).
+ let (view, _cursor, _offset) = self.feedback_view();
+ crate::dsl::walker::input_verdict_in_mode(view, Some(&self.schema_cache), mode)
}
/// Process one event from the runtime, mutating state and
@@ -771,6 +808,10 @@ impl App {
self.handle_dsl_insert_success(&command, &result);
Vec::new()
}
+ AppEvent::DslSeedSucceeded { command, result } => {
+ self.handle_dsl_seed_success(&command, &result);
+ Vec::new()
+ }
AppEvent::DslUpdateSucceeded {
command,
result,
@@ -1395,13 +1436,7 @@ impl App {
}
fn start_or_complete_at(&mut self, multi_start_idx: usize) {
- let cursor = self.input_cursor.min(self.input.len());
- let Some(comp) = crate::completion::candidates_at_cursor_in_mode(
- &self.input,
- cursor,
- &self.schema_cache,
- self.effective_mode().as_mode(),
- ) else {
+ let Some(comp) = self.completion_for_feedback() else {
return;
};
if comp.candidates.len() == 1 {
@@ -1413,13 +1448,7 @@ impl App {
}
fn start_or_complete_last(&mut self) {
- let cursor = self.input_cursor.min(self.input.len());
- let Some(comp) = crate::completion::candidates_at_cursor_in_mode(
- &self.input,
- cursor,
- &self.schema_cache,
- self.effective_mode().as_mode(),
- ) else {
+ let Some(comp) = self.completion_for_feedback() else {
return;
};
if comp.candidates.len() == 1 {
@@ -1430,6 +1459,22 @@ impl App {
}
}
+ /// Completion at the cursor, computed against the `:`-stripped
+ /// feedback view (ADR-0003 one-shot) with its `replaced_range`
+ /// mapped back to real-buffer coordinates so `commit_*` edit the
+ /// right span. Identity for non-one-shot input (offset 0).
+ fn completion_for_feedback(&self) -> Option {
+ let (view, view_cursor, offset) = self.feedback_view();
+ let mut comp = crate::completion::candidates_at_cursor_in_mode(
+ view,
+ view_cursor.min(view.len()),
+ &self.schema_cache,
+ self.effective_mode().as_mode(),
+ )?;
+ comp.replaced_range = (comp.replaced_range.0 + offset, comp.replaced_range.1 + offset);
+ Some(comp)
+ }
+
/// Single-candidate commit: insert " " (with trailing
/// space) and DO NOT create a memo. The user can keep
/// typing or press Tab again to fresh-complete at the new
@@ -2072,6 +2117,39 @@ impl App {
}
}
+ /// Render a successful `seed` (ADR-0048): the ✓ echo, the seeded-row
+ /// count (with a cap note when the unique-value space ran out), the
+ /// capped preview table (D18), and a Hint-styled advisory naming
+ /// columns filled with generic text that look like fixed value sets
+ /// (D12/D13).
+ fn handle_dsl_seed_success(&mut self, command: &Command, result: &crate::db::SeedResult) {
+ self.note_ok_summary(command);
+ let mut summary = crate::t!(
+ "ok.rows_seeded",
+ count = result.produced,
+ table = result.table
+ );
+ if result.produced < result.requested {
+ summary.push(' ');
+ summary.push_str(&crate::t!("seed.capped", requested = result.requested));
+ }
+ self.note_system(summary);
+ for line in crate::output_render::render_data_table(&result.data) {
+ self.note_system(line);
+ }
+ if !result.advisory_columns.is_empty() {
+ // `column` (the first advised column) seeds the concrete
+ // repair examples (D13 Phase 2/3 wording); `columns` lists
+ // them all.
+ self.push_category_three_prose(crate::t!(
+ "seed.advisory_generic",
+ columns = result.advisory_columns.join(", "),
+ column = result.advisory_columns[0],
+ table = result.table
+ ));
+ }
+ }
+
fn handle_dsl_update_success(&mut self, command: &Command, result: &UpdateResult) {
self.note_ok_summary(command);
self.note_system(crate::t!("ok.rows_updated", count = result.rows_affected));
@@ -2390,6 +2468,9 @@ impl App {
// the executor), like the named DSL drop.
C::SqlDropIndex { .. } => (Operation::DropIndex, None, None),
C::Insert { table, .. } => (Operation::Insert, Some(table.as_str()), None),
+ // Seed generates inserts; FK/constraint failures read as
+ // insert errors (ADR-0048).
+ C::Seed { table, .. } => (Operation::Insert, Some(table.as_str()), None),
C::Update { table, .. } => (Operation::Update, Some(table.as_str()), None),
C::Delete { table, .. } => (Operation::Delete, Some(table.as_str()), None),
C::ShowData { name, .. } | C::ShowTable { name } => {
@@ -4936,6 +5017,86 @@ mod tests {
assert_eq!(app.effective_mode(), EffectiveMode::AdvancedPersistent);
}
+ /// Build a two-table cache (`Orders(id, customer_id)` +
+ /// `Customers(id, name)`) for the `:` one-shot SQL-feedback tests.
+ fn install_join_schema(app: &mut App) {
+ use crate::completion::TableColumn;
+ use crate::dsl::types::Type;
+ app.schema_cache.tables = vec!["Orders".into(), "Customers".into()];
+ app.schema_cache.table_columns.insert(
+ "Orders".into(),
+ vec![TableColumn::new("id", Type::Serial), TableColumn::new("customer_id", Type::Int)],
+ );
+ app.schema_cache.table_columns.insert(
+ "Customers".into(),
+ vec![TableColumn::new("id", Type::Serial), TableColumn::new("name", Type::Text)],
+ );
+ for t in app.schema_cache.tables.clone() {
+ for c in &app.schema_cache.table_columns[&t] {
+ app.schema_cache.columns.push(c.name.clone());
+ }
+ }
+ }
+
+ #[test]
+ fn colon_one_shot_gives_sql_completion_the_stripped_view() {
+ // Bug (manual testing): the `:` one-shot escape (ADR-0003) left
+ // the leading `:` in the buffer passed to the live SQL feedback,
+ // so the walker bailed at `:` and Tab completed nothing — while
+ // the identical line in full `mode advanced` completed. Now the
+ // feedback view strips the `:`, so both behave the same.
+ let body = "select c.name from Orders o join Customers c on c.id=o.cu";
+
+ // Full advanced mode: completes `o.cu` → `o.customer_id`.
+ let mut adv = App::new();
+ adv.mode = Mode::Advanced;
+ install_join_schema(&mut adv);
+ type_str(&mut adv, body);
+ adv.update(key(KeyCode::Tab));
+ assert!(
+ adv.input.ends_with("o.customer_id "),
+ "full advanced should complete: {:?}",
+ adv.input
+ );
+
+ // `:` one-shot from simple mode: must complete the same way, and
+ // the `:` prefix must be preserved in the buffer.
+ let mut one = App::new();
+ one.mode = Mode::Simple;
+ install_join_schema(&mut one);
+ one.update(key(KeyCode::Char(':')));
+ type_str(&mut one, body);
+ assert_eq!(one.effective_mode(), EffectiveMode::AdvancedOneShot);
+ one.update(key(KeyCode::Tab));
+ assert!(
+ one.input.trim_start().starts_with(':'),
+ "the `:` prefix is kept: {:?}",
+ one.input
+ );
+ assert!(
+ one.input.ends_with("o.customer_id "),
+ "`:` one-shot must complete the SQL column too: {:?}",
+ one.input
+ );
+ }
+
+ #[test]
+ fn colon_one_shot_validity_is_clean_for_a_valid_query() {
+ // A *valid* `:`-prefixed query must not light the `[ERR]`
+ // indicator (the walker used to choke on the `:` and always
+ // report Error).
+ let mut app = App::new();
+ install_join_schema(&mut app);
+ app.update(key(KeyCode::Char(':')));
+ type_str(&mut app, "select name from Customers");
+ assert_eq!(
+ app.input_validity_verdict(),
+ None,
+ "a valid one-shot query should verdict clean, got {:?}",
+ app.input_validity_verdict(),
+ );
+ }
+
#[test]
fn effective_mode_flips_to_one_shot_when_colon_typed_in_simple_mode() {
let mut app = App::new();
@@ -6223,6 +6384,80 @@ mod tests {
);
}
+ #[test]
+ fn seed_success_renders_count_preview_and_advisory() {
+ // ADR-0048: handle_dsl_seed_success renders the seeded-row count,
+ // the preview table, and the enum/CHECK advisory.
+ let mut app = App::new();
+ app.output
+ .push_back(OutputLine::echo("seed users 20", crate::mode::Mode::Simple));
+ app.update(AppEvent::DslSeedSucceeded {
+ command: Command::Seed {
+ table: "users".to_string(),
+ target_column: None,
+ count: Some(20),
+ overrides: Vec::new(),
+ rng_seed: None,
+ },
+ result: crate::db::SeedResult {
+ table: "users".to_string(),
+ requested: 20,
+ produced: 20,
+ data: crate::db::DataResult {
+ table_name: "users".to_string(),
+ columns: vec!["name".to_string()],
+ column_types: vec![None],
+ rows: vec![vec![Some("Alice".to_string())]],
+ },
+ advisory_columns: vec!["status".to_string()],
+ },
+ });
+ let texts: Vec = app.output.iter().map(|l| l.text.clone()).collect();
+ assert!(
+ texts.iter().any(|t| t.contains("20 row(s) seeded into users")),
+ "seeded-row count surfaced: {texts:?}",
+ );
+ assert!(
+ texts.iter().any(|t| t.contains("status") && t.contains("generic text")),
+ "the advisory names the enum-ish column: {texts:?}",
+ );
+ }
+
+ #[test]
+ fn seed_success_reports_a_cap() {
+ // produced < requested → the cap note appears next to the count.
+ let mut app = App::new();
+ app.output
+ .push_back(OutputLine::echo("seed J 10", crate::mode::Mode::Simple));
+ app.update(AppEvent::DslSeedSucceeded {
+ command: Command::Seed {
+ table: "J".to_string(),
+ target_column: None,
+ count: Some(10),
+ overrides: Vec::new(),
+ rng_seed: None,
+ },
+ result: crate::db::SeedResult {
+ table: "J".to_string(),
+ requested: 10,
+ produced: 4,
+ data: crate::db::DataResult {
+ table_name: "J".to_string(),
+ columns: Vec::new(),
+ column_types: Vec::new(),
+ rows: Vec::new(),
+ },
+ advisory_columns: Vec::new(),
+ },
+ });
+ let texts: Vec = app.output.iter().map(|l| l.text.clone()).collect();
+ assert!(
+ texts.iter().any(|t| t.contains("4 row(s) seeded into J")
+ && t.contains("of 10 requested")),
+ "the cap note surfaces requested vs produced: {texts:?}",
+ );
+ }
+
#[test]
fn sql_delete_returning_renders_cascade_and_result_table() {
// ADR-0033 3g: a DELETE … RETURNING surfaces BOTH the cascade
diff --git a/src/completion.rs b/src/completion.rs
index 5ca535a..38bf3bd 100644
--- a/src/completion.rs
+++ b/src/completion.rs
@@ -120,7 +120,13 @@ impl SchemaCache {
IdentSource::Columns => &self.columns,
IdentSource::Relationships => &self.relationships,
IdentSource::Indexes => &self.indexes,
- IdentSource::NewName | IdentSource::Types | IdentSource::Free => &[],
+ // Curated / invented sources never come from the schema
+ // cache — `Generators` candidates are supplied separately
+ // from the `seed` vocabulary (ADR-0048 D9).
+ IdentSource::NewName
+ | IdentSource::Types
+ | IdentSource::Generators
+ | IdentSource::Free => &[],
}
}
@@ -327,6 +333,37 @@ pub fn candidates_at_cursor_with_in_mode(
break;
}
}
+
+ // Flag-aware extension. The plain walk above stops at `-`, so a
+ // flag the user is mid-typing (`-`, `--`, `--all`, `--create-fk`)
+ // leaves an *empty* partial sitting just after the dash(es) — which
+ // made the engine offer every keyword (a `-` prefix-matches nothing,
+ // so the empty-prefix path let `on` through) and, worse, replace an
+ // empty range so accepting produced `-on` / `---create-fk`. When a
+ // dash-prefixed token sits at a word boundary AND a flag is actually
+ // expected here, treat the whole dash-run-plus-body as the partial so
+ // it is matched and replaced wholesale. The "flag is expected" gate
+ // (one cheap probe on the pre-dash prefix) keeps a signed number /
+ // minus (`where x = -5`) from being mis-read as a flag.
+ {
+ let mut run = cursor;
+ while run > 0 {
+ let p = bytes[run - 1];
+ if p.is_ascii_alphanumeric() || p == b'_' || p == b'-' {
+ run -= 1;
+ } else {
+ break;
+ }
+ }
+ let word_boundary = run == 0 || bytes[run - 1].is_ascii_whitespace();
+ if run < cursor && bytes[run] == b'-' && word_boundary && run < start {
+ let pre = crate::dsl::walker::completion_probe_in_mode(&input[..run], cache, mode);
+ if pre.expected.iter().any(|e| matches!(e, Expectation::Flag(_))) {
+ start = run;
+ }
+ }
+ }
+
let partial_prefix = input[start..cursor].to_string();
let leading = &input[..start];
@@ -623,29 +660,19 @@ pub fn candidates_at_cursor_with_in_mode(
// Source 1.55: flag candidates (`--name`). Surfaced as a
// distinct CandidateKind so the hint panel can colour them
// with `tok_flag` (matching how they'll appear after
- // insertion). The standard prefix matcher walks back over
- // alphanumeric + underscore, which does NOT cross `-`, so
- // when the user types `--all` the partial is `all` — match
- // the flag's body against that. Otherwise match the full
- // `--name` against the partial (which may be empty or start
- // with `--`).
+ // insertion). The flag-aware partial detection above captures any
+ // leading dash-run, so the partial is one of: empty, all-dashes
+ // (`-` / `--`), or `[-]+body`. Stripping the leading dashes and
+ // matching the remainder against the flag *body* handles all of
+ // them uniformly (empty / all-dashes → match every flag).
+ let flag_needle = partial_prefix.trim_start_matches('-').to_lowercase();
let flags: Vec = expected
.iter()
.filter_map(|e| match e {
Expectation::Flag(name) => Some(*name),
_ => None,
})
- .filter(|body| {
- if partial_prefix.starts_with("--") {
- format!("--{body}")
- .to_lowercase()
- .starts_with(&lowered_prefix)
- } else if partial_prefix.is_empty() {
- true
- } else {
- body.to_lowercase().starts_with(&lowered_prefix)
- }
- })
+ .filter(|body| body.to_lowercase().starts_with(&flag_needle))
.map(|body| format!("--{body}"))
.collect();
@@ -709,6 +736,22 @@ pub fn candidates_at_cursor_with_in_mode(
} else {
Vec::new()
};
+ // Source 1.9: fake-data generator names (ADR-0048 D9). At the
+ // `seed … set
as ⟨here⟩` slot (`IdentSource::Generators`) the
+ // curated vocabulary is offered so a learner can discover `email` /
+ // `product` / … by Tab. Same `Function` kind / `tok_function` colour
+ // as SQL functions (no new theme colour — ADR-0048 §Grammar).
+ let has_generator_slot = expected
+ .iter()
+ .any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. }));
+ if has_generator_slot {
+ functions.extend(
+ crate::seed::KNOWN_GENERATORS
+ .iter()
+ .filter(|g| matches_prefix(g))
+ .map(|g| (*g).to_string()),
+ );
+ }
// Source 2: schema identifiers — accumulated across every
// matching schema-listable `Ident { source }` expectation.
@@ -1200,6 +1243,45 @@ pub fn invalid_ident_at_cursor_in_mode(
if has_sql_expr_slot && crate::dsl::sql_functions::is_known_function_prefix(partial) {
return None;
}
+ // A bare ident at a SQL expression slot may be a **table alias / name**
+ // the user is mid-typing as a qualifier (`ol` in `sum(ol.count)`). The
+ // defining FROM clause can sit *after* the cursor — the projection
+ // references it — so the leading-only walk has an empty from-scope and
+ // would wrongly flag the alias as an unknown column. Recover the scope
+ // from the FULL input (mirrors the §10.6 edit-an-existing-query
+ // lookahead the candidate engine uses for column narrowing) and bail
+ // when the partial prefix-matches a binding's alias or table name.
+ if has_sql_expr_slot {
+ let full = crate::dsl::walker::completion_probe_in_mode(input, cache, mode);
+ let lowered = partial.to_lowercase();
+ let matches_qualifier = full.from_scope.iter().any(|b| {
+ b.alias
+ .as_deref()
+ .is_some_and(|a| a.to_lowercase().starts_with(&lowered))
+ || b.table.to_lowercase().starts_with(&lowered)
+ });
+ if matches_qualifier {
+ return None;
+ }
+ }
+ // ADR-0048 D9: the `seed … set
as ` slot is a curated
+ // vocabulary (`IdentSource::Generators`), not a schema source, so the
+ // schema-column check below would never see it. A partial that
+ // prefix-matches a known generator is an in-progress name; anything
+ // else is an unknown generator → flag it `[ERR]` while typing.
+ let has_generator_slot = expected
+ .iter()
+ .any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. }));
+ if has_generator_slot {
+ if crate::seed::is_known_generator_prefix(partial) {
+ return None;
+ }
+ return Some(InvalidIdent {
+ range: (start, cursor),
+ found: partial.to_string(),
+ source: IdentSource::Generators,
+ });
+ }
// Find every schema-listable source in the expected list.
let sources: Vec = expected
.iter()
@@ -1488,6 +1570,71 @@ mod tests {
);
}
+ #[test]
+ fn single_dash_offers_flags_not_keywords_and_replaces_the_dash() {
+ // Bug (manual testing): `add 1:n relationship … -` (one dash)
+ // offered the `on` keyword *and* `--create-fk`, and accepting
+ // produced `-on` / `---create-fk` because the lone `-` was not
+ // part of the replaced range. A dash at a flag position is a
+ // flag-in-progress: offer flags, exclude keywords, replace the
+ // dash on accept.
+ let input = "add 1:n relationship from X.a to Y.b -";
+ let c = candidates_at_cursor(input, input.len(), &SchemaCache::default())
+ .expect("a `-` at a flag position offers candidates");
+ let texts: Vec<&str> = c.candidates.iter().map(|x| x.text.as_str()).collect();
+ assert!(texts.contains(&"--create-fk"), "should offer --create-fk: {texts:?}");
+ assert!(!texts.contains(&"on"), "must NOT offer `on` after a dash: {texts:?}");
+ assert_eq!(
+ c.replaced_range,
+ (input.len() - 1, input.len()),
+ "the `-` must be inside the replaced range so accept yields `--create-fk`",
+ );
+ }
+
+ #[test]
+ fn double_dash_replaces_both_dashes_on_accept() {
+ let input = "delete from T --";
+ let c = candidates_at_cursor_in_mode(
+ input,
+ input.len(),
+ &SchemaCache::default(),
+ Mode::Simple,
+ )
+ .expect("`--` offers the flag");
+ assert!(c.candidates.iter().any(|x| x.text == "--all-rows"));
+ assert_eq!(
+ c.replaced_range,
+ (input.len() - 2, input.len()),
+ "both dashes are replaced so accept yields `--all-rows`, not `----all-rows`",
+ );
+ }
+
+ #[test]
+ fn dash_at_a_value_position_is_not_treated_as_a_flag() {
+ // `show data T where x = -5` — the `-` is a sign, not a flag.
+ // No flag is expected here, so the dash must not be swallowed
+ // into a flag partial: the partial stays `5` (the original
+ // value-operand behaviour), and no `--…` candidate appears.
+ let mut s = SchemaCache::default();
+ s.tables.push("T".into());
+ s.columns.push("x".into());
+ let input = "show data T where x = -5";
+ if let Some(c) =
+ candidates_at_cursor_in_mode(input, input.len(), &s, Mode::Simple)
+ {
+ assert!(
+ !c.candidates.iter().any(|x| x.text.starts_with("--")),
+ "no flags at a value position: {:?}",
+ c.candidates,
+ );
+ assert_eq!(
+ c.replaced_range,
+ (input.len() - 1, input.len()),
+ "only the `5` is the partial; the `-` (sign) is not captured",
+ );
+ }
+ }
+
#[test]
fn typed_dashes_offer_the_optional_cascade_flag_on_drop_column() {
// The same optional-flag class: `drop column … [--cascade]`.
@@ -2606,6 +2753,70 @@ mod tests {
);
}
+ #[test]
+ fn invalid_ident_does_not_flag_a_table_alias_used_before_its_from_clause() {
+ // Manual-testing bug: in `select … sum(ol.count*…) … from … OrderLines ol …`
+ // the projection references alias `ol` whose FROM binding sits
+ // *after* the cursor. The leading-only walk had an empty from-scope
+ // and wrongly flagged `ol` as an unknown column (a red "ERR" overlay
+ // on an otherwise-valid query). The full-input lookahead must
+ // recover the scope (ADR-0032 §10.6) so `ol` is not flagged.
+ use crate::dsl::types::Type;
+ let mut s = SchemaCache::default();
+ s.tables.push("OrderLines".into());
+ s.columns.push("count".into());
+ s.table_columns
+ .insert("OrderLines".into(), vec![TableColumn::new("count", Type::Int)]);
+ let input = "select sum(ol.count) from OrderLines ol";
+ let cursor = input.find("ol.count").unwrap() + 2; // right after `ol`
+ assert!(
+ invalid_ident_at_cursor_in_mode(input, cursor, &s, Mode::Advanced).is_none(),
+ "a table alias used before its FROM clause must not be flagged as a bad column",
+ );
+ }
+
+ #[test]
+ fn invalid_ident_fires_for_unknown_generator_after_as() {
+ // ADR-0048 D9: an unknown name at the `set
as ` slot is
+ // flagged `[ERR]` while typing.
+ let cache = two_table_schema();
+ let input = "seed a set name as bogus";
+ let inv = invalid_ident_at_cursor(input, input.len(), &cache)
+ .expect("unknown generator must flag");
+ assert_eq!(inv.found, "bogus");
+ assert_eq!(inv.source, IdentSource::Generators);
+ }
+
+ #[test]
+ fn invalid_ident_fires_for_unknown_column_in_seed_set_and_column_fill() {
+ // ADR-0048: an unknown column at the `set
` slot and the
+ // `
.
` column-fill slot is flagged like any other
+ // column slot (both are `IdentSource::Columns`).
+ let cache = two_table_schema(); // table `a`; columns id, name
+ let set_in = invalid_ident_at_cursor("seed a set xyz", 14, &cache)
+ .expect("unknown column in `set` must flag");
+ assert_eq!(set_in.found, "xyz");
+ assert_eq!(set_in.source, IdentSource::Columns);
+
+ let fill = invalid_ident_at_cursor("seed a.xyz", 10, &cache)
+ .expect("unknown column in column-fill must flag");
+ assert_eq!(fill.source, IdentSource::Columns);
+ }
+
+ #[test]
+ fn invalid_ident_does_not_fire_for_generator_prefix() {
+ // A prefix of a known generator is an in-progress name, not a typo.
+ let cache = two_table_schema();
+ assert!(
+ invalid_ident_at_cursor("seed a set name as ema", 22, &cache).is_none(),
+ "`ema` prefixes `email` — must not flag",
+ );
+ assert!(
+ invalid_ident_at_cursor("seed a set name as email", 24, &cache).is_none(),
+ "`email` is a known generator — must not flag",
+ );
+ }
+
fn two_table_schema() -> SchemaCache {
use crate::dsl::types::Type;
let mut s = SchemaCache::default();
diff --git a/src/db.rs b/src/db.rs
index 562b8d8..19e4d07 100644
--- a/src/db.rs
+++ b/src/db.rs
@@ -33,7 +33,8 @@ use tracing::{debug, info, warn};
use crate::dsl::action::ReferentialAction;
use crate::dsl::command::{
ChangeColumnMode, Command, CompareOp, Constraint, ConstraintKind, Expr, IndexSelector,
- Operand, Predicate, RelationshipSelector, RowFilter, SqlForeignKey,
+ Operand, Predicate, RelationshipSelector, RowFilter, SeedOverride, SeedOverrideKind,
+ SqlForeignKey,
};
use crate::dsl::ColumnSpec;
use crate::dsl::shortid;
@@ -287,6 +288,23 @@ pub struct InsertResult {
pub data: DataResult,
}
+/// Outcome of a successful `seed` (ADR-0048).
+///
+/// `produced` is below `requested` when the unique-value space ran out
+/// (D14 cap). `data` is a **capped preview** of the seeded rows (D18,
+/// not the whole batch). `advisory_columns` names columns that were
+/// filled with generic text but look like fixed value sets — enum-ish
+/// names or un-derivable CHECKs (D12/D13) — so the render can nudge the
+/// user toward choosing those values deliberately.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct SeedResult {
+ pub table: String,
+ pub requested: u64,
+ pub produced: u64,
+ pub data: DataResult,
+ pub advisory_columns: Vec,
+}
+
/// Outcome of a successful `add column …`.
///
/// Carries the post-add structure (used for the auto-show that
@@ -702,6 +720,17 @@ enum Request {
source: Option,
reply: oneshot::Sender>,
},
+ /// Populate a table with generated fake data (ADR-0048). One undo
+ /// snapshot wraps the whole seed via `snapshot_then`.
+ Seed {
+ table: String,
+ target_column: Option,
+ count: Option,
+ overrides: Vec,
+ rng_seed: Option,
+ source: Option,
+ reply: oneshot::Sender>,
+ },
Update {
table: String,
assignments: Vec<(String, Value)>,
@@ -1491,6 +1520,30 @@ impl Database {
recv.await.map_err(|_| DbError::WorkerGone)?
}
+ /// Populate a table with generated fake data (ADR-0048, SD1/SD2).
+ pub async fn seed(
+ &self,
+ table: String,
+ target_column: Option,
+ count: Option,
+ overrides: Vec,
+ rng_seed: Option,
+ source: Option,
+ ) -> Result {
+ let (reply, recv) = oneshot::channel();
+ self.send(Request::Seed {
+ table,
+ target_column,
+ count,
+ overrides,
+ rng_seed,
+ source,
+ reply,
+ })
+ .await?;
+ recv.await.map_err(|_| DbError::WorkerGone)?
+ }
+
pub async fn update(
&self,
table: String,
@@ -2646,6 +2699,28 @@ fn handle_request(
&values,
));
}
+ Request::Seed {
+ table,
+ target_column,
+ count,
+ overrides,
+ rng_seed,
+ source,
+ reply,
+ } => {
+ // One snapshot wraps the whole seed (ADR-0048 D15 — one undo
+ // step), exactly like a single insert.
+ snapshot_then(snap, batch, conn, source.as_deref(), reply, || do_seed(
+ conn,
+ persistence,
+ source.as_deref(),
+ &table,
+ target_column.as_deref(),
+ count,
+ &overrides,
+ rng_seed,
+ ));
+ }
Request::Update {
table,
assignments,
@@ -2874,7 +2949,10 @@ fn do_list_names_for(
}
Ok(out)
}
- IdentSource::NewName | IdentSource::Types | IdentSource::Free => Ok(Vec::new()),
+ IdentSource::NewName
+ | IdentSource::Types
+ | IdentSource::Generators
+ | IdentSource::Free => Ok(Vec::new()),
}
}
@@ -8636,34 +8714,918 @@ fn count_rows(conn: &Connection, table: &str) -> Result {
.map_err(DbError::from_rusqlite)
}
-fn do_insert(
+/// Default row count when `seed ` omits the count (ADR-0048 D6).
+const DEFAULT_SEED_COUNT: u64 = 20;
+
+/// Upper bound on a single `seed` (ADR-0048 D6) — a typo like
+/// `seed t 1000000` is refused rather than left to hang the app.
+const MAX_SEED_COUNT: u64 = 10_000;
+
+/// Cap on rows shown in the post-seed auto-show preview (ADR-0048 D18).
+/// The full count is always reported; only the rendered table is capped.
+const SEED_PREVIEW_CAP: usize = 20;
+
+/// How a single column's value is produced for each seeded row.
+enum SeedColPlan {
+ /// Generated from the seed library (the generator is chosen once;
+ /// `generate_value` runs per row).
+ Generated {
+ generator: crate::seed::Generator,
+ ty: Type,
+ },
+ /// A foreign-key child column: sampled from an existing parent row
+ /// (ADR-0048 D14). `fk_idx` selects the FK; `pos` selects this
+ /// column's slot within the parent key tuple (so a compound FK's
+ /// child columns all read from the *same* sampled parent row).
+ ForeignKey { fk_idx: usize, pos: usize },
+ /// A `shortid` column: a base58 id from seed's *seeded* RNG so it
+ /// reproduces under `--seed` (ADR-0048 D4). Always forced — a
+ /// `shortid` column needs an id, never a name-heuristic value.
+ ShortId,
+}
+
+/// Collision key for a positional list of seeded values, used to keep
+/// uniqueness groups (PK tuple, UNIQUE columns) distinct (ADR-0048 D10).
+/// `\u{1}` separates fields; `\u{0}` marks NULL.
+fn seed_value_list_key(values: &[Value]) -> String {
+ let mut key = String::new();
+ for v in values {
+ match v {
+ Value::Number(s) | Value::Text(s) => key.push_str(s),
+ Value::Bool(b) => key.push(if *b { 'T' } else { 'F' }),
+ Value::Null => key.push('\u{0}'),
+ }
+ key.push('\u{1}');
+ }
+ key
+}
+
+/// `COALESCE(MAX(col), 0)` — the base for sequencing identifier-int
+/// columns (ADR-0048 D10) so generated ids continue past existing rows.
+fn seed_max_int(conn: &Connection, table: &str, column: &str) -> Result {
+ let sql = format!(
+ "SELECT COALESCE(MAX(\"{}\"), 0) FROM \"{}\"",
+ column.replace('"', "\"\""),
+ table.replace('"', "\"\"")
+ );
+ conn.query_row(&sql, [], |r| r.get::<_, i64>(0))
+ .map_err(DbError::from_rusqlite)
+}
+
+/// Sample existing parent-key tuples for FK generation (ADR-0048 D14).
+///
+/// Returns one `Value` tuple per distinct parent row in
+/// `parent_columns` order, so a compound FK's children can be filled
+/// from one consistent parent row. Empty when the parent has no rows
+/// (the caller turns that into the friendly "seed the parent first"
+/// error).
+fn sample_parent_key_tuples(
+ conn: &Connection,
+ parent_table: &str,
+ parent_columns: &[String],
+) -> Result>, DbError> {
+ let cols = parent_columns
+ .iter()
+ .map(|c| format!("\"{}\"", c.replace('"', "\"\"")))
+ .collect::>()
+ .join(", ");
+ // `ORDER BY` the key columns so the sampled order is deterministic
+ // (ADR-0048 D4): `--seed` reproducibility must not depend on
+ // SQLite's unspecified `DISTINCT` row order.
+ let sql = format!(
+ "SELECT DISTINCT {cols} FROM \"{}\" ORDER BY {cols}",
+ parent_table.replace('"', "\"\"")
+ );
+ let n = parent_columns.len();
+ let mut stmt = conn.prepare(&sql).map_err(DbError::from_rusqlite)?;
+ let tuples = stmt
+ .query_map([], |row| {
+ let mut tuple = Vec::with_capacity(n);
+ for i in 0..n {
+ let v = match row.get_ref(i)? {
+ rusqlite::types::ValueRef::Null => Value::Null,
+ rusqlite::types::ValueRef::Integer(x) => Value::Number(x.to_string()),
+ rusqlite::types::ValueRef::Real(x) => Value::Number(x.to_string()),
+ rusqlite::types::ValueRef::Text(t) => {
+ Value::Text(String::from_utf8_lossy(t).into_owned())
+ }
+ // FK keys are never blobs in this app; treat as NULL.
+ rusqlite::types::ValueRef::Blob(_) => Value::Null,
+ };
+ tuple.push(v);
+ }
+ Ok(tuple)
+ })
+ .map_err(DbError::from_rusqlite)?
+ .collect::, _>>()
+ .map_err(DbError::from_rusqlite)?;
+ Ok(tuples)
+}
+
+/// Populate a table with generated fake data (ADR-0048, SD1/SD2).
+///
+/// Generates whole rows and inserts them in one transaction, reusing the
+/// per-value validation, autogen autofill, FK-error enrichment and
+/// persistence machinery via [`insert_one_row`]. The whole seed is a
+/// single undo step (the worker wraps the call in one `snapshot_then`)
+/// and writes exactly one `history.log` line.
+///
+/// Foreign-key columns are filled by sampling existing parent rows
+/// (D14); a compound FK reads all its child columns from one sampled
+/// parent row. An empty parent is refused with a friendly error. A
+/// `NOT NULL blob` column (which seed cannot generate) is refused by
+/// the block guard (D1); a nullable blob is omitted (→ NULL).
+///
+/// **Phase 2 (SD2):** when `target_column` is `Some`, this delegates to
+/// [`do_seed_column_fill`] (fill one column across existing rows, D1
+/// form 2). `overrides` carries the `set
…` clause (D2): per-column
+/// pins that replace the heuristic generator and drop the column from the
+/// generic-fill advisory (D13).
+#[allow(clippy::too_many_arguments)]
+fn do_seed(
conn: &Connection,
persistence: Option<&Persistence>,
source: Option<&str>,
table: &str,
- user_columns: Option<&[String]>,
- user_values: &[Value],
-) -> Result {
- debug!(table = %table, "insert");
+ target_column: Option<&str>,
+ count: Option,
+ overrides: &[SeedOverride],
+ rng_seed: Option,
+) -> Result {
+ use crate::seed;
+ use rand::RngExt;
+
let canonical_table = require_canonical_table(conn, table)?;
let table = canonical_table.as_str();
+
+ // Column-fill (D1 form 2) is a distinct UPDATE path.
+ if let Some(col) = target_column {
+ return do_seed_column_fill(
+ conn, persistence, source, table, col, count, overrides, rng_seed,
+ );
+ }
+
+ let n = count.unwrap_or(DEFAULT_SEED_COUNT);
+ debug!(table = %table, count = n, "seed");
+ if n > MAX_SEED_COUNT {
+ return Err(DbError::Unsupported(format!(
+ "cannot seed {n} rows at once: the maximum is {MAX_SEED_COUNT}. \
+ Seed in smaller batches."
+ )));
+ }
+
let schema = read_schema(conn, table)?;
- // Resolve which columns the user is providing values for.
- let user_cols: Vec = match user_columns {
- Some(cols) => cols.to_vec(),
- None => {
- // Short form: every non-auto-generated column in
- // schema declaration order. Serial and shortid both
- // get auto-filled below.
+ // Pre-sample each FK's parent key tuples (D14); refuse if a parent
+ // is empty (no valid reference can be fabricated).
+ let mut fk_samples: Vec>> = Vec::with_capacity(schema.foreign_keys.len());
+ for fk in &schema.foreign_keys {
+ let tuples = sample_parent_key_tuples(conn, &fk.parent_table, &fk.parent_columns)?;
+ if tuples.is_empty() {
+ return Err(DbError::Unsupported(format!(
+ "cannot seed `{table}`: parent table `{}` (referenced by `{}`) has no rows. \
+ Seed or insert into `{}` first.",
+ fk.parent_table,
+ fk.child_columns.join(", "),
+ fk.parent_table,
+ )));
+ }
+ fk_samples.push(tuples);
+ }
+ // child column → (fk index, position within the FK's column list).
+ let mut fk_child_pos: std::collections::HashMap<&str, (usize, usize)> =
+ std::collections::HashMap::new();
+ for (fk_idx, fk) in schema.foreign_keys.iter().enumerate() {
+ for (pos, child) in fk.child_columns.iter().enumerate() {
+ fk_child_pos.insert(child.as_str(), (fk_idx, pos));
+ }
+ }
+
+ // Build the per-column generation plan, skipping autogen and
+ // un-generatable columns. `advisory_columns` collects columns
+ // filled with generic text that look like fixed value sets (D12/D13).
+ let mut col_names: Vec = Vec::new();
+ let mut plans: Vec = Vec::new();
+ let mut advisory_columns: Vec = Vec::new();
+ for c in &schema.columns {
+ let ty = c.user_type.unwrap_or(Type::Text);
+ // serial auto-fills deterministically in `do_insert` (rowid /
+ // MAX+1) — omit it. shortid is handled below from the seeded RNG.
+ if matches!(ty, Type::Serial) {
+ continue;
+ }
+ // blob has no DSL value path: refuse if required (D1), else omit.
+ if matches!(ty, Type::Blob) {
+ if c.notnull {
+ return Err(DbError::Unsupported(format!(
+ "cannot seed `{table}`: column `{}` is `NOT NULL` but has type `blob`, \
+ which seed cannot generate. Add the rows another way or make it nullable.",
+ c.name,
+ )));
+ }
+ continue;
+ }
+ col_names.push(c.name.clone());
+ if let Some(&(fk_idx, pos)) = fk_child_pos.get(c.name.as_str()) {
+ plans.push(SeedColPlan::ForeignKey { fk_idx, pos });
+ } else if matches!(ty, Type::ShortId) {
+ // Always the shortid generator (never a name heuristic — a
+ // shortid column needs a base58 id, not e.g. an email).
+ plans.push(SeedColPlan::ShortId);
+ } else {
+ // A simple `col IN ('a','b')` CHECK becomes the value source
+ // (D17) so the enum-as-CHECK pattern just works.
+ let check_in_values = c
+ .check
+ .as_deref()
+ .and_then(|chk| seed::parse_in_check_values(chk, &c.name));
+ let spec = seed::ColumnSpec {
+ name: c.name.clone(),
+ ty,
+ not_null: c.notnull,
+ primary_key: c.primary_key,
+ unique: c.unique,
+ is_foreign_key: false,
+ check_in_values,
+ };
+ let generator = seed::choose_generator(table, &spec);
+ // Flag columns that fell through to generic text but look
+ // like a fixed value set (enum-ish name, or a CHECK we
+ // could not derive values from) — D12/D13.
+ if matches!(generator, crate::seed::Generator::Generic)
+ && (seed::is_enum_ish(&c.name)
+ || (c.check.is_some() && spec.check_in_values.is_none()))
+ {
+ advisory_columns.push(c.name.clone());
+ }
+ plans.push(SeedColPlan::Generated { generator, ty });
+ }
+ }
+
+ // Apply the `set
…` overrides (D2): each replaces the named
+ // column's plan with the pinned generator and removes it from the
+ // generic-fill advisory (the user chose its values deliberately,
+ // D13). An override that names a non-fillable column is a friendly
+ // error; a bounded value source (fixed / pick-list) that can't supply
+ // enough distinct values for a single-column-UNIQUE target is refused
+ // up front rather than silently capped (DA finding). FK / type binding
+ // still apply — a value that violates a constraint surfaces through the
+ // existing FK-error guard.
+ apply_seed_overrides(&schema, overrides, n, &col_names, &mut plans, &mut advisory_columns)?;
+
+ // Uniqueness groups (ADR-0048 D10): value tuples that must stay
+ // distinct across the batch and against existing rows — the
+ // user-fillable PK (so junction distinct-combos fall out of this),
+ // each compound UNIQUE constraint, and each single-column UNIQUE or
+ // identifier-named column. Each group is a list of indices into
+ // `col_names` / `plans`.
+ let col_index: std::collections::HashMap<&str, usize> = col_names
+ .iter()
+ .enumerate()
+ .map(|(i, name)| (name.as_str(), i))
+ .collect();
+ let project_group = |cols: &[String]| -> Vec {
+ cols.iter()
+ .filter_map(|c| col_index.get(c.as_str()).copied())
+ .collect()
+ };
+ let mut unique_groups: Vec> = Vec::new();
+ let pk_group = project_group(&schema.primary_key);
+ if !pk_group.is_empty() {
+ unique_groups.push(pk_group);
+ }
+ for uc in &schema.unique_constraints {
+ let g = project_group(uc);
+ if !g.is_empty() {
+ unique_groups.push(g);
+ }
+ }
+ for (i, name) in col_names.iter().enumerate() {
+ let unique_col = schema
+ .columns
+ .iter()
+ .find(|c| &c.name == name)
+ .is_some_and(|c| c.unique);
+ let is_identifier = matches!(
+ &plans[i],
+ SeedColPlan::Generated {
+ generator: crate::seed::Generator::IdentitySequential,
+ ..
+ }
+ );
+ if unique_col || is_identifier {
+ unique_groups.push(vec![i]);
+ }
+ }
+
+ // Sequence base for identifier-int columns (D10): start past the
+ // current MAX so generated ids continue cleanly.
+ let mut seq_base: std::collections::HashMap = std::collections::HashMap::new();
+ for (i, plan) in plans.iter().enumerate() {
+ if let SeedColPlan::Generated { generator, ty } = plan
+ && matches!(generator, crate::seed::Generator::IdentitySequential)
+ && matches!(ty, Type::Int)
+ {
+ seq_base.insert(i, seed_max_int(conn, table, &col_names[i])?);
+ }
+ }
+
+ // Pre-load each group's existing tuples so generation never
+ // collides with rows already present.
+ let mut used: Vec> =
+ vec![std::collections::HashSet::new(); unique_groups.len()];
+ for (gi, group) in unique_groups.iter().enumerate() {
+ let cols: Vec = group.iter().map(|&i| col_names[i].clone()).collect();
+ for tuple in sample_parent_key_tuples(conn, table, &cols)? {
+ used[gi].insert(seed_value_list_key(&tuple));
+ }
+ }
+
+ // Retry cap per row: when the unique space is exhausted (e.g. a
+ // junction requested more rows than there are parent combinations),
+ // stop and cap rather than spin (D14).
+ const MAX_ATTEMPTS: u32 = 200;
+
+ let mut rng = seed::make_rng(rng_seed);
+ let mut preview_rowids: Vec = Vec::new();
+ let mut accepted: u64 = 0;
+ let mut capped = false;
+
+ // All rows insert in a single transaction; persistence (the CSV and
+ // the one history line) is written once, before the single commit —
+ // preserving ADR-0015 §6 commit-db-last while staying O(N) instead
+ // of the O(N^2) of per-row CSV rewrites. A mid-batch failure rolls
+ // the whole seed back (atomic).
+ let tx = conn
+ .unchecked_transaction()
+ .map_err(DbError::from_rusqlite)?;
+
+ while accepted < n {
+ let mut attempt = 0u32;
+ let rowid = loop {
+ // One sampled parent row per FK for this attempt, so a
+ // compound FK's children stay consistent.
+ let fk_choice: Vec = fk_samples
+ .iter()
+ .map(|tuples| rng.random_range(0..tuples.len()))
+ .collect();
+ let values: Vec = plans
+ .iter()
+ .enumerate()
+ .map(|(i, plan)| match plan {
+ SeedColPlan::ForeignKey { fk_idx, pos } => {
+ fk_samples[*fk_idx][fk_choice[*fk_idx]][*pos].clone()
+ }
+ // Seeded base58 id → reproducible under `--seed` (D4).
+ SeedColPlan::ShortId => {
+ Value::Text(crate::dsl::shortid::generate_with_rng(&mut rng))
+ }
+ SeedColPlan::Generated { generator, ty }
+ if matches!(generator, crate::seed::Generator::IdentitySequential)
+ && matches!(ty, Type::Int) =>
+ {
+ // Monotonic past existing rows → inherently unique.
+ Value::Number((seq_base[&i] + accepted as i64 + 1).to_string())
+ }
+ SeedColPlan::Generated { generator, ty } => {
+ seed::generate_value(generator, *ty, &mut rng)
+ }
+ })
+ .collect();
+
+ let keys: Vec = unique_groups
+ .iter()
+ .map(|group| {
+ let projected: Vec =
+ group.iter().map(|&i| values[i].clone()).collect();
+ seed_value_list_key(&projected)
+ })
+ .collect();
+ if keys.iter().enumerate().any(|(gi, k)| used[gi].contains(k)) {
+ attempt += 1;
+ if attempt >= MAX_ATTEMPTS {
+ capped = true;
+ break None;
+ }
+ continue;
+ }
+ for (gi, k) in keys.into_iter().enumerate() {
+ used[gi].insert(k);
+ }
+ let (_rows, rowid) =
+ insert_one_row(conn, table, &schema, Some(&col_names), &values)?;
+ break Some(rowid);
+ };
+ match rowid {
+ Some(rowid) => {
+ // Keep the first `SEED_PREVIEW_CAP` rowids for the
+ // capped auto-show (D18).
+ if preview_rowids.len() < SEED_PREVIEW_CAP {
+ preview_rowids.push(rowid);
+ }
+ accepted += 1;
+ }
+ None => break,
+ }
+ }
+
+ if capped {
+ warn!(
+ table = %table,
+ requested = n,
+ produced = accepted,
+ "seed capped: ran out of distinct unique-value combinations before the requested count"
+ );
+ }
+
+ // Persist once (CSV + the single history line), then commit (db-last).
+ let changes = Changes {
+ schema_dirty: false,
+ rewritten_tables: vec![table.to_string()],
+ ..Changes::default()
+ };
+ finalize_persistence(conn, persistence, source, &changes)?;
+ tx.commit().map_err(DbError::from_rusqlite)?;
+
+ let data = if preview_rowids.is_empty() {
+ DataResult {
+ table_name: table.to_string(),
+ columns: Vec::new(),
+ column_types: Vec::new(),
+ rows: Vec::new(),
+ }
+ } else {
+ query_rows_by_rowid(conn, table, &preview_rowids)?
+ };
+
+ Ok(SeedResult {
+ table: table.to_string(),
+ requested: n,
+ produced: accepted,
+ data,
+ advisory_columns,
+ })
+}
+
+/// Apply the `set
…` overrides (ADR-0048 D2) to the per-column
+/// generation plan. Each override replaces the named column's plan and
+/// drops it from the generic-fill advisory (D13 — the user chose those
+/// values). An override naming a column that is not in the fillable set
+/// (unknown, or an auto-generated `serial`) is a friendly error.
+fn apply_seed_overrides(
+ schema: &ReadSchema,
+ overrides: &[SeedOverride],
+ row_count: u64,
+ col_names: &[String],
+ plans: &mut [SeedColPlan],
+ advisory_columns: &mut Vec,
+) -> Result<(), DbError> {
+ for ov in overrides {
+ let Some(idx) = col_names
+ .iter()
+ .position(|c| c.eq_ignore_ascii_case(&ov.column))
+ else {
+ return Err(DbError::Unsupported(format!(
+ "cannot apply `set {col} …`: `{col}` is not a fillable column of this \
+ table (it is unknown, or an auto-generated column).",
+ col = ov.column,
+ )));
+ };
+ let ty = schema
+ .columns
+ .iter()
+ .find(|c| c.name.eq_ignore_ascii_case(&ov.column))
+ .and_then(|c| c.user_type)
+ .unwrap_or(Type::Text);
+ seed_override_capacity_guard(schema, &ov.column, &ov.kind, row_count)?;
+ plans[idx] = seed_override_plan(&ov.kind, ty, &ov.column)?;
+ advisory_columns.retain(|c| !c.eq_ignore_ascii_case(&ov.column));
+ }
+ Ok(())
+}
+
+/// Refuse up front when a **bounded** override (a fixed value or a
+/// pick-list) cannot supply enough *distinct* values to fill a
+/// single-column-UNIQUE target across `row_count` rows — otherwise the
+/// uniqueness machinery would silently cap the run to the achievable
+/// count (DA finding; the ADR left this interaction open and the user
+/// chose a friendly error). Generators and ranges are treated as
+/// effectively unbounded sources here; if one does exhaust, the existing
+/// distinct-combination cap (D14) still applies.
+fn seed_override_capacity_guard(
+ schema: &ReadSchema,
+ column: &str,
+ kind: &SeedOverrideKind,
+ row_count: u64,
+) -> Result<(), DbError> {
+ let distinct = match kind {
+ SeedOverrideKind::Fixed(_) => 1,
+ SeedOverrideKind::PickList(values) => {
+ let mut set = std::collections::HashSet::new();
+ for v in values {
+ set.insert(seed_override_literal(v, column)?);
+ }
+ set.len()
+ }
+ // Unbounded-enough sources — leave to the cap if they exhaust.
+ SeedOverrideKind::Generator(_) | SeedOverrideKind::Range { .. } => return Ok(()),
+ };
+ if distinct as u64 >= row_count.max(1) {
+ return Ok(());
+ }
+ // Single-column uniqueness only: a compound UNIQUE / compound PK can
+ // still be satisfied by varying the *other* columns, so a pinned
+ // value there does not force a cap.
+ let single_unique = schema
+ .columns
+ .iter()
+ .find(|c| c.name.eq_ignore_ascii_case(column))
+ .is_some_and(|c| c.unique)
+ || (schema.primary_key.len() == 1
+ && schema.primary_key[0].eq_ignore_ascii_case(column));
+ if single_unique {
+ return Err(DbError::Unsupported(format!(
+ "cannot fill {row_count} rows: `set {column} …` offers only {distinct} distinct \
+ value(s), but `{column}` is UNIQUE. Use a generator (e.g. `as email`) or a list \
+ of at least {row_count} values."
+ )));
+ }
+ Ok(())
+}
+
+/// Turn one `set` override into the `SeedColPlan` that produces its
+/// values (ADR-0048 D2). `Fixed`/`PickList` become a `PickFrom` over the
+/// literal(s); `Generator` resolves the curated name (unknown → friendly
+/// error); `Range` validates its bounds against the column type *before*
+/// generation (an incompatible bound → friendly error).
+fn seed_override_plan(
+ kind: &SeedOverrideKind,
+ ty: Type,
+ column: &str,
+) -> Result {
+ use crate::seed::Generator;
+ let generator = match kind {
+ SeedOverrideKind::Fixed(v) => Generator::PickFrom(vec![seed_override_literal(v, column)?]),
+ SeedOverrideKind::PickList(vs) => {
+ let lits = vs
+ .iter()
+ .map(|v| seed_override_literal(v, column))
+ .collect::, _>>()?;
+ Generator::PickFrom(lits)
+ }
+ SeedOverrideKind::Generator(name) => {
+ crate::seed::generator_for_name(name).ok_or_else(|| {
+ DbError::Unsupported(format!(
+ "unknown generator `{name}` in `set {column} as {name}`. \
+ Known generators: {}.",
+ crate::seed::KNOWN_GENERATORS.join(", "),
+ ))
+ })?
+ }
+ SeedOverrideKind::Range { low, high } => {
+ let lo = seed_override_literal(low, column)?;
+ let hi = seed_override_literal(high, column)?;
+ if let Some(reason) = crate::seed::range_bounds_reason(ty, &lo, &hi) {
+ return Err(DbError::Unsupported(format!(
+ "cannot apply `set {column} between …`: {reason}."
+ )));
+ }
+ Generator::Range { low: lo, high: hi }
+ }
+ };
+ Ok(SeedColPlan::Generated { generator, ty })
+}
+
+/// Extract the literal string an override value contributes to a
+/// `PickFrom` / `Range` (re-typed per column by `generate_value`). A
+/// `null` override is refused — seed always fills a value (NULL
+/// injection is out of scope, ADR-0048 Out-of-scope).
+fn seed_override_literal(value: &Value, column: &str) -> Result {
+ match value {
+ Value::Number(s) | Value::Text(s) => Ok(s.clone()),
+ Value::Bool(b) => Ok(if *b { "true" } else { "false" }.to_string()),
+ Value::Null => Err(DbError::Unsupported(format!(
+ "`set {column} = null` is not supported — seed always fills a value."
+ ))),
+ }
+}
+
+/// Column-fill (ADR-0048 D1 form 2): fill one column across the table's
+/// **existing** rows (an UPDATE), the natural follow-up to `add column`.
+///
+/// Refuses PK and auto-generated (`serial`/`shortid`/`blob`) targets;
+/// an empty table is a friendly no-op. The `set` clause may only adjust
+/// the column being filled (the rest of the per-column heuristics do not
+/// apply — there is exactly one column). A UNIQUE / identifier target
+/// gets collision-free values (generated distinct from *every* existing
+/// value in the column, so no row-by-row UPDATE can transiently collide);
+/// an FK target samples an existing parent key (D14). The whole fill is
+/// one transaction → one undo step (D15), persisted once (commit-db-last).
+#[allow(clippy::too_many_arguments)]
+fn do_seed_column_fill(
+ conn: &Connection,
+ persistence: Option<&Persistence>,
+ source: Option<&str>,
+ table: &str,
+ column: &str,
+ count: Option,
+ overrides: &[SeedOverride],
+ rng_seed: Option,
+) -> Result {
+ use crate::seed;
+ use rand::RngExt;
+
+ debug!(table = %table, column = %column, "seed column-fill");
+
+ // A row count is meaningless when filling existing rows (D1 form 2).
+ if count.is_some() {
+ return Err(DbError::Unsupported(format!(
+ "`seed {table}.{column}` fills existing rows, so it takes no row count \
+ (drop the number)."
+ )));
+ }
+
+ let schema = read_schema(conn, table)?;
+ let col = schema
+ .columns
+ .iter()
+ .find(|c| c.name.eq_ignore_ascii_case(column))
+ .ok_or_else(|| {
+ DbError::Unsupported(format!("cannot fill `{table}.{column}`: no such column."))
+ })?;
+ let canonical_col = col.name.clone();
+ let ty = col.user_type.unwrap_or(Type::Text);
+
+ // Refuse identity / auto-generated / un-generatable targets (D1).
+ if col.primary_key {
+ return Err(DbError::Unsupported(format!(
+ "cannot fill `{table}.{canonical_col}`: it is part of the primary key — \
+ you don't fill an identity column."
+ )));
+ }
+ if matches!(ty, Type::Serial | Type::ShortId) {
+ return Err(DbError::Unsupported(format!(
+ "cannot fill `{table}.{canonical_col}`: `{}` columns generate their own \
+ values automatically.",
+ ty.keyword(),
+ )));
+ }
+ if matches!(ty, Type::Blob) {
+ return Err(DbError::Unsupported(format!(
+ "cannot fill `{table}.{canonical_col}`: seed cannot generate `blob` values."
+ )));
+ }
+
+ // The `set` clause may only adjust the filled column (user decision).
+ for ov in overrides {
+ if !ov.column.eq_ignore_ascii_case(&canonical_col) {
+ return Err(DbError::Unsupported(format!(
+ "in `seed {table}.{canonical_col}`, `set` can only adjust \
+ `{canonical_col}` (the column being filled), not `{}`.",
+ ov.column,
+ )));
+ }
+ }
+
+ // Existing rowids in a deterministic order (D4 reproducibility).
+ let rowids: Vec = {
+ let sql = format!(
+ "SELECT rowid FROM \"{}\" ORDER BY rowid",
+ table.replace('"', "\"\"")
+ );
+ let mut stmt = conn.prepare(&sql).map_err(DbError::from_rusqlite)?;
+ stmt.query_map([], |r| r.get::<_, i64>(0))
+ .map_err(DbError::from_rusqlite)?
+ .collect::, _>>()
+ .map_err(DbError::from_rusqlite)?
+ };
+
+ // Empty table → friendly no-op (D1).
+ if rowids.is_empty() {
+ return Ok(SeedResult {
+ table: table.to_string(),
+ requested: 0,
+ produced: 0,
+ data: DataResult {
+ table_name: table.to_string(),
+ columns: Vec::new(),
+ column_types: Vec::new(),
+ rows: Vec::new(),
+ },
+ advisory_columns: Vec::new(),
+ });
+ }
+
+ // FK target → sample an existing parent key column (D14).
+ let fk_sample: Option> = {
+ let fk = schema.foreign_keys.iter().find(|fk| {
+ fk.child_columns
+ .iter()
+ .any(|c| c.eq_ignore_ascii_case(&canonical_col))
+ });
+ match fk {
+ Some(fk) => {
+ // Single-column position within the FK (column-fill targets
+ // one column; a compound FK filled one column at a time is
+ // unusual but we sample that column's parent values).
+ let pos = fk
+ .child_columns
+ .iter()
+ .position(|c| c.eq_ignore_ascii_case(&canonical_col))
+ .unwrap_or(0);
+ let parent_col = fk.parent_columns.get(pos).cloned().unwrap_or_default();
+ let tuples = sample_parent_key_tuples(conn, &fk.parent_table, &[parent_col])?;
+ if tuples.is_empty() {
+ return Err(DbError::Unsupported(format!(
+ "cannot fill `{table}.{canonical_col}`: parent table `{}` has no \
+ rows to reference. Seed or insert into `{}` first.",
+ fk.parent_table, fk.parent_table,
+ )));
+ }
+ Some(tuples.into_iter().map(|mut t| t.remove(0)).collect())
+ }
+ None => None,
+ }
+ };
+
+ // The value source: an override (if present) else the heuristic.
+ let mut advisory_columns: Vec = Vec::new();
+ let plan: SeedColPlan = if let Some(ov) = overrides
+ .iter()
+ .find(|o| o.column.eq_ignore_ascii_case(&canonical_col))
+ {
+ // Same capacity guard as whole-row: a bounded override that can't
+ // give enough distinct values for a UNIQUE column across the
+ // existing rows is refused up front, not silently capped.
+ seed_override_capacity_guard(&schema, &canonical_col, &ov.kind, rowids.len() as u64)?;
+ seed_override_plan(&ov.kind, ty, &canonical_col)?
+ } else if fk_sample.is_some() {
+ SeedColPlan::ForeignKey { fk_idx: 0, pos: 0 }
+ } else if matches!(ty, Type::ShortId) {
+ SeedColPlan::ShortId // unreachable (refused above), kept for totality
+ } else {
+ let check_in_values = col
+ .check
+ .as_deref()
+ .and_then(|chk| seed::parse_in_check_values(chk, &canonical_col));
+ let spec = seed::ColumnSpec {
+ name: canonical_col.clone(),
+ ty,
+ not_null: col.notnull,
+ primary_key: col.primary_key,
+ unique: col.unique,
+ is_foreign_key: false,
+ check_in_values,
+ };
+ let generator = seed::choose_generator(table, &spec);
+ if matches!(generator, crate::seed::Generator::Generic)
+ && (seed::is_enum_ish(&canonical_col)
+ || (col.check.is_some() && spec.check_in_values.is_none()))
+ {
+ advisory_columns.push(canonical_col.clone());
+ }
+ SeedColPlan::Generated { generator, ty }
+ };
+
+ // Collision-free generation for UNIQUE / identifier targets: seed the
+ // used-set with EVERY existing value of the column so a generated
+ // value never matches a not-yet-updated row (no transient UNIQUE
+ // violation) nor a value already assigned this batch (ADR-0048 D10).
+ let enforce_unique = col.unique
+ || matches!(
+ &plan,
+ SeedColPlan::Generated {
+ generator: crate::seed::Generator::IdentitySequential,
+ ..
+ }
+ );
+ let mut used: std::collections::HashSet = std::collections::HashSet::new();
+ if enforce_unique {
+ for tuple in
+ sample_parent_key_tuples(conn, table, std::slice::from_ref(&canonical_col))?
+ {
+ used.insert(seed_value_list_key(&tuple));
+ }
+ }
+ let seq_base = if matches!(
+ &plan,
+ SeedColPlan::Generated {
+ generator: crate::seed::Generator::IdentitySequential,
+ ..
+ }
+ ) && matches!(ty, Type::Int)
+ {
+ Some(seed_max_int(conn, table, &canonical_col)?)
+ } else {
+ None
+ };
+
+ const MAX_ATTEMPTS: u32 = 200;
+ let mut rng = seed::make_rng(rng_seed);
+ let tx = conn
+ .unchecked_transaction()
+ .map_err(DbError::from_rusqlite)?;
+
+ let update_sql = format!(
+ "UPDATE \"{}\" SET \"{}\" = ?1 WHERE rowid = ?2",
+ table.replace('"', "\"\""),
+ canonical_col.replace('"', "\"\""),
+ );
+ let mut produced: u64 = 0;
+ for (offset, rowid) in rowids.iter().enumerate() {
+ let mut attempt = 0u32;
+ let value = loop {
+ let v = match &plan {
+ SeedColPlan::ForeignKey { .. } => {
+ let samples = fk_sample.as_ref().expect("fk plan implies samples");
+ samples[rng.random_range(0..samples.len())].clone()
+ }
+ SeedColPlan::ShortId => {
+ Value::Text(crate::dsl::shortid::generate_with_rng(&mut rng))
+ }
+ SeedColPlan::Generated { generator, ty }
+ if matches!(generator, crate::seed::Generator::IdentitySequential)
+ && matches!(ty, Type::Int) =>
+ {
+ Value::Number((seq_base.unwrap_or(0) + produced as i64 + 1).to_string())
+ }
+ SeedColPlan::Generated { generator, ty } => {
+ seed::generate_value(generator, *ty, &mut rng)
+ }
+ };
+ if enforce_unique {
+ let key = seed_value_list_key(std::slice::from_ref(&v));
+ if used.contains(&key) {
+ attempt += 1;
+ if attempt >= MAX_ATTEMPTS {
+ break v; // give up on distinctness; DB may reject
+ }
+ continue;
+ }
+ used.insert(key);
+ }
+ break v;
+ };
+ let bound = impl_value_for(&schema, &canonical_col, &value)?;
+ let params: Vec =
+ vec![bound_to_sqlite_value(&bound), rusqlite::types::Value::Integer(*rowid)];
+ execute_with_fk_enrichment(conn, table, &update_sql, ¶ms)?;
+ produced += 1;
+ let _ = offset;
+ }
+
+ let changes = Changes {
+ schema_dirty: false,
+ rewritten_tables: vec![table.to_string()],
+ ..Changes::default()
+ };
+ finalize_persistence(conn, persistence, source, &changes)?;
+ tx.commit().map_err(DbError::from_rusqlite)?;
+
+ // Preview the first capped rows (D18).
+ let preview: Vec = rowids.iter().take(SEED_PREVIEW_CAP).copied().collect();
+ let data = query_rows_by_rowid(conn, table, &preview)?;
+
+ Ok(SeedResult {
+ table: table.to_string(),
+ requested: produced,
+ produced,
+ data,
+ advisory_columns,
+ })
+}
+
+/// Build and execute a single-row `INSERT` — column resolution, value
+/// binding, `serial`/`shortid` autofill, and the FK-enriched execute —
+/// returning `(rows_affected, new rowid)`.
+///
+/// It does **not** manage the transaction or persistence: the caller
+/// owns those. This lets `do_insert` run one row in its own
+/// transaction while `do_seed` runs N rows in a single transaction and
+/// persists once (preserving ADR-0015 §6 commit-db-last while staying
+/// O(N)). **The caller must hold an open transaction.** `table` must
+/// already be canonical and `schema` already read.
+fn insert_one_row(
+ conn: &Connection,
+ table: &str,
+ schema: &ReadSchema,
+ user_columns: Option<&[String]>,
+ user_values: &[Value],
+) -> Result<(usize, i64), DbError> {
+ // Resolve which columns the user is providing values for. The short
+ // form (None) is every non-auto-generated column in schema
+ // declaration order; serial and shortid get auto-filled below.
+ let user_cols: Vec = user_columns.map_or_else(
+ || {
schema
.columns
.iter()
.filter(|c| !matches!(c.user_type, Some(Type::Serial) | Some(Type::ShortId)))
.map(|c| c.name.clone())
.collect()
- }
- };
+ },
+ <[String]>::to_vec,
+ );
if user_cols.len() != user_values.len() {
return Err(DbError::InvalidValue(format!(
@@ -8676,7 +9638,7 @@ fn do_insert(
let mut bindings: Vec<(String, Bound)> = Vec::with_capacity(user_cols.len());
for (col_name, value) in user_cols.iter().zip(user_values.iter()) {
- let bound = impl_value_for(&schema, col_name, value)?;
+ let bound = impl_value_for(schema, col_name, value)?;
bindings.push((col_name.clone(), bound));
}
@@ -8747,11 +9709,28 @@ fn do_insert(
debug!(sql = %sql, "insert");
let params: Vec =
bindings.iter().map(|(_, b)| bound_to_sqlite_value(b)).collect();
+ let rows_affected = execute_with_fk_enrichment(conn, table, &sql, ¶ms)?;
+ let new_rowid = conn.last_insert_rowid();
+ Ok((rows_affected, new_rowid))
+}
+
+fn do_insert(
+ conn: &Connection,
+ persistence: Option<&Persistence>,
+ source: Option<&str>,
+ table: &str,
+ user_columns: Option<&[String]>,
+ user_values: &[Value],
+) -> Result {
+ debug!(table = %table, "insert");
+ let canonical_table = require_canonical_table(conn, table)?;
+ let table = canonical_table.as_str();
+ let schema = read_schema(conn, table)?;
let tx = conn
.unchecked_transaction()
.map_err(DbError::from_rusqlite)?;
- let rows_affected = execute_with_fk_enrichment(conn, table, &sql, ¶ms)?;
- let new_rowid = conn.last_insert_rowid();
+ let (rows_affected, new_rowid) =
+ insert_one_row(conn, table, &schema, user_columns, user_values)?;
let data = query_rows_by_rowid(conn, table, &[new_rowid])?;
let changes = Changes {
schema_dirty: false,
@@ -8760,10 +9739,7 @@ fn do_insert(
};
finalize_persistence(conn, persistence, source, &changes)?;
tx.commit().map_err(DbError::from_rusqlite)?;
- Ok(InsertResult {
- rows_affected,
- data,
- })
+ Ok(InsertResult { rows_affected, data })
}
/// Build the parameterised `UPDATE … SET … WHERE …` statement.
diff --git a/src/dsl/command.rs b/src/dsl/command.rs
index 68046e4..99304a3 100644
--- a/src/dsl/command.rs
+++ b/src/dsl/command.rs
@@ -402,6 +402,25 @@ pub enum Command {
filter: Option,
limit: Option,
},
+ /// Populate a table with generated fake data (ADR-0048, SD1/SD2).
+ /// `count` defaults to 20 when omitted; `rng_seed` (from the
+ /// `--seed ` flag) makes generation reproducible.
+ ///
+ /// Phase 2 surfaces (ADR-0048 D1/D2):
+ /// - `target_column` is `Some` for the **column-fill** form
+ /// `seed
.` — fill one column across the table's
+ /// *existing* rows (an UPDATE), rather than generating new rows.
+ /// - `overrides` carries the `set
…` clause: per-column pins
+ /// that take precedence over the heuristic generator (D2).
+ Seed {
+ table: String,
+ /// `Some(col)` → column-fill mode (UPDATE existing rows);
+ /// `None` → whole-row generation (INSERT new rows).
+ target_column: Option,
+ count: Option,
+ overrides: Vec,
+ rng_seed: Option,
+ },
/// Replay a sequence of DSL commands from a file. Each line
/// is parsed and dispatched through the same pipeline as
/// interactive input. Blank lines and lines whose first
@@ -637,6 +656,38 @@ impl RowFilter {
}
}
+/// One `set
…` override on a `seed` command (ADR-0048 D2, Phase 2).
+///
+/// The user can pin a column's generated values to a constant, a
+/// pick-list, an explicit named generator, or a range — overriding the
+/// per-column heuristic the executor would otherwise pick. `column` is
+/// the user-typed column name (validated against the table at execution,
+/// like every other column slot).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct SeedOverride {
+ pub column: String,
+ pub kind: SeedOverrideKind,
+}
+
+/// The four `set` override forms (ADR-0048 D2).
+///
+/// Values arrive as the DSL's `Value` (quoted text / unquoted number —
+/// dates are quoted text per the D2 amendment); the `Generator` name is
+/// a raw string validated at execution because `src/dsl` cannot depend
+/// on `src/seed` (the curated vocabulary lives there).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum SeedOverrideKind {
+ /// `set status = 'pending'` — every row gets the constant.
+ Fixed(Value),
+ /// `set role in ('admin', 'editor')` — uniform pick from the list.
+ PickList(Vec),
+ /// `set work_addr as email` — force the named generator (D9).
+ Generator(String),
+ /// `set price between 10 and 100` — uniform in `[low, high]`;
+ /// numeric or (quoted) date bounds per the destination column type.
+ Range { low: Value, high: Value },
+}
+
/// A complex WHERE expression (ADR-0026 §4).
///
/// Built by `grammar::expr::build_expr` from the flat
@@ -949,6 +1000,7 @@ impl Command {
} => "show index",
Self::ShowList { kind, .. } => kind.command_name(),
Self::Insert { .. } => "insert into",
+ Self::Seed { .. } => "seed",
Self::Update { .. } => "update",
Self::Delete { .. } => "delete from",
Self::ShowData { .. } => "show data",
@@ -997,6 +1049,7 @@ impl Command {
| Self::AddConstraint { table, .. }
| Self::DropConstraint { table, .. }
| Self::Insert { table, .. }
+ | Self::Seed { table, .. }
| Self::Update { table, .. }
| Self::Delete { table, .. } => table,
// For relationships we focus on the parent (1-side):
diff --git a/src/dsl/grammar/data.rs b/src/dsl/grammar/data.rs
index b6a8a34..b111075 100644
--- a/src/dsl/grammar/data.rs
+++ b/src/dsl/grammar/data.rs
@@ -24,7 +24,9 @@
//! later swap that capture for the same typed slots used here, adding
//! live hints/highlighting.
-use crate::dsl::command::{Command, Expr, RowFilter, ShowListKind};
+use crate::dsl::command::{
+ Command, Expr, RowFilter, SeedOverride, SeedOverrideKind, ShowListKind,
+};
use crate::dsl::grammar::{
CommandNode, IdentSource, Node, NumberValidator, ValidationError, Word, expr,
shared::{
@@ -425,6 +427,152 @@ const LIMIT_CLAUSE_NODES: &[Node] = &[
];
const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES);
+// =================================================================
+// seed — `seed [.
] [] [set ] [--seed ]`
+// (ADR-0048, SD1 whole-row + SD2 Phase 2 set-clause /
+// column-fill)
+// =================================================================
+
+/// Optional positional row count. Reuses `LIMIT_VALIDATOR` (a
+/// non-negative integer).
+const SEED_COUNT: Node = Node::NumberLit {
+ validator: Some(LIMIT_VALIDATOR),
+};
+/// `--seed ` — a reproducible-generation flag carrying a numeric
+/// seed (ADR-0048 D4). The only flag in the DSL that takes a value;
+/// `build_seed` reads the number immediately after the flag.
+const SEED_FLAG_NODES: &[Node] = &[
+ Node::Flag("seed"),
+ Node::NumberLit {
+ validator: Some(LIMIT_VALIDATOR),
+ },
+];
+const SEED_FLAG: Node = Node::Seq(SEED_FLAG_NODES);
+
+// --- column-fill target: the optional `.` (ADR-0048 D1
+// form 2) ----------------------------------------------------
+//
+// `seed users.email …` fills one column across existing rows. The
+// table ident stops at `.` (idents are alnum/underscore), so an
+// `Optional(Seq['.', column])` after the table cleanly discriminates:
+// when the next token is not `.`, the `Punct('.')` first-child
+// NoMatches and `walk_optional` skips it; once `.` commits, a missing
+// column propagates as the user mid-typing `seed users.` (driver
+// `walk_optional` semantics). The column resolves against
+// `current_table_columns` (populated by `TABLE_NAME_WRITES`).
+const SEED_TARGET_COLUMN: Node = Node::Ident {
+ source: IdentSource::Columns,
+ role: "seed_target_column",
+ validator: None,
+ highlight_override: None,
+ writes_table: false,
+ writes_column: false,
+ writes_user_listed_column: false,
+ writes_table_alias: false,
+ writes_cte_name: false,
+ writes_projection_alias: false,
+};
+const SEED_DOT_COLUMN_NODES: &[Node] = &[Node::Punct('.'), SEED_TARGET_COLUMN];
+const SEED_DOT_COLUMN: Node = Node::Optional(&Node::Seq(SEED_DOT_COLUMN_NODES));
+
+// --- the `set
[, …]` clause (ADR-0048 D2) --------
+//
+// Each override pins one column's generation. The column slot
+// `writes_column` so the typed value slots (`PER_COLUMN_VALUE`, the
+// same `current_column_value` dispatch `update … set` uses) narrow to
+// the column's type — so list/range/fixed values get the column's
+// typed slot (quoted text, unquoted number, quoted date) and a
+// type-mismatched literal is flagged. The four tails each start with a
+// distinct token (`=` / `in` / `between` / `as`), so the `Choice`
+// discriminates cleanly (no Optional-first branch).
+
+/// The `set
` column slot. Distinct role from `update`'s
+/// `update_set_column` and the expression `expr_column`.
+const SEED_SET_COLUMN: Node = Node::Ident {
+ source: IdentSource::Columns,
+ role: "seed_set_column",
+ validator: None,
+ highlight_override: None,
+ writes_table: false,
+ writes_column: true,
+ writes_user_listed_column: false,
+ writes_table_alias: false,
+ writes_cte_name: false,
+ writes_projection_alias: false,
+};
+
+/// `as ` — the curated generator-name vocabulary (D9),
+/// highlighted in the `tok_function` colour. The slot is structural
+/// (any identifier matches); the name is validated at execution and
+/// flagged live by the validity indicator.
+const SEED_GENERATOR: Node = Node::Ident {
+ source: IdentSource::Generators,
+ role: "seed_generator",
+ validator: None,
+ highlight_override: Some(crate::dsl::grammar::HighlightClass::Function),
+ writes_table: false,
+ writes_column: false,
+ writes_user_listed_column: false,
+ writes_table_alias: false,
+ writes_cte_name: false,
+ writes_projection_alias: false,
+};
+
+/// `= ` — a fixed constant for every row.
+const SEED_OV_FIXED_NODES: &[Node] = &[Node::Punct('='), PER_COLUMN_VALUE];
+/// `in ( [, ]* )` — uniform pick from the list.
+const SEED_OV_IN_VALUES: Node = Node::Repeated {
+ inner: &PER_COLUMN_VALUE,
+ separator: Some(&Node::Punct(',')),
+ min: 1,
+};
+const SEED_OV_IN_NODES: &[Node] = &[
+ Node::Word(Word::keyword("in")),
+ Node::Punct('('),
+ SEED_OV_IN_VALUES,
+ Node::Punct(')'),
+];
+/// `between and ` — uniform in the (typed) range.
+const SEED_OV_BETWEEN_NODES: &[Node] = &[
+ Node::Word(Word::keyword("between")),
+ PER_COLUMN_VALUE,
+ Node::Word(Word::keyword("and")),
+ PER_COLUMN_VALUE,
+];
+/// `as ` — force a named generator.
+const SEED_OV_AS_NODES: &[Node] = &[Node::Word(Word::keyword("as")), SEED_GENERATOR];
+
+const SEED_OV_TAIL_CHOICES: &[Node] = &[
+ Node::Seq(SEED_OV_FIXED_NODES),
+ Node::Seq(SEED_OV_IN_NODES),
+ Node::Seq(SEED_OV_BETWEEN_NODES),
+ Node::Seq(SEED_OV_AS_NODES),
+];
+const SEED_OV_TAIL: Node = Node::Choice(SEED_OV_TAIL_CHOICES);
+
+const SEED_OVERRIDE_NODES: &[Node] = &[SEED_SET_COLUMN, SEED_OV_TAIL];
+const SEED_OVERRIDE: Node = Node::Seq(SEED_OVERRIDE_NODES);
+const SEED_OVERRIDES: Node = Node::Repeated {
+ inner: &SEED_OVERRIDE,
+ separator: Some(&Node::Punct(',')),
+ min: 1,
+};
+const SEED_SET_CLAUSE_NODES: &[Node] =
+ &[Node::Word(Word::keyword("set")), SEED_OVERRIDES];
+const SEED_SET_CLAUSE: Node = Node::Seq(SEED_SET_CLAUSE_NODES);
+
+const SEED_NODES: &[Node] = &[
+ // `writes_table` so the `.column` target, the `set
=…`
+ // clause's column slots, and the typed value slots all resolve
+ // against this table.
+ TABLE_NAME_WRITES,
+ SEED_DOT_COLUMN,
+ Node::Optional(&SEED_COUNT),
+ Node::Optional(&SEED_SET_CLAUSE),
+ Node::Optional(&SEED_FLAG),
+];
+const SEED_SHAPE: Node = Node::Seq(SEED_NODES);
+
const UPDATE_NODES: &[Node] = &[
TABLE_NAME_WRITES,
Node::Word(Word::keyword("set")),
@@ -708,6 +856,195 @@ fn build_show_limit(path: &MatchedPath) -> Result