diff --git a/Cargo.lock b/Cargo.lock index 78a8c41..c1fc106 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -419,6 +419,12 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "deunicode" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04" + [[package]] name = "diff" version = "0.1.13" @@ -518,6 +524,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "fake" +version = "5.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea6be833b323a56361118a747470a45a1bcd5c52a2ec9b1e40c83dafe687e453" +dependencies = [ + "deunicode", + "either", + "rand 0.10.1", +] + [[package]] name = "fallible-iterator" version = "0.3.0" @@ -1527,6 +1544,7 @@ dependencies = [ "crossterm", "csv", "directories", + "fake", "futures-util", "gethostname", "insta", diff --git a/Cargo.toml b/Cargo.toml index f3b74d9..10c5fd4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,14 @@ chrono = { version = "0.4.44", default-features = false, features = ["clock"] } crossterm = { version = "0.29.0", features = ["event-stream"] } csv = "1.4.0" directories = "6.0.0" +# Realistic fake-data generators for the `seed` command (ADR-0048): +# names, emails, addresses, companies, lorem, etc. Default features +# only — the basic fakers need no flags; date/datetime values are +# generated in-house (rand + the existing `chrono`) for the bounded +# windows ADR-0048 D8 requires, so `fake`'s `chrono` feature is +# deliberately omitted. No commerce/product module exists, so the +# `product` generator is hand-rolled (D9). +fake = "5" futures-util = "0.3.32" gethostname = "1.1.0" rand = "0.10.1" diff --git a/docs/adr/0048-seed-fake-data-generation.md b/docs/adr/0048-seed-fake-data-generation.md new file mode 100644 index 0000000..9dfd1ed --- /dev/null +++ b/docs/adr/0048-seed-fake-data-generation.md @@ -0,0 +1,677 @@ +# ADR-0048: `seed` — fake-data generation command (SD1, opens SD2) + +## Status + +**Accepted (2026-06-11); Phase 1 + Phase 2 implemented (2026-06-11).** Design +settled with the user across an extended fork dialogue (every decision +below was escalated and user-chosen), then hardened by a pre-build +`/runda` Devil's-Advocate pass that found six blockers — undo +integration (D15), replay semantics (D16), `set` value quoting (D2), +CHECK-constraint handling (D17), a phase-ordering bug in the advisory +(D13), and auto-show flooding (D18) — plus refinements (state-relative +reproducibility, compound-FK tuple sampling, column-fill constraint +rules, the `fake` dependency scan), all folded in. + +**Phase 1 shipped** test-first across commits `202e25a` (generation +library + `fake` dependency) → `f1e9484` (command skeleton) → +`73493fa` (FK sampling) → `9c13501` (uniqueness / junction / IN-CHECK) +→ `0b3ab3c` (`SeedResult` / preview / advisory / count cap) → +`e6ff63d` (single-transaction O(N) path) → `fbd219b` (`--seed` flag, +ambient wiring, and a whole-implementation `/runda` pass). The +post-implementation `/runda` found eight gaps — FK-sampling +determinism (now `ORDER BY`), shortid reproducibility (now from the +seeded RNG, so **D4 holds with no exceptions**), and six untested +ADR decisions (D5/D15/D16/D17 + atomicity + zero-count), all closed. +**2358 tests pass / 0 fail / 0 skip; clippy clean.** + +**Implemented in Phase 1:** the whole-row `seed [count] +[--seed ]` form and every D1–D18 decision *except* the two +Phase-2 surfaces. + +**Phase 2 implemented (2026-06-11):** both remaining surfaces — the +**`set` override clause** (D2: fixed value / pick-list / named +generator / range, quoted literals, type-aware) and the +**`
.` column-fill** form (D1 form 2: an UPDATE over +existing rows, refusing PK/autogen targets, empty-table no-op, one undo +step). The named-generator vocabulary (D9) lives in `src/seed` +(`KNOWN_GENERATORS` / `generator_for_name`); a new range `Generator` +(`src/seed/generators.rs`) backs `between`; the override clause is +folded from the flat matched path (`build_seed_overrides`, +`src/dsl/grammar/data.rs`) and applied to the per-column plan +(`apply_seed_overrides`, `src/db.rs`), with column-fill in +`do_seed_column_fill`. Full ambient wiring: completion (the generator +vocabulary after `as`, the `set`/`.col` column slots), highlighting +(`HighlightClass::Function` → `tok_function`, the generator slot), the +validity indicator (`IdentSource::Generators` — an unknown name flagged +`[ERR]`), help, and parse-error pedagogy rows. The D13 advisory now +carries its Phase-2/3 wording (points at `set` and the column-fill +repair). A post-implementation `/runda` pass then added one +user-chosen refinement: a **bounded override on a UNIQUE column** (a +fixed value / too-short pick-list) is now a **friendly error** rather +than a silent uniqueness cap (see D2). **2400 tests pass / 0 fail / 0 +skip; clippy clean.** Two +implementation refinements vs. this ADR's wording, both met the +user-facing contract: dates in the range form are **quoted** (the D2 +amendment, above — no date-literal token exists); and the `set` value +slots reuse `update`'s typed `current_column_value` (no spurious +column-ref match) rather than the raw expression operand. + +Further SD2 increments (custom user generators, NULL injection, +multi-locale, recursive parent auto-seed) remain out of scope (see Out +of scope). + +Closes `requirements.md` **SD1** and delivers the core of **SD2** +(per-type generators, determinism, the `fake`-backed catalogue). It +also closes one of the two remaining gaps in **A1** ("all canonical +app-level commands") — `seed`; the other, `hint` (**H2**), is +separate. + +Builds on: ADR-0014 (data operations, the `Value`/`Bound` value model, +the auto-show pattern, FK-error enrichment), ADR-0005/0011 (the type +vocabulary and `Type::fk_target_type()`), ADR-0012/0013 (the column / +relationship metadata tables, the rebuild-table primitive — *read* by +seed for schema introspection), ADR-0024 (the unified grammar tree / +`CommandNode` registration that gives completion, hints, help-id, +usage-id for free), ADR-0022 (ambient typing assistance — the +`KNOWN_SQL_FUNCTIONS` curated-vocabulary pattern that the +generator-name list mirrors), ADR-0026 (the `in (...)` / `between ... +and ...` expression grammar the override clause reuses), ADR-0027 (the +validity-indicator diagnostics model), and ADR-0038 (the +`OutputStyleClass::Hint` styled output used for the post-seed +advisory). Honours ADR-0003 (both modes, no sigil), ADR-0009 (DSL +conventions — keyword grammar, `--` flags for opt-in choices, one +sigil only), ADR-0002 (no engine name in user-facing strings), and +ADR-0015 (per-command write-through persistence). + +## Context + +`seed
[count]` is the last unbuilt **data-authoring** command +in the requirements. The pedagogical value is high: a learner who has +just modelled a schema wants rows to query against *now*, without +hand-typing dozens of `insert`s. A teacher wants a one-liner that +fills a demo database with believable data. SD1 commits to "plausible +fake data; junction tables seeded with valid foreign-key references +drawn from existing parent rows." SD2 deferred the *how* — "per-type +generators, locale, determinism, override hooks" — explicitly pending +this ADR. + +The design conversation widened the scope deliberately, with the user +confirming each step: + +- **Realism matters more than minimalism** for a teaching tool. Random + `text_a3f9` values teach nothing; `Alice Martinez` / + `alice.m@example.com` make queries feel real. → adopt a faker + library and make generation **name-aware**. +- **The column *name* is the strongest signal** for what a value should + look like, but it is **ambiguous** without the **table** for the + `name`/`title` family (`products.name` ≠ `users.name`). +- **Heuristics will miss**, so a **manual override** surface is + required, not optional — this is SD2's "override hooks", brought + forward. +- **Identifiers and enums** are special: `id`-ish columns want + uniqueness; `status`-ish columns have no sensible generic value and + should be *flagged*, not guessed. + +The novel work is the **generation layer**. Everything downstream — +type validation, autogen autofill (`serial`/`shortid`), FK +enforcement, per-command persistence, the auto-show outcome — is +reused from the existing insert/update machinery as **shared helper +functions**, per the X5 architecture preference (unique commands, with +mechanics shared as library functions — *not* by emitting +`Command::Insert` to borrow `do_insert`). + +## Decision + +Add a dedicated **`seed`** command (its own AST variant and its own +`do_seed` worker executor) available in **both modes**, with the +surface and behaviour below. Generation is realistic, name- and +table-aware, type-gated, with a manual override clause and a +reproducibility flag. + +**Command classification (important, set by the replay decision +D16).** Although `requirements.md` A1 lists `seed` among the +"app-level commands" (meaning: part of the canonical command surface, +no sigil, both modes), `seed` is architecturally a **data-authoring +command** — a sibling of `insert`/`update`/`delete`, **not** an +app-lifecycle `AppCommand`. It is therefore **not** added to +`is_app_lifecycle_entry_word` / completion's +`empty_input_offers_app_command_entry_keywords` (those mirror the +`AppCommand` set and must match — `seed` belongs in neither): `replay` +re-runs it as a data write (D16). + +### D1 — Command surface (fork, user-chosen: "whole-row + column-fill") + +Two forms: + +1. **Whole-row generation** — `seed
[count]` + Generates `count` new rows (an INSERT path). `count` **defaults to + 20** (D6) when omitted. Every user-fillable column is filled per the + generation rules (D7–D12); `serial`/`shortid` autogen columns are + left to the existing autofill helpers. + +2. **Column-fill on existing rows** — `seed
.` + Fills `` across the table's **existing** rows (an UPDATE + path) — the natural follow-up to `add column`. Combined with the + `set` clause (D2) this is also the precise repair for a single + mis-guessed column: `seed users.work_addr set work_addr as email`. + Column-fill **refuses** PK columns and autogen (`serial`/`shortid`) + columns (a friendly error — you don't "fill" an identity column), + and **respects** the same UNIQUE / FK / required rules as whole-row + generation (a UNIQUE target gets collision-free values; an FK + target samples from the parent, D14). On an **empty** table it is a + friendly no-op ("no rows to fill"). + +**Zero / over-cap counts.** `seed
0` is a friendly no-op; +`count` over the maximum (D6) is a friendly error. + +The column-restricted-*insert* form (`seed t (a, b)` — new rows, only +some columns filled) was considered and **rejected** as marginal and +constraint-fragile (see Alternatives). + +**Required-column block guard (user requirement).** If seed cannot +produce a value for a `NOT NULL` column — the only real case is a +`NOT NULL blob` column, which has no DSL value path — it **refuses the +whole operation with a friendly error** naming the column, rather than +attempting a NULL insert that would violate the constraint. The check +is a pre-flight over the resolved per-column plan, before any write. + +### D2 — Manual override: the `set` clause (fork, user-chosen: "value + list + generator + range") + +An optional, comma-separated `set` clause overrides generation per +column. Four forms, all reusing existing grammar vocabulary so there +is nothing new to learn: + +| Form | Example | Meaning | +|---|---|---| +| Fixed value | `set status = 'pending'` | every row gets the constant | +| Pick-from-list | `set role in ('admin', 'editor', 'viewer')` | uniform random choice from the list | +| Explicit generator | `set work_addr as email` | force a named generator (D9) | +| Range | `set price between 10 and 100` | uniform in range; **also dates** — `set signup between '2023-01-01' and '2024-12-31'` | + +Multiple clauses combine: `seed users 20 set role in ('admin', +'user'), status = 'active', signup between '2023-01-01' and +'2024-12-31'`. + +**Override × UNIQUE capacity (post-implementation `/runda`, user-chosen: +"friendly error").** A *bounded* override — a fixed value, or a +pick-list — on a **single-column-UNIQUE** target (a `UNIQUE` column or a +single-column PK) that offers fewer **distinct** values than the row +count cannot fill the run; rather than let the D10 uniqueness machinery +silently cap it (e.g. `seed users 100 set email = 'x'` → 1 row), seed +**refuses up front** with a friendly error pointing at the fixes (use a +generator, or a longer list). Generators and ranges are treated as +effectively unbounded sources — if one genuinely exhausts, the D14 +distinct-combination cap still applies. Compound uniqueness is exempt +(the *other* key columns can still vary). + +**Quoting (fork, user-chosen: "quoted, grammar-consistent").** Text +values and list items are **quoted string literals** (`'admin'`), +exactly as everywhere else in the DSL — only **numbers** stay +unquoted. **Amendment (2026-06-11, Phase 2 build):** the original +wording said "numbers *and dates* stay unquoted", but this DSL has +**no date-literal token** — `Value` is `Number`/`Text` only, and a +date is a **quoted string** validated by `bind_date` (`'2023-01-01'`) +everywhere else (insert / update / `where`). An unquoted `2023-01-01` +lexes as `2023`,`-`,`01`,… and cannot parse. So **dates in the range +form are quoted** (`between '2023-01-01' and '2024-12-31'`) — which is +in fact *more* faithful to this decision's own "quoted, +grammar-consistent" principle. Numbers remain unquoted (`NumberLit`). +This reuses the ADR-0026 expression grammar **unchanged**: +the DA pass confirmed that the `in (...)` form's operands are typed +value slots, so a *bare* `admin` would parse as a **column reference** +(→ "unknown column"), not a string. Quoting is therefore not a style +preference but a correctness requirement of grammar reuse. The range +form is **type-aware**: numeric bounds for numeric columns, date +bounds for date/datetime columns; a type-incompatible bound is a +friendly error. `=`, `in (...)`, and `between ... and ...` are the +ADR-0026 expression operators; `set` is the ADR-0014 UPDATE keyword; +`as` is borrowed from the SQL alias slot. The `as ` operand +is a bare name from the curated generator vocabulary (D9), not a +value. The override takes precedence over every heuristic. + +### D3 — Generation library: `fake` crate + hand-rolled gaps (fork, user-chosen: "name-aware + realistic") + +Add the **`fake`** crate (v5.x at time of writing; English locale for +v1 per X2) for realistic values: names, emails, usernames, addresses, +companies, phone numbers, lorem text, dates. Generation is driven by a +per-column **generator** chosen by the heuristics (D7) or the override +(D2), falling back to **type-based** generation (D8). + +**Implementation-time verifications (resolved 2026-06-11 when the +dependency was added):** + +- **`rand` de-duplication — clean.** `fake` 5.1.0 depends on + `rand = "0.10"`, the **same major** as the project's `rand 0.10.1`, + so `cargo tree -e normal` resolves a **single** `rand 0.10.1` (no + runtime duplication; the `rand 0.8.6` visible to `cargo tree -i + rand` is only `fake`'s own dev-dependency, never compiled for us). + Consequence for D4: one seeded `rand 0.10` `StdRng` can drive + **both** `fake`'s `fake_with_rng` and the hand-rolled generators — + determinism is single-RNG, single-version, and shares `shortid.rs`'s + `rand` version. +- **`fake` module inventory / features — confirmed.** Default features + (`["either"]`) cover the core string fakers used here + (Name/Internet/Address/Company/Lorem/PhoneNumber); `fake`'s `chrono` + feature is **deliberately omitted** (dates generated in-house for + D8's bounded windows). No commerce/product module exists → `product` + is hand-rolled (D9). (The exact faker call sites are pinned when the + generation library is built.) +- **Security (new-dependency posture) — clean.** The `fake` tree (296 + packages total) scanned clean by **all three** mandated scanners: + `osv-scanner` (no issues), `grype` (no vulnerabilities), `trivy fs + --scanners vuln` (0). No findings to document or accept. + +### D4 — Determinism: `--seed ` (fork, user-chosen: "optional flag") + +Generation is **random by default**. The optional `--seed ` flag +makes a run **reproducible**: **same database state + same `--seed` → +identical data**. The "database state" qualifier matters (DA +refinement) — FK sampling (D14), identifier sequencing (D10), and +UNIQUE collision-avoidance all *read existing rows*, so reproducibility +is relative to the data already present, not absolute. Value: teachers +hand out one dataset; demos are stable; and the feature's own tests +can assert **exact** output (against a known starting state). +Implemented with a seedable RNG threaded through every generator (no +`thread_rng` on the seeded path). `--` flag per ADR-0009 (opt-in +choice). Naming note: the flag `--seed` and the command `seed` share a +word but never collide grammatically (`seed users 20 --seed 42` parses +unambiguously). This flag is also the determinism lever for **replay** +(D16): a recorded `seed … --seed N` line reproduces on replay; a bare +`seed …` line regenerates fresh data. + +### D5 — Both modes (A1) + +`seed` is a canonical app-level command, available in **simple and +advanced** mode, no sigil — like `save`/`load`/`export`/`replay`. + +### D6 — Default count: 20; bounded maximum + +Omitted `count` → **20** rows: enough to make `where`, `group by`, +`order by`, and `limit` meaningful without flooding the output pane. +A **maximum** is enforced (proposed 10 000) to prevent a typo +(`seed t 1000000`) from hanging the app or bloating the project; over +the cap → friendly error stating the limit. + +### D7 — Name-aware heuristics, type-gated (the catalogue) + +A column's **name** selects a generator, but a name rule only fires +when the column's **type** is compatible (a column named `email` typed +`int` does **not** get a string — it falls through to type-based int). +Matching is **case-insensitive**, **token-based** (split on `_`, +camelCase, kebab), **most-specific-first**, with documented +false-positive guards. The catalogue (representative; full table lives +with the implementation): + +| Column name (tokens) | Generator | Type gate | +|---|---|---| +| `first_name`/`fname` · `last_name`/`surname`/`lname` | first / last name | text | +| `name`/`full_name` · `title` | **table-context** name (D11) | text | +| `email`/`*_email` | email | text | +| `username`/`login`/`handle` | username | text | +| `password`/`pwd` | password | text | +| `phone`/`mobile`/`cell`/`tel` | phone number | text | +| `city`/`town` · `country` · `state`/`province` | address parts | text | +| `street`/`address`/`addr` · `zip`/`postcode`/`postal` | address parts | text | +| `company`/`employer`/`org` · `job`/`position`/`profession` | company / job | text | +| `description`/`bio`/`notes`/`summary`/`comment` | sentence / paragraph | text | +| `url`/`website`/`homepage` · `color`/`colour` | URL / hex colour | text | +| `price`/`amount`/`cost`/`salary`/`balance`/`total` | currency-range number | numeric | +| `age` · `quantity`/`qty`/`stock`/`count` | 18–80 · small int | numeric | +| `date`/`*_date` | date, recent ~3 yr window | date | +| `dob`/`birthday` | date, adult window (18–80 yr ago) | date | +| `timestamp`/`datetime` · `created_at`/`updated_at`/`*_at` | datetime, recent window (`updated_at` ≥ `created_at`) | datetime | +| `is_*`/`has_*`/`active`/`enabled` | boolean | bool | +| **identifier family** (D10) | unique sequential | int/text | +| **enum-ish family** (D12) | generic text + flag | (text) | + +**False-positive guards (documented):** `username`/`filename`/ +`table_name`/`*_name` handled before the bare `name` rule so they do +**not** resolve to person-name; the bare `name`/`title` rule requires a +standalone token or a recognised `*_name` suffix. + +### D8 — Type-based fallback + +When no name rule matches (or to satisfy a name rule's type gate), +generate by **type**: `text`→realistic words/short phrase, `int`→ +bounded random, `real`→random double, `decimal`→formatted number, +`bool`→random, `date`/`datetime`→**bounded recent** value (never "any +point in all of history" — per the user's date concern), `serial`/ +`shortid`→omitted (autogen helpers fill them), `blob`→unsupported +(nullable→NULL; `NOT NULL`→D1 block guard). + +### D9 — Named generators + the `product` generator + +The generators addressable via `set ... as ` (D2) and +chosen by D7 form a **curated, named vocabulary** — `name`, +`first_name`, `last_name`, `email`, `username`, `phone`, `city`, +`country`, `street`, `zip`, `company`, `job`, `sentence`, `paragraph`, +`url`, `color`, `price`, `age`, `date`, `datetime`, `bool`, `product`, +… — the single source of truth shared by the executor, the completion +source, and the highlighter (mirroring `KNOWN_SQL_FUNCTIONS`, +ADR-0022 Amд6). + +**`product`** is **hand-rolled** (the `fake` crate has no +commerce/product module — D3): `{adjective} {material} {noun}` from +three small baked-in word lists (~20 each) → "Sleek Bamboo Keyboard", +"Vintage Leather Backpack". Seedable through the D4 RNG. Always +addressable as `set as product`, and auto-selected by D11 for +the `name`/`title` family in product-ish tables. + +### D10 — Identifier family → unique by name (fork, user-chosen: "unique sequential") + +A column in the identifier family — `id`, `*_id` **that is not an FK**, +`code`, `sku`, `ref`/`reference`, `number`/`no`, `barcode` — that is +**not** a serial/shortid autogen column and **not** the PK is treated +as an identifier and gets **unique** values: **int → sequential** +(`MAX(col)+1` ascending, reads like real ids, never collides); +**text → unique short code** (generate-with-retry). Precedence: +**FK detection wins** over this rule (an FK `user_id` *should* have +duplicates — many children per parent), so `*_id` only triggers +uniqueness when the column is not a foreign key. + +**Constraint-driven uniqueness is independent and mandatory:** any +column with a `UNIQUE` constraint (or a user-fillable single-column +PK) gets guaranteed-unique generation regardless of name — a +correctness requirement, not a heuristic. Generation for such columns +uses retry/sequence to guarantee no collision within the batch and +against existing rows. + +### D11 — Table-context disambiguation for `name`/`title` (fork, user-chosen: "table-context-aware") + +For the `name`/`title` family **only**, the heuristic also reads the +**table** name token: + +- `product`/`item`/`goods`/`merchandise`/`catalog`/`inventory` → + `product` generator (D9) +- `company`/`companies`/`vendor`/`supplier`/`manufacturer`/`brand` → + company name +- `user`/`customer`/`person`/`people`/`employee`/`member`/`contact`/ + `author`/`student` → person name +- unrecognised table → generic word + +This resolves the real ambiguity (`products.name` → "Sleek Bamboo +Keyboard"; `users.name` → "Alice Martinez"; `vendors.name` → "Globex +Corp"). It is a deliberately **scoped** use of table context — the only +place the table name influences generation. + +### D12 — Enum-ish names → generic + post-seed advisory (fork, user-chosen: "flag enum-ish only") + +Enum-ish names — `role`, `status`, `type`, `state`, `kind`, +`category`, `level`, `tier`, `stage`, `priority`, `gender` — have **no +sensible generic generator**, so they are **not guessed**: they fall +through to generic text (they must still be filled — a `NOT NULL` +status cannot be left empty). Seed then emits a **post-seed advisory** +(D13) naming them and pointing at the `set ... in (...)` override. + +### D13 — Reporting: post-seed advisory (fork, user-chosen: "flag enum-ish only") + +After a successful seed, in addition to the normal auto-show outcome +(row count + the affected rows, per ADR-0014), seed appends a +**`OutputStyleClass::Hint`** advisory **only** when one or more +enum-ish columns (D12) — **or columns guarded by a CHECK that seed +could not derive values from** (D17) — were filled generically. + +The wording is **phase-aware** (DA finding: the advisory must not name +features that ship later). In **Phase 1** (no `set` clause yet) it +names the columns and explains they were filled generically. From +**Phase 2/3** it points at the concrete repair: + +``` +# Phase 1 wording: +✓ Seeded 20 rows into users + ℹ status, role were filled with generic text — they look like + fixed value sets you may want to choose deliberately. + +# Phase 2/3 wording (set clause + column-fill exist): +✓ Seeded 20 rows into users + ℹ status, role filled generically. Fix existing rows with + seed users.status set status in ('active','inactive'), + or pass set … on the next seed. +``` + +Note the repair for **already-seeded rows** is the **column-fill** +form (`seed users.status set …`), not "re-seed" (which would add more +rows) — DA correction. This is a **result-time** note (cheap, reusing +ADR-0038's hint rendering), not a typing-time warning. The fuller +"per-column report" (every column → its generator) was considered and +**deferred** (see Alternatives / Out of scope). + +### D14 — Foreign keys (SD1; fork on empty-parent, user-chosen: "friendly error") + +- **Each FK** is filled by sampling **uniformly** from the **existing + rows** of the parent table's referenced column(s). Duplicates are + expected and correct (many children per parent). For a **compound + FK**, the referenced **tuple is sampled jointly** (a whole existing + parent key), never per-column independently — independent sampling + could fabricate a `(a, b)` pair that exists in no parent row and + would fail FK enforcement (DA refinement). +- **Empty parent** → seed **refuses with a friendly error** naming the + parent and the FK column ("seed `users` first — `orders.user_id` + references it"). Safe, predictable, teaches FK dependency order. + Recursive parent auto-seed is **deferred** to a future `--recursive` + opt-in (Out of scope). +- **Junction / compound-PK tables** (SD1's explicit case): sample + **distinct combinations** of the parent PK tuples to satisfy the + compound PK's uniqueness; if `count` exceeds the number of available + distinct combinations, **cap** at the maximum and note it in the + outcome. +- **Self-referential FK** (`manager_id → id`): if nullable, leave NULL + or point at an earlier row in the same batch; if `NOT NULL` on an + otherwise-empty table, friendly error. Documented edge case. +- **Nullable FKs** are **always filled** in v1 (predictable); + occasional-NULL injection is deferred. + +### D15 — Undo: one snapshot per seed (DA finding; ADR-0006) + +Seed is a mutation, so it must participate in undo. The draft omitted +this; the DA found the codebase already has the right primitive — +`BeginBatch` / `EndBatch` (`db.rs`), used by `replay` so a multi-write +run collapses to **one** boundary snapshot. `do_seed` wraps its +generated writes in `begin_batch` / `end_batch`, so **`seed users 20` +is a single undo step**, not 20 — matching ADR-0006 Amendment 1's +batch model. Column-fill's bulk UPDATE is likewise one step. (`import` +remains the only data-affecting op outside undo, per ADR-0015 §11; +seed is firmly inside it.) + +### D16 — Replay: seed re-runs as a data write (fork, user-chosen) + +`replay` re-executes a recorded `seed` line as a **data-write +command** — it is **not** in the app-lifecycle skip-set (see Command +classification, above). Consequence, accepted by the user: a **bare** +`seed users 20` regenerates **fresh, divergent** data on each replay; +a `seed users 20 --seed 42` line (the determinism lever, D4) +**reproduces** the original data. This keeps seed faithful to its +nature as a data write and puts reproducibility exactly where the +`--seed` flag already lives. (Seeded *data* is in any case durable +independently of replay, via the ADR-0015 CSV store + `rebuild`; +replay is the scripting re-run path, U4.) The DA confirmed the wiring +trap: because seed is *not* an `AppCommand`, it is correctly absent +from `is_app_lifecycle_entry_word` and replay dispatches it through +the normal data path rather than aborting. + +### D17 — CHECK constraints: derive from simple `IN`, else friendly-fail (fork, user-chosen) + +A CHECK on a generically-filled column would otherwise fail the whole +batch (DA finding — the block guard only covered `NOT NULL blob`). +Two-tier handling, per the user: + +1. **Derive from simple `IN`-CHECKs.** When a column's CHECK is the + common enum-as-CHECK shape — `col IN ('a', 'b', …)` (the column's + own CHECK, single-column, literal list) — seed **parses out the + allowed values and uses them as the generator** (uniform choice). + The frequent `CHECK (status IN ('active','closed'))` case then + "just works" with no override needed. +2. **Best-effort + friendly fail for the rest.** For CHECKs seed + cannot interpret (ranges, expressions, multi-column), it generates + best-effort; if a generated row violates the CHECK, the insert + fails through the existing **H1 friendly-error layer** (ADR-0019) + naming the constraint and pointing at `set`. Such CHECK-guarded + columns are also **pre-flagged in the advisory** (D13) alongside + enum-ish names, so the user is warned before hitting the failure. + +No new CHECK engine — tier 1 is a narrow literal-`IN` parse over the +CHECK text already stored in metadata; tier 2 is the existing failure +path. + +### D18 — Auto-show is capped for large seeds (DA finding) + +ADR-0014 auto-show renders "the affected rows" — fine for one insert, +a wall for a 10 000-row seed. Seed's outcome shows a **capped +preview** (proposed first **20** rows) with a `(showing 20 of N)` +note, not the full set. The row **count** is always reported in full; +only the rendered table is capped. + +## Grammar, AST, and cross-cutting wiring + +Per ADR-0024, `seed` is registered as a `CommandNode` so completion, +hints, help, and usage flow from one definition. The wiring, as +**explicit acceptance criteria** (a `/runda` pass must verify each — +ADR-0045 showed "claimed verified" is not verified): + +- **AST + executor.** A dedicated command variant (`Seed { table, + target_column: Option, count: Option, overrides: + Vec, rng_seed: Option }`) and a dedicated + `do_seed` worker executor. `do_seed` **reuses shared helpers** + (value binding `impl_value_for`, autogen autofill, FK enrichment, + the multi-row parameterised-insert pattern of `plan_autogen_autofill`, + the UPDATE path for column-fill, per-command persistence, the + `begin_batch`/`end_batch` undo primitive of D15) as library + functions — it does **not** emit `Command::Insert`/`Command::Update` + (X5). +- **Replay / undo classification (D15/D16).** `do_seed` brackets its + writes in one batch (one undo step). The `seed` entry word is + **deliberately absent** from `is_app_lifecycle_entry_word` and + completion's `empty_input_offers_app_command_entry_keywords` (the + `AppCommand` mirror) so replay re-runs it as a data write — an + explicit acceptance check, since the default for an unlisted + recognised command must be "replayed", not "abort". +- **Completion sources:** table-name (existing tables); `.column` and + `set`-clause column slots (columns of the named table); the + generator-name vocabulary (D9) after `as`; `count` number; `set` / + `=` / `in` / `as` / `between` / `and` keywords; `--seed` flag. +- **Syntax highlighting:** `seed` keyword; the generator-name + vocabulary highlighted as **`tok_function`** (reuse the existing + ADR-0022 Amд6 blue — no new theme colour). +- **Hints:** ambient per-slot "what's next" and usage hints, both + modes. +- **Help:** `help seed` topic (`help_id` + per-command block); the + general `help` list picks it up automatically via REGISTRY. +- **Parse-error pedagogy (ADR-0042):** near-miss matrix rows for `seed` + (bare / missing-table / wrong-token / malformed `set`), both modes. +- **Validity indicator (ADR-0027):** typing-time `[ERR]`/`[WRN]` for + unknown table, unknown column (in `.column` or `set`), unknown + generator name after `as`. +- **No DSL→SQL teaching echo (ADR-0038).** `seed` is a utility/app + command, not a DSL form of a SQL statement, so the echo does not + apply. (A future "show the generated INSERTs" is out of scope — + it would dump `count` statements.) + +## Implementation phasing + +Design is whole; the **implementation** is phased into reviewable, +test-first commits: + +1. **Core whole-row seed** *(done, Phase 1)* — grammar/AST/executor; + type-based generation + the `fake`-backed name heuristics + (D7/D8/D11); identifier uniqueness (D10) + constraint uniqueness; FK + sampling (joint tuples) + empty-parent error + junction + distinct-combos (D14); `--seed` determinism (D4); default count + cap + + zero-no-op (D6/D1); required-column block guard (D1); **undo batch + (D15)**; **replay-as-data-write classification (D16)**; **CHECK + derive / friendly-fail (D17)**; **capped auto-show (D18)**; the + enum/CHECK advisory in its **Phase-1 wording** (D12/D13); full + ambient wiring; both modes. +2. **The `set` override clause** (D2) *(done, Phase 2)* — value / list / + generator / range, type-aware, with completion + highlight + + validity for the generator-name slot. +3. **Column-fill mode** (`seed
.`, D1 form 2) *(done, + Phase 2)* — the UPDATE path. + +Each phase is independently green before the next. (Phases 2 and 3 +landed together — they share the `set`-override executor machinery, so +splitting them risked a state where `set` parsed but column-fill +silently no-op'd.) + +## Testing (ADR-0008 tiers 1–3; test-first) + +- **Tier 1 (unit, deterministic via `--seed`):** generator selection + (name × type-gate matrix, including every false-positive guard of + D7); table-context disambiguation (D11); identifier uniqueness and + the FK-wins-over-`*_id` precedence (D10); bounded-date windows (D8); + the `product` generator shape; override resolution + precedence (D2); + the required-column block guard (D1); the count cap (D6). Exact-value + assertions are possible because `--seed` fixes the RNG. +- **Tier 2 (insta snapshots):** the seeded data table render and the + enum advisory (D13) at representative sizes, light + dark. +- **Tier 3 (integration, full event loop):** `seed users 20` end to + end (rows land in db + CSV + history, auto-show, persistence); + FK sampling against a populated parent (incl. a **compound FK** — + every child tuple exists in the parent); **empty-parent friendly + error**; **junction** seeding with distinct combinations and the + over-cap note; the `set` clause forms (quoted literals); **column- + fill** on existing rows (incl. refusal of PK/autogen targets, empty- + table no-op); reproducibility (`--seed 42` twice → identical data + from a fixed state); both modes. Plus the DA-driven cases: + **one-undo-step** (seed then a single `undo` removes all rows); + **replay** of a bare `seed` line (divergent) vs a `--seed` line + (reproduced); **`IN`-CHECK auto-derivation** ("just works") and a + **complex-CHECK friendly failure**; **capped auto-show** on a large + seed. + +"All green, no skips" is the only acceptable end state; the Phase-1 +baseline (2290 passing / 0 failing / 0 skipped / 1 ignored doctest) is +the regression floor. + +## Out of scope / deferred (future SD2 work) + +- **Recursive parent auto-seed** (`--recursive`) — D14 errors instead. +- **NULL injection** for nullable columns (teaching optional + relationships / `IS NULL`) — v1 always fills. +- **Multi-locale** generation — English only (X2). +- **User-defined custom generators** (true "override hooks" — register + a named generator) — the `set ... as ` surface covers the + common need; custom generators are a later SD2 increment. +- **Full per-column seed report** — D13 flags enum-ish only. +- **Column-restricted insert** (`seed t (a, b)`) — rejected (D1). +- **"Show the generated SQL"** teaching echo for seed. + +## Alternatives considered + +- **Hand-rolled generators only (no `fake`):** minimal dependency, but + synthetic-looking data (`text_a3f9`) — rejected on pedagogy + (pedagogy wins ties). +- **Type-only generation (no name awareness):** simpler, but misses + the biggest UX win (a `users` table that reads like real people) — + rejected. +- **Column-name-only `name` (no table context):** leaves + `products.name` → person names, requiring a manual override on every + product/company table — rejected for the `name`/`title` family + (D11). +- **No override clause (heuristics + type only):** could not answer + "the heuristic guessed wrong, fix it" or enum columns — rejected; + the `set` clause (D2) is the answer to the user's Q3. +- **Recursive auto-seed of empty parents:** powerful but magical and + can seed tables the user did not name — deferred behind a future + flag (D14). +- **Always-random (no `--seed`):** simplest, but no reproducible + datasets and weaker tests — rejected (D4). +- **Full per-column report by default:** a nice teaching artifact but + verbose on wide tables — deferred; flag-only advisory chosen (D13). +- **Reuse `Command::Insert`/`do_insert` directly** from seed: tempting + for code reuse, but collapses command identity and violates X5 — + rejected in favour of a dedicated `do_seed` that calls shared + *helpers*. +- **Skip seed on replay** (classify as app-lifecycle, D16): consistent + with A1's "app-level" label and avoids divergent data, but seed is a + data write and silently skipping it on a scripted re-run is + surprising — rejected; `--seed` is the determinism lever instead. +- **Bare-word `set` list items** (`in (admin, …)`, D2): matched the + early mockups and reads cleaner, but bare words are column + references in the reused grammar (would error) and would force a + custom list form — rejected for quoted literals (grammar reuse + + DSL consistency). +- **Pre-flight refuse any CHECK-bearing table** (D17): safest but + blocks seeding too many legitimate tables — rejected for the + derive-`IN`-else-friendly-fail tier. +- **`set`-driven NULL / per-column report / recursive parent seed:** + deferred — see Out of scope. diff --git a/docs/adr/README.md b/docs/adr/README.md index 884d6ef..d02cb90 100644 --- a/docs/adr/README.md +++ b/docs/adr/README.md @@ -60,3 +60,4 @@ This directory contains the project's ADRs, recorded per - [ADR-0045 — `create m:n relationship` convenience command (C4)](0045-mn-convenience.md) — **Accepted + implemented 2026-06-10** (closes `requirements.md` **C4**; all forks user-confirmed + a `/runda` DA pass that verified the `do_create_table` reuse against code and corrected the "no PK-less tables" assumption — advanced SQL `create table t (a int)` has none, so a parent-PK guard is retained). Implementation corrected a second ADR premise: "the walker already dispatches multiple nodes per entry word" held only in *advanced* mode — two simple-mode spots (dispatcher `decide`, completion continuation-merge) assumed ≤1 DSL form per entry word and were generalized **behaviour-preservingly** (dispatch reduces to the old single-candidate commit; completion merge gated on `simple_count > 1`). Junction echo wired (`render_create_m2n`, round-trips as SQL). `create m:n relationship from to [as ]` generates a junction table with one FK column per parent PK column, a **compound PK over all the FK columns** (the textbook junction — the pair is unique, no duplicate links), and **two 1:n relationships**, all in **one transaction = one undo step** (built by reusing `do_create_table`, which already takes `foreign_keys` + writes relationship metadata — no batch bracketing). Forks all user-chosen: junction PK = compound-over-FKs (vs surrogate serial / no PK); referential actions = **`CASCADE`** on delete+update (vs NO ACTION / RESTRICT); naming = auto `{T1}_{T2}` + optional `as` (vs auto-only); available in **both modes** (Simple-category DSL, like the sibling relationship commands). FK columns named `{parent_table}_{pk_column}` (disambiguates shared `id`; generalises to compound parents via ADR-0043), typed via `fk_target_type` (ADR-0011). A distinct `Command::CreateM2nRelationship` (not lowered to `CreateTable`) preserves command identity (X5) and lets the teaching echo speak in m:n terms. Cross-cutting wiring enumerated: separate `CREATE_M2N` `CommandNode` (own `help_id`/`usage_ids`), `("m","m:n")` completion composite, `HintMode`s, grammar-driven highlighting, `help`/`help create`, `parse_error_pedagogy` near-miss matrix, teaching echo. OOS: **self-referential m:n** (`from T to T`) refused outright (user-confirmed "full stop" — directional column-naming is more than this beginner convenience warrants); per-relationship action overrides; extra junction payload columns; m:n diagram echo; renaming the auto-generated relationships - [ADR-0046 — Schema sidebar focus/navigation mode and responsive input & hint layout (UI #20/#21/#23)](0046-sidebar-navigation-and-responsive-input-hint.md) — **Accepted + implemented 2026-06-10, phased A→B→C** (8 commits `9f5f76b`…`22bec61`; closes Gitea **#20** hint jumpiness, **#21** left-column improvements, **#23** long input — all forks user-confirmed, including the persistent show/hide toggle which is **deferred**: the Ctrl-O peek covers #21's "keystroke to show and hide"). Two decisions landed differently from the draft (recorded inline): relationship data on **`App`** not `SchemaCache` (DB2); the nav overlay clears **only the sidebar strip + a one-column gutter**, panels staying visible behind (DC2). Treats the three UI issues as one coupled decision because they share the terminal's width/height budget. **Phase A (input & hint):** the hint panel's height becomes a function of **terminal geometry, fixed between resizes** (not of hint content), eliminating the #20 jump at its source — measured catalog shows ≥ ~54-col right-column width never needs > 2 hint lines, so 3 lines is a rare narrow-terminal-only case; height buckets `H<40` compact (input 1 row + horizontal scroll / hint 2) vs `H≥40` comfortable (input 2 rows soft-wrap / hint 2), output `Min(5)` honoured first under degradation; input gains horizontal scroll (`input_scroll_offset`, single logical `String` — **not** I1 multi-line) and 2-row soft-wrap display when tall, preserving ADR-0027's 6-col indicator reserve. **Phase B (sidebar):** the 26-col Tables column is **kept but made optional and richer** (not deleted — pedagogy wins ties) — **width-derived session-only** visibility (visible iff width > 90 or a Ctrl-O peek is active — no stored field; hides at width ≤ 90 so the 90-col screencasts drop it; ADR-0015 format untouched), plus a **relationships panel** rendered narrow with endpoints broken at the arrow, ellipsized — a **separate sibling panel** that **overrides S2**'s nested-list extension model (relationships are cross-table). the full records live on a new **`App.relationships`** field (revised from the ADR's original `SchemaCache.relationship_details` at implementation — `SchemaCache` is walker-facing and needs only the names, kept in `relationships: Vec`; details are UI-only, so `App` mirrors `app.tables` and avoids ~23 fixture edits), delivered by `Database::read_all_relationships` + an `AppEvent::RelationshipsRefreshed`; the two left panels split vertically with the relationships panel floored at 5 rows ("(none)" when empty) and capped at 50 % of the column (DB4). **Phase C (navigation mode):** **`Ctrl-O`** enters a focus cycle (Input → Tables → Relationships → Input; `Esc` exits) orthogonal to the ADR-0003 input mode — **`Ctrl-B` was rejected on review as the default tmux prefix** (unreachable inside tmux); the focused panel **expands to ~40–50 cols as a `Clear` overlay** (right panels stay unchanging underneath) and scrolls via **Up/Down (line) + PageUp/PageDown (page)** (context-rebind, reusing the output-scroll viewport mechanism), with an accent focus border; all non-nav keys inert in nav mode (and nav keys inert while a modal is open). Forks all user-chosen: keep-optional-richer (vs remove/narrow); navigation-mode (vs modeless modifier scroll); `Ctrl-O` (Ctrl-B rejected = tmux prefix); overlay (vs layout re-split); inert-non-nav-keys; geometry-fixed hint height; `H<40/≥40` thresholds; session-only persistence; Up/Down line-scroll; **separate relationships panel overriding S2**; **no hint-area toggle** (S4's stale "keyboard-toggleable" claim struck — never implemented, unwanted). A pre-build `/runda` DA pass drove these corrections: caught the `Ctrl-B`/tmux collision, the `SchemaCache` retype that would have broken completion, the 2-row-input/indicator placement, the missing nav-mode key disposition + modal gate, and three unreferenced requirements (S1 evolved, S2 overridden, S4 corrected); also cross-checked open issue **#22** (overlay/annotation layer — separate ADR, adjacent). OOS: true multi-line input (I1); readline shortcuts (I1b); cross-session sidebar persistence; output as a third nav focus; relationship search/edit from the panel; hint-area toggle; #22's annotation layer. Accepted consequence: the 90-col visibility threshold makes a terminal's output *narrower* when widened across the boundary (sidebar appears) - [ADR-0047 — Demonstration overlay layer (keystroke badges + step captions)](0047-demonstration-overlay-layer.md) — **Accepted 2026-06-10; implemented 2026-06-11, phased A→B→C (closes Gitea #22)** (commits `f879d54`→`2d0f4b2`; no `requirements.md` item — tracked by issue + ADR per convention; all forks user-confirmed + a pre-build `/runda` pass that produced 10 tightening findings and a whole-implementation `/runda` pass that returned PASS, no blockers). An in-app **demonstration mode** (`--demo` flag / `RDBMS_PLAYGROUND_DEMO` env, **off by default, zero footprint when off**) that renders two transient overlays so `autocast` screencasts — and live teaching, and a future guided-lesson system — can show otherwise-invisible interactions. **Keystroke badges** (`[TAB]`, `[ENTER]`, `[UP]`, …): **automatic, app-detected** over a fixed set of glyph-less keys (the app already sees every key, so it re-records for free), label via a pure `demo_badge_label(&KeyEvent)`; the badge **auto-expires on a ~1.5 s timer** that extends the runtime's existing time-boxed-`recv` arm condition (`debounce.is_armed() || badge_pending`; expiry `Instant` in the runtime, `App.demo_badge` the render mirror — mirroring the `input` vs `input_indicator` split). **Step captions**: a **stealth, control-code-delimited input buffer** toggled by **`Ctrl+]`** (byte `0x1D` → arrives as `Char('5')+CONTROL`, verified against crossterm 0.29 `parse.rs:110-113`; chosen over `Ctrl+!`, which is **not a single ASCII byte so autocast cannot send it** — the same wall as arrow keys, R4) — typed characters accumulate **invisibly** (prompt untouched, no echo/history), `Backspace` edits, other keys inert, a second `Ctrl+]` **commits** to the caption box (empty commit dismisses); lives in pure-sync `App::update()`, **intercepted before the modal gate** so captions/badges work **over the load picker** (the `#24` projects cast). Both render as **floating flat black-on-yellow rectangles** (solid fill, **no border glyphs** — a one-cell text margin, deliberately unlike the app's bordered panels; user decision post-build, `2d0f4b2`) **at the output panel's inner bottom-right**, drawn **last over modals**, badge **stacked above** the caption, **no layout reflow**; caption **word-wraps to ≤ 3 lines** (3–5 rows), badge fixed 3 rows; clamp/skip guard for tiny terminals; a new **`App.last_output_area: Rect`** (set in `render_output_panel`) gives the top-level draw the anchor. Caption persists **until the next keystroke**; badge suppressed while capturing. Forks all user-chosen: `--demo` activation (vs hidden command / chord); automatic badges (vs scripted); stealth buffer (vs typed-command / preloaded-file); floating bottom-right boxes (vs HUD / banner / subtitle); `Ctrl+]` trigger; wrap-to-3-line captions; ~1.5 s badge / next-keystroke caption timing. Tested test-first across Tier 1 (label fn, capture state machine incl. over-modal + demo-off gate, nearest-deadline helper), Tier 2 (insta snapshots: badge/caption/both-stacked at 90×26 light+dark, short-terminal clamp), Tier 3 (`--demo` plumbing, badge set/suppressed, caption-without-input wiring), CLI (`--demo` parse + env fallback) — with an **honest limit** noted: the `tokio` timer wiring inside `run_loop` is exercised via the pure pieces + Tier-3 plumbing, not a standalone integration test of the timeout (same posture as the existing `IndicatorDebounce`). One intentional, user-acknowledged behaviour: `Ctrl-C` is inert while capturing (every non-`Ctrl+]` key is, by spec). Final tally **2290 passing / 0 failing / 0 skipped** (1 long-standing ignored doctest), clippy clean. OOS: scripted/manual badge push; badges for glyph keys; configurable styling/placement; the guided-lesson system itself (own ADR); cross-session/-switch persistence; localised caption content; arrow-only cast interactions (output-pane scroll); wiring the overlays into the website `casts.mjs` scripts (website-branch follow-up). Implementation phased **A** (`--demo` plumbing) → **B** (badges) → **C** (captions) + a flat-rectangle restyle +- [ADR-0048 — `seed` fake-data generation command](0048-seed-fake-data-generation.md) — **Accepted 2026-06-11; Phase 1 + Phase 2 implemented 2026-06-11** (Phase 1 commits `202e25a`→`fbd219b`; design settled with the user across an extended fork dialogue, hardened by a pre-build `/runda` pass (six blockers folded in), a post-implementation `/runda` pass (eight gaps closed — FK/shortid determinism so **D4 holds with no exceptions**, plus six untested ADR decisions), and a Phase-2 pre-build `/runda` pass (which caught the no-date-literal-token reality → the D2 quoted-dates amendment), and a post-implementation `/runda` pass (which added a friendly error for a bounded override on a UNIQUE column — see D2); **2400 tests pass, clippy clean**). Closes `requirements.md` **SD1** and the core of **SD2**; closes the `seed` half of **A1**. **Phase 1 shipped:** whole-row `seed
[count] [--seed ]` with realistic name-aware generation (the `fake` crate + a type-gated heuristic catalogue, table-context name disambiguation, hand-rolled `product` generator, bounded dates), identifier + constraint uniqueness incl. junction distinct-combos, FK sampling from existing parent rows (empty-parent error), `IN`-CHECK derivation + complex-CHECK advisory, a required-column block guard, `--seed` reproducibility (serial/FK/shortid all deterministic), undo as one batch step, replay as a data write, a capped auto-show preview, the enum/CHECK advisory, and an O(N) single-transaction insert path. **Phase 2 shipped (2026-06-11):** the `set` override clause (D2 — fixed value / pick-list / `as ` / `between` range, **quoted** dates per the D2 amendment, type-aware, override drops the column from the advisory) and the `
.` column-fill form (D1 form 2 — an UPDATE over existing rows, refusing PK/autogen targets, empty-table no-op, FK/unique-respecting, one undo step), with the new `KNOWN_GENERATORS` vocabulary (D9), a range `Generator`, full completion/highlight (`HighlightClass::Function`)/validity (`IdentSource::Generators`)/help/pedagogy wiring, and the D13 advisory's Phase-2/3 wording. Further SD2 increments (custom generators, NULL injection, multi-locale, recursive auto-seed) out of scope. Closes `requirements.md` **SD1** and the core of **SD2**; closes the `seed` half of **A1** (the other being `hint`/**H2**). A dedicated `seed` command (own AST variant + `do_seed` executor, **both modes**) generating **realistic, name-aware** fake data. Two forms: **`seed
[count]`** (new rows, default **20**, capped) and **`seed
.`** (fill a column on existing rows, an UPDATE). Generation adds the **`fake` crate** (v5, English) driven by a **type-gated, token-matched name-heuristic catalogue** (~30 patterns, documented false-positive guards), with **table-context** disambiguating the `name`/`title` family (`products.name`→product, `users.name`→person, `vendors.name`→company), a **hand-rolled `product` generator** (`fake` has no commerce module), **bounded dates** (`date`/`timestamp`/`dob`/`*_at` recognised, recent windows — never "all of history"), the **identifier family** (`id`/`code`/`ref`/`number`, non-FK/non-PK) → **unique sequential**, and **enum-ish names** (`role`/`status`/`type`/…) left generic + a **post-seed Hint advisory** pointing at `set … in (…)`. A **`set` override clause** — `= value` / `in (a,b,c)` / `as ` / `between a and b` (numeric **and** date), reusing ADR-0026 operators — answers the heuristic-miss case. **`--seed `** makes runs reproducible (and enables exact-value tests). **FK** columns sampled uniformly from existing parent rows (**empty parent → friendly error**, no recursion v1); **junction/compound-PK** tables seeded with **distinct combinations**, capped + noted (SD1). A **required-column block guard** refuses rather than NULL-violate a `NOT NULL` column it can't fill (e.g. `NOT NULL blob`). Full ambient wiring (completion incl. a new generator-name vocabulary highlighted as `tok_function`, hints, `help seed`, ADR-0042 near-miss matrix, ADR-0027 validity); **no DSL→SQL teaching echo** (seed is a utility command, not a SQL twin). Honours **X5** — `do_seed` reuses insert/update *mechanics as helpers*, not by emitting `Command::Insert`. Implementation phased: (1) core whole-row seed → (2) `set` overrides → (3) column-fill. Deferred (future SD2): recursive auto-seed, NULL injection, multi-locale, user-defined custom generators, full per-column report diff --git a/docs/handoff/20260611-handoff-64.md b/docs/handoff/20260611-handoff-64.md index e5dbbe2..cec3fb8 100644 --- a/docs/handoff/20260611-handoff-64.md +++ b/docs/handoff/20260611-handoff-64.md @@ -8,9 +8,8 @@ to end across three phases + a restyle). ## §1. State at handoff -**Branch:** `main`. **HEAD `2d0f4b2`** plus an **uncommitted docs -finalization** (ADR-0047 status → implemented, README index, this -handoff — see §6). Push is the user's step. +**Branch:** `main`. **HEAD `f0afec3`** — all work committed, nothing +pending. Unpushed (push is the user's step; normal working state). **Tests: 2290 passing / 0 failing / 0 skipped / 1 ignored** (the 1 ignored is the long-standing `friendly` doctest). **Clippy clean** @@ -18,6 +17,7 @@ ignored is the long-standing `friendly` doctest). **Clippy clean** **This session's commits:** ``` +f0afec3 docs: session handoff 64 + ADR-0047 implemented (#22/#24) 2d0f4b2 feat(ui): flat filled rectangles for demo overlays (#22, ADR-0047 D4) 241f60c feat(ui): demo-mode step-caption stealth buffer (#22, ADR-0047 D3/D4) 2584e76 feat(ui): demo-mode keystroke badges (#22, ADR-0047 D2/D4/D5) @@ -26,8 +26,9 @@ e9eb1b1 docs: ADR-0047 — demonstration overlay layer for casts/teaching (#22) 638b4c9 feat(app): vi-style j/k/g/G navigation in the load picker (#24) ``` -**Issues closed:** **#24** (vi nav) and **#22** (demo overlays) — close -#22 once the docs finalization commit lands. +**Issues closed:** both **#24** (vi nav) and **#22** (demo overlays) are +**closed on Gitea** with closing comments — verified via the filtered +issue list. Nothing left open from this session's scope. ## §2. #24 — vi-style load-picker navigation (commit `638b4c9`) @@ -107,13 +108,15 @@ existing `IndicatorDebounce` already takes. A future Tier-4 PTY harness ## §6. How to take over +**Nothing is pending from this session** — both issues are closed, all +docs landed (`f0afec3`), tree is green. The next session **returns to the +open requirements backlog** (§7). Suggested start: run `/whatsnext` +(it reads this handoff), or pick from §7 below. + 1. Read handoffs 62 → 63 → 64, `CLAUDE.md`, `docs/requirements.md`, - `docs/adr/README.md`, and **ADR-0047** (fully landed). -2. **Pending:** the docs finalization commit (ADR-0047 status → - implemented; README index; this handoff). Commit as - `docs: session handoff 64 + ADR-0047 implemented (#22/#24)` (the user - confirms commit messages). Then close **#22** on Gitea. -3. **For demo-overlay work:** `App` has `demo_mode`, `demo_badge`, + `docs/adr/README.md`. ADR-0047 is fully landed; revisit only for + demo-overlay follow-ups. +2. **For demo-overlay work:** `App` has `demo_mode`, `demo_badge`, `demo_badge_seq`, `demo_caption`, `demo_caption_capturing`, `demo_caption_buffer`, `last_output_area`. Rendering: `render_demo_overlays` / `render_badge_box` / `render_caption_box` / diff --git a/docs/handoff/20260611-handoff-65.md b/docs/handoff/20260611-handoff-65.md new file mode 100644 index 0000000..07bd865 --- /dev/null +++ b/docs/handoff/20260611-handoff-65.md @@ -0,0 +1,144 @@ +# Session handoff — 2026-06-11 (65) + +Sixty-fifth handover. Continues from handoff-64 (ADR-0047 demo +overlays). This session designed and shipped **ADR-0048 — the `seed` +fake-data generation command (SD1)**, Phase 1, end to end: an ADR with +an extended fork dialogue + two `/runda` passes, then a phased +test-first build. + +## §1. State at handoff + +**Branch:** `main`. **HEAD will be the doc-wrap-up commit** (see §6) — +all seed work committed, nothing pending. Unpushed (push is the user's +step; normal working state). + +**Tests: 2358 passing / 0 failing / 0 skipped / 1 ignored** (the long +-standing `friendly` doctest). **Clippy clean** (nursery, all targets). ++68 over handoff-64's 2290. + +**`cargo sweep` run** at wrap-up: `target/` 1.6 G → 183 M. + +**This session's commits:** +``` +202e25a feat(seed): fake-data generation library + fake dependency (P1.1) +f1e9484 feat(seed): command plumbing + walking skeleton (P1.2) +73493fa feat(seed): FK sampling, empty-parent error, block guard (P1.3a) +9c13501 feat(seed): uniqueness, junction distinct-combos, IN-CHECK (P1.3b) +0b3ab3c feat(seed): SeedResult outcome, capped preview, advisory, count cap (P1.3c) +e6ff63d perf(seed): single-transaction multi-row insert path (P1.3d) +fbd219b feat(seed): --seed flag, ambient wiring, and /runda hardening (P1.4 + DA) +``` +(plus the earlier `4d0ae77` multi-tab-scope withdrawal and `0af7f56` +ADR-0048 doc, and the wrap-up doc commit.) + +## §2. What `seed` does (Phase 1 — read ADR-0048) + +`seed
[count] [--seed ]` — populate a table with realistic +fake data. **Available in both modes** (A1). + +- **Realistic, name-aware generation:** the **`fake` crate** (v5, + English) driven by a **type-gated heuristic catalogue** (`src/seed/ + heuristics.rs`) — `email`→email, `first_name`→first name, `price`→ + currency, etc., each only firing when the column *type* is + compatible. **Table-context** disambiguates `name`/`title` + (`products.name`→a hand-rolled **product** name, `users.name`→person, + `vendors.name`→company). **Bounded dates** (`dob`/`created_at`/ + `date`/`timestamp` → recent windows, never "all of history", anchored + to a fixed reference epoch for reproducibility). Type-based fallback + otherwise. +- **Uniqueness (D10):** the user-fillable PK, compound UNIQUE + constraints, single-column UNIQUE, and identifier-named columns + (`id`/`code`/…) stay distinct across the batch and vs existing rows; + **junction tables** get **distinct FK combinations** (capped at the + available product, reported). Identifier ints get a monotonic + sequence. +- **FK (D14):** every FK column samples an existing parent row (compound + FK reads one consistent parent row); **empty parent → friendly + error**. +- **`IN`-CHECK (D17):** a simple `col IN ('a','b')` CHECK becomes the + value source (enum-as-CHECK just works); complex CHECKs are flagged in + the advisory and best-effort generated (a violation rolls the batch + back). +- **Reproducibility (D4):** `--seed ` → identical data on the same DB + state. **Holds with no exceptions** — serial (rowid/MAX+1), FK + (`ORDER BY`), **shortid (seeded RNG)**, all generators. +- **Output:** the seeded-row count, a **capped preview** (first 20 + rows), and a **Hint-styled advisory** naming enum-ish / underivable- + CHECK columns filled generically. Count cap 10 000; `seed t 0` no-op. +- **Safety:** one **undo** step (snapshot wraps the whole seed); + **replay** re-runs it as a data write; the insert path is a single + transaction (O(N), atomic, commit-db-last preserved). + +## §3. Where the code lives + +- **`src/seed/`** — the pure generation library (no DB): `mod.rs` + (`ColumnSpec`, `Generator`, `SeedRng`, `make_rng`), `heuristics.rs` + (`choose_generator` + the catalogue + `is_enum_ish`), `generators.rs` + (`generate_value` + the `product` generator + bounded dates), + `check.rs` (`parse_in_check_values`). ~40 Tier-1 tests, deterministic. +- **`src/db.rs`** — `do_seed` (+ `SeedColPlan`, `sample_parent_key_ + tuples`, `seed_value_list_key`, `seed_max_int`, `SeedResult`, + `DEFAULT_SEED_COUNT`/`MAX_SEED_COUNT`/`SEED_PREVIEW_CAP`), the new + **`insert_one_row`** core extracted from `do_insert` (shared, no + tx/persist — so seed runs N rows in one tx), and the `Request::Seed` / + `Database::seed` / worker wiring. +- **`src/dsl/grammar/data.rs`** — `SEED` `CommandNode`, `build_seed`, + the `--seed` flag grammar (`Seq[Flag("seed"), NumberLit]`, the first + DSL flag with a value). `Command::Seed` in `command.rs`. +- **Runtime/render** — `CommandOutcome::Seed`, `AppEvent:: + DslSeedSucceeded`, `App::handle_dsl_seed_success`. Catalog keys + `ok.rows_seeded` / `seed.capped` / `seed.advisory_generic` / + `help.data.seed` / `parse.usage.seed`. +- **Tests** — `tests/it/seed.rs` (25 integration tests), + `tests/typing_surface/mod.rs` (`seed_completion_and_validity`), + `tests/it/parse_error_pedagogy.rs` (bare-`seed` near-miss row), + `src/app.rs` (two render tests), `src/dsl/shortid.rs` + (`generate_with_rng`). + +## §4. Process notes (the two `/runda` passes) + +- **Pre-build `/runda`** (on the ADR) found six blockers — undo + integration (D15), replay semantics (D16), `set`-value quoting (D2), + CHECK handling (D17), an advisory phase-ordering bug (D13), auto-show + flooding (D18) — all folded into ADR-0048 before any code; the three + genuine forks re-escalated and user-resolved. +- **Post-implementation `/runda`** (on the whole implementation) found + **eight gaps**, all closed: FK-sampling determinism (→ `ORDER BY`), + **shortid not reproducible** (→ seeded RNG, fixed not documented — the + user chose the fix), and six **untested ADR decisions** (D5 advanced + mode, D15 undo, D16 replay, D17 complex-CHECK advisory, atomic + rollback, zero-count) — tests added for each. + +## §5. Phase 2 (deferred — designed in ADR-0048, NOT built) + +These are the only seed pieces left; both have full designs in +ADR-0048: + +1. **The `set` override clause (D2)** — `seed t 20 set role in + ('a','b'), status = 'x', work_addr as email, price between 10 and + 100`. Value / pick-from-list / explicit-generator / range, **quoted + literals** (grammar-consistent). This is the SD2 "override hooks" + core. The `ColumnSpec.check_in_values` → `PickFrom` plumbing and the + `Generator` vocabulary already exist; this adds the grammar + a `set` + clause that overrides the per-column plan. +2. **Column-fill (`seed
.`, D1 form 2)** — fill one + column across *existing* rows (an UPDATE). Refuses PK/autogen targets; + empty-table no-op. + +`requirements.md`: **SD1 `[x]`**, **SD2 `[/]`** (core done; the two +above open), **A1 14/15** (only `hint`/**H2** unregistered). + +## §6. How to take over + +1. Read handoffs 63 → 64 → 65, `CLAUDE.md`, `docs/requirements.md`, + `docs/adr/0048-seed-fake-data-generation.md` (the whole thing — D1 + –D18 + the as-built status block). +2. **Seed is feature-complete for Phase 1; nothing pending.** Next + options (user's call): seed **Phase 2** (`set` clause + column-fill); + **H2 `hint`** (closes A1) — own ADR; **TT5 CI**; or the larger + **V4 journal** / **tutorial** ADRs. +3. Two minor, user-deferred observations (non-blocking): the uniqueness + retry cap (`MAX_ATTEMPTS=200`) can cap a *medium* unique domain + slightly below its true size (junction/small domains are exact); + `literal_to_value` doesn't type-check an IN-CHECK literal vs a numeric + column (a malformed `int IN ('a')` CHECK fails cleanly at bind). diff --git a/docs/handoff/20260611-handoff-66.md b/docs/handoff/20260611-handoff-66.md new file mode 100644 index 0000000..70f8562 --- /dev/null +++ b/docs/handoff/20260611-handoff-66.md @@ -0,0 +1,145 @@ +# Session handoff — 2026-06-11 (66) + +Sixty-sixth handover. Continues from handoff-65 (ADR-0048 `seed` +Phase 1). This session built **ADR-0048 Phase 2** end to end: the +**`set` override clause** (D2) and the **`
.` +column-fill** form (D1 form 2) — the two surfaces Phase 1 deliberately +deferred. Designed-then-DA-vetted (a `/runda` pass that caught a real +ADR-vs-grammar conflict), then built test-first. + +## §1. State at handoff + +**Branch:** `main`. All Phase-2 work is in the working tree; +**commits are pending the user's approval** (see §6). Unpushed is the +normal working state. + +**Tests: 2400 passing / 0 failing / 0 skipped / 1 ignored** (the +long-standing `friendly` doctest). **Clippy clean** (nursery, all +targets). +42 over handoff-65's 2358. + +## §2. What landed (read ADR-0048 — Status + D1/D2/D9/D13) + +`seed [.] [count] [set ] [--seed ]`. + +- **`set` override clause (D2):** four forms, comma-separated — + `status = 'active'` (fixed), `role in ('a','b')` (pick-list), + `work_addr as email` (named generator), `price between 10 and 100` + (range; numeric **and quoted dates**). Type-aware; an override + **drops its column from the generic-fill advisory** (D13). Value + slots reuse `update`'s typed `current_column_value` (quoting + enforced structurally — a bare word is rejected). +- **Column-fill (D1 form 2):** `seed users.email [set …]` fills one + column across **existing** rows (an UPDATE). Refuses PK / autogen + (`serial`/`shortid`/`blob`) targets; **empty table → friendly + no-op**; FK target samples the parent; UNIQUE/identifier target gets + collision-free values; **one undo step**; `set` may only adjust the + filled column; a row count is refused. +- **Named-generator vocabulary (D9):** `src/seed/vocabulary.rs` — + `KNOWN_GENERATORS` + `generator_for_name` + `is_known_generator_prefix`, + the single source of truth for completion, validity, and the executor. +- **Range generator:** `Generator::Range { low, high }` in + `src/seed/generators.rs`, interpreted per destination type; + `range_bounds_reason` validates compatibility before generation. +- **Ambient wiring:** completion (generator names after `as`, the + `set ` and `.col` column slots, the `set` keyword); highlight + (new `HighlightClass::Function` → existing `tok_function`); validity + (new `IdentSource::Generators` — unknown generator flagged `[ERR]`; + unknown column in `set`/`.col` flagged via the existing Columns + path); help (`help.data.seed`); parse-error pedagogy near-miss rows; + the D13 advisory's **Phase-2/3 wording** (points at `set` and the + column-fill repair). Both modes (D5). + +## §3. The ADR amendment (a real DA find) + +The pre-build `/runda` pass found that **ADR-0048 D2's "dates stay +unquoted" was impossible** — this DSL has **no date-literal token** +(`Value` is `Number`/`Text`; dates are quoted strings validated by +`bind_date`). Escalated to the user, who chose **quoted dates + +amend the ADR** (the grammar-consistent option). D2 now carries a +dated amendment; the range form uses `between '2023-01-01' and +'2024-12-31'`. This was the only divergence from the ADR text; numbers +remain unquoted. + +## §4. Where the code lives + +- **`src/dsl/command.rs`** — `Command::Seed` gains `target_column: + Option` + `overrides: Vec`; new `SeedOverride` + / `SeedOverrideKind`. +- **`src/dsl/grammar/data.rs`** — `SEED_SET_CLAUSE` + `SEED_DOT_COLUMN` + grammar; `SEED_GENERATOR` slot (`IdentSource::Generators`, + `HighlightClass::Function`); `build_seed` + the override fold + (`build_seed_overrides` / `parse_seed_override_tail`). +- **`src/dsl/grammar/mod.rs`** — `IdentSource::Generators` + + `HighlightClass::Function`. +- **`src/db.rs`** — `apply_seed_overrides` / `seed_override_plan` / + `seed_override_literal`; `do_seed_column_fill`; `do_seed` + + `Database::seed` + worker wiring threaded with the new params. +- **`src/seed/`** — `vocabulary.rs` (new); `generators.rs` (range + generator + `range_bounds_reason`); `mod.rs` (`Generator::Range`). +- **`src/completion.rs`** — generator candidates after `as`; generator + validity. **`src/input_render.rs`** — `"generator"` invalid-ident + kind. **`src/theme.rs`** — `Function → tok_function`. +- **Catalog** — `help.data.seed`, `parse.usage.seed`, + `seed.advisory_generic` (Phase-2/3 wording) in `en-US.yaml`; + `keys.rs` placeholders updated. +- **Tests** — `tests/it/seed.rs` (+~30: builder fold, executor + set/column-fill, undo, advanced mode), `src/seed/{vocabulary, + generators}.rs` (range + vocabulary units), `src/completion.rs` + (generator + column validity), `src/dsl/walker/highlight.rs`, + `tests/typing_surface/mod.rs` (completion slots), + `tests/it/parse_error_pedagogy.rs` (near-miss rows). + +## §5. Two implementation refinements vs. the ADR (both met the contract) + +- **Quoted dates** (the D2 amendment, §3). +- **Value slots reuse `current_column_value`** (the `update … set` + typed slot) rather than the raw ADR-0026 expression operand — no + spurious column-ref match, typed narrowing, consistent with + `update`. The user-facing contract (quoted literals, type-aware) is + fully met. + +The `seed_take_value` / `seed_set_error` builder paths are +drift-guards (the typed slots only ever match value literals, so a bare +word is rejected at the grammar level) — they use the generic +`parse.error_wrapper`, mirroring `expr::build_expr`. + +## §6. How to take over / next steps + +1. Read handoffs 64 → 65 → 66, `CLAUDE.md`, `docs/requirements.md`, + `docs/adr/0048-…md` (Status block + D1/D2/D9/D13 + the amendment). +2. **Seed is feature-complete (SD1 + SD2).** `requirements.md`: **SD1 + `[x]`, SD2 `[x]`**. The only open A1 gap is `hint`/**H2** (own ADR). +3. **Commits pending approval.** Suggested split: + - `feat(seed): set override clause + column-fill (ADR-0048 Phase 2)` + — all `src/` + `tests/` changes. + - `docs: ADR-0048 Phase 2 implemented + handoff 66` — ADR / README / + requirements / this file. +4. Next options (user's call): **H2 `hint`** (closes A1); **TT5 CI**; + the larger **V4 journal** / **tutorial** ADRs; or Tier-4 PTY (TT4). +5. Consider a `cargo sweep` at this milestone (`target/` grows). + +## §7. Post-implementation `/runda` pass (done this session) + +A DA pass over the completed code found **no correctness bugs and no +dropped requirements**; all D1–D18 acceptance criteria verified met, +tests confirmed to catch regressions. One **design fork** was surfaced +and **resolved by the user**: + +- **Bounded override × UNIQUE column** — a fixed value / too-short + pick-list on a single-column-UNIQUE target used to silently cap the + run (e.g. `seed users 100 set email = 'x'` → 1 row). Now a **friendly + error** up front (`seed_override_capacity_guard`, `src/db.rs`), for + both whole-row and column-fill; generators/ranges stay cap-based + (unbounded sources). ADR-0048 D2 documents it; two tests pin it. + +Remaining **non-blocking** edges (noted, not bugs): + +- Overriding an **FK column** with a literal: the override wins (D2); a + non-parent value fails safely through the FK-error layer. +- **Column-fill of one column of a *compound* FK** samples that column + independently → an invalid tuple fails safely (UPDATE rejected, + rollback), never corrupts. Single-column FKs / non-FK columns are + exact. +- The generator slot uses the **default candidate-ladder hint** (offers + the vocabulary), not a dedicated prose intro — discoverability is met + by completion; a prose intro is optional polish. diff --git a/docs/handoff/20260612-handoff-67.md b/docs/handoff/20260612-handoff-67.md new file mode 100644 index 0000000..65477e0 --- /dev/null +++ b/docs/handoff/20260612-handoff-67.md @@ -0,0 +1,119 @@ +# Session handoff — 2026-06-12 (67) + +Sixty-seventh handover. Continues directly from handoff-66 (ADR-0048 +`seed` Phase 2, committed). This was a **manual-testing pass**: the user +exercised the app, found several rough edges, and we triaged each into +*fix now* vs *file an issue*. Net result: **three bug fixes committed** +and **three enhancement issues filed**. + +## §1. State at handoff + +**Branch:** `main`. Working tree **clean**; all work committed. Unpushed +(push is the user's step). + +**Tests: 2407 passing / 0 failing / 0 skipped / 1 ignored** (the +long-standing `friendly` doctest). **Clippy clean** (nursery, all +targets). +7 over handoff-66's 2400. + +**Commits since handoff-65:** +``` +f7155ce fix(input): thread the `:` one-shot escape into live SQL feedback +4cacb82 fix(completion): don't flag a table alias used before its FROM clause +c3e0103 fix(completion): flag-aware partial so a dash completes flags, not keywords +30b2677 docs: ADR-0048 Phase 2 implemented + handoff 66 +a12facc feat(seed): set override clause + column-fill (ADR-0048 Phase 2) +``` +(`a12facc`/`30b2677` are the Phase-2 work documented in handoff-66.) + +## §2. Bug fixes this session (all committed, all tested) + +1. **`c3e0103` — flag completion ate the dash.** Typing a flag at a + flag position (`add 1:n relationship … -`) offered the `on` keyword + and, on accept, produced `-on` / `---create-fk`: the partial-token + walk stopped at `-`, so the dash was outside the replaced range. + Fix: flag-aware partial detection (a dash-prefixed token at a word + boundary is a flag-in-progress, **gated on a flag being expected** so + `where x = -5` stays a number) + a unified flag matcher + (`trim_start_matches('-')`). Affected **all** flags. 4 tests + 2 + partial-flag snapshots updated (they'd captured the latent bug). + +2. **`4cacb82` — table alias flagged as an unknown column.** In a + SELECT, the projection (`sum(ol.count*…)`) can reference an alias + whose `FROM … OrderLines ol` sits *after* the cursor. The candidate + engine recovers that via the §10.6 full-input lookahead (ADR-0032), + but `invalid_ident_at_cursor` only walked text *before* the cursor — + so `ol` matched no scope and got a red "ERR" overlay on an otherwise + valid query. Fix: give the validity check the same full-input + lookahead and bail when the partial prefix-matches a binding's alias + or table. 1 test. + +3. **`f7155ce` — the `:` one-shot escape broke live SQL feedback.** + Submission strips the `:` (ADR-0003), but the *live* feedback kept it + in the buffer handed to the walker, which bailed at the `:`. Effect: + under `:`, Tab completed nothing and a valid query could flash `[ERR]` + — while the same line in full `mode advanced` worked. (The hint + already stripped it, hence "hint shows the name but Tab does + nothing".) Fix: one shared `App::feedback_view()` (the `:`-stripped + SQL + mapped cursor + stripped offset) routed through completion (with + a `replaced_range` offset shift), the validity verdict, and rendering + (new `render_input_runs_feedback` highlights/overlays the view shifted + by the offset; the `:` renders as plain text); the ambient hint was + consolidated onto it (removing the duplicate `strip_one_shot_prefix`). + 3 tests + the 9 existing colon tests still green. + +## §3. Investigated, **no code change** (working as designed) + +- **Comma-`FROM` implicit join** (`select … from A, B, C`) is + **deliberately rejected** — ADR-0032 §11 / OOS-3: *"comma-FROM teaches + habits we do not want to encourage; `CROSS JOIN` covers the same shape + explicitly."* The explicit equivalent (`CROSS JOIN … WHERE …`) works. +- **`sum(…)` returning one row** with no `GROUP BY` is **correct SQL** + (the aggregate collapses the result to one row; SQLite/the playground + allow the non-aggregated columns where Postgres would error). The + user's query needed `group by o.id`. Verified (1 row). + +## §4. Open issues filed this session — **next session's candidates** + +All on `git.lazyeval.net/oli/rdbms-playground`, label `enhancement`: + +- **#26 — `seed
` hint omits the optional count.** A complete + command's optional positional *number* has no Tab candidate, so it's + invisible. `IntroProse` doesn't fit (it only fires for incomplete + required slots; the completing Seq match clears the hint). Needs a way + to advertise optional positional non-keyword args. *(I attempted + + reverted this during Phase 2; see the analysis in the issue.)* +- **#27 — Bottom status line: keybindings-only, context- and + state-aware.** Per-nav-focus keybindings (Input vs sidebar), **include + transient states** (Tab-cycle, history) — user preference — and add + `mode advanced` to the empty-input hint. May warrant a small ADR. +- **#28 — Reconsider relationship prose in `add column` (incidental DDL) + confirmations.** Currently by design (ADR-0044 §1 keeps prose, not + diagrams, for incidental DDL). **User preference: do NOT show the + `References:` / `Referenced by:` block** in the add-column + confirmation at all — focus on the change just made. This revisits a + decided area → land as a **new ADR** superseding the relevant part of + ADR-0016 §5 / ADR-0044 §1; confirm scope (just `add column`, or all + incidental DDL). + +## §5. Other open work (unchanged from handoff-66 §6) + +`seed` is **feature-complete** (`requirements.md` SD1 `[x]`, SD2 `[x]`). +Remaining roadmap, user's call: + +- **H2 `hint`** — the last A1 gap (its own ADR). +- **TT5 CI** — test infra exists; no CI workflow yet. +- **TT4 PTY (Tier-4)** — ADR-0008 specifies it; not wired. +- Larger: **V4 journal**, **tutorial/lesson system** (each needs an ADR). + +A possible quick follow-up: a friendlier "use an explicit `JOIN`" +parse-error for comma-`FROM` (point 1) — not filed; mention if wanted. + +## §6. How to take over + +1. Read handoffs 65 → 66 → 67, `CLAUDE.md`, `docs/requirements.md`. +2. `seed` Phase 2 is done (ADR-0048 Status block is current). The + manual-testing fixes (§2) are committed and green. +3. Pick from §4 (filed issues #26/#27/#28) or §5 (roadmap). #28 is a + decision/ADR; #27 is UX (maybe ADR); #26 is a hint-system enhancement. +4. Consider a `cargo sweep` at this milestone (`target/` grows across + sessions). diff --git a/docs/requirements.md b/docs/requirements.md index 68fa1fe..2222f11 100644 --- a/docs/requirements.md +++ b/docs/requirements.md @@ -88,12 +88,16 @@ since ADR-0027.) because relationships are cross-table rather than per-table, they get their own sibling panel stacked below the tables list, not nested items within it — user-confirmed 2026-06-10.)* -- [/] **S3** Output panel renders a visualization of the - currently selected item and supports multiple tabs. - *(Partial, verified 2026-06-07: single-element structure - visualisation renders (`output_render.rs:82-180`); **multiple - tabs are not implemented** — the output is one line buffer, no - tab abstraction. Same multi-tab gap as V2.)* +- [x] **S3** Output panel renders a visualization of the + currently selected item. + *(Satisfied: single-element structure visualisation renders + (`output_render.rs:82-180`) — select a table, see its columns / + types / keys. **Multi-tab clause withdrawn 2026-06-11** (user + decision): the original wording promised "and supports multiple + tabs", but the output model is settling on the single scrollable + **V4 journal** rather than switchable tabs, so the tab clause is + dropped from tracked scope. A future return to tabbed output would + be a fresh requirement, not this one. Same withdrawal as V2.)* - [x] **S4** Hint area below the input field, showing hints about the current input or last error. *(Verified 2026-06-07: `ui.rs:1088-1110` `render_hint_panel` / @@ -242,13 +246,12 @@ since ADR-0027.) available in both modes: `save`, `save as`, `load`, `new`, `rebuild`, `export`, `import`, `seed`, `replay`, `undo`, `redo`, `mode`, `help`, `hint`, `quit`. - *(Partial, verified 2026-06-07: 13 of 15 implemented and - available in both modes — `quit`/`q`, `mode simple|advanced`, - `help`, `save`, `save as`, `load`, `new`, `rebuild`, `export`, - `import`, `replay`, `undo`, `redo` (REGISTRY in - `grammar/app.rs:249-333`). **Missing: `seed`** (tracked as SD1) - **and `hint`** (tracked as H2) — neither is registered. A1 - closes when SD1 + H2 land.)* + *(Partial: **14 of 15** implemented and available in both modes — + `quit`/`q`, `mode simple|advanced`, `help`, `save`, `save as`, + `load`, `new`, `rebuild`, `export`, `import`, `replay`, `undo`, + `redo`, and now **`seed`** (ADR-0048 / SD1, done 2026-06-11). + **Only `hint`** (tracked as H2) remains unregistered. A1 closes + when H2 lands.)* ## DSL data commands @@ -469,15 +472,18 @@ since ADR-0027.) "relationship-relevant" reach). The §3 last-resort helper line was considered and rejected. Two `/runda` passes (design + implementation). Selection-nav and the broader journal direction remain in V4.)* -- [/] **V2** SQL query results render as a dynamic table view in - the output pane, with multiple result tabs supported. - *(Partial, verified 2026-06-07: the **table view** is done — - `output_render.rs:38-72` `render_data_table` renders a - box-drawing frame with aligned columns (numeric right, text - left) and NULL/control-char sanitisation, for `show data` and - after every write (ADR-0014). **Missing: multiple result tabs** - — the output is a single `VecDeque` with no tab - abstraction (same gap as S3). Multi-tab sits in V4 territory.)* +- [x] **V2** SQL query results render as a dynamic table view in + the output pane. + *(Satisfied: the **table view** is done — `output_render.rs:38-72` + `render_data_table` renders a box-drawing frame with aligned + columns (numeric right, text left) and NULL/control-char + sanitisation, for `show data` and after every write (ADR-0014). + **Multi-tab clause withdrawn 2026-06-11** (user decision): the + original wording promised "with multiple result tabs supported"; + retained multi-result output, if ever wanted, now belongs to the + single scrollable **V4 journal** direction rather than switchable + tabs, so the tab clause is dropped from tracked scope. A future + return would be a new requirement. Same withdrawal as S3.)* - [~] **V3** Full ER-diagram export (whole-database graph, viewed outside the TUI) — low priority; design and ADR pending. - [~] **V4** Output panel as a *scrollable per-session log* with @@ -492,7 +498,13 @@ since ADR-0027.) *(Partial: PageUp / PageDown scrolling of the existing line buffer is in, with new output snapping the view to the most recent. The full V4 scope — smart structure rendering, log - styling, Markdown export, scroll indicator — remains pending.)* + styling, Markdown export, scroll indicator — remains pending. + **As of 2026-06-11 this journal model is the sole tracked + direction for evolving the output pane:** the competing multi-tab + output alternative (the trailing clauses of S3 and V2) was + withdrawn from scope by user decision, so retained / multi-result + output, if pursued, is folded into this journal rather than into + switchable tabs.)* - [x] **V5** `show []` family of commands for redisplaying schema info on demand. *(Done 2026-06-07: `show table ` + `show data
` @@ -652,11 +664,39 @@ since ADR-0027.) ## Sample data / seeding -- [ ] **SD1** `seed
[count]` generates plausible fake +- [x] **SD1** `seed
[count]` generates plausible fake data; junction tables are seeded with valid foreign-key references drawn from existing parent rows. -- [~] **SD2** Detailed seeding rules (per-type generators, - locale, determinism, override hooks) — design and ADR pending. + *(Done 2026-06-11 via **ADR-0048** (commits `202e25a`→`fbd219b`). + Whole-row `seed
[count] [--seed ]` with realistic + name-aware generation (`fake` crate + a type-gated heuristic + catalogue, table-context name disambiguation, hand-rolled + `product` generator, bounded dates), identifier + constraint + uniqueness, **junction tables seeded with valid FK references + drawn from existing parent rows** (distinct combinations, capped; + empty-parent friendly error), `IN`-CHECK derivation, a + required-column block guard, undo as one step, replay as a data + write, a capped auto-show + enum/CHECK advisory, and an O(N) + single-transaction path. The `set` override clause and + `
.` column-fill landed in SD2 Phase 2, below.)* +- [x] **SD2** Detailed seeding rules (per-type generators, + locale, determinism, override hooks). + *(Done 2026-06-11 via **ADR-0048** (Phase 1 + Phase 2). Phase 1: + type-gated name-aware per-type generators with a `fake`-backed + catalogue + table-context disambiguation, **`--seed` determinism** + (serial/FK/shortid all reproducible — D4 holds with no + exceptions), English-only locale (X2). **Phase 2 (the "override + hooks" core):** the `set` override clause — fixed value / + pick-from-list / `as ` / `between` range (numeric and + **quoted** dates, type-aware; an override drops the column from + the generic-fill advisory) — and the `
.` + column-fill form (an UPDATE over existing rows, refusing + PK/autogen targets, empty-table no-op, FK/unique-respecting, one + undo step). Adds the `KNOWN_GENERATORS` vocabulary (D9), a range + `Generator`, and full completion / highlight / validity / help / + parse-error-pedagogy wiring. Deferred SD2 increments: + user-defined custom generators, NULL injection, multi-locale, + recursive parent auto-seed.)* ## Query analysis diff --git a/src/app.rs b/src/app.rs index 56fc7fc..2863382 100644 --- a/src/app.rs +++ b/src/app.rs @@ -646,6 +646,44 @@ impl App { } } + /// The input view the **live-feedback** walkers (completion, ambient + /// hint, validity verdict, highlight overlays) should see, plus the + /// byte offset stripped from the front and the cursor mapped into the + /// view. + /// + /// Under the `:` one-shot escape (ADR-0003) the buffer carries a + /// leading `:` (and an auto-inserted space) that is *not* advanced + /// SQL — submission already strips it before parsing, but the live + /// feedback did not, so the walker bailed at the `:` and resolved + /// nothing (no completion / hint, a spurious error overlay). This + /// returns the stripped SQL exactly as submission sees it, so the + /// feedback matches a real advanced-mode session. `offset` maps any + /// walker-returned byte position (completion `replaced_range`, + /// overlay spans) back to real-buffer coordinates. + /// + /// For every non-one-shot input this is the identity + /// `(&input, cursor, 0)`. + #[must_use] + pub fn feedback_view(&self) -> (&str, usize, usize) { + if matches!(self.effective_mode(), EffectiveMode::AdvancedOneShot) { + // The first non-whitespace char is the `:` (per + // `effective_mode`); strip up to and including it, then any + // following whitespace — mirroring submission's + // `trimmed[1..].trim()`. + let leading_ws = self.input.len() - self.input.trim_start().len(); + let mut offset = leading_ws + 1; // past the `:` + while offset < self.input.len() + && self.input.as_bytes()[offset].is_ascii_whitespace() + { + offset += 1; + } + let view = &self.input[offset..]; + let cursor = self.input_cursor.saturating_sub(offset).min(view.len()); + return (view, cursor, offset); + } + (&self.input, self.input_cursor, 0) + } + /// The validity-indicator verdict for the current input /// (ADR-0027 §3). `None` when the input would run clean. /// @@ -667,11 +705,10 @@ impl App { EffectiveMode::AdvancedPersistent | EffectiveMode::AdvancedOneShot => Mode::Advanced, }; - crate::dsl::walker::input_verdict_in_mode( - &self.input, - Some(&self.schema_cache), - mode, - ) + // Strip the `:` one-shot prefix so the walker verdicts the SQL + // itself, not the escape marker (which it can't parse). + let (view, _cursor, _offset) = self.feedback_view(); + crate::dsl::walker::input_verdict_in_mode(view, Some(&self.schema_cache), mode) } /// Process one event from the runtime, mutating state and @@ -771,6 +808,10 @@ impl App { self.handle_dsl_insert_success(&command, &result); Vec::new() } + AppEvent::DslSeedSucceeded { command, result } => { + self.handle_dsl_seed_success(&command, &result); + Vec::new() + } AppEvent::DslUpdateSucceeded { command, result, @@ -1395,13 +1436,7 @@ impl App { } fn start_or_complete_at(&mut self, multi_start_idx: usize) { - let cursor = self.input_cursor.min(self.input.len()); - let Some(comp) = crate::completion::candidates_at_cursor_in_mode( - &self.input, - cursor, - &self.schema_cache, - self.effective_mode().as_mode(), - ) else { + let Some(comp) = self.completion_for_feedback() else { return; }; if comp.candidates.len() == 1 { @@ -1413,13 +1448,7 @@ impl App { } fn start_or_complete_last(&mut self) { - let cursor = self.input_cursor.min(self.input.len()); - let Some(comp) = crate::completion::candidates_at_cursor_in_mode( - &self.input, - cursor, - &self.schema_cache, - self.effective_mode().as_mode(), - ) else { + let Some(comp) = self.completion_for_feedback() else { return; }; if comp.candidates.len() == 1 { @@ -1430,6 +1459,22 @@ impl App { } } + /// Completion at the cursor, computed against the `:`-stripped + /// feedback view (ADR-0003 one-shot) with its `replaced_range` + /// mapped back to real-buffer coordinates so `commit_*` edit the + /// right span. Identity for non-one-shot input (offset 0). + fn completion_for_feedback(&self) -> Option { + let (view, view_cursor, offset) = self.feedback_view(); + let mut comp = crate::completion::candidates_at_cursor_in_mode( + view, + view_cursor.min(view.len()), + &self.schema_cache, + self.effective_mode().as_mode(), + )?; + comp.replaced_range = (comp.replaced_range.0 + offset, comp.replaced_range.1 + offset); + Some(comp) + } + /// Single-candidate commit: insert " " (with trailing /// space) and DO NOT create a memo. The user can keep /// typing or press Tab again to fresh-complete at the new @@ -2072,6 +2117,39 @@ impl App { } } + /// Render a successful `seed` (ADR-0048): the ✓ echo, the seeded-row + /// count (with a cap note when the unique-value space ran out), the + /// capped preview table (D18), and a Hint-styled advisory naming + /// columns filled with generic text that look like fixed value sets + /// (D12/D13). + fn handle_dsl_seed_success(&mut self, command: &Command, result: &crate::db::SeedResult) { + self.note_ok_summary(command); + let mut summary = crate::t!( + "ok.rows_seeded", + count = result.produced, + table = result.table + ); + if result.produced < result.requested { + summary.push(' '); + summary.push_str(&crate::t!("seed.capped", requested = result.requested)); + } + self.note_system(summary); + for line in crate::output_render::render_data_table(&result.data) { + self.note_system(line); + } + if !result.advisory_columns.is_empty() { + // `column` (the first advised column) seeds the concrete + // repair examples (D13 Phase 2/3 wording); `columns` lists + // them all. + self.push_category_three_prose(crate::t!( + "seed.advisory_generic", + columns = result.advisory_columns.join(", "), + column = result.advisory_columns[0], + table = result.table + )); + } + } + fn handle_dsl_update_success(&mut self, command: &Command, result: &UpdateResult) { self.note_ok_summary(command); self.note_system(crate::t!("ok.rows_updated", count = result.rows_affected)); @@ -2390,6 +2468,9 @@ impl App { // the executor), like the named DSL drop. C::SqlDropIndex { .. } => (Operation::DropIndex, None, None), C::Insert { table, .. } => (Operation::Insert, Some(table.as_str()), None), + // Seed generates inserts; FK/constraint failures read as + // insert errors (ADR-0048). + C::Seed { table, .. } => (Operation::Insert, Some(table.as_str()), None), C::Update { table, .. } => (Operation::Update, Some(table.as_str()), None), C::Delete { table, .. } => (Operation::Delete, Some(table.as_str()), None), C::ShowData { name, .. } | C::ShowTable { name } => { @@ -4936,6 +5017,86 @@ mod tests { assert_eq!(app.effective_mode(), EffectiveMode::AdvancedPersistent); } + /// Build a two-table cache (`Orders(id, customer_id)` + + /// `Customers(id, name)`) for the `:` one-shot SQL-feedback tests. + fn install_join_schema(app: &mut App) { + use crate::completion::TableColumn; + use crate::dsl::types::Type; + app.schema_cache.tables = vec!["Orders".into(), "Customers".into()]; + app.schema_cache.table_columns.insert( + "Orders".into(), + vec![TableColumn::new("id", Type::Serial), TableColumn::new("customer_id", Type::Int)], + ); + app.schema_cache.table_columns.insert( + "Customers".into(), + vec![TableColumn::new("id", Type::Serial), TableColumn::new("name", Type::Text)], + ); + for t in app.schema_cache.tables.clone() { + for c in &app.schema_cache.table_columns[&t] { + app.schema_cache.columns.push(c.name.clone()); + } + } + } + + #[test] + fn colon_one_shot_gives_sql_completion_the_stripped_view() { + // Bug (manual testing): the `:` one-shot escape (ADR-0003) left + // the leading `:` in the buffer passed to the live SQL feedback, + // so the walker bailed at `:` and Tab completed nothing — while + // the identical line in full `mode advanced` completed. Now the + // feedback view strips the `:`, so both behave the same. + let body = "select c.name from Orders o join Customers c on c.id=o.cu"; + + // Full advanced mode: completes `o.cu` → `o.customer_id`. + let mut adv = App::new(); + adv.mode = Mode::Advanced; + install_join_schema(&mut adv); + type_str(&mut adv, body); + adv.update(key(KeyCode::Tab)); + assert!( + adv.input.ends_with("o.customer_id "), + "full advanced should complete: {:?}", + adv.input + ); + + // `:` one-shot from simple mode: must complete the same way, and + // the `:` prefix must be preserved in the buffer. + let mut one = App::new(); + one.mode = Mode::Simple; + install_join_schema(&mut one); + one.update(key(KeyCode::Char(':'))); + type_str(&mut one, body); + assert_eq!(one.effective_mode(), EffectiveMode::AdvancedOneShot); + one.update(key(KeyCode::Tab)); + assert!( + one.input.trim_start().starts_with(':'), + "the `:` prefix is kept: {:?}", + one.input + ); + assert!( + one.input.ends_with("o.customer_id "), + "`:` one-shot must complete the SQL column too: {:?}", + one.input + ); + } + + #[test] + fn colon_one_shot_validity_is_clean_for_a_valid_query() { + // A *valid* `:`-prefixed query must not light the `[ERR]` + // indicator (the walker used to choke on the `:` and always + // report Error). + let mut app = App::new(); + install_join_schema(&mut app); + app.update(key(KeyCode::Char(':'))); + type_str(&mut app, "select name from Customers"); + assert_eq!( + app.input_validity_verdict(), + None, + "a valid one-shot query should verdict clean, got {:?}", + app.input_validity_verdict(), + ); + } + #[test] fn effective_mode_flips_to_one_shot_when_colon_typed_in_simple_mode() { let mut app = App::new(); @@ -6223,6 +6384,80 @@ mod tests { ); } + #[test] + fn seed_success_renders_count_preview_and_advisory() { + // ADR-0048: handle_dsl_seed_success renders the seeded-row count, + // the preview table, and the enum/CHECK advisory. + let mut app = App::new(); + app.output + .push_back(OutputLine::echo("seed users 20", crate::mode::Mode::Simple)); + app.update(AppEvent::DslSeedSucceeded { + command: Command::Seed { + table: "users".to_string(), + target_column: None, + count: Some(20), + overrides: Vec::new(), + rng_seed: None, + }, + result: crate::db::SeedResult { + table: "users".to_string(), + requested: 20, + produced: 20, + data: crate::db::DataResult { + table_name: "users".to_string(), + columns: vec!["name".to_string()], + column_types: vec![None], + rows: vec![vec![Some("Alice".to_string())]], + }, + advisory_columns: vec!["status".to_string()], + }, + }); + let texts: Vec = app.output.iter().map(|l| l.text.clone()).collect(); + assert!( + texts.iter().any(|t| t.contains("20 row(s) seeded into users")), + "seeded-row count surfaced: {texts:?}", + ); + assert!( + texts.iter().any(|t| t.contains("status") && t.contains("generic text")), + "the advisory names the enum-ish column: {texts:?}", + ); + } + + #[test] + fn seed_success_reports_a_cap() { + // produced < requested → the cap note appears next to the count. + let mut app = App::new(); + app.output + .push_back(OutputLine::echo("seed J 10", crate::mode::Mode::Simple)); + app.update(AppEvent::DslSeedSucceeded { + command: Command::Seed { + table: "J".to_string(), + target_column: None, + count: Some(10), + overrides: Vec::new(), + rng_seed: None, + }, + result: crate::db::SeedResult { + table: "J".to_string(), + requested: 10, + produced: 4, + data: crate::db::DataResult { + table_name: "J".to_string(), + columns: Vec::new(), + column_types: Vec::new(), + rows: Vec::new(), + }, + advisory_columns: Vec::new(), + }, + }); + let texts: Vec = app.output.iter().map(|l| l.text.clone()).collect(); + assert!( + texts.iter().any(|t| t.contains("4 row(s) seeded into J") + && t.contains("of 10 requested")), + "the cap note surfaces requested vs produced: {texts:?}", + ); + } + #[test] fn sql_delete_returning_renders_cascade_and_result_table() { // ADR-0033 3g: a DELETE … RETURNING surfaces BOTH the cascade diff --git a/src/completion.rs b/src/completion.rs index 5ca535a..38bf3bd 100644 --- a/src/completion.rs +++ b/src/completion.rs @@ -120,7 +120,13 @@ impl SchemaCache { IdentSource::Columns => &self.columns, IdentSource::Relationships => &self.relationships, IdentSource::Indexes => &self.indexes, - IdentSource::NewName | IdentSource::Types | IdentSource::Free => &[], + // Curated / invented sources never come from the schema + // cache — `Generators` candidates are supplied separately + // from the `seed` vocabulary (ADR-0048 D9). + IdentSource::NewName + | IdentSource::Types + | IdentSource::Generators + | IdentSource::Free => &[], } } @@ -327,6 +333,37 @@ pub fn candidates_at_cursor_with_in_mode( break; } } + + // Flag-aware extension. The plain walk above stops at `-`, so a + // flag the user is mid-typing (`-`, `--`, `--all`, `--create-fk`) + // leaves an *empty* partial sitting just after the dash(es) — which + // made the engine offer every keyword (a `-` prefix-matches nothing, + // so the empty-prefix path let `on` through) and, worse, replace an + // empty range so accepting produced `-on` / `---create-fk`. When a + // dash-prefixed token sits at a word boundary AND a flag is actually + // expected here, treat the whole dash-run-plus-body as the partial so + // it is matched and replaced wholesale. The "flag is expected" gate + // (one cheap probe on the pre-dash prefix) keeps a signed number / + // minus (`where x = -5`) from being mis-read as a flag. + { + let mut run = cursor; + while run > 0 { + let p = bytes[run - 1]; + if p.is_ascii_alphanumeric() || p == b'_' || p == b'-' { + run -= 1; + } else { + break; + } + } + let word_boundary = run == 0 || bytes[run - 1].is_ascii_whitespace(); + if run < cursor && bytes[run] == b'-' && word_boundary && run < start { + let pre = crate::dsl::walker::completion_probe_in_mode(&input[..run], cache, mode); + if pre.expected.iter().any(|e| matches!(e, Expectation::Flag(_))) { + start = run; + } + } + } + let partial_prefix = input[start..cursor].to_string(); let leading = &input[..start]; @@ -623,29 +660,19 @@ pub fn candidates_at_cursor_with_in_mode( // Source 1.55: flag candidates (`--name`). Surfaced as a // distinct CandidateKind so the hint panel can colour them // with `tok_flag` (matching how they'll appear after - // insertion). The standard prefix matcher walks back over - // alphanumeric + underscore, which does NOT cross `-`, so - // when the user types `--all` the partial is `all` — match - // the flag's body against that. Otherwise match the full - // `--name` against the partial (which may be empty or start - // with `--`). + // insertion). The flag-aware partial detection above captures any + // leading dash-run, so the partial is one of: empty, all-dashes + // (`-` / `--`), or `[-]+body`. Stripping the leading dashes and + // matching the remainder against the flag *body* handles all of + // them uniformly (empty / all-dashes → match every flag). + let flag_needle = partial_prefix.trim_start_matches('-').to_lowercase(); let flags: Vec = expected .iter() .filter_map(|e| match e { Expectation::Flag(name) => Some(*name), _ => None, }) - .filter(|body| { - if partial_prefix.starts_with("--") { - format!("--{body}") - .to_lowercase() - .starts_with(&lowered_prefix) - } else if partial_prefix.is_empty() { - true - } else { - body.to_lowercase().starts_with(&lowered_prefix) - } - }) + .filter(|body| body.to_lowercase().starts_with(&flag_needle)) .map(|body| format!("--{body}")) .collect(); @@ -709,6 +736,22 @@ pub fn candidates_at_cursor_with_in_mode( } else { Vec::new() }; + // Source 1.9: fake-data generator names (ADR-0048 D9). At the + // `seed … set as ⟨here⟩` slot (`IdentSource::Generators`) the + // curated vocabulary is offered so a learner can discover `email` / + // `product` / … by Tab. Same `Function` kind / `tok_function` colour + // as SQL functions (no new theme colour — ADR-0048 §Grammar). + let has_generator_slot = expected + .iter() + .any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. })); + if has_generator_slot { + functions.extend( + crate::seed::KNOWN_GENERATORS + .iter() + .filter(|g| matches_prefix(g)) + .map(|g| (*g).to_string()), + ); + } // Source 2: schema identifiers — accumulated across every // matching schema-listable `Ident { source }` expectation. @@ -1200,6 +1243,45 @@ pub fn invalid_ident_at_cursor_in_mode( if has_sql_expr_slot && crate::dsl::sql_functions::is_known_function_prefix(partial) { return None; } + // A bare ident at a SQL expression slot may be a **table alias / name** + // the user is mid-typing as a qualifier (`ol` in `sum(ol.count)`). The + // defining FROM clause can sit *after* the cursor — the projection + // references it — so the leading-only walk has an empty from-scope and + // would wrongly flag the alias as an unknown column. Recover the scope + // from the FULL input (mirrors the §10.6 edit-an-existing-query + // lookahead the candidate engine uses for column narrowing) and bail + // when the partial prefix-matches a binding's alias or table name. + if has_sql_expr_slot { + let full = crate::dsl::walker::completion_probe_in_mode(input, cache, mode); + let lowered = partial.to_lowercase(); + let matches_qualifier = full.from_scope.iter().any(|b| { + b.alias + .as_deref() + .is_some_and(|a| a.to_lowercase().starts_with(&lowered)) + || b.table.to_lowercase().starts_with(&lowered) + }); + if matches_qualifier { + return None; + } + } + // ADR-0048 D9: the `seed … set as ` slot is a curated + // vocabulary (`IdentSource::Generators`), not a schema source, so the + // schema-column check below would never see it. A partial that + // prefix-matches a known generator is an in-progress name; anything + // else is an unknown generator → flag it `[ERR]` while typing. + let has_generator_slot = expected + .iter() + .any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. })); + if has_generator_slot { + if crate::seed::is_known_generator_prefix(partial) { + return None; + } + return Some(InvalidIdent { + range: (start, cursor), + found: partial.to_string(), + source: IdentSource::Generators, + }); + } // Find every schema-listable source in the expected list. let sources: Vec = expected .iter() @@ -1488,6 +1570,71 @@ mod tests { ); } + #[test] + fn single_dash_offers_flags_not_keywords_and_replaces_the_dash() { + // Bug (manual testing): `add 1:n relationship … -` (one dash) + // offered the `on` keyword *and* `--create-fk`, and accepting + // produced `-on` / `---create-fk` because the lone `-` was not + // part of the replaced range. A dash at a flag position is a + // flag-in-progress: offer flags, exclude keywords, replace the + // dash on accept. + let input = "add 1:n relationship from X.a to Y.b -"; + let c = candidates_at_cursor(input, input.len(), &SchemaCache::default()) + .expect("a `-` at a flag position offers candidates"); + let texts: Vec<&str> = c.candidates.iter().map(|x| x.text.as_str()).collect(); + assert!(texts.contains(&"--create-fk"), "should offer --create-fk: {texts:?}"); + assert!(!texts.contains(&"on"), "must NOT offer `on` after a dash: {texts:?}"); + assert_eq!( + c.replaced_range, + (input.len() - 1, input.len()), + "the `-` must be inside the replaced range so accept yields `--create-fk`", + ); + } + + #[test] + fn double_dash_replaces_both_dashes_on_accept() { + let input = "delete from T --"; + let c = candidates_at_cursor_in_mode( + input, + input.len(), + &SchemaCache::default(), + Mode::Simple, + ) + .expect("`--` offers the flag"); + assert!(c.candidates.iter().any(|x| x.text == "--all-rows")); + assert_eq!( + c.replaced_range, + (input.len() - 2, input.len()), + "both dashes are replaced so accept yields `--all-rows`, not `----all-rows`", + ); + } + + #[test] + fn dash_at_a_value_position_is_not_treated_as_a_flag() { + // `show data T where x = -5` — the `-` is a sign, not a flag. + // No flag is expected here, so the dash must not be swallowed + // into a flag partial: the partial stays `5` (the original + // value-operand behaviour), and no `--…` candidate appears. + let mut s = SchemaCache::default(); + s.tables.push("T".into()); + s.columns.push("x".into()); + let input = "show data T where x = -5"; + if let Some(c) = + candidates_at_cursor_in_mode(input, input.len(), &s, Mode::Simple) + { + assert!( + !c.candidates.iter().any(|x| x.text.starts_with("--")), + "no flags at a value position: {:?}", + c.candidates, + ); + assert_eq!( + c.replaced_range, + (input.len() - 1, input.len()), + "only the `5` is the partial; the `-` (sign) is not captured", + ); + } + } + #[test] fn typed_dashes_offer_the_optional_cascade_flag_on_drop_column() { // The same optional-flag class: `drop column … [--cascade]`. @@ -2606,6 +2753,70 @@ mod tests { ); } + #[test] + fn invalid_ident_does_not_flag_a_table_alias_used_before_its_from_clause() { + // Manual-testing bug: in `select … sum(ol.count*…) … from … OrderLines ol …` + // the projection references alias `ol` whose FROM binding sits + // *after* the cursor. The leading-only walk had an empty from-scope + // and wrongly flagged `ol` as an unknown column (a red "ERR" overlay + // on an otherwise-valid query). The full-input lookahead must + // recover the scope (ADR-0032 §10.6) so `ol` is not flagged. + use crate::dsl::types::Type; + let mut s = SchemaCache::default(); + s.tables.push("OrderLines".into()); + s.columns.push("count".into()); + s.table_columns + .insert("OrderLines".into(), vec![TableColumn::new("count", Type::Int)]); + let input = "select sum(ol.count) from OrderLines ol"; + let cursor = input.find("ol.count").unwrap() + 2; // right after `ol` + assert!( + invalid_ident_at_cursor_in_mode(input, cursor, &s, Mode::Advanced).is_none(), + "a table alias used before its FROM clause must not be flagged as a bad column", + ); + } + + #[test] + fn invalid_ident_fires_for_unknown_generator_after_as() { + // ADR-0048 D9: an unknown name at the `set as ` slot is + // flagged `[ERR]` while typing. + let cache = two_table_schema(); + let input = "seed a set name as bogus"; + let inv = invalid_ident_at_cursor(input, input.len(), &cache) + .expect("unknown generator must flag"); + assert_eq!(inv.found, "bogus"); + assert_eq!(inv.source, IdentSource::Generators); + } + + #[test] + fn invalid_ident_fires_for_unknown_column_in_seed_set_and_column_fill() { + // ADR-0048: an unknown column at the `set ` slot and the + // `
.` column-fill slot is flagged like any other + // column slot (both are `IdentSource::Columns`). + let cache = two_table_schema(); // table `a`; columns id, name + let set_in = invalid_ident_at_cursor("seed a set xyz", 14, &cache) + .expect("unknown column in `set` must flag"); + assert_eq!(set_in.found, "xyz"); + assert_eq!(set_in.source, IdentSource::Columns); + + let fill = invalid_ident_at_cursor("seed a.xyz", 10, &cache) + .expect("unknown column in column-fill must flag"); + assert_eq!(fill.source, IdentSource::Columns); + } + + #[test] + fn invalid_ident_does_not_fire_for_generator_prefix() { + // A prefix of a known generator is an in-progress name, not a typo. + let cache = two_table_schema(); + assert!( + invalid_ident_at_cursor("seed a set name as ema", 22, &cache).is_none(), + "`ema` prefixes `email` — must not flag", + ); + assert!( + invalid_ident_at_cursor("seed a set name as email", 24, &cache).is_none(), + "`email` is a known generator — must not flag", + ); + } + fn two_table_schema() -> SchemaCache { use crate::dsl::types::Type; let mut s = SchemaCache::default(); diff --git a/src/db.rs b/src/db.rs index 562b8d8..19e4d07 100644 --- a/src/db.rs +++ b/src/db.rs @@ -33,7 +33,8 @@ use tracing::{debug, info, warn}; use crate::dsl::action::ReferentialAction; use crate::dsl::command::{ ChangeColumnMode, Command, CompareOp, Constraint, ConstraintKind, Expr, IndexSelector, - Operand, Predicate, RelationshipSelector, RowFilter, SqlForeignKey, + Operand, Predicate, RelationshipSelector, RowFilter, SeedOverride, SeedOverrideKind, + SqlForeignKey, }; use crate::dsl::ColumnSpec; use crate::dsl::shortid; @@ -287,6 +288,23 @@ pub struct InsertResult { pub data: DataResult, } +/// Outcome of a successful `seed` (ADR-0048). +/// +/// `produced` is below `requested` when the unique-value space ran out +/// (D14 cap). `data` is a **capped preview** of the seeded rows (D18, +/// not the whole batch). `advisory_columns` names columns that were +/// filled with generic text but look like fixed value sets — enum-ish +/// names or un-derivable CHECKs (D12/D13) — so the render can nudge the +/// user toward choosing those values deliberately. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SeedResult { + pub table: String, + pub requested: u64, + pub produced: u64, + pub data: DataResult, + pub advisory_columns: Vec, +} + /// Outcome of a successful `add column …`. /// /// Carries the post-add structure (used for the auto-show that @@ -702,6 +720,17 @@ enum Request { source: Option, reply: oneshot::Sender>, }, + /// Populate a table with generated fake data (ADR-0048). One undo + /// snapshot wraps the whole seed via `snapshot_then`. + Seed { + table: String, + target_column: Option, + count: Option, + overrides: Vec, + rng_seed: Option, + source: Option, + reply: oneshot::Sender>, + }, Update { table: String, assignments: Vec<(String, Value)>, @@ -1491,6 +1520,30 @@ impl Database { recv.await.map_err(|_| DbError::WorkerGone)? } + /// Populate a table with generated fake data (ADR-0048, SD1/SD2). + pub async fn seed( + &self, + table: String, + target_column: Option, + count: Option, + overrides: Vec, + rng_seed: Option, + source: Option, + ) -> Result { + let (reply, recv) = oneshot::channel(); + self.send(Request::Seed { + table, + target_column, + count, + overrides, + rng_seed, + source, + reply, + }) + .await?; + recv.await.map_err(|_| DbError::WorkerGone)? + } + pub async fn update( &self, table: String, @@ -2646,6 +2699,28 @@ fn handle_request( &values, )); } + Request::Seed { + table, + target_column, + count, + overrides, + rng_seed, + source, + reply, + } => { + // One snapshot wraps the whole seed (ADR-0048 D15 — one undo + // step), exactly like a single insert. + snapshot_then(snap, batch, conn, source.as_deref(), reply, || do_seed( + conn, + persistence, + source.as_deref(), + &table, + target_column.as_deref(), + count, + &overrides, + rng_seed, + )); + } Request::Update { table, assignments, @@ -2874,7 +2949,10 @@ fn do_list_names_for( } Ok(out) } - IdentSource::NewName | IdentSource::Types | IdentSource::Free => Ok(Vec::new()), + IdentSource::NewName + | IdentSource::Types + | IdentSource::Generators + | IdentSource::Free => Ok(Vec::new()), } } @@ -8636,34 +8714,918 @@ fn count_rows(conn: &Connection, table: &str) -> Result { .map_err(DbError::from_rusqlite) } -fn do_insert( +/// Default row count when `seed ` omits the count (ADR-0048 D6). +const DEFAULT_SEED_COUNT: u64 = 20; + +/// Upper bound on a single `seed` (ADR-0048 D6) — a typo like +/// `seed t 1000000` is refused rather than left to hang the app. +const MAX_SEED_COUNT: u64 = 10_000; + +/// Cap on rows shown in the post-seed auto-show preview (ADR-0048 D18). +/// The full count is always reported; only the rendered table is capped. +const SEED_PREVIEW_CAP: usize = 20; + +/// How a single column's value is produced for each seeded row. +enum SeedColPlan { + /// Generated from the seed library (the generator is chosen once; + /// `generate_value` runs per row). + Generated { + generator: crate::seed::Generator, + ty: Type, + }, + /// A foreign-key child column: sampled from an existing parent row + /// (ADR-0048 D14). `fk_idx` selects the FK; `pos` selects this + /// column's slot within the parent key tuple (so a compound FK's + /// child columns all read from the *same* sampled parent row). + ForeignKey { fk_idx: usize, pos: usize }, + /// A `shortid` column: a base58 id from seed's *seeded* RNG so it + /// reproduces under `--seed` (ADR-0048 D4). Always forced — a + /// `shortid` column needs an id, never a name-heuristic value. + ShortId, +} + +/// Collision key for a positional list of seeded values, used to keep +/// uniqueness groups (PK tuple, UNIQUE columns) distinct (ADR-0048 D10). +/// `\u{1}` separates fields; `\u{0}` marks NULL. +fn seed_value_list_key(values: &[Value]) -> String { + let mut key = String::new(); + for v in values { + match v { + Value::Number(s) | Value::Text(s) => key.push_str(s), + Value::Bool(b) => key.push(if *b { 'T' } else { 'F' }), + Value::Null => key.push('\u{0}'), + } + key.push('\u{1}'); + } + key +} + +/// `COALESCE(MAX(col), 0)` — the base for sequencing identifier-int +/// columns (ADR-0048 D10) so generated ids continue past existing rows. +fn seed_max_int(conn: &Connection, table: &str, column: &str) -> Result { + let sql = format!( + "SELECT COALESCE(MAX(\"{}\"), 0) FROM \"{}\"", + column.replace('"', "\"\""), + table.replace('"', "\"\"") + ); + conn.query_row(&sql, [], |r| r.get::<_, i64>(0)) + .map_err(DbError::from_rusqlite) +} + +/// Sample existing parent-key tuples for FK generation (ADR-0048 D14). +/// +/// Returns one `Value` tuple per distinct parent row in +/// `parent_columns` order, so a compound FK's children can be filled +/// from one consistent parent row. Empty when the parent has no rows +/// (the caller turns that into the friendly "seed the parent first" +/// error). +fn sample_parent_key_tuples( + conn: &Connection, + parent_table: &str, + parent_columns: &[String], +) -> Result>, DbError> { + let cols = parent_columns + .iter() + .map(|c| format!("\"{}\"", c.replace('"', "\"\""))) + .collect::>() + .join(", "); + // `ORDER BY` the key columns so the sampled order is deterministic + // (ADR-0048 D4): `--seed` reproducibility must not depend on + // SQLite's unspecified `DISTINCT` row order. + let sql = format!( + "SELECT DISTINCT {cols} FROM \"{}\" ORDER BY {cols}", + parent_table.replace('"', "\"\"") + ); + let n = parent_columns.len(); + let mut stmt = conn.prepare(&sql).map_err(DbError::from_rusqlite)?; + let tuples = stmt + .query_map([], |row| { + let mut tuple = Vec::with_capacity(n); + for i in 0..n { + let v = match row.get_ref(i)? { + rusqlite::types::ValueRef::Null => Value::Null, + rusqlite::types::ValueRef::Integer(x) => Value::Number(x.to_string()), + rusqlite::types::ValueRef::Real(x) => Value::Number(x.to_string()), + rusqlite::types::ValueRef::Text(t) => { + Value::Text(String::from_utf8_lossy(t).into_owned()) + } + // FK keys are never blobs in this app; treat as NULL. + rusqlite::types::ValueRef::Blob(_) => Value::Null, + }; + tuple.push(v); + } + Ok(tuple) + }) + .map_err(DbError::from_rusqlite)? + .collect::, _>>() + .map_err(DbError::from_rusqlite)?; + Ok(tuples) +} + +/// Populate a table with generated fake data (ADR-0048, SD1/SD2). +/// +/// Generates whole rows and inserts them in one transaction, reusing the +/// per-value validation, autogen autofill, FK-error enrichment and +/// persistence machinery via [`insert_one_row`]. The whole seed is a +/// single undo step (the worker wraps the call in one `snapshot_then`) +/// and writes exactly one `history.log` line. +/// +/// Foreign-key columns are filled by sampling existing parent rows +/// (D14); a compound FK reads all its child columns from one sampled +/// parent row. An empty parent is refused with a friendly error. A +/// `NOT NULL blob` column (which seed cannot generate) is refused by +/// the block guard (D1); a nullable blob is omitted (→ NULL). +/// +/// **Phase 2 (SD2):** when `target_column` is `Some`, this delegates to +/// [`do_seed_column_fill`] (fill one column across existing rows, D1 +/// form 2). `overrides` carries the `set …` clause (D2): per-column +/// pins that replace the heuristic generator and drop the column from the +/// generic-fill advisory (D13). +#[allow(clippy::too_many_arguments)] +fn do_seed( conn: &Connection, persistence: Option<&Persistence>, source: Option<&str>, table: &str, - user_columns: Option<&[String]>, - user_values: &[Value], -) -> Result { - debug!(table = %table, "insert"); + target_column: Option<&str>, + count: Option, + overrides: &[SeedOverride], + rng_seed: Option, +) -> Result { + use crate::seed; + use rand::RngExt; + let canonical_table = require_canonical_table(conn, table)?; let table = canonical_table.as_str(); + + // Column-fill (D1 form 2) is a distinct UPDATE path. + if let Some(col) = target_column { + return do_seed_column_fill( + conn, persistence, source, table, col, count, overrides, rng_seed, + ); + } + + let n = count.unwrap_or(DEFAULT_SEED_COUNT); + debug!(table = %table, count = n, "seed"); + if n > MAX_SEED_COUNT { + return Err(DbError::Unsupported(format!( + "cannot seed {n} rows at once: the maximum is {MAX_SEED_COUNT}. \ + Seed in smaller batches." + ))); + } + let schema = read_schema(conn, table)?; - // Resolve which columns the user is providing values for. - let user_cols: Vec = match user_columns { - Some(cols) => cols.to_vec(), - None => { - // Short form: every non-auto-generated column in - // schema declaration order. Serial and shortid both - // get auto-filled below. + // Pre-sample each FK's parent key tuples (D14); refuse if a parent + // is empty (no valid reference can be fabricated). + let mut fk_samples: Vec>> = Vec::with_capacity(schema.foreign_keys.len()); + for fk in &schema.foreign_keys { + let tuples = sample_parent_key_tuples(conn, &fk.parent_table, &fk.parent_columns)?; + if tuples.is_empty() { + return Err(DbError::Unsupported(format!( + "cannot seed `{table}`: parent table `{}` (referenced by `{}`) has no rows. \ + Seed or insert into `{}` first.", + fk.parent_table, + fk.child_columns.join(", "), + fk.parent_table, + ))); + } + fk_samples.push(tuples); + } + // child column → (fk index, position within the FK's column list). + let mut fk_child_pos: std::collections::HashMap<&str, (usize, usize)> = + std::collections::HashMap::new(); + for (fk_idx, fk) in schema.foreign_keys.iter().enumerate() { + for (pos, child) in fk.child_columns.iter().enumerate() { + fk_child_pos.insert(child.as_str(), (fk_idx, pos)); + } + } + + // Build the per-column generation plan, skipping autogen and + // un-generatable columns. `advisory_columns` collects columns + // filled with generic text that look like fixed value sets (D12/D13). + let mut col_names: Vec = Vec::new(); + let mut plans: Vec = Vec::new(); + let mut advisory_columns: Vec = Vec::new(); + for c in &schema.columns { + let ty = c.user_type.unwrap_or(Type::Text); + // serial auto-fills deterministically in `do_insert` (rowid / + // MAX+1) — omit it. shortid is handled below from the seeded RNG. + if matches!(ty, Type::Serial) { + continue; + } + // blob has no DSL value path: refuse if required (D1), else omit. + if matches!(ty, Type::Blob) { + if c.notnull { + return Err(DbError::Unsupported(format!( + "cannot seed `{table}`: column `{}` is `NOT NULL` but has type `blob`, \ + which seed cannot generate. Add the rows another way or make it nullable.", + c.name, + ))); + } + continue; + } + col_names.push(c.name.clone()); + if let Some(&(fk_idx, pos)) = fk_child_pos.get(c.name.as_str()) { + plans.push(SeedColPlan::ForeignKey { fk_idx, pos }); + } else if matches!(ty, Type::ShortId) { + // Always the shortid generator (never a name heuristic — a + // shortid column needs a base58 id, not e.g. an email). + plans.push(SeedColPlan::ShortId); + } else { + // A simple `col IN ('a','b')` CHECK becomes the value source + // (D17) so the enum-as-CHECK pattern just works. + let check_in_values = c + .check + .as_deref() + .and_then(|chk| seed::parse_in_check_values(chk, &c.name)); + let spec = seed::ColumnSpec { + name: c.name.clone(), + ty, + not_null: c.notnull, + primary_key: c.primary_key, + unique: c.unique, + is_foreign_key: false, + check_in_values, + }; + let generator = seed::choose_generator(table, &spec); + // Flag columns that fell through to generic text but look + // like a fixed value set (enum-ish name, or a CHECK we + // could not derive values from) — D12/D13. + if matches!(generator, crate::seed::Generator::Generic) + && (seed::is_enum_ish(&c.name) + || (c.check.is_some() && spec.check_in_values.is_none())) + { + advisory_columns.push(c.name.clone()); + } + plans.push(SeedColPlan::Generated { generator, ty }); + } + } + + // Apply the `set …` overrides (D2): each replaces the named + // column's plan with the pinned generator and removes it from the + // generic-fill advisory (the user chose its values deliberately, + // D13). An override that names a non-fillable column is a friendly + // error; a bounded value source (fixed / pick-list) that can't supply + // enough distinct values for a single-column-UNIQUE target is refused + // up front rather than silently capped (DA finding). FK / type binding + // still apply — a value that violates a constraint surfaces through the + // existing FK-error guard. + apply_seed_overrides(&schema, overrides, n, &col_names, &mut plans, &mut advisory_columns)?; + + // Uniqueness groups (ADR-0048 D10): value tuples that must stay + // distinct across the batch and against existing rows — the + // user-fillable PK (so junction distinct-combos fall out of this), + // each compound UNIQUE constraint, and each single-column UNIQUE or + // identifier-named column. Each group is a list of indices into + // `col_names` / `plans`. + let col_index: std::collections::HashMap<&str, usize> = col_names + .iter() + .enumerate() + .map(|(i, name)| (name.as_str(), i)) + .collect(); + let project_group = |cols: &[String]| -> Vec { + cols.iter() + .filter_map(|c| col_index.get(c.as_str()).copied()) + .collect() + }; + let mut unique_groups: Vec> = Vec::new(); + let pk_group = project_group(&schema.primary_key); + if !pk_group.is_empty() { + unique_groups.push(pk_group); + } + for uc in &schema.unique_constraints { + let g = project_group(uc); + if !g.is_empty() { + unique_groups.push(g); + } + } + for (i, name) in col_names.iter().enumerate() { + let unique_col = schema + .columns + .iter() + .find(|c| &c.name == name) + .is_some_and(|c| c.unique); + let is_identifier = matches!( + &plans[i], + SeedColPlan::Generated { + generator: crate::seed::Generator::IdentitySequential, + .. + } + ); + if unique_col || is_identifier { + unique_groups.push(vec![i]); + } + } + + // Sequence base for identifier-int columns (D10): start past the + // current MAX so generated ids continue cleanly. + let mut seq_base: std::collections::HashMap = std::collections::HashMap::new(); + for (i, plan) in plans.iter().enumerate() { + if let SeedColPlan::Generated { generator, ty } = plan + && matches!(generator, crate::seed::Generator::IdentitySequential) + && matches!(ty, Type::Int) + { + seq_base.insert(i, seed_max_int(conn, table, &col_names[i])?); + } + } + + // Pre-load each group's existing tuples so generation never + // collides with rows already present. + let mut used: Vec> = + vec![std::collections::HashSet::new(); unique_groups.len()]; + for (gi, group) in unique_groups.iter().enumerate() { + let cols: Vec = group.iter().map(|&i| col_names[i].clone()).collect(); + for tuple in sample_parent_key_tuples(conn, table, &cols)? { + used[gi].insert(seed_value_list_key(&tuple)); + } + } + + // Retry cap per row: when the unique space is exhausted (e.g. a + // junction requested more rows than there are parent combinations), + // stop and cap rather than spin (D14). + const MAX_ATTEMPTS: u32 = 200; + + let mut rng = seed::make_rng(rng_seed); + let mut preview_rowids: Vec = Vec::new(); + let mut accepted: u64 = 0; + let mut capped = false; + + // All rows insert in a single transaction; persistence (the CSV and + // the one history line) is written once, before the single commit — + // preserving ADR-0015 §6 commit-db-last while staying O(N) instead + // of the O(N^2) of per-row CSV rewrites. A mid-batch failure rolls + // the whole seed back (atomic). + let tx = conn + .unchecked_transaction() + .map_err(DbError::from_rusqlite)?; + + while accepted < n { + let mut attempt = 0u32; + let rowid = loop { + // One sampled parent row per FK for this attempt, so a + // compound FK's children stay consistent. + let fk_choice: Vec = fk_samples + .iter() + .map(|tuples| rng.random_range(0..tuples.len())) + .collect(); + let values: Vec = plans + .iter() + .enumerate() + .map(|(i, plan)| match plan { + SeedColPlan::ForeignKey { fk_idx, pos } => { + fk_samples[*fk_idx][fk_choice[*fk_idx]][*pos].clone() + } + // Seeded base58 id → reproducible under `--seed` (D4). + SeedColPlan::ShortId => { + Value::Text(crate::dsl::shortid::generate_with_rng(&mut rng)) + } + SeedColPlan::Generated { generator, ty } + if matches!(generator, crate::seed::Generator::IdentitySequential) + && matches!(ty, Type::Int) => + { + // Monotonic past existing rows → inherently unique. + Value::Number((seq_base[&i] + accepted as i64 + 1).to_string()) + } + SeedColPlan::Generated { generator, ty } => { + seed::generate_value(generator, *ty, &mut rng) + } + }) + .collect(); + + let keys: Vec = unique_groups + .iter() + .map(|group| { + let projected: Vec = + group.iter().map(|&i| values[i].clone()).collect(); + seed_value_list_key(&projected) + }) + .collect(); + if keys.iter().enumerate().any(|(gi, k)| used[gi].contains(k)) { + attempt += 1; + if attempt >= MAX_ATTEMPTS { + capped = true; + break None; + } + continue; + } + for (gi, k) in keys.into_iter().enumerate() { + used[gi].insert(k); + } + let (_rows, rowid) = + insert_one_row(conn, table, &schema, Some(&col_names), &values)?; + break Some(rowid); + }; + match rowid { + Some(rowid) => { + // Keep the first `SEED_PREVIEW_CAP` rowids for the + // capped auto-show (D18). + if preview_rowids.len() < SEED_PREVIEW_CAP { + preview_rowids.push(rowid); + } + accepted += 1; + } + None => break, + } + } + + if capped { + warn!( + table = %table, + requested = n, + produced = accepted, + "seed capped: ran out of distinct unique-value combinations before the requested count" + ); + } + + // Persist once (CSV + the single history line), then commit (db-last). + let changes = Changes { + schema_dirty: false, + rewritten_tables: vec![table.to_string()], + ..Changes::default() + }; + finalize_persistence(conn, persistence, source, &changes)?; + tx.commit().map_err(DbError::from_rusqlite)?; + + let data = if preview_rowids.is_empty() { + DataResult { + table_name: table.to_string(), + columns: Vec::new(), + column_types: Vec::new(), + rows: Vec::new(), + } + } else { + query_rows_by_rowid(conn, table, &preview_rowids)? + }; + + Ok(SeedResult { + table: table.to_string(), + requested: n, + produced: accepted, + data, + advisory_columns, + }) +} + +/// Apply the `set …` overrides (ADR-0048 D2) to the per-column +/// generation plan. Each override replaces the named column's plan and +/// drops it from the generic-fill advisory (D13 — the user chose those +/// values). An override naming a column that is not in the fillable set +/// (unknown, or an auto-generated `serial`) is a friendly error. +fn apply_seed_overrides( + schema: &ReadSchema, + overrides: &[SeedOverride], + row_count: u64, + col_names: &[String], + plans: &mut [SeedColPlan], + advisory_columns: &mut Vec, +) -> Result<(), DbError> { + for ov in overrides { + let Some(idx) = col_names + .iter() + .position(|c| c.eq_ignore_ascii_case(&ov.column)) + else { + return Err(DbError::Unsupported(format!( + "cannot apply `set {col} …`: `{col}` is not a fillable column of this \ + table (it is unknown, or an auto-generated column).", + col = ov.column, + ))); + }; + let ty = schema + .columns + .iter() + .find(|c| c.name.eq_ignore_ascii_case(&ov.column)) + .and_then(|c| c.user_type) + .unwrap_or(Type::Text); + seed_override_capacity_guard(schema, &ov.column, &ov.kind, row_count)?; + plans[idx] = seed_override_plan(&ov.kind, ty, &ov.column)?; + advisory_columns.retain(|c| !c.eq_ignore_ascii_case(&ov.column)); + } + Ok(()) +} + +/// Refuse up front when a **bounded** override (a fixed value or a +/// pick-list) cannot supply enough *distinct* values to fill a +/// single-column-UNIQUE target across `row_count` rows — otherwise the +/// uniqueness machinery would silently cap the run to the achievable +/// count (DA finding; the ADR left this interaction open and the user +/// chose a friendly error). Generators and ranges are treated as +/// effectively unbounded sources here; if one does exhaust, the existing +/// distinct-combination cap (D14) still applies. +fn seed_override_capacity_guard( + schema: &ReadSchema, + column: &str, + kind: &SeedOverrideKind, + row_count: u64, +) -> Result<(), DbError> { + let distinct = match kind { + SeedOverrideKind::Fixed(_) => 1, + SeedOverrideKind::PickList(values) => { + let mut set = std::collections::HashSet::new(); + for v in values { + set.insert(seed_override_literal(v, column)?); + } + set.len() + } + // Unbounded-enough sources — leave to the cap if they exhaust. + SeedOverrideKind::Generator(_) | SeedOverrideKind::Range { .. } => return Ok(()), + }; + if distinct as u64 >= row_count.max(1) { + return Ok(()); + } + // Single-column uniqueness only: a compound UNIQUE / compound PK can + // still be satisfied by varying the *other* columns, so a pinned + // value there does not force a cap. + let single_unique = schema + .columns + .iter() + .find(|c| c.name.eq_ignore_ascii_case(column)) + .is_some_and(|c| c.unique) + || (schema.primary_key.len() == 1 + && schema.primary_key[0].eq_ignore_ascii_case(column)); + if single_unique { + return Err(DbError::Unsupported(format!( + "cannot fill {row_count} rows: `set {column} …` offers only {distinct} distinct \ + value(s), but `{column}` is UNIQUE. Use a generator (e.g. `as email`) or a list \ + of at least {row_count} values." + ))); + } + Ok(()) +} + +/// Turn one `set` override into the `SeedColPlan` that produces its +/// values (ADR-0048 D2). `Fixed`/`PickList` become a `PickFrom` over the +/// literal(s); `Generator` resolves the curated name (unknown → friendly +/// error); `Range` validates its bounds against the column type *before* +/// generation (an incompatible bound → friendly error). +fn seed_override_plan( + kind: &SeedOverrideKind, + ty: Type, + column: &str, +) -> Result { + use crate::seed::Generator; + let generator = match kind { + SeedOverrideKind::Fixed(v) => Generator::PickFrom(vec![seed_override_literal(v, column)?]), + SeedOverrideKind::PickList(vs) => { + let lits = vs + .iter() + .map(|v| seed_override_literal(v, column)) + .collect::, _>>()?; + Generator::PickFrom(lits) + } + SeedOverrideKind::Generator(name) => { + crate::seed::generator_for_name(name).ok_or_else(|| { + DbError::Unsupported(format!( + "unknown generator `{name}` in `set {column} as {name}`. \ + Known generators: {}.", + crate::seed::KNOWN_GENERATORS.join(", "), + )) + })? + } + SeedOverrideKind::Range { low, high } => { + let lo = seed_override_literal(low, column)?; + let hi = seed_override_literal(high, column)?; + if let Some(reason) = crate::seed::range_bounds_reason(ty, &lo, &hi) { + return Err(DbError::Unsupported(format!( + "cannot apply `set {column} between …`: {reason}." + ))); + } + Generator::Range { low: lo, high: hi } + } + }; + Ok(SeedColPlan::Generated { generator, ty }) +} + +/// Extract the literal string an override value contributes to a +/// `PickFrom` / `Range` (re-typed per column by `generate_value`). A +/// `null` override is refused — seed always fills a value (NULL +/// injection is out of scope, ADR-0048 Out-of-scope). +fn seed_override_literal(value: &Value, column: &str) -> Result { + match value { + Value::Number(s) | Value::Text(s) => Ok(s.clone()), + Value::Bool(b) => Ok(if *b { "true" } else { "false" }.to_string()), + Value::Null => Err(DbError::Unsupported(format!( + "`set {column} = null` is not supported — seed always fills a value." + ))), + } +} + +/// Column-fill (ADR-0048 D1 form 2): fill one column across the table's +/// **existing** rows (an UPDATE), the natural follow-up to `add column`. +/// +/// Refuses PK and auto-generated (`serial`/`shortid`/`blob`) targets; +/// an empty table is a friendly no-op. The `set` clause may only adjust +/// the column being filled (the rest of the per-column heuristics do not +/// apply — there is exactly one column). A UNIQUE / identifier target +/// gets collision-free values (generated distinct from *every* existing +/// value in the column, so no row-by-row UPDATE can transiently collide); +/// an FK target samples an existing parent key (D14). The whole fill is +/// one transaction → one undo step (D15), persisted once (commit-db-last). +#[allow(clippy::too_many_arguments)] +fn do_seed_column_fill( + conn: &Connection, + persistence: Option<&Persistence>, + source: Option<&str>, + table: &str, + column: &str, + count: Option, + overrides: &[SeedOverride], + rng_seed: Option, +) -> Result { + use crate::seed; + use rand::RngExt; + + debug!(table = %table, column = %column, "seed column-fill"); + + // A row count is meaningless when filling existing rows (D1 form 2). + if count.is_some() { + return Err(DbError::Unsupported(format!( + "`seed {table}.{column}` fills existing rows, so it takes no row count \ + (drop the number)." + ))); + } + + let schema = read_schema(conn, table)?; + let col = schema + .columns + .iter() + .find(|c| c.name.eq_ignore_ascii_case(column)) + .ok_or_else(|| { + DbError::Unsupported(format!("cannot fill `{table}.{column}`: no such column.")) + })?; + let canonical_col = col.name.clone(); + let ty = col.user_type.unwrap_or(Type::Text); + + // Refuse identity / auto-generated / un-generatable targets (D1). + if col.primary_key { + return Err(DbError::Unsupported(format!( + "cannot fill `{table}.{canonical_col}`: it is part of the primary key — \ + you don't fill an identity column." + ))); + } + if matches!(ty, Type::Serial | Type::ShortId) { + return Err(DbError::Unsupported(format!( + "cannot fill `{table}.{canonical_col}`: `{}` columns generate their own \ + values automatically.", + ty.keyword(), + ))); + } + if matches!(ty, Type::Blob) { + return Err(DbError::Unsupported(format!( + "cannot fill `{table}.{canonical_col}`: seed cannot generate `blob` values." + ))); + } + + // The `set` clause may only adjust the filled column (user decision). + for ov in overrides { + if !ov.column.eq_ignore_ascii_case(&canonical_col) { + return Err(DbError::Unsupported(format!( + "in `seed {table}.{canonical_col}`, `set` can only adjust \ + `{canonical_col}` (the column being filled), not `{}`.", + ov.column, + ))); + } + } + + // Existing rowids in a deterministic order (D4 reproducibility). + let rowids: Vec = { + let sql = format!( + "SELECT rowid FROM \"{}\" ORDER BY rowid", + table.replace('"', "\"\"") + ); + let mut stmt = conn.prepare(&sql).map_err(DbError::from_rusqlite)?; + stmt.query_map([], |r| r.get::<_, i64>(0)) + .map_err(DbError::from_rusqlite)? + .collect::, _>>() + .map_err(DbError::from_rusqlite)? + }; + + // Empty table → friendly no-op (D1). + if rowids.is_empty() { + return Ok(SeedResult { + table: table.to_string(), + requested: 0, + produced: 0, + data: DataResult { + table_name: table.to_string(), + columns: Vec::new(), + column_types: Vec::new(), + rows: Vec::new(), + }, + advisory_columns: Vec::new(), + }); + } + + // FK target → sample an existing parent key column (D14). + let fk_sample: Option> = { + let fk = schema.foreign_keys.iter().find(|fk| { + fk.child_columns + .iter() + .any(|c| c.eq_ignore_ascii_case(&canonical_col)) + }); + match fk { + Some(fk) => { + // Single-column position within the FK (column-fill targets + // one column; a compound FK filled one column at a time is + // unusual but we sample that column's parent values). + let pos = fk + .child_columns + .iter() + .position(|c| c.eq_ignore_ascii_case(&canonical_col)) + .unwrap_or(0); + let parent_col = fk.parent_columns.get(pos).cloned().unwrap_or_default(); + let tuples = sample_parent_key_tuples(conn, &fk.parent_table, &[parent_col])?; + if tuples.is_empty() { + return Err(DbError::Unsupported(format!( + "cannot fill `{table}.{canonical_col}`: parent table `{}` has no \ + rows to reference. Seed or insert into `{}` first.", + fk.parent_table, fk.parent_table, + ))); + } + Some(tuples.into_iter().map(|mut t| t.remove(0)).collect()) + } + None => None, + } + }; + + // The value source: an override (if present) else the heuristic. + let mut advisory_columns: Vec = Vec::new(); + let plan: SeedColPlan = if let Some(ov) = overrides + .iter() + .find(|o| o.column.eq_ignore_ascii_case(&canonical_col)) + { + // Same capacity guard as whole-row: a bounded override that can't + // give enough distinct values for a UNIQUE column across the + // existing rows is refused up front, not silently capped. + seed_override_capacity_guard(&schema, &canonical_col, &ov.kind, rowids.len() as u64)?; + seed_override_plan(&ov.kind, ty, &canonical_col)? + } else if fk_sample.is_some() { + SeedColPlan::ForeignKey { fk_idx: 0, pos: 0 } + } else if matches!(ty, Type::ShortId) { + SeedColPlan::ShortId // unreachable (refused above), kept for totality + } else { + let check_in_values = col + .check + .as_deref() + .and_then(|chk| seed::parse_in_check_values(chk, &canonical_col)); + let spec = seed::ColumnSpec { + name: canonical_col.clone(), + ty, + not_null: col.notnull, + primary_key: col.primary_key, + unique: col.unique, + is_foreign_key: false, + check_in_values, + }; + let generator = seed::choose_generator(table, &spec); + if matches!(generator, crate::seed::Generator::Generic) + && (seed::is_enum_ish(&canonical_col) + || (col.check.is_some() && spec.check_in_values.is_none())) + { + advisory_columns.push(canonical_col.clone()); + } + SeedColPlan::Generated { generator, ty } + }; + + // Collision-free generation for UNIQUE / identifier targets: seed the + // used-set with EVERY existing value of the column so a generated + // value never matches a not-yet-updated row (no transient UNIQUE + // violation) nor a value already assigned this batch (ADR-0048 D10). + let enforce_unique = col.unique + || matches!( + &plan, + SeedColPlan::Generated { + generator: crate::seed::Generator::IdentitySequential, + .. + } + ); + let mut used: std::collections::HashSet = std::collections::HashSet::new(); + if enforce_unique { + for tuple in + sample_parent_key_tuples(conn, table, std::slice::from_ref(&canonical_col))? + { + used.insert(seed_value_list_key(&tuple)); + } + } + let seq_base = if matches!( + &plan, + SeedColPlan::Generated { + generator: crate::seed::Generator::IdentitySequential, + .. + } + ) && matches!(ty, Type::Int) + { + Some(seed_max_int(conn, table, &canonical_col)?) + } else { + None + }; + + const MAX_ATTEMPTS: u32 = 200; + let mut rng = seed::make_rng(rng_seed); + let tx = conn + .unchecked_transaction() + .map_err(DbError::from_rusqlite)?; + + let update_sql = format!( + "UPDATE \"{}\" SET \"{}\" = ?1 WHERE rowid = ?2", + table.replace('"', "\"\""), + canonical_col.replace('"', "\"\""), + ); + let mut produced: u64 = 0; + for (offset, rowid) in rowids.iter().enumerate() { + let mut attempt = 0u32; + let value = loop { + let v = match &plan { + SeedColPlan::ForeignKey { .. } => { + let samples = fk_sample.as_ref().expect("fk plan implies samples"); + samples[rng.random_range(0..samples.len())].clone() + } + SeedColPlan::ShortId => { + Value::Text(crate::dsl::shortid::generate_with_rng(&mut rng)) + } + SeedColPlan::Generated { generator, ty } + if matches!(generator, crate::seed::Generator::IdentitySequential) + && matches!(ty, Type::Int) => + { + Value::Number((seq_base.unwrap_or(0) + produced as i64 + 1).to_string()) + } + SeedColPlan::Generated { generator, ty } => { + seed::generate_value(generator, *ty, &mut rng) + } + }; + if enforce_unique { + let key = seed_value_list_key(std::slice::from_ref(&v)); + if used.contains(&key) { + attempt += 1; + if attempt >= MAX_ATTEMPTS { + break v; // give up on distinctness; DB may reject + } + continue; + } + used.insert(key); + } + break v; + }; + let bound = impl_value_for(&schema, &canonical_col, &value)?; + let params: Vec = + vec![bound_to_sqlite_value(&bound), rusqlite::types::Value::Integer(*rowid)]; + execute_with_fk_enrichment(conn, table, &update_sql, ¶ms)?; + produced += 1; + let _ = offset; + } + + let changes = Changes { + schema_dirty: false, + rewritten_tables: vec![table.to_string()], + ..Changes::default() + }; + finalize_persistence(conn, persistence, source, &changes)?; + tx.commit().map_err(DbError::from_rusqlite)?; + + // Preview the first capped rows (D18). + let preview: Vec = rowids.iter().take(SEED_PREVIEW_CAP).copied().collect(); + let data = query_rows_by_rowid(conn, table, &preview)?; + + Ok(SeedResult { + table: table.to_string(), + requested: produced, + produced, + data, + advisory_columns, + }) +} + +/// Build and execute a single-row `INSERT` — column resolution, value +/// binding, `serial`/`shortid` autofill, and the FK-enriched execute — +/// returning `(rows_affected, new rowid)`. +/// +/// It does **not** manage the transaction or persistence: the caller +/// owns those. This lets `do_insert` run one row in its own +/// transaction while `do_seed` runs N rows in a single transaction and +/// persists once (preserving ADR-0015 §6 commit-db-last while staying +/// O(N)). **The caller must hold an open transaction.** `table` must +/// already be canonical and `schema` already read. +fn insert_one_row( + conn: &Connection, + table: &str, + schema: &ReadSchema, + user_columns: Option<&[String]>, + user_values: &[Value], +) -> Result<(usize, i64), DbError> { + // Resolve which columns the user is providing values for. The short + // form (None) is every non-auto-generated column in schema + // declaration order; serial and shortid get auto-filled below. + let user_cols: Vec = user_columns.map_or_else( + || { schema .columns .iter() .filter(|c| !matches!(c.user_type, Some(Type::Serial) | Some(Type::ShortId))) .map(|c| c.name.clone()) .collect() - } - }; + }, + <[String]>::to_vec, + ); if user_cols.len() != user_values.len() { return Err(DbError::InvalidValue(format!( @@ -8676,7 +9638,7 @@ fn do_insert( let mut bindings: Vec<(String, Bound)> = Vec::with_capacity(user_cols.len()); for (col_name, value) in user_cols.iter().zip(user_values.iter()) { - let bound = impl_value_for(&schema, col_name, value)?; + let bound = impl_value_for(schema, col_name, value)?; bindings.push((col_name.clone(), bound)); } @@ -8747,11 +9709,28 @@ fn do_insert( debug!(sql = %sql, "insert"); let params: Vec = bindings.iter().map(|(_, b)| bound_to_sqlite_value(b)).collect(); + let rows_affected = execute_with_fk_enrichment(conn, table, &sql, ¶ms)?; + let new_rowid = conn.last_insert_rowid(); + Ok((rows_affected, new_rowid)) +} + +fn do_insert( + conn: &Connection, + persistence: Option<&Persistence>, + source: Option<&str>, + table: &str, + user_columns: Option<&[String]>, + user_values: &[Value], +) -> Result { + debug!(table = %table, "insert"); + let canonical_table = require_canonical_table(conn, table)?; + let table = canonical_table.as_str(); + let schema = read_schema(conn, table)?; let tx = conn .unchecked_transaction() .map_err(DbError::from_rusqlite)?; - let rows_affected = execute_with_fk_enrichment(conn, table, &sql, ¶ms)?; - let new_rowid = conn.last_insert_rowid(); + let (rows_affected, new_rowid) = + insert_one_row(conn, table, &schema, user_columns, user_values)?; let data = query_rows_by_rowid(conn, table, &[new_rowid])?; let changes = Changes { schema_dirty: false, @@ -8760,10 +9739,7 @@ fn do_insert( }; finalize_persistence(conn, persistence, source, &changes)?; tx.commit().map_err(DbError::from_rusqlite)?; - Ok(InsertResult { - rows_affected, - data, - }) + Ok(InsertResult { rows_affected, data }) } /// Build the parameterised `UPDATE … SET … WHERE …` statement. diff --git a/src/dsl/command.rs b/src/dsl/command.rs index 68046e4..99304a3 100644 --- a/src/dsl/command.rs +++ b/src/dsl/command.rs @@ -402,6 +402,25 @@ pub enum Command { filter: Option, limit: Option, }, + /// Populate a table with generated fake data (ADR-0048, SD1/SD2). + /// `count` defaults to 20 when omitted; `rng_seed` (from the + /// `--seed ` flag) makes generation reproducible. + /// + /// Phase 2 surfaces (ADR-0048 D1/D2): + /// - `target_column` is `Some` for the **column-fill** form + /// `seed
.` — fill one column across the table's + /// *existing* rows (an UPDATE), rather than generating new rows. + /// - `overrides` carries the `set …` clause: per-column pins + /// that take precedence over the heuristic generator (D2). + Seed { + table: String, + /// `Some(col)` → column-fill mode (UPDATE existing rows); + /// `None` → whole-row generation (INSERT new rows). + target_column: Option, + count: Option, + overrides: Vec, + rng_seed: Option, + }, /// Replay a sequence of DSL commands from a file. Each line /// is parsed and dispatched through the same pipeline as /// interactive input. Blank lines and lines whose first @@ -637,6 +656,38 @@ impl RowFilter { } } +/// One `set …` override on a `seed` command (ADR-0048 D2, Phase 2). +/// +/// The user can pin a column's generated values to a constant, a +/// pick-list, an explicit named generator, or a range — overriding the +/// per-column heuristic the executor would otherwise pick. `column` is +/// the user-typed column name (validated against the table at execution, +/// like every other column slot). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SeedOverride { + pub column: String, + pub kind: SeedOverrideKind, +} + +/// The four `set` override forms (ADR-0048 D2). +/// +/// Values arrive as the DSL's `Value` (quoted text / unquoted number — +/// dates are quoted text per the D2 amendment); the `Generator` name is +/// a raw string validated at execution because `src/dsl` cannot depend +/// on `src/seed` (the curated vocabulary lives there). +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum SeedOverrideKind { + /// `set status = 'pending'` — every row gets the constant. + Fixed(Value), + /// `set role in ('admin', 'editor')` — uniform pick from the list. + PickList(Vec), + /// `set work_addr as email` — force the named generator (D9). + Generator(String), + /// `set price between 10 and 100` — uniform in `[low, high]`; + /// numeric or (quoted) date bounds per the destination column type. + Range { low: Value, high: Value }, +} + /// A complex WHERE expression (ADR-0026 §4). /// /// Built by `grammar::expr::build_expr` from the flat @@ -949,6 +1000,7 @@ impl Command { } => "show index", Self::ShowList { kind, .. } => kind.command_name(), Self::Insert { .. } => "insert into", + Self::Seed { .. } => "seed", Self::Update { .. } => "update", Self::Delete { .. } => "delete from", Self::ShowData { .. } => "show data", @@ -997,6 +1049,7 @@ impl Command { | Self::AddConstraint { table, .. } | Self::DropConstraint { table, .. } | Self::Insert { table, .. } + | Self::Seed { table, .. } | Self::Update { table, .. } | Self::Delete { table, .. } => table, // For relationships we focus on the parent (1-side): diff --git a/src/dsl/grammar/data.rs b/src/dsl/grammar/data.rs index b6a8a34..b111075 100644 --- a/src/dsl/grammar/data.rs +++ b/src/dsl/grammar/data.rs @@ -24,7 +24,9 @@ //! later swap that capture for the same typed slots used here, adding //! live hints/highlighting. -use crate::dsl::command::{Command, Expr, RowFilter, ShowListKind}; +use crate::dsl::command::{ + Command, Expr, RowFilter, SeedOverride, SeedOverrideKind, ShowListKind, +}; use crate::dsl::grammar::{ CommandNode, IdentSource, Node, NumberValidator, ValidationError, Word, expr, shared::{ @@ -425,6 +427,152 @@ const LIMIT_CLAUSE_NODES: &[Node] = &[ ]; const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES); +// ================================================================= +// seed — `seed [.] [] [set ] [--seed ]` +// (ADR-0048, SD1 whole-row + SD2 Phase 2 set-clause / +// column-fill) +// ================================================================= + +/// Optional positional row count. Reuses `LIMIT_VALIDATOR` (a +/// non-negative integer). +const SEED_COUNT: Node = Node::NumberLit { + validator: Some(LIMIT_VALIDATOR), +}; +/// `--seed ` — a reproducible-generation flag carrying a numeric +/// seed (ADR-0048 D4). The only flag in the DSL that takes a value; +/// `build_seed` reads the number immediately after the flag. +const SEED_FLAG_NODES: &[Node] = &[ + Node::Flag("seed"), + Node::NumberLit { + validator: Some(LIMIT_VALIDATOR), + }, +]; +const SEED_FLAG: Node = Node::Seq(SEED_FLAG_NODES); + +// --- column-fill target: the optional `.` (ADR-0048 D1 +// form 2) ---------------------------------------------------- +// +// `seed users.email …` fills one column across existing rows. The +// table ident stops at `.` (idents are alnum/underscore), so an +// `Optional(Seq['.', column])` after the table cleanly discriminates: +// when the next token is not `.`, the `Punct('.')` first-child +// NoMatches and `walk_optional` skips it; once `.` commits, a missing +// column propagates as the user mid-typing `seed users.` (driver +// `walk_optional` semantics). The column resolves against +// `current_table_columns` (populated by `TABLE_NAME_WRITES`). +const SEED_TARGET_COLUMN: Node = Node::Ident { + source: IdentSource::Columns, + role: "seed_target_column", + validator: None, + highlight_override: None, + writes_table: false, + writes_column: false, + writes_user_listed_column: false, + writes_table_alias: false, + writes_cte_name: false, + writes_projection_alias: false, +}; +const SEED_DOT_COLUMN_NODES: &[Node] = &[Node::Punct('.'), SEED_TARGET_COLUMN]; +const SEED_DOT_COLUMN: Node = Node::Optional(&Node::Seq(SEED_DOT_COLUMN_NODES)); + +// --- the `set [, …]` clause (ADR-0048 D2) -------- +// +// Each override pins one column's generation. The column slot +// `writes_column` so the typed value slots (`PER_COLUMN_VALUE`, the +// same `current_column_value` dispatch `update … set` uses) narrow to +// the column's type — so list/range/fixed values get the column's +// typed slot (quoted text, unquoted number, quoted date) and a +// type-mismatched literal is flagged. The four tails each start with a +// distinct token (`=` / `in` / `between` / `as`), so the `Choice` +// discriminates cleanly (no Optional-first branch). + +/// The `set ` column slot. Distinct role from `update`'s +/// `update_set_column` and the expression `expr_column`. +const SEED_SET_COLUMN: Node = Node::Ident { + source: IdentSource::Columns, + role: "seed_set_column", + validator: None, + highlight_override: None, + writes_table: false, + writes_column: true, + writes_user_listed_column: false, + writes_table_alias: false, + writes_cte_name: false, + writes_projection_alias: false, +}; + +/// `as ` — the curated generator-name vocabulary (D9), +/// highlighted in the `tok_function` colour. The slot is structural +/// (any identifier matches); the name is validated at execution and +/// flagged live by the validity indicator. +const SEED_GENERATOR: Node = Node::Ident { + source: IdentSource::Generators, + role: "seed_generator", + validator: None, + highlight_override: Some(crate::dsl::grammar::HighlightClass::Function), + writes_table: false, + writes_column: false, + writes_user_listed_column: false, + writes_table_alias: false, + writes_cte_name: false, + writes_projection_alias: false, +}; + +/// `= ` — a fixed constant for every row. +const SEED_OV_FIXED_NODES: &[Node] = &[Node::Punct('='), PER_COLUMN_VALUE]; +/// `in ( [, ]* )` — uniform pick from the list. +const SEED_OV_IN_VALUES: Node = Node::Repeated { + inner: &PER_COLUMN_VALUE, + separator: Some(&Node::Punct(',')), + min: 1, +}; +const SEED_OV_IN_NODES: &[Node] = &[ + Node::Word(Word::keyword("in")), + Node::Punct('('), + SEED_OV_IN_VALUES, + Node::Punct(')'), +]; +/// `between and ` — uniform in the (typed) range. +const SEED_OV_BETWEEN_NODES: &[Node] = &[ + Node::Word(Word::keyword("between")), + PER_COLUMN_VALUE, + Node::Word(Word::keyword("and")), + PER_COLUMN_VALUE, +]; +/// `as ` — force a named generator. +const SEED_OV_AS_NODES: &[Node] = &[Node::Word(Word::keyword("as")), SEED_GENERATOR]; + +const SEED_OV_TAIL_CHOICES: &[Node] = &[ + Node::Seq(SEED_OV_FIXED_NODES), + Node::Seq(SEED_OV_IN_NODES), + Node::Seq(SEED_OV_BETWEEN_NODES), + Node::Seq(SEED_OV_AS_NODES), +]; +const SEED_OV_TAIL: Node = Node::Choice(SEED_OV_TAIL_CHOICES); + +const SEED_OVERRIDE_NODES: &[Node] = &[SEED_SET_COLUMN, SEED_OV_TAIL]; +const SEED_OVERRIDE: Node = Node::Seq(SEED_OVERRIDE_NODES); +const SEED_OVERRIDES: Node = Node::Repeated { + inner: &SEED_OVERRIDE, + separator: Some(&Node::Punct(',')), + min: 1, +}; +const SEED_SET_CLAUSE_NODES: &[Node] = + &[Node::Word(Word::keyword("set")), SEED_OVERRIDES]; +const SEED_SET_CLAUSE: Node = Node::Seq(SEED_SET_CLAUSE_NODES); + +const SEED_NODES: &[Node] = &[ + // `writes_table` so the `.column` target, the `set =…` + // clause's column slots, and the typed value slots all resolve + // against this table. + TABLE_NAME_WRITES, + SEED_DOT_COLUMN, + Node::Optional(&SEED_COUNT), + Node::Optional(&SEED_SET_CLAUSE), + Node::Optional(&SEED_FLAG), +]; +const SEED_SHAPE: Node = Node::Seq(SEED_NODES); + const UPDATE_NODES: &[Node] = &[ TABLE_NAME_WRITES, Node::Word(Word::keyword("set")), @@ -708,6 +856,195 @@ fn build_show_limit(path: &MatchedPath) -> Result, ValidationError> }) } +/// Build a `seed [.] [] [set ] [--seed ]` +/// command (ADR-0048, SD1 + SD2 Phase 2). +/// +/// - `target_column` (column-fill, D1 form 2) is the `seed_target_column` +/// ident, present only for the `seed .` form. +/// - The positional `count` is the `NumberLit` that precedes both the +/// `set` keyword and the `--seed` flag — bounding it that way keeps a +/// `set age between 18 and 80` value (also a `NumberLit`) from being +/// mistaken for the count. +/// - `--seed ` is the `NumberLit` right after the flag (D4). +/// - `overrides` (D2) is folded from the flat `set`-clause terminals. +fn build_seed(path: &MatchedPath, _source: &str) -> Result { + let table = require_ident(path, "table_name")?; + let target_column = ident_text(path, "seed_target_column").map(str::to_string); + + let flag_idx = path + .items + .iter() + .position(|i| matches!(&i.kind, MatchedKind::Flag("seed"))); + let set_idx = path + .items + .iter() + .position(|i| matches!(&i.kind, MatchedKind::Word("set"))); + + let rng_seed = flag_idx + .and_then(|fi| path.items.get(fi + 1)) + .filter(|i| matches!(i.kind, MatchedKind::NumberLit)) + .map(|i| parse_seed_u64(&i.text)) + .transpose()?; + + // The count is bounded to before the `set` clause and the flag, so a + // numeric value inside `set` (e.g. `between 18 and 80`) is never read + // as the count. + let count_boundary = [set_idx, flag_idx] + .into_iter() + .flatten() + .min() + .unwrap_or(path.items.len()); + let count = path + .items + .iter() + .enumerate() + .find(|(idx, i)| matches!(i.kind, MatchedKind::NumberLit) && *idx < count_boundary) + .map(|(_, i)| parse_seed_u64(&i.text)) + .transpose()?; + + let overrides = build_seed_overrides(path, set_idx, flag_idx)?; + + Ok(Command::Seed { + table, + target_column, + count, + overrides, + rng_seed, + }) +} + +/// Fold the flat `set`-clause terminals into [`SeedOverride`]s +/// (ADR-0048 D2). The clause region runs from just after `Word("set")` +/// to the `--seed` flag (or the path end). Each override begins at a +/// `seed_set_column` ident; the token right after it selects the form +/// (`=` / `in` / `between` / `as`). Top-level comma separators between +/// overrides are skipped (the `in (...)` form consumes its own inner +/// commas up to `)`). +fn build_seed_overrides( + path: &MatchedPath, + set_idx: Option, + flag_idx: Option, +) -> Result, ValidationError> { + let Some(set_idx) = set_idx else { + return Ok(Vec::new()); + }; + let end = flag_idx.unwrap_or(path.items.len()); + let region = &path.items[set_idx + 1..end]; + + let mut overrides = Vec::new(); + let mut i = 0; + while i < region.len() { + // The next override starts at its column ident; skip the + // top-level comma separators (and any stray token) between them. + let MatchedKind::Ident { + role: "seed_set_column", + .. + } = ®ion[i].kind + else { + i += 1; + continue; + }; + let column = region[i].text.clone(); + i += 1; + let kind = parse_seed_override_tail(region, &mut i, &column)?; + overrides.push(SeedOverride { column, kind }); + } + Ok(overrides) +} + +/// Parse one override tail starting at `region[*i]` (just past the +/// column ident), advancing `*i` past the consumed tokens. +fn parse_seed_override_tail( + region: &[MatchedItem], + i: &mut usize, + column: &str, +) -> Result { + let head = region.get(*i).ok_or_else(|| seed_set_error(column))?; + match &head.kind { + MatchedKind::Punct('=') => { + *i += 1; + let value = seed_take_value(region, i, column)?; + Ok(SeedOverrideKind::Fixed(value)) + } + MatchedKind::Word("in") => { + *i += 1; // `in` + // `(` + if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Punct('('))) { + *i += 1; + } + let mut values = Vec::new(); + while let Some(item) = region.get(*i) { + match &item.kind { + MatchedKind::Punct(')') => { + *i += 1; + break; + } + MatchedKind::Punct(',') => { + *i += 1; + } + _ => values.push(seed_take_value(region, i, column)?), + } + } + Ok(SeedOverrideKind::PickList(values)) + } + MatchedKind::Word("between") => { + *i += 1; // `between` + let low = seed_take_value(region, i, column)?; + if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Word("and"))) { + *i += 1; + } + let high = seed_take_value(region, i, column)?; + Ok(SeedOverrideKind::Range { low, high }) + } + MatchedKind::Word("as") => { + *i += 1; // `as` + let gen_item = region + .get(*i) + .filter(|t| matches!(t.kind, MatchedKind::Ident { role: "seed_generator", .. })) + .ok_or_else(|| seed_set_error(column))?; + *i += 1; + Ok(SeedOverrideKind::Generator(gen_item.text.clone())) + } + _ => Err(seed_set_error(column)), + } +} + +/// Take one value literal at `region[*i]`, advancing past it. +/// +/// The grammar's typed value slots only ever match value literals (a +/// bare unquoted word fails to match the slot and is rejected *before* +/// this fold runs — D2's quoting requirement enforced structurally), so +/// a non-literal here can only mean a grammar/builder drift bug; the +/// `Err` is a drift guard (mirrors `expr::build_expr`). +fn seed_take_value( + region: &[MatchedItem], + i: &mut usize, + column: &str, +) -> Result { + let item = region.get(*i).ok_or_else(|| seed_set_error(column))?; + let value = item_to_value(item).ok_or_else(|| seed_set_error(column))?; + *i += 1; + Ok(value) +} + +/// Drift-guard error for the `set`-clause fold (see `seed_take_value`). +fn seed_set_error(column: &str) -> ValidationError { + ValidationError { + message_key: "parse.error_wrapper", + args: vec![("detail", format!("malformed `set` clause for `{column}`"))], + } +} + +fn parse_seed_u64(text: &str) -> Result { + text.parse::().map_err(|_| ValidationError { + message_key: "parse.custom.bind_type_mismatch", + args: vec![ + ("found", text.to_string()), + ("expected", "non-negative integer".to_string()), + ], + }) +} + fn build_insert(path: &MatchedPath, _source: &str) -> Result { let table = require_ident(path, "table_name")?; @@ -1452,6 +1789,14 @@ pub static SHOW: CommandNode = CommandNode { "parse.usage.show_index", ],}; +pub static SEED: CommandNode = CommandNode { + entry: Word::keyword("seed"), + shape: SEED_SHAPE, + ast_builder: build_seed, + help_id: Some("data.seed"), + usage_ids: &["parse.usage.seed"], +}; + pub static INSERT: CommandNode = CommandNode { entry: Word::keyword("insert"), shape: INSERT_SHAPE, diff --git a/src/dsl/grammar/mod.rs b/src/dsl/grammar/mod.rs index 30a5b3b..f06cf3f 100644 --- a/src/dsl/grammar/mod.rs +++ b/src/dsl/grammar/mod.rs @@ -57,6 +57,12 @@ pub enum HighlightClass { String, Punct, Flag, + /// A curated function-vocabulary name — the `seed … set as + /// ` generator names (ADR-0048 D2/§Grammar). Rendered in + /// the existing `tok_function` colour (ADR-0022 Amд6 blue — no new + /// theme colour), assigned via a generator slot's + /// `highlight_override`, not by byte shape. + Function, Error, } @@ -86,6 +92,14 @@ pub enum IdentSource { /// content validator on column-type slots; not user-listable /// from the schema. Types, + /// Closed, curated set of fake-data generator names (ADR-0048 + /// D9) — the `seed … set as ` slot. Like + /// `Types`, not user-listable from the schema; the vocabulary + /// lives in `src/seed` and the completion engine offers it. The + /// grammar slot is purely structural (matches any identifier); + /// an unknown name is flagged live (validity) and rejected at + /// execution. + Generators, /// Any identifier shape; used by synthetic catch-all branches /// (e.g., the unknown-value branch of `mode `). Free, @@ -117,6 +131,7 @@ impl IdentSource { Self::Relationships => "relationship name", Self::Indexes => "index name", Self::Types => "type", + Self::Generators => "generator name", } } @@ -134,6 +149,7 @@ impl IdentSource { "relationship name" => Some(Self::Relationships), "index name" => Some(Self::Indexes), "type" => Some(Self::Types), + "generator name" => Some(Self::Generators), _ => None, } } @@ -714,6 +730,7 @@ pub static REGISTRY: &[(&CommandNode, CommandCategory)] = &[ (&ddl::CREATE, CommandCategory::Simple), (&ddl::CREATE_M2N, CommandCategory::Simple), (&data::SHOW, CommandCategory::Simple), + (&data::SEED, CommandCategory::Simple), (&data::INSERT, CommandCategory::Simple), (&data::UPDATE, CommandCategory::Simple), (&data::DELETE, CommandCategory::Simple), diff --git a/src/dsl/parser.rs b/src/dsl/parser.rs index 9e260c8..42ad61d 100644 --- a/src/dsl/parser.rs +++ b/src/dsl/parser.rs @@ -300,6 +300,7 @@ fn format_expectation(e: &crate::dsl::walker::outcome::Expectation) -> String { IdentSource::Relationships => "relationship name".to_string(), IdentSource::Indexes => "index name".to_string(), IdentSource::Types => "type".to_string(), + IdentSource::Generators => "generator name".to_string(), IdentSource::NewName | IdentSource::Free => "identifier".to_string(), }, Expectation::Punct(c) => format!("`{c}`"), diff --git a/src/dsl/shortid.rs b/src/dsl/shortid.rs index 3638e92..b591bd7 100644 --- a/src/dsl/shortid.rs +++ b/src/dsl/shortid.rs @@ -18,17 +18,21 @@ const DEFAULT_LEN: usize = 10; pub const MIN_LEN: usize = 10; pub const MAX_LEN: usize = 12; -/// Generate a fresh shortid using thread-local RNG. +/// Generate a fresh shortid using the thread-local RNG. #[must_use] pub fn generate() -> String { - generate_len(DEFAULT_LEN) + generate_with_rng(&mut rand::rng()) } +/// Generate a shortid from a caller-supplied RNG. +/// +/// Lets `seed --seed ` produce **reproducible** shortid values +/// (ADR-0048 D4) by threading its seeded RNG through, while the default +/// [`generate`] keeps its thread-RNG behaviour for ordinary inserts. #[must_use] -fn generate_len(len: usize) -> String { - let mut rng = rand::rng(); - let mut out = String::with_capacity(len); - for _ in 0..len { +pub fn generate_with_rng(rng: &mut R) -> String { + let mut out = String::with_capacity(DEFAULT_LEN); + for _ in 0..DEFAULT_LEN { let idx = rng.random_range(0..ALPHABET.len()); out.push(ALPHABET[idx] as char); } diff --git a/src/dsl/walker/highlight.rs b/src/dsl/walker/highlight.rs index f2bd732..0a29ef4 100644 --- a/src/dsl/walker/highlight.rs +++ b/src/dsl/walker/highlight.rs @@ -240,6 +240,18 @@ mod tests { ); } + #[test] + fn seed_generator_name_highlighted_as_function() { + // ADR-0048 D9: the `set as ` generator name carries the + // `Function` highlight class (via the slot's `highlight_override`), + // rendered in the shared `tok_function` colour. + let runs = run("seed Members set role as email"); + assert!( + runs.iter().any(|(_, _, c)| *c == HighlightClass::Function), + "generator name `email` should be Function-highlighted: {runs:?}" + ); + } + #[test] fn unknown_command_word_classified_by_byte_shape() { // Walker doesn't engage; fallback classifies as Identifier. diff --git a/src/dsl/walker/mod.rs b/src/dsl/walker/mod.rs index d3cf55b..f32ffa1 100644 --- a/src/dsl/walker/mod.rs +++ b/src/dsl/walker/mod.rs @@ -1236,6 +1236,10 @@ fn schema_existence_diagnostics( IdentSource::Relationships | IdentSource::Indexes | IdentSource::Types + // `Generators` (the `set … as ` slot, ADR-0048 D9) is a + // curated vocabulary; its unknown-name validity is handled by + // the completion-layer indicator, not this walker diagnostic. + | IdentSource::Generators | IdentSource::Free => {} } } diff --git a/src/event.rs b/src/event.rs index 51b2be2..623f299 100644 --- a/src/event.rs +++ b/src/event.rs @@ -87,6 +87,10 @@ pub enum AppEvent { command: Command, result: InsertResult, }, + DslSeedSucceeded { + command: Command, + result: crate::db::SeedResult, + }, DslUpdateSucceeded { command: Command, result: UpdateResult, diff --git a/src/friendly/keys.rs b/src/friendly/keys.rs index 389a22a..b26d01e 100644 --- a/src/friendly/keys.rs +++ b/src/friendly/keys.rs @@ -207,6 +207,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[ ("help.ddl.rename", &[]), ("help.ddl.change", &[]), ("help.data.show", &[]), + ("help.data.seed", &[]), ("help.data.insert", &[]), ("help.data.update", &[]), ("help.data.delete", &[]), @@ -308,6 +309,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[ ("parse.usage.undo", &[]), ("parse.usage.save", &[]), ("parse.usage.select", &[]), + ("parse.usage.seed", &[]), ("parse.usage.show_data", &[]), ("parse.usage.show_table", &[]), ("parse.usage.show_tables", &[]), @@ -548,7 +550,10 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[ ("ok.index_dropped_with_column", &["index"]), ("ok.rows_deleted", &["count"]), ("ok.rows_inserted", &["count"]), + ("ok.rows_seeded", &["count", "table"]), ("ok.rows_updated", &["count"]), + ("seed.capped", &["requested"]), + ("seed.advisory_generic", &["columns", "column", "table"]), // ---- Client-side success notes (ADR-0017 §6, ADR-0018 §9) ---- ("client_side.auto_fill_add_serial", &["count"]), ("client_side.auto_fill_add_shortid", &["count"]), diff --git a/src/friendly/strings/en-US.yaml b/src/friendly/strings/en-US.yaml index 88778ee..8bfb9c5 100644 --- a/src/friendly/strings/en-US.yaml +++ b/src/friendly/strings/en-US.yaml @@ -333,6 +333,17 @@ help: show indexes — list all indexes show relationship — show one relationship's detail show index — show one index's detail + seed: |- + seed [] — fill a table with generated sample rows + (default 20). Existing rows are kept; + foreign keys draw from existing parent rows. + seed ... set = 'v' | in ('a','b') | as | between x and y + — pin how a column is generated: a fixed + value, a pick-list, a named generator + (email, name, product, ...), or a range. + seed .[set ...] — fill one column across the EXISTING rows + (the follow-up to `add column`). + seed ... --seed — reproducible: same data for the same n. insert: |- insert into [(cols)] [values] (vals) — add a row update: |- @@ -569,6 +580,7 @@ parse: change_column: |- change column [in] [table]
: () [--force-conversion | --dont-convert] + seed: "seed
[count] [set = ... | in (...) | as | between x and y] | seed
." show_data: "show data
" show_table: "show table
" show_tables: "show tables" @@ -978,6 +990,17 @@ db: # template couldn't provide. Re-introduce a key here if a non-English # locale lands.) +# Seed-command notes (ADR-0048): the cap note when the unique-value +# space is exhausted, and the advisory that flags columns filled with +# generic text that look like fixed value sets. +seed: + capped: "(of {requested} requested — ran out of distinct value combinations)" + # ADR-0048 D13 (Phase 2/3 wording): name the generically-filled + # enum-ish / CHECK columns and point at the concrete repairs — the + # `set` clause on a fresh seed, or the column-fill form for the rows + # just created. + advisory_generic: "{columns} filled with generic text — they look like fixed value sets. Pin them next time with `set {column} in ('…', '…')`, or fix these rows with `seed {table}.{column} set {column} in ('…', '…')`." + ok: # ADR-0040: the generic `[ok] ` summary line was # retired — a successful command's echo line now carries a ✓ @@ -985,6 +1008,7 @@ ok: # per-operation row-count footers below still convey real payload # and are unchanged. rows_inserted: " {count} row(s) inserted" + rows_seeded: " {count} row(s) seeded into {table}" rows_updated: " {count} row(s) updated" rows_deleted: " {count} row(s) deleted" # Shown beneath a `drop column --cascade` summary, once per diff --git a/src/input_render.rs b/src/input_render.rs index e0bac70..47efe00 100644 --- a/src/input_render.rs +++ b/src/input_render.rs @@ -84,16 +84,60 @@ pub fn render_input_runs_in_mode( cache: &crate::completion::SchemaCache, mode: Mode, ) -> Vec { - let mut runs = lex_to_runs_in_mode(input, theme, mode); + // Identity feedback view — highlight/overlay the whole input. + render_input_runs_feedback(input, cursor_byte, theme, cache, mode, input, cursor_byte, 0) +} + +/// [`render_input_runs_in_mode`] with a separate **feedback view** for +/// the walker-driven highlighting and overlays. +/// +/// Under the `:` one-shot escape (ADR-0003) the buffer carries a leading +/// `:` that is not advanced SQL; `view` is the stripped SQL (and +/// `view_cursor` the cursor within it) so the walker highlights and +/// diagnoses the SQL itself, while the `:` prefix renders as plain text. +/// `offset` is the byte length stripped from the front — base runs and +/// overlay positions are shifted by it back into `input` coordinates. +/// Callers without a one-shot escape pass `(input, cursor, 0)` (what +/// [`render_input_runs_in_mode`] does). +#[must_use] +#[allow(clippy::too_many_arguments)] +pub fn render_input_runs_feedback( + input: &str, + cursor_byte: usize, + theme: &Theme, + cache: &crate::completion::SchemaCache, + mode: Mode, + view: &str, + view_cursor: usize, + offset: usize, +) -> Vec { + // Base highlighting runs over the SQL view, shifted into buffer + // coordinates; the stripped prefix (the `:` + space) renders as + // plain foreground text. + let mut runs: Vec = if offset == 0 { + lex_to_runs_in_mode(input, theme, mode) + } else { + let mut r = vec![StyledRun { + byte_range: (0, offset), + style: ratatui::style::Style::default().fg(theme.fg), + }]; + r.extend(lex_to_runs_in_mode(view, theme, mode).into_iter().map(|run| { + StyledRun { + byte_range: (run.byte_range.0 + offset, run.byte_range.1 + offset), + ..run + } + })); + r + }; if let InputState::DefiniteErrorAt(pos) = - classify_parse_result(parse_command_with_schema_in_mode(input, cache, mode)) + classify_parse_result(parse_command_with_schema_in_mode(view, cache, mode)) { - overlay_error(&mut runs, pos, theme); + overlay_error(&mut runs, pos + offset, theme); } if let Some(inv) = - crate::completion::invalid_ident_at_cursor_in_mode(input, cursor_byte, cache, mode) + crate::completion::invalid_ident_at_cursor_in_mode(view, view_cursor, cache, mode) { - overlay_error(&mut runs, inv.range.0, theme); + overlay_error(&mut runs, inv.range.0 + offset, theme); } // Schema-aware diagnostics (ADR-0027 §2): unknown table / // column (ERROR), or a dubious comparison (WARNING), is @@ -101,12 +145,12 @@ pub fn render_input_runs_in_mode( // so a problem the user has typed past stays visible. The // mode-aware walk picks up the SQL-specific diagnostics from // ADR-0032 in advanced mode. - for diag in walker::input_diagnostics_in_mode(input, Some(cache), mode) { + for diag in walker::input_diagnostics_in_mode(view, Some(cache), mode) { let colour = match diag.severity { walker::Severity::Error => theme.tok_error, walker::Severity::Warning => theme.warning, }; - overlay_span(&mut runs, diag.span, colour); + overlay_span(&mut runs, (diag.span.0 + offset, diag.span.1 + offset), colour); } inject_cursor(&mut runs, input, cursor_byte, theme); runs @@ -817,6 +861,9 @@ fn ambient_hint_core_in_mode( crate::dsl::grammar::IdentSource::Tables => "table", crate::dsl::grammar::IdentSource::Columns => "column", crate::dsl::grammar::IdentSource::Relationships => "relationship", + // The `seed … set as ` curated vocabulary + // (ADR-0048 D9) flags an unknown name here. + crate::dsl::grammar::IdentSource::Generators => "generator", // `NewName`, `Types`, `Free` are filtered out by // `invalid_ident_at_cursor` (it only fires for // known-set sources via `completes_from_schema`), so @@ -1105,6 +1152,50 @@ mod tests { assert!(reversed(&runs[0])); } + #[test] + fn one_shot_colon_highlights_the_sql_and_overlays_no_error() { + // ADR-0003 `:` one-shot: the SQL after the `:` must highlight and + // diagnose like real advanced mode — the `:` prefix renders as + // plain text and a valid query carries no error overlay (the old + // path let the walker choke on the `:` and mark it red). + use crate::completion::{SchemaCache, TableColumn}; + use crate::dsl::types::Type; + let theme = dark(); + let mut cache = SchemaCache::default(); + cache.tables.push("Customers".into()); + cache.columns.push("name".into()); + cache + .table_columns + .insert("Customers".into(), vec![TableColumn::new("name", Type::Text)]); + let input = ": select name from Customers"; + let view = "select name from Customers"; + let offset = 2; // ": " + let runs = render_input_runs_feedback( + input, + input.len(), + &theme, + &cache, + Mode::Advanced, + view, + view.len(), + offset, + ); + assert!( + runs.iter().all(|r| r.style.fg != Some(theme.tok_error)), + "a valid one-shot query must carry no error overlay: {runs:?}", + ); + assert!( + runs.iter() + .any(|r| r.byte_range.0 == offset && r.style.fg == Some(theme.tok_keyword)), + "the `select` keyword (past the `: ` prefix) is keyword-coloured: {runs:?}", + ); + assert_eq!( + runs.first().unwrap().byte_range.0, + 0, + "the `:` prefix is rendered from byte 0", + ); + } + #[test] fn keyword_token_takes_keyword_colour() { let theme = dark(); diff --git a/src/lib.rs b/src/lib.rs index 0b21d7a..aa39b75 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,6 +23,7 @@ pub mod output_render; pub mod persistence; pub mod project; pub mod runtime; +pub mod seed; pub mod theme; pub mod type_change; pub mod ui; diff --git a/src/runtime.rs b/src/runtime.rs index ba9c056..df5b8fb 100644 --- a/src/runtime.rs +++ b/src/runtime.rs @@ -1492,6 +1492,10 @@ fn spawn_dsl_dispatch( command: command.clone(), result, }, + Ok(CommandOutcome::Seed(result)) => AppEvent::DslSeedSucceeded { + command: command.clone(), + result, + }, Ok(CommandOutcome::Update(result)) => AppEvent::DslUpdateSucceeded { command: command.clone(), result, @@ -2364,6 +2368,7 @@ enum CommandOutcome { ShowRelationship(Option>), QueryPlan(QueryPlan), Insert(InsertResult), + Seed(crate::db::SeedResult), Update(UpdateResult), Delete(DeleteResult), ChangeColumn(ChangeColumnTypeResult), @@ -2911,6 +2916,17 @@ async fn execute_command_typed( .insert(table, columns, values, src) .await .map(CommandOutcome::Insert), + // ADR-0048 (SD1/SD2 Phase 2). + Command::Seed { + table, + target_column, + count, + overrides, + rng_seed, + } => database + .seed(table, target_column, count, overrides, rng_seed, src) + .await + .map(CommandOutcome::Seed), Command::Update { table, assignments, diff --git a/src/seed/check.rs b/src/seed/check.rs new file mode 100644 index 0000000..1294eb1 --- /dev/null +++ b/src/seed/check.rs @@ -0,0 +1,193 @@ +//! Parse a simple ` IN ('a', 'b', …)` CHECK into its allowed +//! value list (ADR-0048 D17), so the common enum-as-CHECK pattern seeds +//! from the permitted values instead of generic text. Anything more +//! complex (ranges, expressions, multi-column, non-literal items) +//! returns `None`; the executor then best-effort generates and lets a +//! violation surface through the friendly-error layer. + +/// Extract the string-literal values of a ` IN ( … )` CHECK. +/// +/// Case-insensitive on the `IN` keyword and the column name; tolerates a +/// quoted column (`"status"`). Every list item must be a single-quoted +/// string literal (`''` is an embedded quote). Returns `None` for any +/// other shape. +#[must_use] +pub fn parse_in_check_values(check: &str, column: &str) -> Option> { + let (in_idx, paren_open) = find_in_paren(check)?; + if !lhs_is_column(check[..in_idx].trim(), column) { + return None; + } + let values = extract_quoted_list(&check[paren_open..])?; + if values.is_empty() { None } else { Some(values) } +} + +const fn is_ident_byte(b: u8) -> bool { + b.is_ascii_alphanumeric() || b == b'_' +} + +/// Find the `IN` keyword (as a word, outside string literals) that is +/// followed by `(`. Returns `(byte index of `IN`, byte index of `(`)`. +fn find_in_paren(check: &str) -> Option<(usize, usize)> { + let bytes = check.as_bytes(); + let mut i = 0; + let mut in_quote = false; + while i < bytes.len() { + let b = bytes[i]; + if in_quote { + if b == b'\'' { + in_quote = false; + } + i += 1; + continue; + } + if b == b'\'' { + in_quote = true; + i += 1; + continue; + } + let is_in = (b == b'i' || b == b'I') + && bytes.get(i + 1).is_some_and(|n| *n == b'n' || *n == b'N'); + if is_in { + let before_ok = i == 0 || !is_ident_byte(bytes[i - 1]); + let after = i + 2; + let after_ok = bytes.get(after).is_none_or(|n| !is_ident_byte(*n)); + if before_ok && after_ok { + let mut k = after; + while bytes.get(k).is_some_and(u8::is_ascii_whitespace) { + k += 1; + } + if bytes.get(k) == Some(&b'(') { + return Some((i, k)); + } + } + } + i += 1; + } + None +} + +fn lhs_is_column(lhs: &str, column: &str) -> bool { + let t = lhs.trim(); + let stripped = t + .strip_prefix('"') + .and_then(|s| s.strip_suffix('"')) + .unwrap_or(t); + stripped.eq_ignore_ascii_case(column) +} + +/// Parse `( 'a', 'b', … )` from a string starting at `(` into the +/// unescaped literals. `None` if any item is not a pure quoted literal. +fn extract_quoted_list(s: &str) -> Option> { + let mut chars = s.chars().peekable(); + if chars.next()? != '(' { + return None; + } + let mut values = Vec::new(); + loop { + while chars.peek().is_some_and(|c| c.is_whitespace()) { + chars.next(); + } + match chars.peek()? { + ')' => { + chars.next(); + break; + } + '\'' => { + let v = read_quoted(&mut chars)?; + values.push(v); + while chars.peek().is_some_and(|c| c.is_whitespace()) { + chars.next(); + } + match chars.next()? { + ',' => {} + ')' => break, + _ => return None, + } + } + _ => return None, + } + } + Some(values) +} + +/// Read a single-quoted string literal (cursor at the opening `'`), +/// unescaping `''` to `'`. +fn read_quoted(chars: &mut std::iter::Peekable) -> Option { + if chars.next()? != '\'' { + return None; + } + let mut out = String::new(); + loop { + match chars.next()? { + '\'' => { + if chars.peek() == Some(&'\'') { + chars.next(); + out.push('\''); + } else { + return Some(out); + } + } + c => out.push(c), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn parses_a_simple_in_check() { + assert_eq!( + parse_in_check_values("status IN ('active', 'closed')", "status"), + Some(vec!["active".to_string(), "closed".to_string()]) + ); + } + + #[test] + fn tolerates_a_quoted_column_and_lowercase_in() { + assert_eq!( + parse_in_check_values("\"status\" in ('a','b','c')", "status"), + Some(vec!["a".into(), "b".into(), "c".into()]) + ); + } + + #[test] + fn unescapes_embedded_quotes() { + assert_eq!( + parse_in_check_values("note IN ('it''s', 'ok')", "note"), + Some(vec!["it's".into(), "ok".into()]) + ); + } + + #[test] + fn handles_commas_and_parens_inside_literals() { + assert_eq!( + parse_in_check_values("label IN ('a, b', 'c)d')", "label"), + Some(vec!["a, b".into(), "c)d".into()]) + ); + } + + #[test] + fn rejects_non_literal_lists() { + assert_eq!(parse_in_check_values("n IN (1, 2, 3)", "n"), None); + } + + #[test] + fn rejects_non_in_checks() { + assert_eq!(parse_in_check_values("age >= 0", "age"), None); + assert_eq!(parse_in_check_values("length(name) > 0", "name"), None); + } + + #[test] + fn rejects_when_lhs_is_a_different_column() { + assert_eq!(parse_in_check_values("status IN ('a')", "role"), None); + } + + #[test] + fn does_not_trip_on_in_inside_a_word_or_literal() { + // `min` contains "in" but is not the IN operator. + assert_eq!(parse_in_check_values("min(x) > 0", "x"), None); + } +} diff --git a/src/seed/generators.rs b/src/seed/generators.rs new file mode 100644 index 0000000..a5d6a99 --- /dev/null +++ b/src/seed/generators.rs @@ -0,0 +1,584 @@ +//! Value production: turn a [`Generator`] + a seeded RNG into a +//! [`Value`] (ADR-0048 D8/D9). Realistic generators come from the +//! `fake` crate (English locale); `product` is hand-rolled (D9, no +//! commerce module exists); dates are generated against a **fixed +//! reference epoch** so a `--seed` run is fully reproducible without +//! depending on the wall clock (D8 bounded windows). +//! +//! The stateful markers ([`Generator::IdentitySequential`], +//! [`Generator::ForeignKeySample`]) are resolved by the executor with +//! database context; if one reaches here un-intercepted it falls back +//! to type-based generation rather than panicking. + +use chrono::{Datelike, NaiveDate}; +use fake::Fake; +use rand::RngExt; + +use crate::dsl::types::Type; +use crate::dsl::value::Value; +use crate::seed::{Generator, SeedRng}; + +/// Fixed anchor for bounded date/datetime windows. Using a constant +/// (rather than `now()`) keeps `--seed` output reproducible across days +/// and makes tests deterministic. It advances with releases. +const REF_YEAR: i32 = 2025; +const REF_MONTH: u32 = 6; +const REF_DAY: u32 = 1; + +/// `~3 years` window for "recent" dates, in days. +const RECENT_WINDOW_DAYS: i64 = 3 * 365; +/// Adult birth window (≈18–80 years ago), in days. +const ADULT_MIN_DAYS: i64 = 18 * 365; +const ADULT_MAX_DAYS: i64 = 80 * 365; + +/// Produce one value for `generator` against destination type `ty`. +#[must_use] +pub fn generate_value(generator: &Generator, ty: Type, rng: &mut SeedRng) -> Value { + use fake::faker::address::en as addr; + use fake::faker::company::en as company; + use fake::faker::internet::en as net; + use fake::faker::job::en as job; + use fake::faker::lorem::en as lorem; + use fake::faker::name::en as name; + use fake::faker::phone_number::en as phone; + + match generator { + Generator::FirstName => Value::Text(name::FirstName().fake_with_rng(rng)), + Generator::LastName => Value::Text(name::LastName().fake_with_rng(rng)), + Generator::FullName => Value::Text(name::Name().fake_with_rng(rng)), + Generator::Email => Value::Text(net::FreeEmail().fake_with_rng(rng)), + Generator::Username => Value::Text(net::Username().fake_with_rng(rng)), + Generator::Password => Value::Text(net::Password(8..16).fake_with_rng(rng)), + Generator::Phone => Value::Text(phone::PhoneNumber().fake_with_rng(rng)), + Generator::City => Value::Text(addr::CityName().fake_with_rng(rng)), + Generator::Country => Value::Text(addr::CountryName().fake_with_rng(rng)), + Generator::StateName => Value::Text(addr::StateName().fake_with_rng(rng)), + Generator::Street => Value::Text(addr::StreetName().fake_with_rng(rng)), + Generator::ZipCode => Value::Text(addr::ZipCode().fake_with_rng(rng)), + Generator::Company => Value::Text(company::CompanyName().fake_with_rng(rng)), + Generator::JobTitle => Value::Text(job::Title().fake_with_rng(rng)), + Generator::ProductName => Value::Text(product_name(rng)), + Generator::Sentence => Value::Text(lorem::Sentence(5..12).fake_with_rng(rng)), + Generator::Paragraph => Value::Text(lorem::Paragraph(2..4).fake_with_rng(rng)), + Generator::Url => { + let word: String = lorem::Word().fake_with_rng(rng); + let suffix: String = net::DomainSuffix().fake_with_rng(rng); + Value::Text(format!("https://{word}.{suffix}")) + } + // Hand-rolled — `fake`'s color module is feature-gated (it pulls + // an extra crate); a hex colour is trivial from the RNG. + Generator::HexColor => Value::Text(format!("#{:06X}", rng.random_range(0..0x0100_0000))), + Generator::CurrencyAmount => currency_amount(ty, rng), + Generator::Age => Value::Number(rng.random_range(18..=80).to_string()), + Generator::SmallInt => Value::Number(rng.random_range(1..=100).to_string()), + Generator::DateRecent => Value::Text(format_date(random_past_date(rng, 0, RECENT_WINDOW_DAYS))), + Generator::DateAdult => { + Value::Text(format_date(random_past_date(rng, ADULT_MIN_DAYS, ADULT_MAX_DAYS))) + } + Generator::DateTimeRecent => Value::Text(random_recent_datetime(rng)), + Generator::Boolean => Value::Bool(rng.random_range(0..2) == 1), + Generator::PickFrom(values) if !values.is_empty() => { + let chosen: &String = pick(rng, values); + literal_to_value(chosen, ty) + } + // The `set between low and high` override (D2). Bounds are + // interpreted per the destination type; the executor has already + // validated they parse, so a defensive parse failure here falls + // back to type-based generation rather than producing junk. + Generator::Range { low, high } => range_value(low, high, ty, rng), + // Un-intercepted markers + an empty pick list → type-based. + Generator::PickFrom(_) + | Generator::IdentitySequential + | Generator::ForeignKeySample + | Generator::Generic => generic_for_type(ty, rng), + } +} + +/// Uniform value in `[low, high]` for the `between` override (D2). +/// +/// Bounds are interpreted by destination type. Returns the type-based +/// fallback for a bound that does not parse or a type that has no range +/// meaning — the executor pre-validates, so this is defensive only. +fn range_value(low: &str, high: &str, ty: Type, rng: &mut SeedRng) -> Value { + match ty { + Type::Int | Type::Serial => parse_int_range(low, high) + .map(|(lo, hi)| Value::Number(rng.random_range(lo..=hi).to_string())) + .unwrap_or_else(|| generic_for_type(ty, rng)), + Type::Real | Type::Decimal => parse_real_range(low, high) + .map(|(lo, hi)| { + let v = rng.random::().mul_add(hi - lo, lo); + Value::Number(format!("{v:.2}")) + }) + .unwrap_or_else(|| generic_for_type(ty, rng)), + Type::Date => parse_date_range(low, high) + .map(|(lo, hi)| Value::Text(format_date(random_date_between(rng, lo, hi)))) + .unwrap_or_else(|| generic_for_type(ty, rng)), + Type::DateTime => parse_datetime_range(low, high) + .map(|(lo, hi)| Value::Text(random_datetime_between(rng, lo, hi))) + .unwrap_or_else(|| generic_for_type(ty, rng)), + // text / bool / blob / shortid have no range meaning. + _ => generic_for_type(ty, rng), + } +} + +/// Validate that `low`/`high` parse as bounds for `ty`. +/// +/// The `between` override (D2) is checked by the executor *before* +/// generation. Returns a short human reason on failure (the executor +/// wraps it in a friendly error naming the column), `None` when valid. +#[must_use] +pub fn range_bounds_reason(ty: Type, low: &str, high: &str) -> Option { + let ok = match ty { + Type::Int | Type::Serial => parse_int_range(low, high).is_some(), + Type::Real | Type::Decimal => parse_real_range(low, high).is_some(), + Type::Date => parse_date_range(low, high).is_some(), + Type::DateTime => parse_datetime_range(low, high).is_some(), + // text / bool / blob / shortid have no range meaning. + Type::Text | Type::Bool | Type::Blob | Type::ShortId => false, + }; + if ok { + return None; + } + Some(match ty { + Type::Int | Type::Serial => "expected two whole numbers, e.g. `between 1 and 100`".to_string(), + Type::Real | Type::Decimal => "expected two numbers, e.g. `between 1.0 and 9.99`".to_string(), + Type::Date => "expected two quoted dates, e.g. `between '2023-01-01' and '2024-12-31'`".to_string(), + Type::DateTime => { + "expected two quoted datetimes, e.g. `between '2023-01-01T00:00:00' and '2024-12-31T23:59:59'`" + .to_string() + } + Type::Text | Type::Bool | Type::Blob | Type::ShortId => { + "a `between` range only applies to numeric and date/datetime columns".to_string() + } + }) +} + +/// Parse and order an integer range; `None` if either bound is not an +/// integer. +fn parse_int_range(low: &str, high: &str) -> Option<(i64, i64)> { + let lo: i64 = low.trim().parse().ok()?; + let hi: i64 = high.trim().parse().ok()?; + Some(if lo <= hi { (lo, hi) } else { (hi, lo) }) +} + +fn parse_real_range(low: &str, high: &str) -> Option<(f64, f64)> { + let lo: f64 = low.trim().parse().ok()?; + let hi: f64 = high.trim().parse().ok()?; + if !lo.is_finite() || !hi.is_finite() { + return None; + } + Some(if lo <= hi { (lo, hi) } else { (hi, lo) }) +} + +fn parse_date_range(low: &str, high: &str) -> Option<(NaiveDate, NaiveDate)> { + let lo = NaiveDate::parse_from_str(low.trim(), "%Y-%m-%d").ok()?; + let hi = NaiveDate::parse_from_str(high.trim(), "%Y-%m-%d").ok()?; + Some(if lo <= hi { (lo, hi) } else { (hi, lo) }) +} + +/// Accept both the `T`-separated and space-separated datetime spellings +/// the app validates (`bind_datetime` / `validate_datetime`). +fn parse_one_datetime(s: &str) -> Option { + let t = s.trim(); + chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%dT%H:%M:%S") + .or_else(|_| chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%d %H:%M:%S")) + .ok() +} + +fn parse_datetime_range( + low: &str, + high: &str, +) -> Option<(chrono::NaiveDateTime, chrono::NaiveDateTime)> { + let lo = parse_one_datetime(low)?; + let hi = parse_one_datetime(high)?; + Some(if lo <= hi { (lo, hi) } else { (hi, lo) }) +} + +/// Uniform date in `[lo, hi]` (inclusive). +fn random_date_between(rng: &mut SeedRng, lo: NaiveDate, hi: NaiveDate) -> NaiveDate { + let lo_ce = lo.num_days_from_ce(); + let hi_ce = hi.num_days_from_ce(); + let day = rng.random_range(lo_ce..=hi_ce); + NaiveDate::from_num_days_from_ce_opt(day).unwrap_or(lo) +} + +/// Uniform datetime in `[lo, hi]`, rendered `YYYY-MM-DDTHH:MM:SS`. +fn random_datetime_between( + rng: &mut SeedRng, + lo: chrono::NaiveDateTime, + hi: chrono::NaiveDateTime, +) -> String { + let lo_s = lo.and_utc().timestamp(); + let hi_s = hi.and_utc().timestamp(); + let secs = if lo_s <= hi_s { + rng.random_range(lo_s..=hi_s) + } else { + rng.random_range(hi_s..=lo_s) + }; + let dt = chrono::DateTime::from_timestamp(secs, 0) + .map_or(lo, |d| d.naive_utc()); + dt.format("%Y-%m-%dT%H:%M:%S").to_string() +} + +/// Type-based fallback generation (D8). Never produces NULL for a +/// generatable type; `blob`/`serial`/`shortid` are handled by the +/// executor (autogen / block guard) and yield NULL here only as a +/// last resort. +fn generic_for_type(ty: Type, rng: &mut SeedRng) -> Value { + use fake::faker::lorem::en as lorem; + match ty { + Type::Text => { + let words: Vec = lorem::Words(2..4).fake_with_rng(rng); + Value::Text(words.join(" ")) + } + Type::ShortId => Value::Text(crate::dsl::shortid::generate_with_rng(rng)), + Type::Int => Value::Number(rng.random_range(1..=10_000).to_string()), + Type::Serial => Value::Number(rng.random_range(1..=10_000).to_string()), + Type::Real => { + let n: f64 = rng.random_range(0..100_000) as f64 / 100.0; + Value::Number(format!("{n:.2}")) + } + Type::Decimal => { + let dollars = rng.random_range(0..10_000); + let cents = rng.random_range(0..100); + Value::Number(format!("{dollars}.{cents:02}")) + } + Type::Bool => Value::Bool(rng.random_range(0..2) == 1), + Type::Date => Value::Text(format_date(random_past_date(rng, 0, RECENT_WINDOW_DAYS))), + Type::DateTime => Value::Text(random_recent_datetime(rng)), + Type::Blob => Value::Null, + } +} + +/// Wrap a fixed-list literal as the right `Value` shape for `ty` (used +/// by `PickFrom` — enum / `IN`-CHECK values). +fn literal_to_value(s: &str, ty: Type) -> Value { + match ty { + Type::Int | Type::Serial | Type::Real | Type::Decimal => Value::Number(s.to_string()), + Type::Bool => Value::Bool(matches!(s.to_ascii_lowercase().as_str(), "true" | "1")), + _ => Value::Text(s.to_string()), + } +} + +/// A money-shaped amount: whole for `int`/`serial`, two-decimal for the +/// fractional numeric types. +fn currency_amount(ty: Type, rng: &mut SeedRng) -> Value { + match ty { + Type::Real | Type::Decimal => { + let dollars = rng.random_range(1..=1_000); + let cents = rng.random_range(0..100); + Value::Number(format!("{dollars}.{cents:02}")) + } + // int / serial / anything else numeric → whole amount. + _ => Value::Number(rng.random_range(1..=1_000).to_string()), + } +} + +// — the hand-rolled `product` generator (D9) — + +const PRODUCT_ADJECTIVES: &[&str] = &[ + "Sleek", "Rustic", "Ergonomic", "Handcrafted", "Refined", "Modern", + "Vintage", "Compact", "Premium", "Lightweight", "Durable", "Elegant", + "Sturdy", "Smooth", "Gorgeous", "Intelligent", "Practical", "Awesome", + "Incredible", "Recycled", +]; +const PRODUCT_MATERIALS: &[&str] = &[ + "Wooden", "Copper", "Granite", "Cotton", "Steel", "Leather", "Bamboo", + "Plastic", "Ceramic", "Glass", "Concrete", "Rubber", "Bronze", "Marble", + "Linen", "Silk", "Aluminum", "Wool", "Gold", "Carbon", +]; +const PRODUCT_NOUNS: &[&str] = &[ + "Chair", "Lamp", "Table", "Bottle", "Backpack", "Keyboard", "Mug", + "Shoes", "Jacket", "Watch", "Wallet", "Bench", "Hat", "Gloves", + "Towel", "Ball", "Bike", "Knife", "Pillow", "Blanket", +]; + +fn product_name(rng: &mut SeedRng) -> String { + format!( + "{} {} {}", + pick(rng, PRODUCT_ADJECTIVES), + pick(rng, PRODUCT_MATERIALS), + pick(rng, PRODUCT_NOUNS), + ) +} + +// — bounded dates (D8) — + +const fn reference_date() -> NaiveDate { + match NaiveDate::from_ymd_opt(REF_YEAR, REF_MONTH, REF_DAY) { + Some(d) => d, + None => panic!("reference date constants must be valid"), + } +} + +/// A date between `min_days_ago` and `max_days_ago` before the +/// reference epoch (inclusive). +fn random_past_date(rng: &mut SeedRng, min_days_ago: i64, max_days_ago: i64) -> NaiveDate { + let days_ago = rng.random_range(min_days_ago..=max_days_ago); + let ce = reference_date().num_days_from_ce(); + let target = ce - i32::try_from(days_ago).unwrap_or(0); + NaiveDate::from_num_days_from_ce_opt(target).unwrap_or_else(reference_date) +} + +fn format_date(date: NaiveDate) -> String { + date.format("%Y-%m-%d").to_string() +} + +/// A recent datetime: a recent date plus a random time-of-day, rendered +/// as `YYYY-MM-DDTHH:MM:SS`. +fn random_recent_datetime(rng: &mut SeedRng) -> String { + let date = random_past_date(rng, 0, RECENT_WINDOW_DAYS); + let h = rng.random_range(0..24); + let m = rng.random_range(0..60); + let s = rng.random_range(0..60); + format!("{}T{h:02}:{m:02}:{s:02}", format_date(date)) +} + +/// Pick a uniformly random element from a non-empty slice. +fn pick<'a, T>(rng: &mut SeedRng, items: &'a [T]) -> &'a T { + &items[rng.random_range(0..items.len())] +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::seed::make_rng; + use pretty_assertions::assert_eq; + + fn gen_once(generator: &Generator, ty: Type, seed: u64) -> Value { + let mut rng = make_rng(Some(seed)); + generate_value(generator, ty, &mut rng) + } + + #[test] + fn generation_is_deterministic_for_a_fixed_seed() { + for generator in [ + Generator::FullName, + Generator::Email, + Generator::ProductName, + Generator::DateRecent, + Generator::CurrencyAmount, + ] { + let a = gen_once(&generator, Type::Text, 7); + let b = gen_once(&generator, Type::Text, 7); + assert_eq!(a, b, "{generator:?} must reproduce for a fixed seed"); + } + } + + #[test] + fn text_generators_produce_nonempty_text() { + for generator in [ + Generator::FirstName, + Generator::LastName, + Generator::FullName, + Generator::Email, + Generator::Username, + Generator::Company, + Generator::City, + Generator::ProductName, + ] { + let v = gen_once(&generator, Type::Text, 3); + match v { + Value::Text(s) => assert!(!s.trim().is_empty(), "{generator:?} produced empty text"), + other => panic!("{generator:?} produced non-text {other:?}"), + } + } + } + + #[test] + fn email_looks_like_an_email() { + let v = gen_once(&Generator::Email, Type::Text, 11); + let Value::Text(s) = v else { panic!("not text") }; + assert!(s.contains('@'), "email should contain @: {s}"); + } + + #[test] + fn product_name_is_three_capitalised_words() { + let v = gen_once(&Generator::ProductName, Type::Text, 99); + let Value::Text(s) = v else { panic!("not text") }; + let words: Vec<&str> = s.split(' ').collect(); + assert_eq!(words.len(), 3, "product name should be 3 words: {s}"); + for w in words { + assert!(w.chars().next().unwrap().is_ascii_uppercase(), "word `{w}` not capitalised"); + } + } + + #[test] + fn recent_dates_fall_within_the_bounded_window() { + let mut rng = make_rng(Some(1)); + let earliest = reference_date() + .checked_sub_days(chrono::Days::new(RECENT_WINDOW_DAYS as u64)) + .unwrap(); + let latest = reference_date(); + for _ in 0..200 { + let v = generate_value(&Generator::DateRecent, Type::Date, &mut rng); + let Value::Text(s) = v else { panic!("date not text") }; + let d = NaiveDate::parse_from_str(&s, "%Y-%m-%d").expect("valid ISO date"); + assert!(d >= earliest && d <= latest, "date {d} outside recent window"); + } + } + + #[test] + fn dob_dates_fall_within_the_adult_window() { + let mut rng = make_rng(Some(2)); + let earliest = reference_date() + .checked_sub_days(chrono::Days::new(ADULT_MAX_DAYS as u64)) + .unwrap(); + let latest = reference_date() + .checked_sub_days(chrono::Days::new(ADULT_MIN_DAYS as u64)) + .unwrap(); + for _ in 0..200 { + let v = generate_value(&Generator::DateAdult, Type::Date, &mut rng); + let Value::Text(s) = v else { panic!("date not text") }; + let d = NaiveDate::parse_from_str(&s, "%Y-%m-%d").expect("valid ISO date"); + assert!(d >= earliest && d <= latest, "dob {d} outside adult window"); + } + } + + #[test] + fn datetime_is_iso_shaped() { + let v = gen_once(&Generator::DateTimeRecent, Type::DateTime, 5); + let Value::Text(s) = v else { panic!("not text") }; + assert!(s.contains('T'), "datetime needs a T separator: {s}"); + // Parses as a naive datetime. + chrono::NaiveDateTime::parse_from_str(&s, "%Y-%m-%dT%H:%M:%S") + .unwrap_or_else(|e| panic!("invalid datetime {s}: {e}")); + } + + #[test] + fn currency_is_whole_for_int_and_fractional_for_decimal() { + let Value::Number(int_amt) = gen_once(&Generator::CurrencyAmount, Type::Int, 4) else { + panic!("not a number") + }; + assert!(!int_amt.contains('.'), "int currency should be whole: {int_amt}"); + let Value::Number(dec_amt) = gen_once(&Generator::CurrencyAmount, Type::Decimal, 4) else { + panic!("not a number") + }; + assert!(dec_amt.contains('.'), "decimal currency should have cents: {dec_amt}"); + } + + #[test] + fn age_is_in_human_range() { + let mut rng = make_rng(Some(8)); + for _ in 0..100 { + let Value::Number(a) = generate_value(&Generator::Age, Type::Int, &mut rng) else { + panic!("age not a number") + }; + let n: i64 = a.parse().unwrap(); + assert!((18..=80).contains(&n), "age {n} out of range"); + } + } + + #[test] + fn pick_from_chooses_a_listed_value() { + let generator = Generator::PickFrom(vec!["active".into(), "closed".into()]); + let mut rng = make_rng(Some(6)); + for _ in 0..50 { + let Value::Text(s) = generate_value(&generator, Type::Text, &mut rng) else { + panic!("not text") + }; + assert!(matches!(s.as_str(), "active" | "closed"), "unexpected pick {s}"); + } + } + + #[test] + fn pick_from_wraps_numeric_values_as_numbers() { + let generator = Generator::PickFrom(vec!["1".into(), "2".into(), "3".into()]); + let mut rng = make_rng(Some(6)); + let v = generate_value(&generator, Type::Int, &mut rng); + assert!(matches!(v, Value::Number(_)), "numeric pick should be a Number: {v:?}"); + } + + #[test] + fn int_range_stays_within_inclusive_bounds() { + let g = Generator::Range { low: "10".into(), high: "20".into() }; + let mut rng = make_rng(Some(5)); + for _ in 0..200 { + let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else { + panic!("int range should be a number") + }; + let n: i64 = s.parse().unwrap(); + assert!((10..=20).contains(&n), "int {n} out of [10,20]"); + } + } + + #[test] + fn real_range_stays_within_bounds_and_has_cents() { + let g = Generator::Range { low: "1.0".into(), high: "9.0".into() }; + let mut rng = make_rng(Some(5)); + for _ in 0..200 { + let Value::Number(s) = generate_value(&g, Type::Real, &mut rng) else { + panic!("real range should be a number") + }; + let n: f64 = s.parse().unwrap(); + assert!((1.0..=9.0).contains(&n), "real {n} out of [1,9]"); + assert!(s.contains('.'), "real should be formatted with cents: {s}"); + } + } + + #[test] + fn date_range_stays_within_quoted_bounds() { + let g = Generator::Range { + low: "2023-01-01".into(), + high: "2023-12-31".into(), + }; + let lo = NaiveDate::parse_from_str("2023-01-01", "%Y-%m-%d").unwrap(); + let hi = NaiveDate::parse_from_str("2023-12-31", "%Y-%m-%d").unwrap(); + let mut rng = make_rng(Some(9)); + for _ in 0..200 { + let Value::Text(s) = generate_value(&g, Type::Date, &mut rng) else { + panic!("date range should be text") + }; + let d = NaiveDate::parse_from_str(&s, "%Y-%m-%d").expect("valid date"); + assert!(d >= lo && d <= hi, "date {d} out of range"); + } + } + + #[test] + fn reversed_bounds_are_tolerated() { + let g = Generator::Range { low: "20".into(), high: "10".into() }; + let mut rng = make_rng(Some(1)); + let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else { + panic!("number") + }; + let n: i64 = s.parse().unwrap(); + assert!((10..=20).contains(&n), "reversed bounds still produce in-range: {n}"); + } + + #[test] + fn range_bounds_reason_accepts_compatible_and_rejects_incompatible() { + // Numeric / date / datetime accept; text / bool reject. + assert!(range_bounds_reason(Type::Int, "1", "10").is_none()); + assert!(range_bounds_reason(Type::Real, "1.5", "9.9").is_none()); + assert!(range_bounds_reason(Type::Date, "2023-01-01", "2024-01-01").is_none()); + assert!(range_bounds_reason(Type::DateTime, "2023-01-01T00:00:00", "2024-01-01T00:00:00").is_none()); + // Non-numeric bound on a numeric column. + assert!(range_bounds_reason(Type::Int, "abc", "10").is_some()); + // A range on a text column is meaningless. + assert!(range_bounds_reason(Type::Text, "a", "z").is_some()); + assert!(range_bounds_reason(Type::Bool, "0", "1").is_some()); + } + + #[test] + fn markers_fall_back_to_type_based_generation() { + // An un-intercepted marker must not panic; it generates by type. + let v = gen_once(&Generator::IdentitySequential, Type::Text, 1); + assert!(matches!(v, Value::Text(_))); + let v = gen_once(&Generator::ForeignKeySample, Type::Int, 1); + assert!(matches!(v, Value::Number(_))); + } + + #[test] + fn generic_fallback_matches_each_type() { + let mut rng = make_rng(Some(0)); + assert!(matches!(generate_value(&Generator::Generic, Type::Text, &mut rng), Value::Text(_))); + assert!(matches!(generate_value(&Generator::Generic, Type::Int, &mut rng), Value::Number(_))); + assert!(matches!(generate_value(&Generator::Generic, Type::Bool, &mut rng), Value::Bool(_))); + assert!(matches!(generate_value(&Generator::Generic, Type::Blob, &mut rng), Value::Null)); + // shortid fallback is a valid base58 id. + let Value::Text(sid) = generate_value(&Generator::Generic, Type::ShortId, &mut rng) else { + panic!("shortid not text") + }; + assert!(crate::dsl::shortid::validate(&sid).is_ok(), "invalid shortid {sid}"); + } +} diff --git a/src/seed/heuristics.rs b/src/seed/heuristics.rs new file mode 100644 index 0000000..d62f78a --- /dev/null +++ b/src/seed/heuristics.rs @@ -0,0 +1,440 @@ +//! Generator selection: the name-aware, type-gated catalogue (ADR-0048 +//! D7), table-context disambiguation for `name`/`title` (D11), the +//! identifier-family rule (D10), and enum-ish detection (D12). +//! +//! Selection is **token-based**: a column name is split on `_`, `-` and +//! camelCase boundaries, lowercased, and matched against an +//! ordered, most-specific-first list. Each rule is **type-gated** — a +//! name match only fires when the column's type is compatible, so a +//! column called `email` typed `int` falls through to type-based +//! generation rather than producing a string. Documented false-positive +//! guards keep `username`/`filename` away from the bare person-name +//! rule. + +use tracing::trace; + +use crate::dsl::types::Type; +use crate::seed::{ColumnSpec, Generator}; + +/// Choose the generator for a column (ADR-0048 D7/D10/D11/D12). +/// +/// Precedence: foreign keys and `IN`-CHECK columns are resolved first +/// (the executor / a fixed list), then the ordered name catalogue, then +/// the type-based fallback. +#[must_use] +pub fn choose_generator(table: &str, col: &ColumnSpec) -> Generator { + let generator = choose_generator_inner(table, col); + trace!( + table = table, + column = %col.name, + ty = %col.ty, + chosen = ?generator, + "seed: chose generator for column" + ); + generator +} + +fn choose_generator_inner(table: &str, col: &ColumnSpec) -> Generator { + // FK columns are filled by sampling existing parent rows (D14) — + // the executor owns that; generation here would be wrong. + if col.is_foreign_key { + return Generator::ForeignKeySample; + } + // A simple `col IN (…)` CHECK becomes the value source (D17), so the + // common enum-as-CHECK pattern just works. + if let Some(values) = &col.check_in_values + && !values.is_empty() + { + return Generator::PickFrom(values.clone()); + } + + let toks = tokens(&col.name); + match_name_generator(table, &toks, col.ty).unwrap_or(Generator::Generic) +} + +/// Whether a column name looks like an enum / fixed-value set that has +/// no sensible generic generator (D12). Used by the executor to drive +/// the post-seed advisory; such columns still receive generic text. +#[must_use] +pub fn is_enum_ish(name: &str) -> bool { + const ENUM_TOKENS: &[&str] = &[ + "role", "status", "state", "type", "kind", "category", "level", + "tier", "stage", "priority", "gender", + ]; + let toks = tokens(name); + toks.iter().any(|t| ENUM_TOKENS.contains(&t.as_str())) +} + +/// The ordered, most-specific-first name catalogue. Returns `None` when +/// nothing matches (→ type-based fallback) or when a name matches but +/// its type gate fails. +fn match_name_generator(table: &str, toks: &[String], ty: Type) -> Option { + let text = type_is_text(ty); + let numeric = ty.is_numeric(); + + // — Person — + if text && (has_any(toks, &["fname", "firstname"]) || has_seq(toks, "first", "name")) { + return Some(Generator::FirstName); + } + if text + && (has_any(toks, &["lname", "lastname", "surname"]) || has_seq(toks, "last", "name")) + { + return Some(Generator::LastName); + } + if text && (has_any(toks, &["username", "login", "handle"]) || has_seq(toks, "user", "name")) { + return Some(Generator::Username); + } + if text && has_any(toks, &["email", "emails"]) { + return Some(Generator::Email); + } + if text && has_any(toks, &["password", "passwd", "pwd"]) { + return Some(Generator::Password); + } + if text && has_any(toks, &["phone", "mobile", "cell", "tel", "telephone"]) { + return Some(Generator::Phone); + } + + // — bare `name` / `title` → table-context (D11) — + // Guarded against the `*_name` false positives handled above (those + // returned already) plus structural names like `filename`/`table_name`. + if text && has_any(toks, &["name", "title"]) && !is_name_false_positive(toks) { + return Some(name_by_table_context(table)); + } + + // — Address — + if text && has_any(toks, &["city", "town"]) { + return Some(Generator::City); + } + if text && has_token(toks, "country") { + return Some(Generator::Country); + } + // `province` / explicit `state_name`/`state_abbr` → a real state name. + // Bare `state` is left to enum-ish (it usually means status), so we + // require `province` or a `state` token paired with name/abbr. + if text && (has_token(toks, "province") || (has_token(toks, "state") && has_any(toks, &["name", "abbr"]))) { + return Some(Generator::StateName); + } + if text && has_any(toks, &["street", "address", "addr"]) { + return Some(Generator::Street); + } + if text && has_any(toks, &["zip", "zipcode", "postcode", "postal"]) { + return Some(Generator::ZipCode); + } + + // — Organisation / job — + if text && has_any(toks, &["company", "employer", "org", "organization", "organisation"]) { + return Some(Generator::Company); + } + if text && has_any(toks, &["job", "position", "profession", "occupation"]) { + return Some(Generator::JobTitle); + } + + // — Free text — + if text && has_any(toks, &["description", "bio", "notes", "note", "summary", "comment", "comments", "about"]) { + return Some(Generator::Sentence); + } + if text && has_any(toks, &["url", "website", "homepage", "link"]) { + return Some(Generator::Url); + } + if text && has_any(toks, &["color", "colour"]) { + return Some(Generator::HexColor); + } + + // — Numeric — + if numeric && has_any(toks, &["price", "amount", "cost", "salary", "balance", "total", "fee", "revenue"]) { + return Some(Generator::CurrencyAmount); + } + if numeric && has_token(toks, "age") { + return Some(Generator::Age); + } + if numeric && has_any(toks, &["quantity", "qty", "stock", "count"]) { + return Some(Generator::SmallInt); + } + + // — Temporal (bounded, D8) — + if matches!(ty, Type::Date) && has_any(toks, &["dob", "birthday", "birthdate"]) { + return Some(Generator::DateAdult); + } + if matches!(ty, Type::Date) && has_token(toks, "date") { + return Some(Generator::DateRecent); + } + if matches!(ty, Type::DateTime) && has_any(toks, &["timestamp", "datetime", "at"]) { + return Some(Generator::DateTimeRecent); + } + + // — Boolean — + if matches!(ty, Type::Bool) + && (toks.first().map(String::as_str) == Some("is") + || toks.first().map(String::as_str) == Some("has") + || has_any(toks, &["active", "enabled", "verified", "deleted"])) + { + return Some(Generator::Boolean); + } + + // — Identifier family (D10) — late so phone/email/etc. win first. + if matches!(ty, Type::Int | Type::Text) && is_identifier_name(toks) { + return Some(Generator::IdentitySequential); + } + + None +} + +/// Resolve a bare `name`/`title` column by the **table** it lives in +/// (D11): product-ish → a product name, company-ish → a company name, +/// person-ish → a person name, otherwise a generic person name. +fn name_by_table_context(table: &str) -> Generator { + let toks = tokens(table); + const PRODUCTY: &[&str] = &[ + "product", "products", "item", "items", "good", "goods", + "merchandise", "catalog", "catalogue", "inventory", "sku", "skus", + ]; + const COMPANYISH: &[&str] = &[ + "company", "companies", "vendor", "vendors", "supplier", + "suppliers", "manufacturer", "manufacturers", "brand", "brands", + "organization", "organisation", + ]; + const PERSONISH: &[&str] = &[ + "user", "users", "customer", "customers", "person", "people", + "employee", "employees", "member", "members", "contact", + "contacts", "author", "authors", "student", "students", + ]; + if has_any(&toks, PRODUCTY) { + Generator::ProductName + } else if has_any(&toks, COMPANYISH) { + Generator::Company + } else if has_any(&toks, PERSONISH) { + Generator::FullName + } else { + // Unknown table: a person name is the most generally useful + // default for a bare `name` column. + Generator::FullName + } +} + +/// Names ending in `name`/`title` that are NOT person names. The +/// specific `first`/`last`/`user` cases are matched earlier and return +/// before this guard; this catches structural names. +fn is_name_false_positive(toks: &[String]) -> bool { + const NON_PERSON: &[&str] = &[ + "file", "table", "host", "domain", "field", "class", "tag", + "event", "path", "col", "column", "db", "schema", "index", "key", + "page", "node", "type", + ]; + has_any(toks, NON_PERSON) && has_any(toks, &["name", "title"]) +} + +/// Identifier-family names (D10): treated as unique identifiers. FK +/// columns never reach here (handled in [`choose_generator`]). +fn is_identifier_name(toks: &[String]) -> bool { + const ID_TOKENS: &[&str] = &["id", "code", "sku", "ref", "reference", "barcode"]; + if has_any(toks, ID_TOKENS) { + return true; + } + // `*_number` / `*_no` as an identifier, but only when qualified + // (a bare `number`/`no` is too ambiguous, and `phone_number` already + // matched the phone rule earlier). + toks.len() >= 2 && has_any(toks, &["number", "no"]) +} + +// — token utilities — + +/// Split a column/table name into lowercase tokens on `_`, `-`, spaces, +/// and camelCase boundaries. `created_at` → [`created`, `at`]; +/// `firstName` → [`first`, `name`]; `DOB` → [`dob`]. +fn tokens(name: &str) -> Vec { + let mut out = Vec::new(); + let mut cur = String::new(); + let mut prev_was_lower_or_digit = false; + for ch in name.chars() { + if ch == '_' || ch == '-' || ch == ' ' { + if !cur.is_empty() { + out.push(std::mem::take(&mut cur)); + } + prev_was_lower_or_digit = false; + continue; + } + // camelCase boundary: an uppercase letter following a lowercase + // letter or digit starts a new token. + if ch.is_ascii_uppercase() && prev_was_lower_or_digit && !cur.is_empty() { + out.push(std::mem::take(&mut cur)); + } + cur.push(ch.to_ascii_lowercase()); + prev_was_lower_or_digit = ch.is_ascii_lowercase() || ch.is_ascii_digit(); + } + if !cur.is_empty() { + out.push(cur); + } + out +} + +fn has_token(toks: &[String], t: &str) -> bool { + toks.iter().any(|x| x == t) +} + +fn has_any(toks: &[String], candidates: &[&str]) -> bool { + candidates.iter().any(|c| has_token(toks, c)) +} + +/// Whether `a` is immediately followed by `b` in the token list — for +/// matching split compound names like `first name` / `user name`. +fn has_seq(toks: &[String], a: &str, b: &str) -> bool { + toks.windows(2).any(|w| w[0] == a && w[1] == b) +} + +/// Text-typed for heuristic purposes — `text`, `shortid`, plus the +/// text-backed `decimal`/`date`/`datetime` are excluded here because +/// those have their own dedicated gates; only `text`/`shortid` accept +/// free-text generators. +const fn type_is_text(ty: Type) -> bool { + matches!(ty, Type::Text | Type::ShortId) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::seed::ColumnSpec; + use pretty_assertions::assert_eq; + + fn choose(table: &str, name: &str, ty: Type) -> Generator { + choose_generator(table, &ColumnSpec::plain(name, ty)) + } + + #[test] + fn person_name_fields_map_to_name_generators() { + assert_eq!(choose("users", "first_name", Type::Text), Generator::FirstName); + assert_eq!(choose("users", "firstName", Type::Text), Generator::FirstName); + assert_eq!(choose("users", "last_name", Type::Text), Generator::LastName); + assert_eq!(choose("users", "surname", Type::Text), Generator::LastName); + } + + #[test] + fn contact_fields_map_correctly() { + assert_eq!(choose("users", "email", Type::Text), Generator::Email); + assert_eq!(choose("users", "work_email", Type::Text), Generator::Email); + assert_eq!(choose("users", "username", Type::Text), Generator::Username); + assert_eq!(choose("users", "user_name", Type::Text), Generator::Username); + assert_eq!(choose("users", "phone", Type::Text), Generator::Phone); + assert_eq!(choose("accounts", "password", Type::Text), Generator::Password); + } + + #[test] + fn address_fields_map_correctly() { + assert_eq!(choose("a", "city", Type::Text), Generator::City); + assert_eq!(choose("a", "country", Type::Text), Generator::Country); + assert_eq!(choose("a", "street", Type::Text), Generator::Street); + assert_eq!(choose("a", "zip", Type::Text), Generator::ZipCode); + assert_eq!(choose("a", "postcode", Type::Text), Generator::ZipCode); + assert_eq!(choose("a", "province", Type::Text), Generator::StateName); + } + + #[test] + fn bare_name_uses_table_context() { + // D11 — the same column name resolves differently by table. + assert_eq!(choose("products", "name", Type::Text), Generator::ProductName); + assert_eq!(choose("items", "title", Type::Text), Generator::ProductName); + assert_eq!(choose("users", "name", Type::Text), Generator::FullName); + assert_eq!(choose("customers", "name", Type::Text), Generator::FullName); + assert_eq!(choose("vendors", "name", Type::Text), Generator::Company); + // Unknown table → person name default. + assert_eq!(choose("widgets", "name", Type::Text), Generator::FullName); + } + + #[test] + fn name_false_positives_do_not_become_person_names() { + // These must NOT resolve to a person/product name. + assert_ne!(choose("files", "filename", Type::Text), Generator::FullName); + assert_ne!(choose("meta", "table_name", Type::Text), Generator::FullName); + // They fall through to a generic / non-person generator. + assert_eq!(choose("files", "filename", Type::Text), Generator::Generic); + } + + #[test] + fn numeric_name_heuristics_are_type_gated() { + // `price` on a numeric column → currency; on text → falls through. + assert_eq!(choose("p", "price", Type::Int), Generator::CurrencyAmount); + assert_eq!(choose("p", "price", Type::Decimal), Generator::CurrencyAmount); + assert_eq!(choose("p", "price", Type::Text), Generator::Generic); + assert_eq!(choose("u", "age", Type::Int), Generator::Age); + assert_eq!(choose("o", "quantity", Type::Int), Generator::SmallInt); + } + + #[test] + fn email_on_wrong_type_falls_through() { + // The type gate: an `email` int column does NOT get a string — + // it falls through to type-based generation. + assert_eq!(choose("u", "email", Type::Int), Generator::Generic); + } + + #[test] + fn temporal_fields_are_bounded_and_type_gated() { + assert_eq!(choose("u", "dob", Type::Date), Generator::DateAdult); + assert_eq!(choose("o", "order_date", Type::Date), Generator::DateRecent); + assert_eq!(choose("o", "created_at", Type::DateTime), Generator::DateTimeRecent); + assert_eq!(choose("o", "timestamp", Type::DateTime), Generator::DateTimeRecent); + // Wrong type → not a date generator. + assert_eq!(choose("o", "order_date", Type::Int), Generator::Generic); + } + + #[test] + fn boolean_fields_map_to_boolean() { + assert_eq!(choose("u", "is_active", Type::Bool), Generator::Boolean); + assert_eq!(choose("u", "has_paid", Type::Bool), Generator::Boolean); + assert_eq!(choose("u", "enabled", Type::Bool), Generator::Boolean); + } + + #[test] + fn identifier_family_is_unique_sequential() { + assert_eq!(choose("t", "code", Type::Text), Generator::IdentitySequential); + assert_eq!(choose("t", "sku", Type::Text), Generator::IdentitySequential); + assert_eq!(choose("t", "order_number", Type::Int), Generator::IdentitySequential); + assert_eq!(choose("t", "external_id", Type::Int), Generator::IdentitySequential); + } + + #[test] + fn foreign_key_columns_defer_to_executor() { + let mut spec = ColumnSpec::plain("user_id", Type::Int); + spec.is_foreign_key = true; + assert_eq!(choose_generator("orders", &spec), Generator::ForeignKeySample); + } + + #[test] + fn check_in_values_become_pick_from() { + let mut spec = ColumnSpec::plain("status", Type::Text); + spec.check_in_values = Some(vec!["active".into(), "closed".into()]); + assert_eq!( + choose_generator("orders", &spec), + Generator::PickFrom(vec!["active".into(), "closed".into()]) + ); + } + + #[test] + fn enum_ish_names_are_detected_for_the_advisory() { + assert!(is_enum_ish("status")); + assert!(is_enum_ish("role")); + assert!(is_enum_ish("order_state")); + assert!(is_enum_ish("priority")); + assert!(!is_enum_ish("email")); + assert!(!is_enum_ish("first_name")); + } + + #[test] + fn enum_ish_columns_fall_through_to_generic() { + // No special generator — generic text + the advisory flags them. + assert_eq!(choose("orders", "status", Type::Text), Generator::Generic); + assert_eq!(choose("users", "role", Type::Text), Generator::Generic); + } + + #[test] + fn unmatched_columns_use_type_based_fallback() { + assert_eq!(choose("t", "some_freeform_field", Type::Text), Generator::Generic); + } + + #[test] + fn tokenizer_splits_on_all_boundaries() { + assert_eq!(tokens("created_at"), vec!["created", "at"]); + assert_eq!(tokens("firstName"), vec!["first", "name"]); + assert_eq!(tokens("DOB"), vec!["dob"]); + assert_eq!(tokens("user-email"), vec!["user", "email"]); + assert_eq!(tokens("HTTPStatus"), vec!["httpstatus"]); + } +} diff --git a/src/seed/mod.rs b/src/seed/mod.rs new file mode 100644 index 0000000..1a4d424 --- /dev/null +++ b/src/seed/mod.rs @@ -0,0 +1,213 @@ +//! Pure fake-data generation library for the `seed` command (ADR-0048). +//! +//! This module is the **generation half** of `seed`: given a column's +//! shape (name, type, constraints), it chooses a *generator* and turns +//! a seeded RNG into plausible [`Value`]s. It is deliberately decoupled +//! from `db.rs` — it knows nothing about SQLite, the worker thread, or +//! persistence — so it stays pure and unit-testable, with exact-value +//! assertions made possible by the seedable RNG (ADR-0048 D4). +//! +//! The executor (`db.rs::do_seed`) adapts the real schema into +//! [`ColumnSpec`]s, calls [`choose_generator`] per column, and then +//! [`generate_value`] per row — except for the *stateful* markers +//! ([`Generator::IdentitySequential`], [`Generator::ForeignKeySample`]) +//! which need database context (existing rows, the running sequence) +//! and so are resolved by the executor, not here. +//! +//! Layout: +//! - this file — the public types ([`ColumnSpec`], [`Generator`], +//! [`SeedRng`]) and the RNG constructor. +//! - [`heuristics`] — [`choose_generator`] + the name-aware catalogue +//! (D7), table-context disambiguation (D11), identifier (D10) and +//! enum-ish (D12) detection. +//! - [`generators`] — [`generate_value`]: per-generator value +//! production, the hand-rolled `product` generator (D9) and the +//! bounded date windows (D8). + +mod check; +mod generators; +mod heuristics; +mod vocabulary; + +pub use check::parse_in_check_values; +pub use generators::{generate_value, range_bounds_reason}; +pub use heuristics::{choose_generator, is_enum_ish}; +pub use vocabulary::{generator_for_name, is_known_generator_prefix, KNOWN_GENERATORS}; + +use rand::rngs::StdRng; +use rand::{RngExt, SeedableRng}; + +use crate::dsl::types::Type; + +/// The RNG that drives all seed generation. +/// +/// A single seeded `StdRng` feeds both `fake`'s `fake_with_rng` and the +/// hand-rolled generators, so a `--seed` value fully determines the +/// output (ADR-0048 D4). `rand 0.10`'s `StdRng` satisfies `fake`'s +/// `RngExt` bound (it re-exports `rand::RngExt`), so the same handle +/// works on both sides. +pub type SeedRng = StdRng; + +/// Build the seed RNG. +/// +/// With `Some(seed)` the stream is reproducible; with `None` it is +/// seeded from entropy (via the thread RNG) so each run differs. +/// Seeding `StdRng` from a single `u64` in both cases keeps +/// construction uniform and avoids `rand`'s churn-prone from-entropy +/// constructors. +#[must_use] +pub fn make_rng(seed: Option) -> SeedRng { + let seed = seed.unwrap_or_else(|| rand::rng().random::()); + StdRng::seed_from_u64(seed) +} + +/// A column described in just enough detail to choose and run a +/// generator. Built by the executor from the real schema; kept +/// independent of `db.rs`'s `ReadColumn` so this library stays pure. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ColumnSpec { + /// The column's name — the primary signal for generator choice. + pub name: String, + /// The user-facing playground type — gates every name heuristic. + pub ty: Type, + /// `NOT NULL` — the executor uses this for the block guard (D1); + /// generation always produces a value, so it is informational here. + pub not_null: bool, + /// Part of the table's primary key. + pub primary_key: bool, + /// Carries a `UNIQUE` constraint (or is a single-column PK). + pub unique: bool, + /// A foreign-key column — generation is the executor's job + /// (sample an existing parent row, D14), so [`choose_generator`] + /// returns [`Generator::ForeignKeySample`]. + pub is_foreign_key: bool, + /// Values parsed from a simple `col IN ('a', 'b', …)` CHECK + /// constraint (D17). When present, generation draws from them so + /// the common enum-as-CHECK pattern "just works". + pub check_in_values: Option>, +} + +impl ColumnSpec { + /// Convenience constructor for a plain, unconstrained column — + /// used heavily in tests. + #[cfg(test)] + #[must_use] + pub fn plain(name: &str, ty: Type) -> Self { + Self { + name: name.to_string(), + ty, + not_null: false, + primary_key: false, + unique: false, + is_foreign_key: false, + check_in_values: None, + } + } +} + +/// The chosen generation strategy for a column. +/// +/// Most variants are *stateless* — [`generate_value`] turns them into a +/// [`Value`] from the RNG alone. Two are *stateful markers* that the +/// executor must intercept (they need database context): +/// [`Self::IdentitySequential`] (the running `MAX+offset` sequence, +/// D10) and [`Self::ForeignKeySample`] (draw from existing parent +/// rows, D14). For safety [`generate_value`] treats an un-intercepted +/// marker as [`Self::Generic`] rather than panicking. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Generator { + // — Person — + FirstName, + LastName, + /// A full person name (table-context default for `name`/`title`). + FullName, + Email, + Username, + Password, + Phone, + // — Address — + City, + Country, + StateName, + Street, + ZipCode, + // — Organisation / commerce — + Company, + JobTitle, + /// Hand-rolled `{adjective} {material} {noun}` (D9) — `fake` has no + /// commerce module. + ProductName, + // — Free text — + Sentence, + Paragraph, + Url, + HexColor, + // — Numeric — + /// A money-shaped amount (whole for `int`, two-decimal otherwise). + CurrencyAmount, + /// A plausible human age (18–80). + Age, + /// A small positive integer (quantities, counts). + SmallInt, + // — Temporal (bounded windows, D8) — + /// A date within the last few years. + DateRecent, + /// A date in an adult birth window (≈18–80 years ago) — for `dob`. + DateAdult, + /// A datetime within the last few years. + DateTimeRecent, + // — Boolean — + Boolean, + // — Stateful markers (executor-resolved) — + /// Unique sequential identifier (D10): the executor supplies + /// `MAX(col)+offset`. Chosen for identifier-named non-FK columns. + IdentitySequential, + /// FK column (D14): the executor samples an existing parent key. + ForeignKeySample, + // — List / range (the `set` override clause, D2) — + /// Uniform pick from a fixed list — a simple `IN`-CHECK (D17), an + /// enum, or a `set in (…)` / `= ` override (D2). + PickFrom(Vec), + /// Uniform value in `[low, high]` — the `set between low and + /// high` override (D2). Bounds are the raw literal strings; their + /// interpretation (int / real / date / datetime) follows the + /// destination column type at generation time. The executor + /// validates type-compatibility *before* generation (a bound that + /// does not parse for the column type is a friendly error), so + /// [`generate_value`] only ever sees parseable bounds; a defensive + /// parse failure falls back to type-based generation. + Range { low: String, high: String }, + /// Type-based fallback (D8) when no name heuristic matches. + Generic, +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn same_seed_yields_identical_rng_streams() { + let mut a = make_rng(Some(42)); + let mut b = make_rng(Some(42)); + let xs: Vec = (0..8).map(|_| a.random::()).collect(); + let ys: Vec = (0..8).map(|_| b.random::()).collect(); + assert_eq!(xs, ys, "a fixed seed must reproduce the stream"); + } + + #[test] + fn different_seeds_yield_different_streams() { + let mut a = make_rng(Some(1)); + let mut b = make_rng(Some(2)); + let xs: Vec = (0..8).map(|_| a.random::()).collect(); + let ys: Vec = (0..8).map(|_| b.random::()).collect(); + assert_ne!(xs, ys); + } + + #[test] + fn unseeded_rng_constructs_without_panicking() { + // Entropy-seeded path: just exercise it. + let mut rng = make_rng(None); + let _ = rng.random::(); + } +} diff --git a/src/seed/vocabulary.rs b/src/seed/vocabulary.rs new file mode 100644 index 0000000..578bee3 --- /dev/null +++ b/src/seed/vocabulary.rs @@ -0,0 +1,149 @@ +//! The curated named-generator vocabulary (ADR-0048 D9). +//! +//! This is the **single source of truth** for "what generator names can +//! a learner write after `set as …`", shared by three consumers +//! (mirroring `KNOWN_SQL_FUNCTIONS`, ADR-0022 Amд6): +//! +//! - **Tab completion** — the `seed … set as ⟨here⟩` slot offers +//! these names (`src/completion.rs`). +//! - **The typing-time validity indicator (ADR-0027)** — an unknown +//! name after `as` is flagged `[ERR]` while typing. +//! - **The executor** — `db.rs::do_seed` maps a name to a [`Generator`] +//! via [`generator_for_name`]; an unknown name is a friendly error. +//! +//! The list is a deliberately *curated pedagogical set* — the generators +//! a learner reaches for, not every internal [`Generator`] variant +//! (stateful markers like `ForeignKeySample` are executor-only and have +//! no name). It is lowercase + sorted (pinned by a unit test). + +use crate::seed::Generator; + +/// The curated generator names, lowercase and **sorted** (invariant +/// pinned by a test — completion relies on stable order and a +/// case-insensitive prefix match against these canonical spellings). +pub const KNOWN_GENERATORS: &[&str] = &[ + "age", + "bool", + "city", + "color", + "company", + "country", + "date", + "datetime", + "email", + "first_name", + "job", + "last_name", + "name", + "paragraph", + "password", + "phone", + "price", + "product", + "sentence", + "state", + "street", + "url", + "username", + "zip", +]; + +/// Map a generator name (case-insensitive) to its [`Generator`]. +/// +/// `None` for an unrecognised name — the executor turns that into a +/// friendly "unknown generator" error naming the curated set. A couple +/// of common spelling variants (`firstname`, `lastname`, `colour`, +/// `full_name`) are accepted as aliases even though only the canonical +/// spelling is offered for completion. +#[must_use] +pub fn generator_for_name(name: &str) -> Option { + let n = name.to_ascii_lowercase(); + let g = match n.as_str() { + "name" | "full_name" => Generator::FullName, + "first_name" | "firstname" => Generator::FirstName, + "last_name" | "lastname" | "surname" => Generator::LastName, + "email" => Generator::Email, + "username" => Generator::Username, + "password" => Generator::Password, + "phone" => Generator::Phone, + "city" => Generator::City, + "country" => Generator::Country, + "state" => Generator::StateName, + "street" => Generator::Street, + "zip" => Generator::ZipCode, + "company" => Generator::Company, + "job" => Generator::JobTitle, + "product" => Generator::ProductName, + "sentence" => Generator::Sentence, + "paragraph" => Generator::Paragraph, + "url" => Generator::Url, + "color" | "colour" => Generator::HexColor, + "price" => Generator::CurrencyAmount, + "age" => Generator::Age, + "date" => Generator::DateRecent, + "datetime" => Generator::DateTimeRecent, + "bool" => Generator::Boolean, + _ => return None, + }; + Some(g) +} + +/// Whether `partial` is a case-insensitive prefix of at least one known +/// generator name. +/// +/// An empty `partial` matches every generator (it is a prefix of all) — +/// mirrors `is_known_function_prefix`. Used by the validity indicator to +/// avoid flagging a still-being-typed name. +#[must_use] +pub fn is_known_generator_prefix(partial: &str) -> bool { + let lowered = partial.to_ascii_lowercase(); + KNOWN_GENERATORS.iter().any(|g| g.starts_with(&lowered)) +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn known_generators_is_sorted_and_lowercase() { + let mut sorted = KNOWN_GENERATORS.to_vec(); + sorted.sort_unstable(); + assert_eq!(KNOWN_GENERATORS, sorted.as_slice(), "must be sorted"); + for g in KNOWN_GENERATORS { + assert_eq!(*g, g.to_ascii_lowercase(), "must be lowercase: {g}"); + } + } + + #[test] + fn every_listed_name_maps_to_a_generator() { + for g in KNOWN_GENERATORS { + assert!( + generator_for_name(g).is_some(), + "listed generator name `{g}` has no mapping" + ); + } + } + + #[test] + fn mapping_is_case_insensitive_and_has_aliases() { + assert_eq!(generator_for_name("EMAIL"), Some(Generator::Email)); + assert_eq!(generator_for_name("FirstName"), Some(Generator::FirstName)); + assert_eq!(generator_for_name("colour"), Some(Generator::HexColor)); + assert_eq!(generator_for_name("full_name"), Some(Generator::FullName)); + } + + #[test] + fn unknown_name_has_no_mapping() { + assert_eq!(generator_for_name("bogus"), None); + assert_eq!(generator_for_name(""), None); + } + + #[test] + fn prefix_check_matches_known_and_rejects_unknown() { + assert!(is_known_generator_prefix("ema")); + assert!(is_known_generator_prefix("EMA")); + assert!(is_known_generator_prefix("")); // empty is a prefix of all + assert!(!is_known_generator_prefix("zzz")); + } +} diff --git a/src/theme.rs b/src/theme.rs index ad2a424..8ffeabb 100644 --- a/src/theme.rs +++ b/src/theme.rs @@ -163,6 +163,7 @@ impl Theme { HighlightClass::String => self.tok_string, HighlightClass::Punct => self.tok_punct, HighlightClass::Flag => self.tok_flag, + HighlightClass::Function => self.tok_function, HighlightClass::Error => self.tok_error, } } @@ -228,6 +229,7 @@ mod tests { assert_eq!(t.highlight_class_color(HighlightClass::String), t.tok_string); assert_eq!(t.highlight_class_color(HighlightClass::Punct), t.tok_punct); assert_eq!(t.highlight_class_color(HighlightClass::Flag), t.tok_flag); + assert_eq!(t.highlight_class_color(HighlightClass::Function), t.tok_function); assert_eq!(t.highlight_class_color(HighlightClass::Error), t.tok_error); } diff --git a/src/ui.rs b/src/ui.rs index e4df268..409cf9e 100644 --- a/src/ui.rs +++ b/src/ui.rs @@ -1438,12 +1438,19 @@ fn render_input_one_row( let offset = input_scroll_offset(line_cols, cursor_col, tw, app.input_scroll_offset); app.input_scroll_offset = offset; - let runs = crate::input_render::render_input_runs_in_mode( + // Strip the `:` one-shot prefix for the SQL highlighting/overlays + // (ADR-0003); the `:` itself renders as plain text. Identity for + // non-one-shot input. + let (fb_view, fb_cursor, fb_off) = app.feedback_view(); + let runs = crate::input_render::render_input_runs_feedback( &app.input, cursor, theme, &app.schema_cache, mode_for_render, + fb_view, + fb_cursor, + fb_off, ); let spans = runs_to_spans(&app.input, &runs); @@ -1507,12 +1514,19 @@ fn render_input_two_rows( let offset = input_scroll_offset(line_cols, cursor_col, capacity, app.input_scroll_offset); app.input_scroll_offset = offset; - let runs = crate::input_render::render_input_runs_in_mode( + // Strip the `:` one-shot prefix for the SQL highlighting/overlays + // (ADR-0003); the `:` itself renders as plain text. Identity for + // non-one-shot input. + let (fb_view, fb_cursor, fb_off) = app.feedback_view(); + let runs = crate::input_render::render_input_runs_feedback( &app.input, cursor, theme, &app.schema_cache, mode_for_render, + fb_view, + fb_cursor, + fb_off, ); let cells = expand_runs_to_cells(&app.input, &runs); let len = cells.len(); @@ -1621,23 +1635,6 @@ fn runs_to_spans<'a>( .collect() } -/// Strip a leading one-shot `:` sigil (and the whitespace after -/// it) from `input`, returning the advanced command slice and the -/// cursor remapped into it. Mirrors `App::submit`'s `:` handling -/// so the hint panel hints at the command, not the sigil -/// (ADR-0022 Amendment 1). Used only when the effective mode is -/// `AdvancedOneShot`, where `input` is guaranteed to start (after -/// any leading whitespace) with `:`. -fn strip_one_shot_prefix(input: &str, cursor: usize) -> (&str, usize) { - let lead_ws = input.len() - input.trim_start().len(); - let after_colon = lead_ws + 1; // skip the `:` - let ws_after = input[after_colon..].len() - input[after_colon..].trim_start().len(); - let prefix_len = (after_colon + ws_after).min(input.len()); - let effective = &input[prefix_len..]; - let effective_cursor = cursor.saturating_sub(prefix_len).min(effective.len()); - (effective, effective_cursor) -} - /// Resolve the Hint panel body into its rendered lines, pre-wrapped /// to the panel's inner width and clamped to `max_rows` with an /// ellipsis backstop (issue #12). `max_rows` is the geometry-fixed row @@ -1679,14 +1676,9 @@ fn resolve_hint_lines( // In one-shot advanced mode (`:` prefix in simple mode) the // raw input carries the `:` sigil, which is not part of the - // grammar. Strip it for the ambient computation so the hint - // reflects the advanced command — mirroring `App::submit`. - let (hint_input, hint_cursor) = match app.effective_mode() { - EffectiveMode::AdvancedOneShot => { - strip_one_shot_prefix(&app.input, app.input_cursor) - } - _ => (app.input.as_str(), app.input_cursor), - }; + // grammar. The shared feedback view strips it so the hint reflects + // the advanced command — mirroring `App::submit` (ADR-0003). + let (hint_input, hint_cursor, _off) = app.feedback_view(); let ambient = crate::input_render::ambient_hint_in_mode( hint_input, hint_cursor, diff --git a/tests/it/main.rs b/tests/it/main.rs index a6d300d..cbc6d4b 100644 --- a/tests/it/main.rs +++ b/tests/it/main.rs @@ -23,6 +23,7 @@ mod m2n; mod parse_error_pedagogy; mod project_lifecycle; mod replay_command; +mod seed; mod sql_alter_table; mod sql_create_index; mod sql_create_table; diff --git a/tests/it/parse_error_pedagogy.rs b/tests/it/parse_error_pedagogy.rs index 8cab3b5..919c07a 100644 --- a/tests/it/parse_error_pedagogy.rs +++ b/tests/it/parse_error_pedagogy.rs @@ -109,6 +109,14 @@ fn near_miss_matrix_simple_mode() { ("delete", &["after `delete`, expected `from`", "delete from
"]), ("delete from", &["after `delete from`, expected table name", "delete from
"]), ("delete from T", &["expected `where` or `--all-rows`", "delete from
"]), + ("seed", &["after `seed`, expected table name", "seed
[count]"]), + // Phase 2 (ADR-0048 D2/D1): malformed `set` clause + column-fill. + ("seed T set", &["after `seed T set`, expected column name", "seed
."]), + ( + "seed T set role", + &["after `seed T set role`, expected `=`, `in`, `between`, or `as`", "seed
."], + ), + ("seed T.", &["after `seed T.`, expected column name", "seed
."]), ("replay", &["after `replay`, expected string literal or path", "replay "]), ("explain", &["after `explain`, expected `show`, `update`, or `delete`", "explain show data"]), // advanced-only entry word typed in simple mode → "this is SQL" rail @@ -539,3 +547,4 @@ fn caret_aligns_under_offending_token() { + diff --git a/tests/it/seed.rs b/tests/it/seed.rs new file mode 100644 index 0000000..a4a1bc2 --- /dev/null +++ b/tests/it/seed.rs @@ -0,0 +1,1277 @@ +//! Tier-3 integration tests for the `seed` command (ADR-0048, the +//! Phase-1 walking skeleton). Covers the parse path (grammar → AST), +//! the worker round-trip (rows generated + persisted to CSV), +//! reproducibility via a fixed `--seed`, and the single `history.log` +//! line for the whole command (ADR-0048 D15 / U3). + +use rdbms_playground::db::Database; +use rdbms_playground::dsl::{ColumnSpec, Command, ReferentialAction, Type, parse_command}; +use rdbms_playground::persistence::Persistence; +use rdbms_playground::project; + +fn rt() -> tokio::runtime::Runtime { + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("tokio rt") +} + +fn open_project_db() -> (project::Project, Database, tempfile::TempDir) { + let dir = tempfile::tempdir().expect("create tempdir"); + let project = + project::open_or_create(None, Some(dir.path())).expect("open or create project"); + let persistence = Persistence::new(project.path().to_path_buf()); + let db = Database::open_with_persistence(project.db_path(), persistence) + .expect("open db with persistence"); + (project, db, dir) +} + +fn read_csv(project: &project::Project, table: &str) -> Option { + std::fs::read_to_string(project.path().join("data").join(format!("{table}.csv"))).ok() +} + +/// `People(id serial pk, name text, email text)` — `id` is autogen +/// (excluded from generation, so no PK collisions), `name`/`email` +/// are generated. +fn create_people(db: &Database, rt: &tokio::runtime::Runtime) { + rt.block_on(db.create_table( + "People".to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + ColumnSpec::new("name", Type::Text), + ColumnSpec::new("email", Type::Text), + ], + vec!["id".to_string()], + None, + )) + .expect("create People"); +} + +/// Data rows in a CSV = non-empty lines minus the header. +fn data_row_count(csv: &str) -> usize { + csv.lines() + .filter(|l| !l.trim().is_empty()) + .count() + .saturating_sub(1) +} + +#[test] +fn seed_parses_with_and_without_count() { + match parse_command("seed People 5").expect("`seed People 5` parses") { + Command::Seed { + table, + target_column, + count, + overrides, + rng_seed, + } => { + assert_eq!(table, "People"); + assert_eq!(target_column, None); + assert_eq!(count, Some(5)); + assert!(overrides.is_empty()); + assert_eq!(rng_seed, None); + } + other => panic!("expected Command::Seed, got {other:?}"), + } + match parse_command("seed People").expect("`seed People` parses") { + Command::Seed { table, count, .. } => { + assert_eq!(table, "People"); + assert_eq!(count, None, "omitted count is None (executor defaults to 20)"); + } + other => panic!("expected Command::Seed, got {other:?}"), + } +} + +#[test] +fn seed_parses_the_reproducibility_flag() { + // `--seed ` after a count. + match parse_command("seed People 5 --seed 42").expect("count + --seed parses") { + Command::Seed { + table, + count, + rng_seed, + .. + } => { + assert_eq!(table, "People"); + assert_eq!(count, Some(5)); + assert_eq!(rng_seed, Some(42), "the value after --seed is the rng seed"); + } + other => panic!("expected Command::Seed, got {other:?}"), + } + // `--seed ` with no count — the only number is the seed value, + // not the count. + match parse_command("seed People --seed 7").expect("--seed without count parses") { + Command::Seed { + count, rng_seed, .. + } => { + assert_eq!(count, None, "no positional count"); + assert_eq!(rng_seed, Some(7)); + } + other => panic!("expected Command::Seed, got {other:?}"), + } +} + +// — Phase 2 (SD2): set-clause + column-fill parse path (ADR-0048 D2/D1) — + +use rdbms_playground::dsl::command::{SeedOverride, SeedOverrideKind}; +use rdbms_playground::dsl::value::Value; + +/// Pull the `overrides` out of a parsed `seed` command (panics on a +/// non-seed command), for the builder-fold assertions below. +fn seed_overrides(input: &str) -> (Option, Vec) { + match parse_command(input).unwrap_or_else(|e| panic!("`{input}` should parse: {e:?}")) { + Command::Seed { + target_column, + overrides, + .. + } => (target_column, overrides), + other => panic!("expected Command::Seed, got {other:?}"), + } +} + +#[test] +fn seed_set_fixed_value_override_parses() { + let (_t, ov) = seed_overrides("seed users 5 set status = 'active'"); + assert_eq!(ov.len(), 1); + assert_eq!(ov[0].column, "status"); + assert_eq!(ov[0].kind, SeedOverrideKind::Fixed(Value::Text("active".into()))); +} + +#[test] +fn seed_set_pick_list_override_parses() { + let (_t, ov) = seed_overrides("seed users set role in ('admin', 'editor', 'viewer')"); + assert_eq!(ov.len(), 1); + assert_eq!(ov[0].column, "role"); + assert_eq!( + ov[0].kind, + SeedOverrideKind::PickList(vec![ + Value::Text("admin".into()), + Value::Text("editor".into()), + Value::Text("viewer".into()), + ]) + ); +} + +#[test] +fn seed_set_generator_override_parses() { + let (_t, ov) = seed_overrides("seed users set work_addr as email"); + assert_eq!(ov.len(), 1); + assert_eq!(ov[0].column, "work_addr"); + assert_eq!(ov[0].kind, SeedOverrideKind::Generator("email".into())); +} + +#[test] +fn seed_set_numeric_range_override_parses() { + let (_t, ov) = seed_overrides("seed products set price between 10 and 100"); + assert_eq!(ov.len(), 1); + assert_eq!(ov[0].column, "price"); + assert_eq!( + ov[0].kind, + SeedOverrideKind::Range { + low: Value::Number("10".into()), + high: Value::Number("100".into()), + } + ); +} + +#[test] +fn seed_set_date_range_override_parses_with_quoted_dates() { + // ADR-0048 D2 amendment: dates in the range form are quoted strings. + let (_t, ov) = + seed_overrides("seed users set signup between '2023-01-01' and '2024-12-31'"); + assert_eq!( + ov[0].kind, + SeedOverrideKind::Range { + low: Value::Text("2023-01-01".into()), + high: Value::Text("2024-12-31".into()), + } + ); +} + +#[test] +fn seed_multiple_overrides_combine() { + let (_t, ov) = seed_overrides( + "seed users 20 set role in ('admin', 'user'), status = 'active', signup between '2023-01-01' and '2024-12-31'", + ); + assert_eq!(ov.len(), 3, "three comma-separated overrides: {ov:?}"); + assert_eq!(ov[0].column, "role"); + assert!(matches!(ov[0].kind, SeedOverrideKind::PickList(_))); + assert_eq!(ov[1].column, "status"); + assert!(matches!(ov[1].kind, SeedOverrideKind::Fixed(_))); + assert_eq!(ov[2].column, "signup"); + assert!(matches!(ov[2].kind, SeedOverrideKind::Range { .. })); +} + +#[test] +fn seed_count_is_not_confused_by_a_range_value() { + // No positional count, but `between 18 and 80` carries NumberLits — + // they must not be read as the count (bounded to before `set`). + match parse_command("seed users set age between 18 and 80").expect("parses") { + Command::Seed { count, overrides, .. } => { + assert_eq!(count, None, "the count is None, not 18"); + assert_eq!(overrides.len(), 1); + } + other => panic!("expected seed, got {other:?}"), + } +} + +#[test] +fn seed_set_combines_with_count_and_flag() { + match parse_command("seed users 30 set status = 'x' --seed 42").expect("parses") { + Command::Seed { + count, + overrides, + rng_seed, + .. + } => { + assert_eq!(count, Some(30)); + assert_eq!(rng_seed, Some(42)); + assert_eq!(overrides.len(), 1); + } + other => panic!("expected seed, got {other:?}"), + } +} + +#[test] +fn seed_column_fill_target_parses() { + let (target, ov) = seed_overrides("seed users.work_addr"); + assert_eq!(target.as_deref(), Some("work_addr")); + assert!(ov.is_empty()); +} + +#[test] +fn seed_column_fill_with_set_parses() { + let (target, ov) = seed_overrides("seed users.work_addr set work_addr as email"); + assert_eq!(target.as_deref(), Some("work_addr")); + assert_eq!(ov.len(), 1); + assert_eq!(ov[0].kind, SeedOverrideKind::Generator("email".into())); +} + +#[test] +fn seed_bare_word_set_value_is_rejected() { + // A bare (unquoted) word is not a value — D2 requires quoting. The + // typed value slot rejects `active` at the grammar level (it is not a + // quoted string / number), so the command does not parse. + assert!( + parse_command("seed users set status = active").is_err(), + "a bare-word `set` value must be rejected (quoting required, D2)" + ); + // The quoted form parses. + assert!(parse_command("seed users set status = 'active'").is_ok()); +} + +#[test] +fn seed_populates_a_table_and_persists_rows() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_people(&db, &rt); + + let result = rt + .block_on(db.seed("People".into(), None, Some(7), Vec::new(), Some(42), Some("seed People 7".into()))) + .expect("seed succeeds"); + assert_eq!(result.produced, 7); + + let csv = read_csv(&project, "People").expect("People CSV exists after seed"); + assert_eq!( + data_row_count(&csv), + 7, + "CSV should hold 7 generated rows:\n{csv}" + ); + // The generated `email` column produces address-shaped values. + assert!(csv.contains('@'), "seeded emails should appear in the CSV:\n{csv}"); +} + +#[test] +fn seed_count_defaults_to_twenty() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_people(&db, &rt); + + let result = rt + .block_on(db.seed("People".into(), None, None, Vec::new(), Some(1), Some("seed People".into()))) + .expect("seed succeeds"); + assert_eq!(result.produced, 20, "omitted count defaults to 20"); + let csv = read_csv(&project, "People").expect("People CSV exists"); + assert_eq!(data_row_count(&csv), 20); +} + +#[test] +fn seed_is_reproducible_with_a_fixed_seed() { + let (p1, db1, _d1) = open_project_db(); + let (p2, db2, _d2) = open_project_db(); + let rt = rt(); + create_people(&db1, &rt); + create_people(&db2, &rt); + + rt.block_on(db1.seed("People".into(), None, Some(4), Vec::new(), Some(123), Some("seed People 4".into()))) + .expect("seed run 1"); + rt.block_on(db2.seed("People".into(), None, Some(4), Vec::new(), Some(123), Some("seed People 4".into()))) + .expect("seed run 2"); + + let csv1 = read_csv(&p1, "People").expect("csv 1"); + let csv2 = read_csv(&p2, "People").expect("csv 2"); + assert_eq!(csv1, csv2, "the same --seed must reproduce identical data"); +} + +#[test] +fn seed_writes_exactly_one_history_line() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_people(&db, &rt); + + rt.block_on(db.seed("People".into(), None, Some(5), Vec::new(), Some(1), Some("seed People 5".into()))) + .expect("seed succeeds"); + + let history = std::fs::read_to_string(project.path().join("history.log")) + .expect("history.log exists"); + let seed_lines = history.lines().filter(|l| l.contains("seed People 5")).count(); + assert_eq!( + seed_lines, 1, + "a seed of 5 rows must write exactly one history line:\n{history}" + ); +} + +// — FK sampling, empty-parent error, block guard (ADR-0048 D14 / D1) — + +/// `Users(id serial pk, name text)` + `Orders(id serial pk, user_id +/// int, total decimal)` with `Orders.user_id -> Users.id`. +fn create_users_and_orders(db: &Database, rt: &tokio::runtime::Runtime, add_fk: bool) { + rt.block_on(async { + db.create_table( + "Users".to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + ColumnSpec::new("name", Type::Text), + ], + vec!["id".to_string()], + None, + ) + .await + .expect("create Users"); + db.create_table( + "Orders".to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + ColumnSpec::new("user_id", Type::Int), + ColumnSpec::new("total", Type::Decimal), + ], + vec!["id".to_string()], + None, + ) + .await + .expect("create Orders"); + if add_fk { + db.add_relationship( + None, + "Users".to_string(), + vec!["id".to_string()], + "Orders".to_string(), + vec!["user_id".to_string()], + ReferentialAction::NoAction, + ReferentialAction::NoAction, + false, + None, + ) + .await + .expect("add Orders->Users FK"); + } + }); +} + +/// `user_id` is column index 1 of `Orders(id, user_id, total)`. +fn order_user_ids(csv: &str) -> Vec { + let mut lines = csv.lines().filter(|l| !l.trim().is_empty()); + lines.next(); // header + lines + .map(|l| l.split(',').nth(1).unwrap_or_default().to_string()) + .collect() +} + +#[test] +fn seed_fills_foreign_keys_from_existing_parents() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_users_and_orders(&db, &rt, true); + + // 5 parents → serial ids 1..=5. + rt.block_on(db.seed("Users".into(), None, Some(5), Vec::new(), Some(1), Some("seed Users 5".into()))) + .expect("seed Users"); + let res = rt + .block_on(db.seed("Orders".into(), None, Some(10), Vec::new(), Some(2), Some("seed Orders 10".into()))) + .expect("seed Orders"); + assert_eq!(res.produced, 10, "every child row must insert (valid FK)"); + + let csv = read_csv(&project, "Orders").expect("Orders CSV"); + let valid: std::collections::HashSet = (1..=5).map(|i| i.to_string()).collect(); + let user_ids = order_user_ids(&csv); + assert_eq!(user_ids.len(), 10); + for uid in &user_ids { + assert!( + valid.contains(uid), + "user_id `{uid}` does not reference an existing parent:\n{csv}" + ); + } +} + +#[test] +fn seed_refuses_when_a_parent_table_is_empty() { + let (_project, db, _dir) = open_project_db(); + let rt = rt(); + create_users_and_orders(&db, &rt, true); + + // Users is empty — no valid FK can be fabricated. + let err = rt + .block_on(db.seed("Orders".into(), None, Some(3), Vec::new(), Some(1), Some("seed Orders 3".into()))) + .expect_err("seed must refuse an empty parent"); + let msg = err.to_string(); + assert!(msg.contains("Users"), "error should name the empty parent: {msg}"); + let lower = msg.to_lowercase(); + assert!( + lower.contains("no rows") || lower.contains("first"), + "error should explain how to fix it: {msg}" + ); +} + +#[test] +fn seed_refuses_a_not_null_blob_column() { + let (_project, db, _dir) = open_project_db(); + let rt = rt(); + let mut payload = ColumnSpec::new("payload", Type::Blob); + payload.not_null = true; + rt.block_on(db.create_table( + "Files".to_string(), + vec![ColumnSpec::new("id", Type::Serial), payload], + vec!["id".to_string()], + None, + )) + .expect("create Files"); + + let err = rt + .block_on(db.seed("Files".into(), None, Some(2), Vec::new(), Some(1), Some("seed Files 2".into()))) + .expect_err("seed must refuse a NOT NULL blob"); + let msg = err.to_string(); + assert!( + msg.contains("payload") && msg.to_lowercase().contains("blob"), + "error should name the un-generatable blob column: {msg}" + ); +} + +#[test] +fn seed_omits_a_nullable_blob_column() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + rt.block_on(db.create_table( + "Files".to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + ColumnSpec::new("name", Type::Text), + // nullable blob → omitted (→ NULL), seed still succeeds. + ColumnSpec::new("payload", Type::Blob), + ], + vec!["id".to_string()], + None, + )) + .expect("create Files"); + + let res = rt + .block_on(db.seed("Files".into(), None, Some(3), Vec::new(), Some(1), Some("seed Files 3".into()))) + .expect("seed succeeds despite the nullable blob"); + assert_eq!(res.produced, 3); + let csv = read_csv(&project, "Files").expect("Files CSV"); + assert_eq!(data_row_count(&csv), 3); +} + +// — uniqueness, junction distinct-combos, IN-CHECK (D10 / D14 / D17) — + +/// The `n`th comma-separated field of each data row (the generated +/// values here never contain commas). +fn nth_column_values(csv: &str, n: usize) -> Vec { + csv.lines() + .filter(|l| !l.trim().is_empty()) + .skip(1) + .map(|l| l.split(',').nth(n).unwrap_or_default().trim().to_string()) + .collect() +} + +#[test] +fn seed_keeps_unique_columns_distinct() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + let mut label = ColumnSpec::new("label", Type::Text); + label.unique = true; + rt.block_on(db.create_table( + "Tags".to_string(), + vec![ColumnSpec::new("id", Type::Serial), label], + vec!["id".to_string()], + None, + )) + .expect("create Tags"); + + let res = rt + .block_on(db.seed("Tags".into(), None, Some(8), Vec::new(), Some(3), Some("seed Tags 8".into()))) + .expect("seed"); + assert_eq!(res.produced, 8); + + let csv = read_csv(&project, "Tags").expect("Tags CSV"); + let labels = nth_column_values(&csv, 1); + let distinct: std::collections::HashSet<&String> = labels.iter().collect(); + assert_eq!(distinct.len(), labels.len(), "UNIQUE column has duplicates:\n{csv}"); +} + +#[test] +fn seed_sequences_identifier_int_columns() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + // `code` is an identifier-named int (D10) but not a constraint — + // uniqueness comes from the identifier rule. + rt.block_on(db.create_table( + "Items".to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + ColumnSpec::new("code", Type::Int), + ColumnSpec::new("name", Type::Text), + ], + vec!["id".to_string()], + None, + )) + .expect("create Items"); + + let res = rt + .block_on(db.seed("Items".into(), None, Some(5), Vec::new(), Some(1), Some("seed Items 5".into()))) + .expect("seed"); + assert_eq!(res.produced, 5); + + let csv = read_csv(&project, "Items").expect("Items CSV"); + let codes: Vec = nth_column_values(&csv, 1) + .iter() + .map(|s| s.parse().expect("code is an int")) + .collect(); + let distinct: std::collections::HashSet = codes.iter().copied().collect(); + assert_eq!(distinct.len(), 5, "identifier ints must be unique: {codes:?}"); +} + +#[test] +fn seed_junction_produces_distinct_combinations_and_caps() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + rt.block_on(async { + // Two parents, 2 rows each → 2x2 = 4 possible (a, b) pairs. + for t in ["P1", "P2"] { + db.create_table( + t.to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + ColumnSpec::new("name", Type::Text), + ], + vec!["id".to_string()], + None, + ) + .await + .expect("create parent"); + db.seed(t.into(), None, Some(2), Vec::new(), Some(1), Some(format!("seed {t} 2"))) + .await + .expect("seed parent"); + } + // Junction with a compound PK over its two FK columns. + db.create_table( + "J".to_string(), + vec![ColumnSpec::new("a", Type::Int), ColumnSpec::new("b", Type::Int)], + vec!["a".to_string(), "b".to_string()], + None, + ) + .await + .expect("create J"); + db.add_relationship( + None, + "P1".into(), + vec!["id".into()], + "J".into(), + vec!["a".into()], + ReferentialAction::NoAction, + ReferentialAction::NoAction, + false, + None, + ) + .await + .expect("fk a"); + db.add_relationship( + None, + "P2".into(), + vec!["id".into()], + "J".into(), + vec!["b".into()], + ReferentialAction::NoAction, + ReferentialAction::NoAction, + false, + None, + ) + .await + .expect("fk b"); + + // Requesting 10 caps at the 4 available distinct combinations. + let res = db + .seed("J".into(), None, Some(10), Vec::new(), Some(7), Some("seed J 10".into())) + .await + .expect("seed J"); + assert_eq!(res.produced, 4, "junction caps at available combos"); + assert_eq!(res.requested, 10, "the requested count is reported for the cap note"); + }); + + let csv = read_csv(&project, "J").expect("J CSV"); + let pairs: Vec = csv + .lines() + .filter(|l| !l.trim().is_empty()) + .skip(1) + .map(str::to_string) + .collect(); + let distinct: std::collections::HashSet<&String> = pairs.iter().collect(); + assert_eq!(distinct.len(), pairs.len(), "junction rows must be distinct:\n{csv}"); +} + +#[test] +fn seed_draws_enum_values_from_an_in_check() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + let mut status = ColumnSpec::new("status", Type::Text); + status.check_sql = Some("status IN ('active', 'closed')".to_string()); + rt.block_on(db.create_table( + "Tickets".to_string(), + vec![ColumnSpec::new("id", Type::Serial), status], + vec!["id".to_string()], + None, + )) + .expect("create Tickets"); + + // Every generated status must satisfy the CHECK, so all rows insert. + let res = rt + .block_on(db.seed("Tickets".into(), None, Some(12), Vec::new(), Some(2), Some("seed Tickets 12".into()))) + .expect("seed"); + assert_eq!(res.produced, 12, "all rows insert — values satisfy the CHECK"); + + let csv = read_csv(&project, "Tickets").expect("Tickets CSV"); + for v in nth_column_values(&csv, 1) { + assert!( + matches!(v.as_str(), "active" | "closed"), + "status `{v}` was not drawn from the IN check:\n{csv}" + ); + } + // The IN-check column is derived, not generic, so it is NOT flagged. + assert!( + res.advisory_columns.is_empty(), + "an IN-check column should not be flagged: {:?}", + res.advisory_columns + ); +} + +#[test] +fn seed_advises_on_enum_ish_columns() { + let (_project, db, _dir) = open_project_db(); + let rt = rt(); + // `status` has no CHECK and no name heuristic → generic text, so it + // is flagged for the advisory (D12/D13). + rt.block_on(db.create_table( + "Tasks".to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + ColumnSpec::new("status", Type::Text), + ], + vec!["id".to_string()], + None, + )) + .expect("create Tasks"); + + let res = rt + .block_on(db.seed("Tasks".into(), None, Some(3), Vec::new(), Some(1), Some("seed Tasks 3".into()))) + .expect("seed"); + assert!( + res.advisory_columns.contains(&"status".to_string()), + "enum-ish `status` should be flagged: {:?}", + res.advisory_columns + ); +} + +#[test] +fn seed_refuses_an_excessive_count() { + let (_project, db, _dir) = open_project_db(); + let rt = rt(); + create_people(&db, &rt); + let err = rt + .block_on(db.seed("People".into(), None, Some(1_000_000), Vec::new(), Some(1), Some("seed People 1000000".into()))) + .expect_err("an excessive count must be refused"); + assert!( + err.to_string().to_lowercase().contains("maximum"), + "error should mention the maximum: {err}" + ); +} + +#[test] +fn seed_preview_is_capped_but_count_is_full() { + let (_project, db, _dir) = open_project_db(); + let rt = rt(); + create_people(&db, &rt); + + let res = rt + .block_on(db.seed("People".into(), None, Some(25), Vec::new(), Some(1), Some("seed People 25".into()))) + .expect("seed"); + assert_eq!(res.produced, 25, "the full count is produced"); + assert_eq!(res.data.rows.len(), 20, "the preview is capped at 20 rows"); +} + +#[test] +fn seed_is_available_in_advanced_mode() { + use rdbms_playground::dsl::parser::parse_command_in_mode; + use rdbms_playground::mode::Mode; + // D5/A1: seed is a canonical command available in BOTH modes. + let r = parse_command_in_mode("seed People 5", Mode::Advanced); + assert!( + matches!(r, Ok(Command::Seed { .. })), + "seed must parse in advanced mode: {r:?}" + ); + // The Phase 2 surfaces (set clause + column-fill) also parse in + // advanced mode — same grammar, no mode gate. + assert!( + matches!( + parse_command_in_mode("seed People 5 set status = 'active'", Mode::Advanced), + Ok(Command::Seed { .. }) + ), + "set clause must parse in advanced mode" + ); + assert!( + matches!( + parse_command_in_mode("seed People.email set email as email", Mode::Advanced), + Ok(Command::Seed { + target_column: Some(_), + .. + }) + ), + "column-fill must parse in advanced mode" + ); +} + +// — DA-pass coverage: undo (D15), replay (D16), atomicity, zero count, +// complex-CHECK advisory (D17), FK reproducibility (D4) — + +#[test] +fn seed_is_one_undo_step() { + // Undo must be explicitly enabled on the Database. + let dir = tempfile::tempdir().expect("tempdir"); + let project = project::open_or_create(None, Some(dir.path())).expect("project"); + let persistence = Persistence::new(project.path().to_path_buf()); + let db = Database::open_with_persistence_and_undo(project.db_path(), persistence, true) + .expect("open db with undo"); + let rt = rt(); + create_people(&db, &rt); + rt.block_on(db.seed("People".into(), None, Some(6), Vec::new(), Some(1), Some("seed People 6".into()))) + .expect("seed"); + assert_eq!(data_row_count(&read_csv(&project, "People").unwrap()), 6); + + // One undo removes the whole seed batch (ADR-0048 D15). + rt.block_on(db.undo()).unwrap().expect("undo applied"); + let rows = read_csv(&project, "People").map_or(0, |c| data_row_count(&c)); + assert_eq!(rows, 0, "one undo must remove every seeded row in a single step"); +} + +#[test] +fn seed_column_fill_is_one_undo_step() { + // ADR-0048 D15: column-fill's bulk UPDATE is one undo step too. + let dir = tempfile::tempdir().expect("tempdir"); + let project = project::open_or_create(None, Some(dir.path())).expect("project"); + let persistence = Persistence::new(project.path().to_path_buf()); + let db = Database::open_with_persistence_and_undo(project.db_path(), persistence, true) + .expect("open db with undo"); + let rt = rt(); + create_members(&db, &rt); + run_seed(&db, &rt, "seed Members 5 --seed 1").expect("seed"); + // Fill `status` across all 5 rows with a constant, then undo once. + run_seed(&db, &rt, "seed Members.status set status = 'flagged' --seed 2") + .expect("column-fill"); + let before = named_column_values(&read_csv(&project, "Members").unwrap(), "status"); + assert!(before.iter().all(|s| s == "flagged"), "all rows filled: {before:?}"); + + rt.block_on(db.undo()).unwrap().expect("undo applied"); + let after = named_column_values(&read_csv(&project, "Members").unwrap(), "status"); + assert!( + after.iter().all(|s| s != "flagged"), + "one undo reverts the whole column-fill in a single step: {after:?}" + ); + assert_eq!(after.len(), 5, "undo restores the original rows, not removes them"); +} + +#[test] +fn replay_reruns_a_seed_line_as_a_data_write() { + use rdbms_playground::runtime::run_replay; + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_people(&db, &rt); + std::fs::write(project.path().join("seed.script"), "seed People 5\n").expect("write script"); + + // D16: seed is a data-write — replay re-runs it (it is NOT in the + // app-lifecycle skip-list), so the rows appear. + let _events = rt.block_on(run_replay(&db, project.path(), "seed.script")); + assert_eq!( + data_row_count(&read_csv(&project, "People").unwrap()), + 5, + "replay must re-run the seed line" + ); +} + +#[test] +fn seed_rolls_back_atomically_on_a_constraint_failure() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + // A CHECK that generic text cannot satisfy → every generated row + // violates it, so the whole batch must roll back (P1.3d atomicity). + let mut code = ColumnSpec::new("note", Type::Text); + code.check_sql = Some("length(note) > 100".to_string()); + rt.block_on(db.create_table( + "Bad".to_string(), + vec![ColumnSpec::new("id", Type::Serial), code], + vec!["id".to_string()], + None, + )) + .expect("create Bad"); + + let res = rt.block_on(db.seed("Bad".into(), None, Some(5), Vec::new(), Some(1), Some("seed Bad 5".into()))); + assert!(res.is_err(), "seed must fail when generated rows violate the CHECK"); + let rows = read_csv(&project, "Bad").map_or(0, |c| data_row_count(&c)); + assert_eq!(rows, 0, "a failed seed must leave the table unchanged (atomic)"); +} + +#[test] +fn seed_zero_is_a_no_op() { + let (project, db, _dir) = open_project_db(); + let rt = rt(); + create_people(&db, &rt); + let res = rt + .block_on(db.seed("People".into(), None, Some(0), Vec::new(), Some(1), Some("seed People 0".into()))) + .expect("seed 0 succeeds"); + assert_eq!(res.produced, 0); + let rows = read_csv(&project, "People").map_or(0, |c| data_row_count(&c)); + assert_eq!(rows, 0, "seed 0 inserts nothing"); +} + +#[test] +fn seed_advises_on_a_complex_check_column() { + let (_project, db, _dir) = open_project_db(); + let rt = rt(); + // A complex (non-IN) CHECK seed can't derive values from → the + // column is filled generically AND flagged (D17/D13). `length` keeps + // generic words valid so the seed still succeeds. + let mut label = ColumnSpec::new("label", Type::Text); + label.check_sql = Some("length(label) >= 1".to_string()); + rt.block_on(db.create_table( + "Widgets".to_string(), + vec![ColumnSpec::new("id", Type::Serial), label], + vec!["id".to_string()], + None, + )) + .expect("create Widgets"); + + let res = rt + .block_on(db.seed("Widgets".into(), None, Some(3), Vec::new(), Some(1), Some("seed Widgets 3".into()))) + .expect("seed"); + assert!( + res.advisory_columns.contains(&"label".to_string()), + "a column with an underivable CHECK should be flagged: {:?}", + res.advisory_columns + ); +} + +#[test] +fn seed_foreign_keys_are_reproducible_with_a_fixed_seed() { + let rt = rt(); + let seed_one = |db: &Database| { + create_users_and_orders(db, &rt, true); + rt.block_on(db.seed("Users".into(), None, Some(4), Vec::new(), Some(1), Some("seed Users 4".into()))) + .expect("seed users"); + rt.block_on(db.seed("Orders".into(), None, Some(8), Vec::new(), Some(99), Some("seed Orders 8".into()))) + .expect("seed orders"); + }; + let (p1, db1, _d1) = open_project_db(); + let (p2, db2, _d2) = open_project_db(); + seed_one(&db1); + seed_one(&db2); + // With ORDER BY on the FK sample, the same --seed reproduces the + // sampled FK values (D4). + assert_eq!( + read_csv(&p1, "Orders").unwrap(), + read_csv(&p2, "Orders").unwrap(), + "FK sampling must be reproducible with a fixed --seed" + ); +} + +#[test] +fn seed_shortid_columns_are_reproducible_with_a_fixed_seed() { + let rt = rt(); + let make = |db: &Database| { + rt.block_on(db.create_table( + "Contacts".to_string(), + vec![ + ColumnSpec::new("code", Type::ShortId), + ColumnSpec::new("name", Type::Text), + ], + vec!["code".to_string()], + None, + )) + .expect("create Contacts"); + rt.block_on(db.seed("Contacts".into(), None, Some(5), Vec::new(), Some(42), Some("seed Contacts 5".into()))) + .expect("seed"); + }; + let (p1, db1, _d1) = open_project_db(); + let (p2, db2, _d2) = open_project_db(); + make(&db1); + make(&db2); + + let csv1 = read_csv(&p1, "Contacts").unwrap(); + let csv2 = read_csv(&p2, "Contacts").unwrap(); + assert_eq!(csv1, csv2, "shortid values must reproduce under a fixed --seed"); + + // The shortid PK is populated with distinct 10-char base58 ids. + let codes = nth_column_values(&csv1, 0); + assert_eq!(codes.len(), 5); + let distinct: std::collections::HashSet<&String> = codes.iter().collect(); + assert_eq!(distinct.len(), 5, "shortid PK values must be distinct: {codes:?}"); + for code in &codes { + assert_eq!(code.len(), 10, "shortid should be 10 chars: {code}"); + } +} + +// ================================================================= +// Phase 2 (SD2) executor: set-clause overrides + column-fill, +// exercised full-stack (parse → worker) — ADR-0048 D2 / D1. +// ================================================================= + +/// Parse `input` as a `seed` command and run it through the worker — +/// the full stack minus UI render (grammar → builder → executor). +fn run_seed( + db: &Database, + rt: &tokio::runtime::Runtime, + input: &str, +) -> Result { + match parse_command(input).unwrap_or_else(|e| panic!("`{input}` should parse: {e:?}")) { + Command::Seed { + table, + target_column, + count, + overrides, + rng_seed, + } => rt.block_on(db.seed( + table, + target_column, + count, + overrides, + rng_seed, + Some(input.to_string()), + )), + other => panic!("expected a seed command, got {other:?}"), + } +} + +/// Values of the column named `col` (by header lookup) across the CSV's +/// data rows. +fn named_column_values(csv: &str, col: &str) -> Vec { + let header = csv.lines().next().unwrap_or_default(); + let idx = header + .split(',') + .position(|h| h.trim() == col) + .unwrap_or_else(|| panic!("column `{col}` not in header `{header}`")); + nth_column_values(csv, idx) +} + +/// `Members(id serial pk, name text, status text, role text, age int)`. +/// `status`/`role` are enum-ish names (advisory targets without an +/// override); `name`/`age` exercise the generator / range overrides. +fn create_members(db: &Database, rt: &tokio::runtime::Runtime) { + rt.block_on(db.create_table( + "Members".to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + ColumnSpec::new("name", Type::Text), + ColumnSpec::new("status", Type::Text), + ColumnSpec::new("role", Type::Text), + ColumnSpec::new("age", Type::Int), + ], + vec!["id".to_string()], + None, + )) + .expect("create Members"); +} + +#[test] +fn seed_set_fixed_value_fills_every_row() { + let (project, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + run_seed(&db, &rt, "seed Members 6 set status = 'active' --seed 1").expect("seed"); + let csv = read_csv(&project, "Members").unwrap(); + let statuses = named_column_values(&csv, "status"); + assert_eq!(statuses.len(), 6); + assert!(statuses.iter().all(|s| s == "active"), "every status pinned: {statuses:?}"); +} + +#[test] +fn seed_set_pick_list_draws_only_from_the_list() { + let (project, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + run_seed(&db, &rt, "seed Members 20 set role in ('admin', 'user') --seed 2").expect("seed"); + let csv = read_csv(&project, "Members").unwrap(); + let roles = named_column_values(&csv, "role"); + assert!( + roles.iter().all(|r| r == "admin" || r == "user"), + "roles only from the list: {roles:?}" + ); +} + +#[test] +fn seed_set_as_generator_forces_the_shape() { + let (project, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + // Force the `name` column (a person-name heuristic) to emails. + run_seed(&db, &rt, "seed Members 5 set name as email --seed 3").expect("seed"); + let csv = read_csv(&project, "Members").unwrap(); + let names = named_column_values(&csv, "name"); + assert!(names.iter().all(|n| n.contains('@')), "name forced to email shape: {names:?}"); +} + +#[test] +fn seed_set_numeric_range_stays_within_bounds() { + let (project, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + run_seed(&db, &rt, "seed Members 30 set age between 30 and 40 --seed 4").expect("seed"); + let csv = read_csv(&project, "Members").unwrap(); + for a in named_column_values(&csv, "age") { + let n: i64 = a.parse().unwrap_or_else(|_| panic!("age `{a}` not an int")); + assert!((30..=40).contains(&n), "age {n} out of [30,40]"); + } +} + +#[test] +fn seed_override_drops_the_column_from_the_advisory() { + let (_p, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + // Without an override, `status` (enum-ish) is flagged in the advisory. + let plain = run_seed(&db, &rt, "seed Members 3 --seed 5").expect("seed"); + assert!( + plain.advisory_columns.iter().any(|c| c == "status"), + "status should be advised without an override: {:?}", + plain.advisory_columns + ); + // With an override on status, it must not appear in the advisory. + let overridden = + run_seed(&db, &rt, "seed Members 3 set status in ('a', 'b') --seed 5").expect("seed"); + assert!( + !overridden.advisory_columns.iter().any(|c| c == "status"), + "overridden status must drop from advisory: {:?}", + overridden.advisory_columns + ); +} + +#[test] +fn seed_unknown_generator_is_a_friendly_error() { + let (_p, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + let err = run_seed(&db, &rt, "seed Members 3 set name as bogus").unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("unknown generator") && msg.contains("bogus"), + "should name the unknown generator: {msg}" + ); +} + +#[test] +fn seed_incompatible_range_is_a_friendly_error() { + let (_p, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + // A numeric range on a text column (`name`) is rejected. + let err = run_seed(&db, &rt, "seed Members 3 set name between 1 and 10").unwrap_err(); + let msg = format!("{err}"); + assert!(msg.contains("between"), "range error should mention `between`: {msg}"); +} + +#[test] +fn seed_with_set_is_reproducible() { + let (p1, db1, _d1) = open_project_db(); + let (p2, db2, _d2) = open_project_db(); + let rt = rt(); + create_members(&db1, &rt); + create_members(&db2, &rt); + let cmd = "seed Members 10 set role in ('a', 'b', 'c'), age between 20 and 60 --seed 77"; + run_seed(&db1, &rt, cmd).expect("seed 1"); + run_seed(&db2, &rt, cmd).expect("seed 2"); + assert_eq!( + read_csv(&p1, "Members").unwrap(), + read_csv(&p2, "Members").unwrap(), + "the same --seed + set clause must reproduce identical data" + ); +} + +// — column-fill (ADR-0048 D1 form 2) — + +#[test] +fn seed_column_fill_updates_existing_rows_without_adding() { + let (project, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + run_seed(&db, &rt, "seed Members 5 --seed 1").expect("initial seed"); + let before = data_row_count(&read_csv(&project, "Members").unwrap()); + assert_eq!(before, 5); + + let res = run_seed(&db, &rt, "seed Members.status set status in ('x', 'y') --seed 2") + .expect("column-fill"); + assert_eq!(res.produced, 5, "column-fill touches the 5 existing rows"); + let csv = read_csv(&project, "Members").unwrap(); + assert_eq!(data_row_count(&csv), 5, "no new rows added"); + let statuses = named_column_values(&csv, "status"); + assert!( + statuses.iter().all(|s| s == "x" || s == "y"), + "every existing row's status refilled from the list: {statuses:?}" + ); +} + +#[test] +fn seed_column_fill_refuses_a_pk_target() { + let (_p, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + run_seed(&db, &rt, "seed Members 3 --seed 1").expect("seed"); + let err = run_seed(&db, &rt, "seed Members.id").unwrap_err(); + assert!(format!("{err}").contains("primary key"), "PK target refused: {err}"); +} + +#[test] +fn seed_column_fill_empty_table_is_a_noop() { + let (_p, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + // No rows yet → friendly no-op, not an error. + let res = run_seed(&db, &rt, "seed Members.status set status in ('a', 'b')").expect("no-op"); + assert_eq!(res.produced, 0, "empty table → nothing filled"); +} + +#[test] +fn seed_column_fill_set_may_only_target_the_filled_column() { + let (_p, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + run_seed(&db, &rt, "seed Members 3 --seed 1").expect("seed"); + let err = run_seed(&db, &rt, "seed Members.status set role = 'x'").unwrap_err(); + assert!( + format!("{err}").contains("can only adjust"), + "set targeting another column is refused: {err}" + ); +} + +#[test] +fn seed_column_fill_rejects_a_row_count() { + let (_p, db, _d) = open_project_db(); + let rt = rt(); + create_members(&db, &rt); + // `seed T.col 5` parses, but a count is meaningless for column-fill. + let err = rt + .block_on(db.seed( + "Members".into(), + Some("status".into()), + Some(5), + Vec::new(), + Some(1), + Some("seed Members.status 5".into()), + )) + .unwrap_err(); + assert!(format!("{err}").contains("no row count"), "count refused: {err}"); +} + +#[test] +fn seed_column_fill_fk_target_samples_the_parent() { + let (project, db, _d) = open_project_db(); + let rt = rt(); + create_users_and_orders(&db, &rt, true); + run_seed(&db, &rt, "seed Users 4 --seed 1").expect("seed users"); + run_seed(&db, &rt, "seed Orders 8 --seed 2").expect("seed orders"); + // Re-fill the FK column across existing orders; every value must be a + // valid parent key (the UPDATE would fail FK enforcement otherwise). + let res = run_seed(&db, &rt, "seed Orders.user_id --seed 3").expect("column-fill FK"); + assert_eq!(res.produced, 8); + let csv = read_csv(&project, "Orders").unwrap(); + let user_ids = named_column_values(&csv, "user_id"); + assert!(user_ids.iter().all(|v| (1..=4).contains(&v.parse::().unwrap()))); +} + +#[test] +fn seed_fixed_override_on_unique_column_is_a_friendly_error() { + // DA finding (user-chosen: friendly error). A fixed value can't fill a + // UNIQUE column for more than one row — refuse up front rather than + // silently capping to 1. + let (_p, db, _d) = open_project_db(); + let rt = rt(); + rt.block_on(db.create_table( + "U".to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + { + let mut c = ColumnSpec::new("email", Type::Text); + c.unique = true; + c + }, + ], + vec!["id".to_string()], + None, + )) + .expect("create U"); + let err = run_seed(&db, &rt, "seed U 5 set email = 'x@y.com'").unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("UNIQUE") && msg.contains("distinct"), + "fixed value on a UNIQUE column should be a friendly capacity error: {msg}" + ); + // A short pick-list (< count) is likewise refused... + let err2 = run_seed(&db, &rt, "seed U 5 set email in ('a@b.c', 'd@e.f')").unwrap_err(); + assert!(format!("{err2}").contains("distinct"), "short list refused: {err2}"); + // ...but a pick-list with enough distinct values succeeds. + let ok = run_seed( + &db, + &rt, + "seed U 3 set email in ('a@b.c', 'd@e.f', 'g@h.i') --seed 1", + ) + .expect("a list >= count fills cleanly"); + assert_eq!(ok.produced, 3); + // A generator is unbounded — also fine. + assert_eq!( + run_seed(&db, &rt, "seed U 4 set email as email --seed 2") + .expect("generator fills a unique column") + .produced, + 4 + ); +} + +#[test] +fn seed_column_fill_fixed_on_unique_column_is_a_friendly_error() { + let (_p, db, _d) = open_project_db(); + let rt = rt(); + rt.block_on(db.create_table( + "U".to_string(), + vec![ + ColumnSpec::new("id", Type::Serial), + { + let mut c = ColumnSpec::new("email", Type::Text); + c.unique = true; + c + }, + ], + vec!["id".to_string()], + None, + )) + .expect("create U"); + run_seed(&db, &rt, "seed U 4 set email as email --seed 1").expect("seed 4 rows"); + // Filling the UNIQUE column on 4 rows with one fixed value is refused. + let err = run_seed(&db, &rt, "seed U.email set email = 'same@x.com'").unwrap_err(); + assert!( + format!("{err}").contains("UNIQUE"), + "column-fill of a fixed value on a UNIQUE column should refuse: {err}" + ); +} diff --git a/tests/typing_surface/mod.rs b/tests/typing_surface/mod.rs index c2d4307..53bef3b 100644 --- a/tests/typing_surface/mod.rs +++ b/tests/typing_surface/mod.rs @@ -237,6 +237,7 @@ fn command_kind_label(cmd: &rdbms_playground::dsl::Command) -> String { ShowTable { .. } => "ShowTable".into(), ShowList { kind, name } => format!("ShowList({kind:?}, {})", name.is_some()), Insert { .. } => "Insert".into(), + Seed { .. } => "Seed".into(), Update { .. } => "Update".into(), Delete { .. } => "Delete".into(), ShowData { .. } => "ShowData".into(), @@ -440,3 +441,68 @@ fn smoke_assess_parse_label_round_trips() { assert_eq!(a.parse_result.as_deref(), Ok("Insert")); assert!(matches!(a.state, InputState::Valid)); } + +/// `seed` (ADR-0048) gets the standard ambient surface for free from +/// grammar registration: table-name completion, the validity indicator +/// flagging an unknown table, and the `--seed` flag offered as a +/// candidate. +#[test] +fn seed_completion_and_validity() { + let schema = schema_serial_pk(); // Customers(id serial, Name, Email) + + // Completion: `seed ` offers existing table names. + let cands = completion_candidate_texts(&assess_at_end("seed ", &schema)); + assert!( + cands.iter().any(|c| c == "Customers"), + "`seed ` should complete table names, got {cands:?}" + ); + + // Validity (ADR-0027): a known table seeds clean; an unknown one is + // flagged (same table slot as update/delete/show data). + let ok = assess_at_end("seed Customers 5", &schema); + assert!(matches!(ok.state, InputState::Valid), "known table: {:?}", ok.state); + // seed's unknown-table behaviour must match its closest sibling + // `show data` (same table-only slot), whatever that is. + let seed_ghost = assess_at_end("seed Ghost 5", &schema).state; + let show_ghost = assess_at_end("show data Ghost", &schema).state; + assert_eq!( + std::mem::discriminant(&seed_ghost), + std::mem::discriminant(&show_ghost), + "seed should treat an unknown table like `show data`: seed={seed_ghost:?}, show={show_ghost:?}" + ); + + // The `--seed` reproducibility flag is offered after the count. + let flag_cands = completion_candidate_texts(&assess_at_end("seed Customers 5 ", &schema)); + assert!( + flag_cands.iter().any(|c| c.contains("seed")), + "`--seed` should be offered as a candidate, got {flag_cands:?}" + ); + + // Phase 2 (ADR-0048 D2): the `set` clause is offered after the count. + assert!( + flag_cands.iter().any(|c| c == "set"), + "`set` should be offered after the count, got {flag_cands:?}" + ); + + // `set ` offers the active table's columns (narrowed to Customers). + let set_cands = completion_candidate_texts(&assess_at_end("seed Customers set ", &schema)); + assert!( + set_cands.iter().any(|c| c == "Name") && set_cands.iter().any(|c| c == "Email"), + "`set ` should complete this table's columns, got {set_cands:?}" + ); + + // `set as ` offers the curated generator vocabulary (D9). + let gen_cands = + completion_candidate_texts(&assess_at_end("seed Customers set Email as ", &schema)); + assert!( + gen_cands.iter().any(|c| c == "email") && gen_cands.iter().any(|c| c == "product"), + "`as ` should complete generator names, got {gen_cands:?}" + ); + + // Column-fill (D1 form 2): `seed Customers.` offers the columns. + let fill_cands = completion_candidate_texts(&assess_at_end("seed Customers.", &schema)); + assert!( + fill_cands.iter().any(|c| c == "Name"), + "`seed Customers.` should complete column names, got {fill_cands:?}" + ); +} diff --git a/tests/typing_surface/snapshots/typing_surface_matrix__typing_surface__delete_all_rows__delete_partial_flag_is_incomplete@partial_flag.snap b/tests/typing_surface/snapshots/typing_surface_matrix__typing_surface__delete_all_rows__delete_partial_flag_is_incomplete@partial_flag.snap index f0261f3..d5dfb13 100644 --- a/tests/typing_surface/snapshots/typing_surface_matrix__typing_surface__delete_all_rows__delete_partial_flag_is_incomplete@partial_flag.snap +++ b/tests/typing_surface/snapshots/typing_surface_matrix__typing_surface__delete_all_rows__delete_partial_flag_is_incomplete@partial_flag.snap @@ -24,10 +24,10 @@ Assessment { completion: Some( Completion { replaced_range: ( - 24, + 22, 27, ), - partial_prefix: "all", + partial_prefix: "--all", candidates: [ Candidate { text: "--all-rows", diff --git a/tests/typing_surface/snapshots/typing_surface_matrix__typing_surface__update_all_rows__update_partial_flag_name_is_incomplete@partial_flag.snap b/tests/typing_surface/snapshots/typing_surface_matrix__typing_surface__update_all_rows__update_partial_flag_name_is_incomplete@partial_flag.snap index e10cfab..7b43f80 100644 --- a/tests/typing_surface/snapshots/typing_surface_matrix__typing_surface__update_all_rows__update_partial_flag_name_is_incomplete@partial_flag.snap +++ b/tests/typing_surface/snapshots/typing_surface_matrix__typing_surface__update_all_rows__update_partial_flag_name_is_incomplete@partial_flag.snap @@ -24,10 +24,10 @@ Assessment { completion: Some( Completion { replaced_range: ( - 33, + 31, 36, ), - partial_prefix: "all", + partial_prefix: "--all", candidates: [ Candidate { text: "--all-rows",