Merge branch 'main' into website

This commit is contained in:
claude@clouddev1
2026-06-12 13:22:52 +00:00
38 changed files with 6222 additions and 142 deletions
Generated
+18
View File
@@ -419,6 +419,12 @@ dependencies = [
"syn 2.0.117",
]
[[package]]
name = "deunicode"
version = "1.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04"
[[package]]
name = "diff"
version = "0.1.13"
@@ -518,6 +524,17 @@ dependencies = [
"num-traits",
]
[[package]]
name = "fake"
version = "5.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea6be833b323a56361118a747470a45a1bcd5c52a2ec9b1e40c83dafe687e453"
dependencies = [
"deunicode",
"either",
"rand 0.10.1",
]
[[package]]
name = "fallible-iterator"
version = "0.3.0"
@@ -1527,6 +1544,7 @@ dependencies = [
"crossterm",
"csv",
"directories",
"fake",
"futures-util",
"gethostname",
"insta",
+8
View File
@@ -24,6 +24,14 @@ chrono = { version = "0.4.44", default-features = false, features = ["clock"] }
crossterm = { version = "0.29.0", features = ["event-stream"] }
csv = "1.4.0"
directories = "6.0.0"
# Realistic fake-data generators for the `seed` command (ADR-0048):
# names, emails, addresses, companies, lorem, etc. Default features
# only — the basic fakers need no flags; date/datetime values are
# generated in-house (rand + the existing `chrono`) for the bounded
# windows ADR-0048 D8 requires, so `fake`'s `chrono` feature is
# deliberately omitted. No commerce/product module exists, so the
# `product` generator is hand-rolled (D9).
fake = "5"
futures-util = "0.3.32"
gethostname = "1.1.0"
rand = "0.10.1"
+677
View File
@@ -0,0 +1,677 @@
# ADR-0048: `seed` — fake-data generation command (SD1, opens SD2)
## Status
**Accepted (2026-06-11); Phase 1 + Phase 2 implemented (2026-06-11).** Design
settled with the user across an extended fork dialogue (every decision
below was escalated and user-chosen), then hardened by a pre-build
`/runda` Devil's-Advocate pass that found six blockers — undo
integration (D15), replay semantics (D16), `set` value quoting (D2),
CHECK-constraint handling (D17), a phase-ordering bug in the advisory
(D13), and auto-show flooding (D18) — plus refinements (state-relative
reproducibility, compound-FK tuple sampling, column-fill constraint
rules, the `fake` dependency scan), all folded in.
**Phase 1 shipped** test-first across commits `202e25a` (generation
library + `fake` dependency) → `f1e9484` (command skeleton) →
`73493fa` (FK sampling) → `9c13501` (uniqueness / junction / IN-CHECK)
`0b3ab3c` (`SeedResult` / preview / advisory / count cap) →
`e6ff63d` (single-transaction O(N) path) → `fbd219b` (`--seed` flag,
ambient wiring, and a whole-implementation `/runda` pass). The
post-implementation `/runda` found eight gaps — FK-sampling
determinism (now `ORDER BY`), shortid reproducibility (now from the
seeded RNG, so **D4 holds with no exceptions**), and six untested
ADR decisions (D5/D15/D16/D17 + atomicity + zero-count), all closed.
**2358 tests pass / 0 fail / 0 skip; clippy clean.**
**Implemented in Phase 1:** the whole-row `seed <table> [count]
[--seed <n>]` form and every D1D18 decision *except* the two
Phase-2 surfaces.
**Phase 2 implemented (2026-06-11):** both remaining surfaces — the
**`set` override clause** (D2: fixed value / pick-list / named
generator / range, quoted literals, type-aware) and the
**`<table>.<column>` column-fill** form (D1 form 2: an UPDATE over
existing rows, refusing PK/autogen targets, empty-table no-op, one undo
step). The named-generator vocabulary (D9) lives in `src/seed`
(`KNOWN_GENERATORS` / `generator_for_name`); a new range `Generator`
(`src/seed/generators.rs`) backs `between`; the override clause is
folded from the flat matched path (`build_seed_overrides`,
`src/dsl/grammar/data.rs`) and applied to the per-column plan
(`apply_seed_overrides`, `src/db.rs`), with column-fill in
`do_seed_column_fill`. Full ambient wiring: completion (the generator
vocabulary after `as`, the `set`/`.col` column slots), highlighting
(`HighlightClass::Function``tok_function`, the generator slot), the
validity indicator (`IdentSource::Generators` — an unknown name flagged
`[ERR]`), help, and parse-error pedagogy rows. The D13 advisory now
carries its Phase-2/3 wording (points at `set` and the column-fill
repair). A post-implementation `/runda` pass then added one
user-chosen refinement: a **bounded override on a UNIQUE column** (a
fixed value / too-short pick-list) is now a **friendly error** rather
than a silent uniqueness cap (see D2). **2400 tests pass / 0 fail / 0
skip; clippy clean.** Two
implementation refinements vs. this ADR's wording, both met the
user-facing contract: dates in the range form are **quoted** (the D2
amendment, above — no date-literal token exists); and the `set` value
slots reuse `update`'s typed `current_column_value` (no spurious
column-ref match) rather than the raw expression operand.
Further SD2 increments (custom user generators, NULL injection,
multi-locale, recursive parent auto-seed) remain out of scope (see Out
of scope).
Closes `requirements.md` **SD1** and delivers the core of **SD2**
(per-type generators, determinism, the `fake`-backed catalogue). It
also closes one of the two remaining gaps in **A1** ("all canonical
app-level commands") — `seed`; the other, `hint` (**H2**), is
separate.
Builds on: ADR-0014 (data operations, the `Value`/`Bound` value model,
the auto-show pattern, FK-error enrichment), ADR-0005/0011 (the type
vocabulary and `Type::fk_target_type()`), ADR-0012/0013 (the column /
relationship metadata tables, the rebuild-table primitive — *read* by
seed for schema introspection), ADR-0024 (the unified grammar tree /
`CommandNode` registration that gives completion, hints, help-id,
usage-id for free), ADR-0022 (ambient typing assistance — the
`KNOWN_SQL_FUNCTIONS` curated-vocabulary pattern that the
generator-name list mirrors), ADR-0026 (the `in (...)` / `between ...
and ...` expression grammar the override clause reuses), ADR-0027 (the
validity-indicator diagnostics model), and ADR-0038 (the
`OutputStyleClass::Hint` styled output used for the post-seed
advisory). Honours ADR-0003 (both modes, no sigil), ADR-0009 (DSL
conventions — keyword grammar, `--` flags for opt-in choices, one
sigil only), ADR-0002 (no engine name in user-facing strings), and
ADR-0015 (per-command write-through persistence).
## Context
`seed <table> [count]` is the last unbuilt **data-authoring** command
in the requirements. The pedagogical value is high: a learner who has
just modelled a schema wants rows to query against *now*, without
hand-typing dozens of `insert`s. A teacher wants a one-liner that
fills a demo database with believable data. SD1 commits to "plausible
fake data; junction tables seeded with valid foreign-key references
drawn from existing parent rows." SD2 deferred the *how* — "per-type
generators, locale, determinism, override hooks" — explicitly pending
this ADR.
The design conversation widened the scope deliberately, with the user
confirming each step:
- **Realism matters more than minimalism** for a teaching tool. Random
`text_a3f9` values teach nothing; `Alice Martinez` /
`alice.m@example.com` make queries feel real. → adopt a faker
library and make generation **name-aware**.
- **The column *name* is the strongest signal** for what a value should
look like, but it is **ambiguous** without the **table** for the
`name`/`title` family (`products.name``users.name`).
- **Heuristics will miss**, so a **manual override** surface is
required, not optional — this is SD2's "override hooks", brought
forward.
- **Identifiers and enums** are special: `id`-ish columns want
uniqueness; `status`-ish columns have no sensible generic value and
should be *flagged*, not guessed.
The novel work is the **generation layer**. Everything downstream —
type validation, autogen autofill (`serial`/`shortid`), FK
enforcement, per-command persistence, the auto-show outcome — is
reused from the existing insert/update machinery as **shared helper
functions**, per the X5 architecture preference (unique commands, with
mechanics shared as library functions — *not* by emitting
`Command::Insert` to borrow `do_insert`).
## Decision
Add a dedicated **`seed`** command (its own AST variant and its own
`do_seed` worker executor) available in **both modes**, with the
surface and behaviour below. Generation is realistic, name- and
table-aware, type-gated, with a manual override clause and a
reproducibility flag.
**Command classification (important, set by the replay decision
D16).** Although `requirements.md` A1 lists `seed` among the
"app-level commands" (meaning: part of the canonical command surface,
no sigil, both modes), `seed` is architecturally a **data-authoring
command** — a sibling of `insert`/`update`/`delete`, **not** an
app-lifecycle `AppCommand`. It is therefore **not** added to
`is_app_lifecycle_entry_word` / completion's
`empty_input_offers_app_command_entry_keywords` (those mirror the
`AppCommand` set and must match — `seed` belongs in neither): `replay`
re-runs it as a data write (D16).
### D1 — Command surface (fork, user-chosen: "whole-row + column-fill")
Two forms:
1. **Whole-row generation**`seed <table> [count]`
Generates `count` new rows (an INSERT path). `count` **defaults to
20** (D6) when omitted. Every user-fillable column is filled per the
generation rules (D7D12); `serial`/`shortid` autogen columns are
left to the existing autofill helpers.
2. **Column-fill on existing rows**`seed <table>.<column>`
Fills `<column>` across the table's **existing** rows (an UPDATE
path) — the natural follow-up to `add column`. Combined with the
`set` clause (D2) this is also the precise repair for a single
mis-guessed column: `seed users.work_addr set work_addr as email`.
Column-fill **refuses** PK columns and autogen (`serial`/`shortid`)
columns (a friendly error — you don't "fill" an identity column),
and **respects** the same UNIQUE / FK / required rules as whole-row
generation (a UNIQUE target gets collision-free values; an FK
target samples from the parent, D14). On an **empty** table it is a
friendly no-op ("no rows to fill").
**Zero / over-cap counts.** `seed <table> 0` is a friendly no-op;
`count` over the maximum (D6) is a friendly error.
The column-restricted-*insert* form (`seed t (a, b)` — new rows, only
some columns filled) was considered and **rejected** as marginal and
constraint-fragile (see Alternatives).
**Required-column block guard (user requirement).** If seed cannot
produce a value for a `NOT NULL` column — the only real case is a
`NOT NULL blob` column, which has no DSL value path — it **refuses the
whole operation with a friendly error** naming the column, rather than
attempting a NULL insert that would violate the constraint. The check
is a pre-flight over the resolved per-column plan, before any write.
### D2 — Manual override: the `set` clause (fork, user-chosen: "value + list + generator + range")
An optional, comma-separated `set` clause overrides generation per
column. Four forms, all reusing existing grammar vocabulary so there
is nothing new to learn:
| Form | Example | Meaning |
|---|---|---|
| Fixed value | `set status = 'pending'` | every row gets the constant |
| Pick-from-list | `set role in ('admin', 'editor', 'viewer')` | uniform random choice from the list |
| Explicit generator | `set work_addr as email` | force a named generator (D9) |
| Range | `set price between 10 and 100` | uniform in range; **also dates**`set signup between '2023-01-01' and '2024-12-31'` |
Multiple clauses combine: `seed users 20 set role in ('admin',
'user'), status = 'active', signup between '2023-01-01' and
'2024-12-31'`.
**Override × UNIQUE capacity (post-implementation `/runda`, user-chosen:
"friendly error").** A *bounded* override — a fixed value, or a
pick-list — on a **single-column-UNIQUE** target (a `UNIQUE` column or a
single-column PK) that offers fewer **distinct** values than the row
count cannot fill the run; rather than let the D10 uniqueness machinery
silently cap it (e.g. `seed users 100 set email = 'x'` → 1 row), seed
**refuses up front** with a friendly error pointing at the fixes (use a
generator, or a longer list). Generators and ranges are treated as
effectively unbounded sources — if one genuinely exhausts, the D14
distinct-combination cap still applies. Compound uniqueness is exempt
(the *other* key columns can still vary).
**Quoting (fork, user-chosen: "quoted, grammar-consistent").** Text
values and list items are **quoted string literals** (`'admin'`),
exactly as everywhere else in the DSL — only **numbers** stay
unquoted. **Amendment (2026-06-11, Phase 2 build):** the original
wording said "numbers *and dates* stay unquoted", but this DSL has
**no date-literal token**`Value` is `Number`/`Text` only, and a
date is a **quoted string** validated by `bind_date` (`'2023-01-01'`)
everywhere else (insert / update / `where`). An unquoted `2023-01-01`
lexes as `2023`,`-`,`01`,… and cannot parse. So **dates in the range
form are quoted** (`between '2023-01-01' and '2024-12-31'`) — which is
in fact *more* faithful to this decision's own "quoted,
grammar-consistent" principle. Numbers remain unquoted (`NumberLit`).
This reuses the ADR-0026 expression grammar **unchanged**:
the DA pass confirmed that the `in (...)` form's operands are typed
value slots, so a *bare* `admin` would parse as a **column reference**
(→ "unknown column"), not a string. Quoting is therefore not a style
preference but a correctness requirement of grammar reuse. The range
form is **type-aware**: numeric bounds for numeric columns, date
bounds for date/datetime columns; a type-incompatible bound is a
friendly error. `=`, `in (...)`, and `between ... and ...` are the
ADR-0026 expression operators; `set` is the ADR-0014 UPDATE keyword;
`as` is borrowed from the SQL alias slot. The `as <generator>` operand
is a bare name from the curated generator vocabulary (D9), not a
value. The override takes precedence over every heuristic.
### D3 — Generation library: `fake` crate + hand-rolled gaps (fork, user-chosen: "name-aware + realistic")
Add the **`fake`** crate (v5.x at time of writing; English locale for
v1 per X2) for realistic values: names, emails, usernames, addresses,
companies, phone numbers, lorem text, dates. Generation is driven by a
per-column **generator** chosen by the heuristics (D7) or the override
(D2), falling back to **type-based** generation (D8).
**Implementation-time verifications (resolved 2026-06-11 when the
dependency was added):**
- **`rand` de-duplication — clean.** `fake` 5.1.0 depends on
`rand = "0.10"`, the **same major** as the project's `rand 0.10.1`,
so `cargo tree -e normal` resolves a **single** `rand 0.10.1` (no
runtime duplication; the `rand 0.8.6` visible to `cargo tree -i
rand` is only `fake`'s own dev-dependency, never compiled for us).
Consequence for D4: one seeded `rand 0.10` `StdRng` can drive
**both** `fake`'s `fake_with_rng` and the hand-rolled generators —
determinism is single-RNG, single-version, and shares `shortid.rs`'s
`rand` version.
- **`fake` module inventory / features — confirmed.** Default features
(`["either"]`) cover the core string fakers used here
(Name/Internet/Address/Company/Lorem/PhoneNumber); `fake`'s `chrono`
feature is **deliberately omitted** (dates generated in-house for
D8's bounded windows). No commerce/product module exists → `product`
is hand-rolled (D9). (The exact faker call sites are pinned when the
generation library is built.)
- **Security (new-dependency posture) — clean.** The `fake` tree (296
packages total) scanned clean by **all three** mandated scanners:
`osv-scanner` (no issues), `grype` (no vulnerabilities), `trivy fs
--scanners vuln` (0). No findings to document or accept.
### D4 — Determinism: `--seed <n>` (fork, user-chosen: "optional flag")
Generation is **random by default**. The optional `--seed <n>` flag
makes a run **reproducible**: **same database state + same `--seed`
identical data**. The "database state" qualifier matters (DA
refinement) — FK sampling (D14), identifier sequencing (D10), and
UNIQUE collision-avoidance all *read existing rows*, so reproducibility
is relative to the data already present, not absolute. Value: teachers
hand out one dataset; demos are stable; and the feature's own tests
can assert **exact** output (against a known starting state).
Implemented with a seedable RNG threaded through every generator (no
`thread_rng` on the seeded path). `--` flag per ADR-0009 (opt-in
choice). Naming note: the flag `--seed` and the command `seed` share a
word but never collide grammatically (`seed users 20 --seed 42` parses
unambiguously). This flag is also the determinism lever for **replay**
(D16): a recorded `seed … --seed N` line reproduces on replay; a bare
`seed …` line regenerates fresh data.
### D5 — Both modes (A1)
`seed` is a canonical app-level command, available in **simple and
advanced** mode, no sigil — like `save`/`load`/`export`/`replay`.
### D6 — Default count: 20; bounded maximum
Omitted `count`**20** rows: enough to make `where`, `group by`,
`order by`, and `limit` meaningful without flooding the output pane.
A **maximum** is enforced (proposed 10 000) to prevent a typo
(`seed t 1000000`) from hanging the app or bloating the project; over
the cap → friendly error stating the limit.
### D7 — Name-aware heuristics, type-gated (the catalogue)
A column's **name** selects a generator, but a name rule only fires
when the column's **type** is compatible (a column named `email` typed
`int` does **not** get a string — it falls through to type-based int).
Matching is **case-insensitive**, **token-based** (split on `_`,
camelCase, kebab), **most-specific-first**, with documented
false-positive guards. The catalogue (representative; full table lives
with the implementation):
| Column name (tokens) | Generator | Type gate |
|---|---|---|
| `first_name`/`fname` · `last_name`/`surname`/`lname` | first / last name | text |
| `name`/`full_name` · `title` | **table-context** name (D11) | text |
| `email`/`*_email` | email | text |
| `username`/`login`/`handle` | username | text |
| `password`/`pwd` | password | text |
| `phone`/`mobile`/`cell`/`tel` | phone number | text |
| `city`/`town` · `country` · `state`/`province` | address parts | text |
| `street`/`address`/`addr` · `zip`/`postcode`/`postal` | address parts | text |
| `company`/`employer`/`org` · `job`/`position`/`profession` | company / job | text |
| `description`/`bio`/`notes`/`summary`/`comment` | sentence / paragraph | text |
| `url`/`website`/`homepage` · `color`/`colour` | URL / hex colour | text |
| `price`/`amount`/`cost`/`salary`/`balance`/`total` | currency-range number | numeric |
| `age` · `quantity`/`qty`/`stock`/`count` | 1880 · small int | numeric |
| `date`/`*_date` | date, recent ~3 yr window | date |
| `dob`/`birthday` | date, adult window (1880 yr ago) | date |
| `timestamp`/`datetime` · `created_at`/`updated_at`/`*_at` | datetime, recent window (`updated_at``created_at`) | datetime |
| `is_*`/`has_*`/`active`/`enabled` | boolean | bool |
| **identifier family** (D10) | unique sequential | int/text |
| **enum-ish family** (D12) | generic text + flag | (text) |
**False-positive guards (documented):** `username`/`filename`/
`table_name`/`*_name` handled before the bare `name` rule so they do
**not** resolve to person-name; the bare `name`/`title` rule requires a
standalone token or a recognised `*_name` suffix.
### D8 — Type-based fallback
When no name rule matches (or to satisfy a name rule's type gate),
generate by **type**: `text`→realistic words/short phrase, `int`
bounded random, `real`→random double, `decimal`→formatted number,
`bool`→random, `date`/`datetime`→**bounded recent** value (never "any
point in all of history" — per the user's date concern), `serial`/
`shortid`→omitted (autogen helpers fill them), `blob`→unsupported
(nullable→NULL; `NOT NULL`→D1 block guard).
### D9 — Named generators + the `product` generator
The generators addressable via `set ... as <generator>` (D2) and
chosen by D7 form a **curated, named vocabulary**`name`,
`first_name`, `last_name`, `email`, `username`, `phone`, `city`,
`country`, `street`, `zip`, `company`, `job`, `sentence`, `paragraph`,
`url`, `color`, `price`, `age`, `date`, `datetime`, `bool`, `product`,
… — the single source of truth shared by the executor, the completion
source, and the highlighter (mirroring `KNOWN_SQL_FUNCTIONS`,
ADR-0022 Amд6).
**`product`** is **hand-rolled** (the `fake` crate has no
commerce/product module — D3): `{adjective} {material} {noun}` from
three small baked-in word lists (~20 each) → "Sleek Bamboo Keyboard",
"Vintage Leather Backpack". Seedable through the D4 RNG. Always
addressable as `set <col> as product`, and auto-selected by D11 for
the `name`/`title` family in product-ish tables.
### D10 — Identifier family → unique by name (fork, user-chosen: "unique sequential")
A column in the identifier family — `id`, `*_id` **that is not an FK**,
`code`, `sku`, `ref`/`reference`, `number`/`no`, `barcode` — that is
**not** a serial/shortid autogen column and **not** the PK is treated
as an identifier and gets **unique** values: **int → sequential**
(`MAX(col)+1` ascending, reads like real ids, never collides);
**text → unique short code** (generate-with-retry). Precedence:
**FK detection wins** over this rule (an FK `user_id` *should* have
duplicates — many children per parent), so `*_id` only triggers
uniqueness when the column is not a foreign key.
**Constraint-driven uniqueness is independent and mandatory:** any
column with a `UNIQUE` constraint (or a user-fillable single-column
PK) gets guaranteed-unique generation regardless of name — a
correctness requirement, not a heuristic. Generation for such columns
uses retry/sequence to guarantee no collision within the batch and
against existing rows.
### D11 — Table-context disambiguation for `name`/`title` (fork, user-chosen: "table-context-aware")
For the `name`/`title` family **only**, the heuristic also reads the
**table** name token:
- `product`/`item`/`goods`/`merchandise`/`catalog`/`inventory`
`product` generator (D9)
- `company`/`companies`/`vendor`/`supplier`/`manufacturer`/`brand`
company name
- `user`/`customer`/`person`/`people`/`employee`/`member`/`contact`/
`author`/`student` → person name
- unrecognised table → generic word
This resolves the real ambiguity (`products.name` → "Sleek Bamboo
Keyboard"; `users.name` → "Alice Martinez"; `vendors.name` → "Globex
Corp"). It is a deliberately **scoped** use of table context — the only
place the table name influences generation.
### D12 — Enum-ish names → generic + post-seed advisory (fork, user-chosen: "flag enum-ish only")
Enum-ish names — `role`, `status`, `type`, `state`, `kind`,
`category`, `level`, `tier`, `stage`, `priority`, `gender` — have **no
sensible generic generator**, so they are **not guessed**: they fall
through to generic text (they must still be filled — a `NOT NULL`
status cannot be left empty). Seed then emits a **post-seed advisory**
(D13) naming them and pointing at the `set ... in (...)` override.
### D13 — Reporting: post-seed advisory (fork, user-chosen: "flag enum-ish only")
After a successful seed, in addition to the normal auto-show outcome
(row count + the affected rows, per ADR-0014), seed appends a
**`OutputStyleClass::Hint`** advisory **only** when one or more
enum-ish columns (D12) — **or columns guarded by a CHECK that seed
could not derive values from** (D17) — were filled generically.
The wording is **phase-aware** (DA finding: the advisory must not name
features that ship later). In **Phase 1** (no `set` clause yet) it
names the columns and explains they were filled generically. From
**Phase 2/3** it points at the concrete repair:
```
# Phase 1 wording:
✓ Seeded 20 rows into users
status, role were filled with generic text — they look like
fixed value sets you may want to choose deliberately.
# Phase 2/3 wording (set clause + column-fill exist):
✓ Seeded 20 rows into users
status, role filled generically. Fix existing rows with
seed users.status set status in ('active','inactive'),
or pass set … on the next seed.
```
Note the repair for **already-seeded rows** is the **column-fill**
form (`seed users.status set …`), not "re-seed" (which would add more
rows) — DA correction. This is a **result-time** note (cheap, reusing
ADR-0038's hint rendering), not a typing-time warning. The fuller
"per-column report" (every column → its generator) was considered and
**deferred** (see Alternatives / Out of scope).
### D14 — Foreign keys (SD1; fork on empty-parent, user-chosen: "friendly error")
- **Each FK** is filled by sampling **uniformly** from the **existing
rows** of the parent table's referenced column(s). Duplicates are
expected and correct (many children per parent). For a **compound
FK**, the referenced **tuple is sampled jointly** (a whole existing
parent key), never per-column independently — independent sampling
could fabricate a `(a, b)` pair that exists in no parent row and
would fail FK enforcement (DA refinement).
- **Empty parent** → seed **refuses with a friendly error** naming the
parent and the FK column ("seed `users` first — `orders.user_id`
references it"). Safe, predictable, teaches FK dependency order.
Recursive parent auto-seed is **deferred** to a future `--recursive`
opt-in (Out of scope).
- **Junction / compound-PK tables** (SD1's explicit case): sample
**distinct combinations** of the parent PK tuples to satisfy the
compound PK's uniqueness; if `count` exceeds the number of available
distinct combinations, **cap** at the maximum and note it in the
outcome.
- **Self-referential FK** (`manager_id → id`): if nullable, leave NULL
or point at an earlier row in the same batch; if `NOT NULL` on an
otherwise-empty table, friendly error. Documented edge case.
- **Nullable FKs** are **always filled** in v1 (predictable);
occasional-NULL injection is deferred.
### D15 — Undo: one snapshot per seed (DA finding; ADR-0006)
Seed is a mutation, so it must participate in undo. The draft omitted
this; the DA found the codebase already has the right primitive —
`BeginBatch` / `EndBatch` (`db.rs`), used by `replay` so a multi-write
run collapses to **one** boundary snapshot. `do_seed` wraps its
generated writes in `begin_batch` / `end_batch`, so **`seed users 20`
is a single undo step**, not 20 — matching ADR-0006 Amendment 1's
batch model. Column-fill's bulk UPDATE is likewise one step. (`import`
remains the only data-affecting op outside undo, per ADR-0015 §11;
seed is firmly inside it.)
### D16 — Replay: seed re-runs as a data write (fork, user-chosen)
`replay` re-executes a recorded `seed` line as a **data-write
command** — it is **not** in the app-lifecycle skip-set (see Command
classification, above). Consequence, accepted by the user: a **bare**
`seed users 20` regenerates **fresh, divergent** data on each replay;
a `seed users 20 --seed 42` line (the determinism lever, D4)
**reproduces** the original data. This keeps seed faithful to its
nature as a data write and puts reproducibility exactly where the
`--seed` flag already lives. (Seeded *data* is in any case durable
independently of replay, via the ADR-0015 CSV store + `rebuild`;
replay is the scripting re-run path, U4.) The DA confirmed the wiring
trap: because seed is *not* an `AppCommand`, it is correctly absent
from `is_app_lifecycle_entry_word` and replay dispatches it through
the normal data path rather than aborting.
### D17 — CHECK constraints: derive from simple `IN`, else friendly-fail (fork, user-chosen)
A CHECK on a generically-filled column would otherwise fail the whole
batch (DA finding — the block guard only covered `NOT NULL blob`).
Two-tier handling, per the user:
1. **Derive from simple `IN`-CHECKs.** When a column's CHECK is the
common enum-as-CHECK shape — `col IN ('a', 'b', …)` (the column's
own CHECK, single-column, literal list) — seed **parses out the
allowed values and uses them as the generator** (uniform choice).
The frequent `CHECK (status IN ('active','closed'))` case then
"just works" with no override needed.
2. **Best-effort + friendly fail for the rest.** For CHECKs seed
cannot interpret (ranges, expressions, multi-column), it generates
best-effort; if a generated row violates the CHECK, the insert
fails through the existing **H1 friendly-error layer** (ADR-0019)
naming the constraint and pointing at `set`. Such CHECK-guarded
columns are also **pre-flagged in the advisory** (D13) alongside
enum-ish names, so the user is warned before hitting the failure.
No new CHECK engine — tier 1 is a narrow literal-`IN` parse over the
CHECK text already stored in metadata; tier 2 is the existing failure
path.
### D18 — Auto-show is capped for large seeds (DA finding)
ADR-0014 auto-show renders "the affected rows" — fine for one insert,
a wall for a 10 000-row seed. Seed's outcome shows a **capped
preview** (proposed first **20** rows) with a `(showing 20 of N)`
note, not the full set. The row **count** is always reported in full;
only the rendered table is capped.
## Grammar, AST, and cross-cutting wiring
Per ADR-0024, `seed` is registered as a `CommandNode` so completion,
hints, help, and usage flow from one definition. The wiring, as
**explicit acceptance criteria** (a `/runda` pass must verify each —
ADR-0045 showed "claimed verified" is not verified):
- **AST + executor.** A dedicated command variant (`Seed { table,
target_column: Option<String>, count: Option<u32>, overrides:
Vec<SeedOverride>, rng_seed: Option<u64> }`) and a dedicated
`do_seed` worker executor. `do_seed` **reuses shared helpers**
(value binding `impl_value_for`, autogen autofill, FK enrichment,
the multi-row parameterised-insert pattern of `plan_autogen_autofill`,
the UPDATE path for column-fill, per-command persistence, the
`begin_batch`/`end_batch` undo primitive of D15) as library
functions — it does **not** emit `Command::Insert`/`Command::Update`
(X5).
- **Replay / undo classification (D15/D16).** `do_seed` brackets its
writes in one batch (one undo step). The `seed` entry word is
**deliberately absent** from `is_app_lifecycle_entry_word` and
completion's `empty_input_offers_app_command_entry_keywords` (the
`AppCommand` mirror) so replay re-runs it as a data write — an
explicit acceptance check, since the default for an unlisted
recognised command must be "replayed", not "abort".
- **Completion sources:** table-name (existing tables); `.column` and
`set`-clause column slots (columns of the named table); the
generator-name vocabulary (D9) after `as`; `count` number; `set` /
`=` / `in` / `as` / `between` / `and` keywords; `--seed` flag.
- **Syntax highlighting:** `seed` keyword; the generator-name
vocabulary highlighted as **`tok_function`** (reuse the existing
ADR-0022 Amд6 blue — no new theme colour).
- **Hints:** ambient per-slot "what's next" and usage hints, both
modes.
- **Help:** `help seed` topic (`help_id` + per-command block); the
general `help` list picks it up automatically via REGISTRY.
- **Parse-error pedagogy (ADR-0042):** near-miss matrix rows for `seed`
(bare / missing-table / wrong-token / malformed `set`), both modes.
- **Validity indicator (ADR-0027):** typing-time `[ERR]`/`[WRN]` for
unknown table, unknown column (in `.column` or `set`), unknown
generator name after `as`.
- **No DSL→SQL teaching echo (ADR-0038).** `seed` is a utility/app
command, not a DSL form of a SQL statement, so the echo does not
apply. (A future "show the generated INSERTs" is out of scope —
it would dump `count` statements.)
## Implementation phasing
Design is whole; the **implementation** is phased into reviewable,
test-first commits:
1. **Core whole-row seed** *(done, Phase 1)* — grammar/AST/executor;
type-based generation + the `fake`-backed name heuristics
(D7/D8/D11); identifier uniqueness (D10) + constraint uniqueness; FK
sampling (joint tuples) + empty-parent error + junction
distinct-combos (D14); `--seed` determinism (D4); default count + cap
+ zero-no-op (D6/D1); required-column block guard (D1); **undo batch
(D15)**; **replay-as-data-write classification (D16)**; **CHECK
derive / friendly-fail (D17)**; **capped auto-show (D18)**; the
enum/CHECK advisory in its **Phase-1 wording** (D12/D13); full
ambient wiring; both modes.
2. **The `set` override clause** (D2) *(done, Phase 2)* — value / list /
generator / range, type-aware, with completion + highlight +
validity for the generator-name slot.
3. **Column-fill mode** (`seed <table>.<column>`, D1 form 2) *(done,
Phase 2)* — the UPDATE path.
Each phase is independently green before the next. (Phases 2 and 3
landed together — they share the `set`-override executor machinery, so
splitting them risked a state where `set` parsed but column-fill
silently no-op'd.)
## Testing (ADR-0008 tiers 13; test-first)
- **Tier 1 (unit, deterministic via `--seed`):** generator selection
(name × type-gate matrix, including every false-positive guard of
D7); table-context disambiguation (D11); identifier uniqueness and
the FK-wins-over-`*_id` precedence (D10); bounded-date windows (D8);
the `product` generator shape; override resolution + precedence (D2);
the required-column block guard (D1); the count cap (D6). Exact-value
assertions are possible because `--seed` fixes the RNG.
- **Tier 2 (insta snapshots):** the seeded data table render and the
enum advisory (D13) at representative sizes, light + dark.
- **Tier 3 (integration, full event loop):** `seed users 20` end to
end (rows land in db + CSV + history, auto-show, persistence);
FK sampling against a populated parent (incl. a **compound FK** —
every child tuple exists in the parent); **empty-parent friendly
error**; **junction** seeding with distinct combinations and the
over-cap note; the `set` clause forms (quoted literals); **column-
fill** on existing rows (incl. refusal of PK/autogen targets, empty-
table no-op); reproducibility (`--seed 42` twice → identical data
from a fixed state); both modes. Plus the DA-driven cases:
**one-undo-step** (seed then a single `undo` removes all rows);
**replay** of a bare `seed` line (divergent) vs a `--seed` line
(reproduced); **`IN`-CHECK auto-derivation** ("just works") and a
**complex-CHECK friendly failure**; **capped auto-show** on a large
seed.
"All green, no skips" is the only acceptable end state; the Phase-1
baseline (2290 passing / 0 failing / 0 skipped / 1 ignored doctest) is
the regression floor.
## Out of scope / deferred (future SD2 work)
- **Recursive parent auto-seed** (`--recursive`) — D14 errors instead.
- **NULL injection** for nullable columns (teaching optional
relationships / `IS NULL`) — v1 always fills.
- **Multi-locale** generation — English only (X2).
- **User-defined custom generators** (true "override hooks" — register
a named generator) — the `set ... as <builtin>` surface covers the
common need; custom generators are a later SD2 increment.
- **Full per-column seed report** — D13 flags enum-ish only.
- **Column-restricted insert** (`seed t (a, b)`) — rejected (D1).
- **"Show the generated SQL"** teaching echo for seed.
## Alternatives considered
- **Hand-rolled generators only (no `fake`):** minimal dependency, but
synthetic-looking data (`text_a3f9`) — rejected on pedagogy
(pedagogy wins ties).
- **Type-only generation (no name awareness):** simpler, but misses
the biggest UX win (a `users` table that reads like real people) —
rejected.
- **Column-name-only `name` (no table context):** leaves
`products.name` → person names, requiring a manual override on every
product/company table — rejected for the `name`/`title` family
(D11).
- **No override clause (heuristics + type only):** could not answer
"the heuristic guessed wrong, fix it" or enum columns — rejected;
the `set` clause (D2) is the answer to the user's Q3.
- **Recursive auto-seed of empty parents:** powerful but magical and
can seed tables the user did not name — deferred behind a future
flag (D14).
- **Always-random (no `--seed`):** simplest, but no reproducible
datasets and weaker tests — rejected (D4).
- **Full per-column report by default:** a nice teaching artifact but
verbose on wide tables — deferred; flag-only advisory chosen (D13).
- **Reuse `Command::Insert`/`do_insert` directly** from seed: tempting
for code reuse, but collapses command identity and violates X5 —
rejected in favour of a dedicated `do_seed` that calls shared
*helpers*.
- **Skip seed on replay** (classify as app-lifecycle, D16): consistent
with A1's "app-level" label and avoids divergent data, but seed is a
data write and silently skipping it on a scripted re-run is
surprising — rejected; `--seed` is the determinism lever instead.
- **Bare-word `set` list items** (`in (admin, …)`, D2): matched the
early mockups and reads cleaner, but bare words are column
references in the reused grammar (would error) and would force a
custom list form — rejected for quoted literals (grammar reuse +
DSL consistency).
- **Pre-flight refuse any CHECK-bearing table** (D17): safest but
blocks seeding too many legitimate tables — rejected for the
derive-`IN`-else-friendly-fail tier.
- **`set`-driven NULL / per-column report / recursive parent seed:**
deferred — see Out of scope.
+1
View File
@@ -60,3 +60,4 @@ This directory contains the project's ADRs, recorded per
- [ADR-0045 — `create m:n relationship` convenience command (C4)](0045-mn-convenience.md) — **Accepted + implemented 2026-06-10** (closes `requirements.md` **C4**; all forks user-confirmed + a `/runda` DA pass that verified the `do_create_table` reuse against code and corrected the "no PK-less tables" assumption — advanced SQL `create table t (a int)` has none, so a parent-PK guard is retained). Implementation corrected a second ADR premise: "the walker already dispatches multiple nodes per entry word" held only in *advanced* mode — two simple-mode spots (dispatcher `decide`, completion continuation-merge) assumed ≤1 DSL form per entry word and were generalized **behaviour-preservingly** (dispatch reduces to the old single-candidate commit; completion merge gated on `simple_count > 1`). Junction echo wired (`render_create_m2n`, round-trips as SQL). `create m:n relationship from <T1> to <T2> [as <name>]` generates a junction table with one FK column per parent PK column, a **compound PK over all the FK columns** (the textbook junction — the pair is unique, no duplicate links), and **two 1:n relationships**, all in **one transaction = one undo step** (built by reusing `do_create_table`, which already takes `foreign_keys` + writes relationship metadata — no batch bracketing). Forks all user-chosen: junction PK = compound-over-FKs (vs surrogate serial / no PK); referential actions = **`CASCADE`** on delete+update (vs NO ACTION / RESTRICT); naming = auto `{T1}_{T2}` + optional `as` (vs auto-only); available in **both modes** (Simple-category DSL, like the sibling relationship commands). FK columns named `{parent_table}_{pk_column}` (disambiguates shared `id`; generalises to compound parents via ADR-0043), typed via `fk_target_type` (ADR-0011). A distinct `Command::CreateM2nRelationship` (not lowered to `CreateTable`) preserves command identity (X5) and lets the teaching echo speak in m:n terms. Cross-cutting wiring enumerated: separate `CREATE_M2N` `CommandNode` (own `help_id`/`usage_ids`), `("m","m:n")` completion composite, `HintMode`s, grammar-driven highlighting, `help`/`help create`, `parse_error_pedagogy` near-miss matrix, teaching echo. OOS: **self-referential m:n** (`from T to T`) refused outright (user-confirmed "full stop" — directional column-naming is more than this beginner convenience warrants); per-relationship action overrides; extra junction payload columns; m:n diagram echo; renaming the auto-generated relationships
- [ADR-0046 — Schema sidebar focus/navigation mode and responsive input & hint layout (UI #20/#21/#23)](0046-sidebar-navigation-and-responsive-input-hint.md) — **Accepted + implemented 2026-06-10, phased A→B→C** (8 commits `9f5f76b``22bec61`; closes Gitea **#20** hint jumpiness, **#21** left-column improvements, **#23** long input — all forks user-confirmed, including the persistent show/hide toggle which is **deferred**: the Ctrl-O peek covers #21's "keystroke to show and hide"). Two decisions landed differently from the draft (recorded inline): relationship data on **`App`** not `SchemaCache` (DB2); the nav overlay clears **only the sidebar strip + a one-column gutter**, panels staying visible behind (DC2). Treats the three UI issues as one coupled decision because they share the terminal's width/height budget. **Phase A (input & hint):** the hint panel's height becomes a function of **terminal geometry, fixed between resizes** (not of hint content), eliminating the #20 jump at its source — measured catalog shows ≥ ~54-col right-column width never needs > 2 hint lines, so 3 lines is a rare narrow-terminal-only case; height buckets `H<40` compact (input 1 row + horizontal scroll / hint 2) vs `H≥40` comfortable (input 2 rows soft-wrap / hint 2), output `Min(5)` honoured first under degradation; input gains horizontal scroll (`input_scroll_offset`, single logical `String`**not** I1 multi-line) and 2-row soft-wrap display when tall, preserving ADR-0027's 6-col indicator reserve. **Phase B (sidebar):** the 26-col Tables column is **kept but made optional and richer** (not deleted — pedagogy wins ties) — **width-derived session-only** visibility (visible iff width > 90 or a Ctrl-O peek is active — no stored field; hides at width ≤ 90 so the 90-col screencasts drop it; ADR-0015 format untouched), plus a **relationships panel** rendered narrow with endpoints broken at the arrow, ellipsized — a **separate sibling panel** that **overrides S2**'s nested-list extension model (relationships are cross-table). the full records live on a new **`App.relationships`** field (revised from the ADR's original `SchemaCache.relationship_details` at implementation — `SchemaCache` is walker-facing and needs only the names, kept in `relationships: Vec<String>`; details are UI-only, so `App` mirrors `app.tables` and avoids ~23 fixture edits), delivered by `Database::read_all_relationships` + an `AppEvent::RelationshipsRefreshed`; the two left panels split vertically with the relationships panel floored at 5 rows ("(none)" when empty) and capped at 50 % of the column (DB4). **Phase C (navigation mode):** **`Ctrl-O`** enters a focus cycle (Input → Tables → Relationships → Input; `Esc` exits) orthogonal to the ADR-0003 input mode — **`Ctrl-B` was rejected on review as the default tmux prefix** (unreachable inside tmux); the focused panel **expands to ~4050 cols as a `Clear` overlay** (right panels stay unchanging underneath) and scrolls via **Up/Down (line) + PageUp/PageDown (page)** (context-rebind, reusing the output-scroll viewport mechanism), with an accent focus border; all non-nav keys inert in nav mode (and nav keys inert while a modal is open). Forks all user-chosen: keep-optional-richer (vs remove/narrow); navigation-mode (vs modeless modifier scroll); `Ctrl-O` (Ctrl-B rejected = tmux prefix); overlay (vs layout re-split); inert-non-nav-keys; geometry-fixed hint height; `H<40/≥40` thresholds; session-only persistence; Up/Down line-scroll; **separate relationships panel overriding S2**; **no hint-area toggle** (S4's stale "keyboard-toggleable" claim struck — never implemented, unwanted). A pre-build `/runda` DA pass drove these corrections: caught the `Ctrl-B`/tmux collision, the `SchemaCache` retype that would have broken completion, the 2-row-input/indicator placement, the missing nav-mode key disposition + modal gate, and three unreferenced requirements (S1 evolved, S2 overridden, S4 corrected); also cross-checked open issue **#22** (overlay/annotation layer — separate ADR, adjacent). OOS: true multi-line input (I1); readline shortcuts (I1b); cross-session sidebar persistence; output as a third nav focus; relationship search/edit from the panel; hint-area toggle; #22's annotation layer. Accepted consequence: the 90-col visibility threshold makes a terminal's output *narrower* when widened across the boundary (sidebar appears)
- [ADR-0047 — Demonstration overlay layer (keystroke badges + step captions)](0047-demonstration-overlay-layer.md) — **Accepted 2026-06-10; implemented 2026-06-11, phased A→B→C (closes Gitea #22)** (commits `f879d54``2d0f4b2`; no `requirements.md` item — tracked by issue + ADR per convention; all forks user-confirmed + a pre-build `/runda` pass that produced 10 tightening findings and a whole-implementation `/runda` pass that returned PASS, no blockers). An in-app **demonstration mode** (`--demo` flag / `RDBMS_PLAYGROUND_DEMO` env, **off by default, zero footprint when off**) that renders two transient overlays so `autocast` screencasts — and live teaching, and a future guided-lesson system — can show otherwise-invisible interactions. **Keystroke badges** (`[TAB]`, `[ENTER]`, `[UP]`, …): **automatic, app-detected** over a fixed set of glyph-less keys (the app already sees every key, so it re-records for free), label via a pure `demo_badge_label(&KeyEvent)`; the badge **auto-expires on a ~1.5 s timer** that extends the runtime's existing time-boxed-`recv` arm condition (`debounce.is_armed() || badge_pending`; expiry `Instant` in the runtime, `App.demo_badge` the render mirror — mirroring the `input` vs `input_indicator` split). **Step captions**: a **stealth, control-code-delimited input buffer** toggled by **`Ctrl+]`** (byte `0x1D` → arrives as `Char('5')+CONTROL`, verified against crossterm 0.29 `parse.rs:110-113`; chosen over `Ctrl+!`, which is **not a single ASCII byte so autocast cannot send it** — the same wall as arrow keys, R4) — typed characters accumulate **invisibly** (prompt untouched, no echo/history), `Backspace` edits, other keys inert, a second `Ctrl+]` **commits** to the caption box (empty commit dismisses); lives in pure-sync `App::update()`, **intercepted before the modal gate** so captions/badges work **over the load picker** (the `#24` projects cast). Both render as **floating flat black-on-yellow rectangles** (solid fill, **no border glyphs** — a one-cell text margin, deliberately unlike the app's bordered panels; user decision post-build, `2d0f4b2`) **at the output panel's inner bottom-right**, drawn **last over modals**, badge **stacked above** the caption, **no layout reflow**; caption **word-wraps to ≤ 3 lines** (35 rows), badge fixed 3 rows; clamp/skip guard for tiny terminals; a new **`App.last_output_area: Rect`** (set in `render_output_panel`) gives the top-level draw the anchor. Caption persists **until the next keystroke**; badge suppressed while capturing. Forks all user-chosen: `--demo` activation (vs hidden command / chord); automatic badges (vs scripted); stealth buffer (vs typed-command / preloaded-file); floating bottom-right boxes (vs HUD / banner / subtitle); `Ctrl+]` trigger; wrap-to-3-line captions; ~1.5 s badge / next-keystroke caption timing. Tested test-first across Tier 1 (label fn, capture state machine incl. over-modal + demo-off gate, nearest-deadline helper), Tier 2 (insta snapshots: badge/caption/both-stacked at 90×26 light+dark, short-terminal clamp), Tier 3 (`--demo` plumbing, badge set/suppressed, caption-without-input wiring), CLI (`--demo` parse + env fallback) — with an **honest limit** noted: the `tokio` timer wiring inside `run_loop` is exercised via the pure pieces + Tier-3 plumbing, not a standalone integration test of the timeout (same posture as the existing `IndicatorDebounce`). One intentional, user-acknowledged behaviour: `Ctrl-C` is inert while capturing (every non-`Ctrl+]` key is, by spec). Final tally **2290 passing / 0 failing / 0 skipped** (1 long-standing ignored doctest), clippy clean. OOS: scripted/manual badge push; badges for glyph keys; configurable styling/placement; the guided-lesson system itself (own ADR); cross-session/-switch persistence; localised caption content; arrow-only cast interactions (output-pane scroll); wiring the overlays into the website `casts.mjs` scripts (website-branch follow-up). Implementation phased **A** (`--demo` plumbing) → **B** (badges) → **C** (captions) + a flat-rectangle restyle
- [ADR-0048 — `seed` fake-data generation command](0048-seed-fake-data-generation.md) — **Accepted 2026-06-11; Phase 1 + Phase 2 implemented 2026-06-11** (Phase 1 commits `202e25a``fbd219b`; design settled with the user across an extended fork dialogue, hardened by a pre-build `/runda` pass (six blockers folded in), a post-implementation `/runda` pass (eight gaps closed — FK/shortid determinism so **D4 holds with no exceptions**, plus six untested ADR decisions), and a Phase-2 pre-build `/runda` pass (which caught the no-date-literal-token reality → the D2 quoted-dates amendment), and a post-implementation `/runda` pass (which added a friendly error for a bounded override on a UNIQUE column — see D2); **2400 tests pass, clippy clean**). Closes `requirements.md` **SD1** and the core of **SD2**; closes the `seed` half of **A1**. **Phase 1 shipped:** whole-row `seed <table> [count] [--seed <n>]` with realistic name-aware generation (the `fake` crate + a type-gated heuristic catalogue, table-context name disambiguation, hand-rolled `product` generator, bounded dates), identifier + constraint uniqueness incl. junction distinct-combos, FK sampling from existing parent rows (empty-parent error), `IN`-CHECK derivation + complex-CHECK advisory, a required-column block guard, `--seed` reproducibility (serial/FK/shortid all deterministic), undo as one batch step, replay as a data write, a capped auto-show preview, the enum/CHECK advisory, and an O(N) single-transaction insert path. **Phase 2 shipped (2026-06-11):** the `set` override clause (D2 — fixed value / pick-list / `as <generator>` / `between` range, **quoted** dates per the D2 amendment, type-aware, override drops the column from the advisory) and the `<table>.<column>` column-fill form (D1 form 2 — an UPDATE over existing rows, refusing PK/autogen targets, empty-table no-op, FK/unique-respecting, one undo step), with the new `KNOWN_GENERATORS` vocabulary (D9), a range `Generator`, full completion/highlight (`HighlightClass::Function`)/validity (`IdentSource::Generators`)/help/pedagogy wiring, and the D13 advisory's Phase-2/3 wording. Further SD2 increments (custom generators, NULL injection, multi-locale, recursive auto-seed) out of scope. Closes `requirements.md` **SD1** and the core of **SD2**; closes the `seed` half of **A1** (the other being `hint`/**H2**). A dedicated `seed` command (own AST variant + `do_seed` executor, **both modes**) generating **realistic, name-aware** fake data. Two forms: **`seed <table> [count]`** (new rows, default **20**, capped) and **`seed <table>.<column>`** (fill a column on existing rows, an UPDATE). Generation adds the **`fake` crate** (v5, English) driven by a **type-gated, token-matched name-heuristic catalogue** (~30 patterns, documented false-positive guards), with **table-context** disambiguating the `name`/`title` family (`products.name`→product, `users.name`→person, `vendors.name`→company), a **hand-rolled `product` generator** (`fake` has no commerce module), **bounded dates** (`date`/`timestamp`/`dob`/`*_at` recognised, recent windows — never "all of history"), the **identifier family** (`id`/`code`/`ref`/`number`, non-FK/non-PK) → **unique sequential**, and **enum-ish names** (`role`/`status`/`type`/…) left generic + a **post-seed Hint advisory** pointing at `set … in (…)`. A **`set` override clause** — `= value` / `in (a,b,c)` / `as <generator>` / `between a and b` (numeric **and** date), reusing ADR-0026 operators — answers the heuristic-miss case. **`--seed <n>`** makes runs reproducible (and enables exact-value tests). **FK** columns sampled uniformly from existing parent rows (**empty parent → friendly error**, no recursion v1); **junction/compound-PK** tables seeded with **distinct combinations**, capped + noted (SD1). A **required-column block guard** refuses rather than NULL-violate a `NOT NULL` column it can't fill (e.g. `NOT NULL blob`). Full ambient wiring (completion incl. a new generator-name vocabulary highlighted as `tok_function`, hints, `help seed`, ADR-0042 near-miss matrix, ADR-0027 validity); **no DSL→SQL teaching echo** (seed is a utility command, not a SQL twin). Honours **X5**`do_seed` reuses insert/update *mechanics as helpers*, not by emitting `Command::Insert`. Implementation phased: (1) core whole-row seed → (2) `set` overrides → (3) column-fill. Deferred (future SD2): recursive auto-seed, NULL injection, multi-locale, user-defined custom generators, full per-column report
+14 -11
View File
@@ -8,9 +8,8 @@ to end across three phases + a restyle).
## §1. State at handoff
**Branch:** `main`. **HEAD `2d0f4b2`** plus an **uncommitted docs
finalization** (ADR-0047 status → implemented, README index, this
handoff — see §6). Push is the user's step.
**Branch:** `main`. **HEAD `f0afec3`** — all work committed, nothing
pending. Unpushed (push is the user's step; normal working state).
**Tests: 2290 passing / 0 failing / 0 skipped / 1 ignored** (the 1
ignored is the long-standing `friendly` doctest). **Clippy clean**
@@ -18,6 +17,7 @@ ignored is the long-standing `friendly` doctest). **Clippy clean**
**This session's commits:**
```
f0afec3 docs: session handoff 64 + ADR-0047 implemented (#22/#24)
2d0f4b2 feat(ui): flat filled rectangles for demo overlays (#22, ADR-0047 D4)
241f60c feat(ui): demo-mode step-caption stealth buffer (#22, ADR-0047 D3/D4)
2584e76 feat(ui): demo-mode keystroke badges (#22, ADR-0047 D2/D4/D5)
@@ -26,8 +26,9 @@ e9eb1b1 docs: ADR-0047 — demonstration overlay layer for casts/teaching (#22)
638b4c9 feat(app): vi-style j/k/g/G navigation in the load picker (#24)
```
**Issues closed:** **#24** (vi nav) and **#22** (demo overlays) — close
#22 once the docs finalization commit lands.
**Issues closed:** both **#24** (vi nav) and **#22** (demo overlays) are
**closed on Gitea** with closing comments — verified via the filtered
issue list. Nothing left open from this session's scope.
## §2. #24 — vi-style load-picker navigation (commit `638b4c9`)
@@ -107,13 +108,15 @@ existing `IndicatorDebounce` already takes. A future Tier-4 PTY harness
## §6. How to take over
**Nothing is pending from this session** — both issues are closed, all
docs landed (`f0afec3`), tree is green. The next session **returns to the
open requirements backlog** (§7). Suggested start: run `/whatsnext`
(it reads this handoff), or pick from §7 below.
1. Read handoffs 62 → 63 → 64, `CLAUDE.md`, `docs/requirements.md`,
`docs/adr/README.md`, and **ADR-0047** (fully landed).
2. **Pending:** the docs finalization commit (ADR-0047 status →
implemented; README index; this handoff). Commit as
`docs: session handoff 64 + ADR-0047 implemented (#22/#24)` (the user
confirms commit messages). Then close **#22** on Gitea.
3. **For demo-overlay work:** `App` has `demo_mode`, `demo_badge`,
`docs/adr/README.md`. ADR-0047 is fully landed; revisit only for
demo-overlay follow-ups.
2. **For demo-overlay work:** `App` has `demo_mode`, `demo_badge`,
`demo_badge_seq`, `demo_caption`, `demo_caption_capturing`,
`demo_caption_buffer`, `last_output_area`. Rendering:
`render_demo_overlays` / `render_badge_box` / `render_caption_box` /
+144
View File
@@ -0,0 +1,144 @@
# Session handoff — 2026-06-11 (65)
Sixty-fifth handover. Continues from handoff-64 (ADR-0047 demo
overlays). This session designed and shipped **ADR-0048 — the `seed`
fake-data generation command (SD1)**, Phase 1, end to end: an ADR with
an extended fork dialogue + two `/runda` passes, then a phased
test-first build.
## §1. State at handoff
**Branch:** `main`. **HEAD will be the doc-wrap-up commit** (see §6) —
all seed work committed, nothing pending. Unpushed (push is the user's
step; normal working state).
**Tests: 2358 passing / 0 failing / 0 skipped / 1 ignored** (the long
-standing `friendly` doctest). **Clippy clean** (nursery, all targets).
+68 over handoff-64's 2290.
**`cargo sweep` run** at wrap-up: `target/` 1.6 G → 183 M.
**This session's commits:**
```
202e25a feat(seed): fake-data generation library + fake dependency (P1.1)
f1e9484 feat(seed): command plumbing + walking skeleton (P1.2)
73493fa feat(seed): FK sampling, empty-parent error, block guard (P1.3a)
9c13501 feat(seed): uniqueness, junction distinct-combos, IN-CHECK (P1.3b)
0b3ab3c feat(seed): SeedResult outcome, capped preview, advisory, count cap (P1.3c)
e6ff63d perf(seed): single-transaction multi-row insert path (P1.3d)
fbd219b feat(seed): --seed flag, ambient wiring, and /runda hardening (P1.4 + DA)
```
(plus the earlier `4d0ae77` multi-tab-scope withdrawal and `0af7f56`
ADR-0048 doc, and the wrap-up doc commit.)
## §2. What `seed` does (Phase 1 — read ADR-0048)
`seed <table> [count] [--seed <n>]` — populate a table with realistic
fake data. **Available in both modes** (A1).
- **Realistic, name-aware generation:** the **`fake` crate** (v5,
English) driven by a **type-gated heuristic catalogue** (`src/seed/
heuristics.rs`) — `email`→email, `first_name`→first name, `price`→
currency, etc., each only firing when the column *type* is
compatible. **Table-context** disambiguates `name`/`title`
(`products.name`→a hand-rolled **product** name, `users.name`→person,
`vendors.name`→company). **Bounded dates** (`dob`/`created_at`/
`date`/`timestamp` → recent windows, never "all of history", anchored
to a fixed reference epoch for reproducibility). Type-based fallback
otherwise.
- **Uniqueness (D10):** the user-fillable PK, compound UNIQUE
constraints, single-column UNIQUE, and identifier-named columns
(`id`/`code`/…) stay distinct across the batch and vs existing rows;
**junction tables** get **distinct FK combinations** (capped at the
available product, reported). Identifier ints get a monotonic
sequence.
- **FK (D14):** every FK column samples an existing parent row (compound
FK reads one consistent parent row); **empty parent → friendly
error**.
- **`IN`-CHECK (D17):** a simple `col IN ('a','b')` CHECK becomes the
value source (enum-as-CHECK just works); complex CHECKs are flagged in
the advisory and best-effort generated (a violation rolls the batch
back).
- **Reproducibility (D4):** `--seed <n>` → identical data on the same DB
state. **Holds with no exceptions** — serial (rowid/MAX+1), FK
(`ORDER BY`), **shortid (seeded RNG)**, all generators.
- **Output:** the seeded-row count, a **capped preview** (first 20
rows), and a **Hint-styled advisory** naming enum-ish / underivable-
CHECK columns filled generically. Count cap 10 000; `seed t 0` no-op.
- **Safety:** one **undo** step (snapshot wraps the whole seed);
**replay** re-runs it as a data write; the insert path is a single
transaction (O(N), atomic, commit-db-last preserved).
## §3. Where the code lives
- **`src/seed/`** — the pure generation library (no DB): `mod.rs`
(`ColumnSpec`, `Generator`, `SeedRng`, `make_rng`), `heuristics.rs`
(`choose_generator` + the catalogue + `is_enum_ish`), `generators.rs`
(`generate_value` + the `product` generator + bounded dates),
`check.rs` (`parse_in_check_values`). ~40 Tier-1 tests, deterministic.
- **`src/db.rs`** — `do_seed` (+ `SeedColPlan`, `sample_parent_key_
tuples`, `seed_value_list_key`, `seed_max_int`, `SeedResult`,
`DEFAULT_SEED_COUNT`/`MAX_SEED_COUNT`/`SEED_PREVIEW_CAP`), the new
**`insert_one_row`** core extracted from `do_insert` (shared, no
tx/persist — so seed runs N rows in one tx), and the `Request::Seed` /
`Database::seed` / worker wiring.
- **`src/dsl/grammar/data.rs`** — `SEED` `CommandNode`, `build_seed`,
the `--seed` flag grammar (`Seq[Flag("seed"), NumberLit]`, the first
DSL flag with a value). `Command::Seed` in `command.rs`.
- **Runtime/render**`CommandOutcome::Seed`, `AppEvent::
DslSeedSucceeded`, `App::handle_dsl_seed_success`. Catalog keys
`ok.rows_seeded` / `seed.capped` / `seed.advisory_generic` /
`help.data.seed` / `parse.usage.seed`.
- **Tests**`tests/it/seed.rs` (25 integration tests),
`tests/typing_surface/mod.rs` (`seed_completion_and_validity`),
`tests/it/parse_error_pedagogy.rs` (bare-`seed` near-miss row),
`src/app.rs` (two render tests), `src/dsl/shortid.rs`
(`generate_with_rng`).
## §4. Process notes (the two `/runda` passes)
- **Pre-build `/runda`** (on the ADR) found six blockers — undo
integration (D15), replay semantics (D16), `set`-value quoting (D2),
CHECK handling (D17), an advisory phase-ordering bug (D13), auto-show
flooding (D18) — all folded into ADR-0048 before any code; the three
genuine forks re-escalated and user-resolved.
- **Post-implementation `/runda`** (on the whole implementation) found
**eight gaps**, all closed: FK-sampling determinism (→ `ORDER BY`),
**shortid not reproducible** (→ seeded RNG, fixed not documented — the
user chose the fix), and six **untested ADR decisions** (D5 advanced
mode, D15 undo, D16 replay, D17 complex-CHECK advisory, atomic
rollback, zero-count) — tests added for each.
## §5. Phase 2 (deferred — designed in ADR-0048, NOT built)
These are the only seed pieces left; both have full designs in
ADR-0048:
1. **The `set` override clause (D2)** — `seed t 20 set role in
('a','b'), status = 'x', work_addr as email, price between 10 and
100`. Value / pick-from-list / explicit-generator / range, **quoted
literals** (grammar-consistent). This is the SD2 "override hooks"
core. The `ColumnSpec.check_in_values``PickFrom` plumbing and the
`Generator` vocabulary already exist; this adds the grammar + a `set`
clause that overrides the per-column plan.
2. **Column-fill (`seed <table>.<column>`, D1 form 2)** — fill one
column across *existing* rows (an UPDATE). Refuses PK/autogen targets;
empty-table no-op.
`requirements.md`: **SD1 `[x]`**, **SD2 `[/]`** (core done; the two
above open), **A1 14/15** (only `hint`/**H2** unregistered).
## §6. How to take over
1. Read handoffs 63 → 64 → 65, `CLAUDE.md`, `docs/requirements.md`,
`docs/adr/0048-seed-fake-data-generation.md` (the whole thing — D1
D18 + the as-built status block).
2. **Seed is feature-complete for Phase 1; nothing pending.** Next
options (user's call): seed **Phase 2** (`set` clause + column-fill);
**H2 `hint`** (closes A1) — own ADR; **TT5 CI**; or the larger
**V4 journal** / **tutorial** ADRs.
3. Two minor, user-deferred observations (non-blocking): the uniqueness
retry cap (`MAX_ATTEMPTS=200`) can cap a *medium* unique domain
slightly below its true size (junction/small domains are exact);
`literal_to_value` doesn't type-check an IN-CHECK literal vs a numeric
column (a malformed `int IN ('a')` CHECK fails cleanly at bind).
+145
View File
@@ -0,0 +1,145 @@
# Session handoff — 2026-06-11 (66)
Sixty-sixth handover. Continues from handoff-65 (ADR-0048 `seed`
Phase 1). This session built **ADR-0048 Phase 2** end to end: the
**`set` override clause** (D2) and the **`<table>.<column>`
column-fill** form (D1 form 2) — the two surfaces Phase 1 deliberately
deferred. Designed-then-DA-vetted (a `/runda` pass that caught a real
ADR-vs-grammar conflict), then built test-first.
## §1. State at handoff
**Branch:** `main`. All Phase-2 work is in the working tree;
**commits are pending the user's approval** (see §6). Unpushed is the
normal working state.
**Tests: 2400 passing / 0 failing / 0 skipped / 1 ignored** (the
long-standing `friendly` doctest). **Clippy clean** (nursery, all
targets). +42 over handoff-65's 2358.
## §2. What landed (read ADR-0048 — Status + D1/D2/D9/D13)
`seed <T>[.<col>] [count] [set <overrides>] [--seed <n>]`.
- **`set` override clause (D2):** four forms, comma-separated —
`status = 'active'` (fixed), `role in ('a','b')` (pick-list),
`work_addr as email` (named generator), `price between 10 and 100`
(range; numeric **and quoted dates**). Type-aware; an override
**drops its column from the generic-fill advisory** (D13). Value
slots reuse `update`'s typed `current_column_value` (quoting
enforced structurally — a bare word is rejected).
- **Column-fill (D1 form 2):** `seed users.email [set …]` fills one
column across **existing** rows (an UPDATE). Refuses PK / autogen
(`serial`/`shortid`/`blob`) targets; **empty table → friendly
no-op**; FK target samples the parent; UNIQUE/identifier target gets
collision-free values; **one undo step**; `set` may only adjust the
filled column; a row count is refused.
- **Named-generator vocabulary (D9):** `src/seed/vocabulary.rs`
`KNOWN_GENERATORS` + `generator_for_name` + `is_known_generator_prefix`,
the single source of truth for completion, validity, and the executor.
- **Range generator:** `Generator::Range { low, high }` in
`src/seed/generators.rs`, interpreted per destination type;
`range_bounds_reason` validates compatibility before generation.
- **Ambient wiring:** completion (generator names after `as`, the
`set <col>` and `.col` column slots, the `set` keyword); highlight
(new `HighlightClass::Function` → existing `tok_function`); validity
(new `IdentSource::Generators` — unknown generator flagged `[ERR]`;
unknown column in `set`/`.col` flagged via the existing Columns
path); help (`help.data.seed`); parse-error pedagogy near-miss rows;
the D13 advisory's **Phase-2/3 wording** (points at `set` and the
column-fill repair). Both modes (D5).
## §3. The ADR amendment (a real DA find)
The pre-build `/runda` pass found that **ADR-0048 D2's "dates stay
unquoted" was impossible** — this DSL has **no date-literal token**
(`Value` is `Number`/`Text`; dates are quoted strings validated by
`bind_date`). Escalated to the user, who chose **quoted dates +
amend the ADR** (the grammar-consistent option). D2 now carries a
dated amendment; the range form uses `between '2023-01-01' and
'2024-12-31'`. This was the only divergence from the ADR text; numbers
remain unquoted.
## §4. Where the code lives
- **`src/dsl/command.rs`** — `Command::Seed` gains `target_column:
Option<String>` + `overrides: Vec<SeedOverride>`; new `SeedOverride`
/ `SeedOverrideKind`.
- **`src/dsl/grammar/data.rs`** — `SEED_SET_CLAUSE` + `SEED_DOT_COLUMN`
grammar; `SEED_GENERATOR` slot (`IdentSource::Generators`,
`HighlightClass::Function`); `build_seed` + the override fold
(`build_seed_overrides` / `parse_seed_override_tail`).
- **`src/dsl/grammar/mod.rs`** — `IdentSource::Generators` +
`HighlightClass::Function`.
- **`src/db.rs`** — `apply_seed_overrides` / `seed_override_plan` /
`seed_override_literal`; `do_seed_column_fill`; `do_seed` +
`Database::seed` + worker wiring threaded with the new params.
- **`src/seed/`** — `vocabulary.rs` (new); `generators.rs` (range
generator + `range_bounds_reason`); `mod.rs` (`Generator::Range`).
- **`src/completion.rs`** — generator candidates after `as`; generator
validity. **`src/input_render.rs`** — `"generator"` invalid-ident
kind. **`src/theme.rs`** — `Function → tok_function`.
- **Catalog**`help.data.seed`, `parse.usage.seed`,
`seed.advisory_generic` (Phase-2/3 wording) in `en-US.yaml`;
`keys.rs` placeholders updated.
- **Tests**`tests/it/seed.rs` (+~30: builder fold, executor
set/column-fill, undo, advanced mode), `src/seed/{vocabulary,
generators}.rs` (range + vocabulary units), `src/completion.rs`
(generator + column validity), `src/dsl/walker/highlight.rs`,
`tests/typing_surface/mod.rs` (completion slots),
`tests/it/parse_error_pedagogy.rs` (near-miss rows).
## §5. Two implementation refinements vs. the ADR (both met the contract)
- **Quoted dates** (the D2 amendment, §3).
- **Value slots reuse `current_column_value`** (the `update … set`
typed slot) rather than the raw ADR-0026 expression operand — no
spurious column-ref match, typed narrowing, consistent with
`update`. The user-facing contract (quoted literals, type-aware) is
fully met.
The `seed_take_value` / `seed_set_error` builder paths are
drift-guards (the typed slots only ever match value literals, so a bare
word is rejected at the grammar level) — they use the generic
`parse.error_wrapper`, mirroring `expr::build_expr`.
## §6. How to take over / next steps
1. Read handoffs 64 → 65 → 66, `CLAUDE.md`, `docs/requirements.md`,
`docs/adr/0048-…md` (Status block + D1/D2/D9/D13 + the amendment).
2. **Seed is feature-complete (SD1 + SD2).** `requirements.md`: **SD1
`[x]`, SD2 `[x]`**. The only open A1 gap is `hint`/**H2** (own ADR).
3. **Commits pending approval.** Suggested split:
- `feat(seed): set override clause + column-fill (ADR-0048 Phase 2)`
— all `src/` + `tests/` changes.
- `docs: ADR-0048 Phase 2 implemented + handoff 66` — ADR / README /
requirements / this file.
4. Next options (user's call): **H2 `hint`** (closes A1); **TT5 CI**;
the larger **V4 journal** / **tutorial** ADRs; or Tier-4 PTY (TT4).
5. Consider a `cargo sweep` at this milestone (`target/` grows).
## §7. Post-implementation `/runda` pass (done this session)
A DA pass over the completed code found **no correctness bugs and no
dropped requirements**; all D1D18 acceptance criteria verified met,
tests confirmed to catch regressions. One **design fork** was surfaced
and **resolved by the user**:
- **Bounded override × UNIQUE column** — a fixed value / too-short
pick-list on a single-column-UNIQUE target used to silently cap the
run (e.g. `seed users 100 set email = 'x'` → 1 row). Now a **friendly
error** up front (`seed_override_capacity_guard`, `src/db.rs`), for
both whole-row and column-fill; generators/ranges stay cap-based
(unbounded sources). ADR-0048 D2 documents it; two tests pin it.
Remaining **non-blocking** edges (noted, not bugs):
- Overriding an **FK column** with a literal: the override wins (D2); a
non-parent value fails safely through the FK-error layer.
- **Column-fill of one column of a *compound* FK** samples that column
independently → an invalid tuple fails safely (UPDATE rejected,
rollback), never corrupts. Single-column FKs / non-FK columns are
exact.
- The generator slot uses the **default candidate-ladder hint** (offers
the vocabulary), not a dedicated prose intro — discoverability is met
by completion; a prose intro is optional polish.
+119
View File
@@ -0,0 +1,119 @@
# Session handoff — 2026-06-12 (67)
Sixty-seventh handover. Continues directly from handoff-66 (ADR-0048
`seed` Phase 2, committed). This was a **manual-testing pass**: the user
exercised the app, found several rough edges, and we triaged each into
*fix now* vs *file an issue*. Net result: **three bug fixes committed**
and **three enhancement issues filed**.
## §1. State at handoff
**Branch:** `main`. Working tree **clean**; all work committed. Unpushed
(push is the user's step).
**Tests: 2407 passing / 0 failing / 0 skipped / 1 ignored** (the
long-standing `friendly` doctest). **Clippy clean** (nursery, all
targets). +7 over handoff-66's 2400.
**Commits since handoff-65:**
```
f7155ce fix(input): thread the `:` one-shot escape into live SQL feedback
4cacb82 fix(completion): don't flag a table alias used before its FROM clause
c3e0103 fix(completion): flag-aware partial so a dash completes flags, not keywords
30b2677 docs: ADR-0048 Phase 2 implemented + handoff 66
a12facc feat(seed): set override clause + column-fill (ADR-0048 Phase 2)
```
(`a12facc`/`30b2677` are the Phase-2 work documented in handoff-66.)
## §2. Bug fixes this session (all committed, all tested)
1. **`c3e0103` — flag completion ate the dash.** Typing a flag at a
flag position (`add 1:n relationship … -`) offered the `on` keyword
and, on accept, produced `-on` / `---create-fk`: the partial-token
walk stopped at `-`, so the dash was outside the replaced range.
Fix: flag-aware partial detection (a dash-prefixed token at a word
boundary is a flag-in-progress, **gated on a flag being expected** so
`where x = -5` stays a number) + a unified flag matcher
(`trim_start_matches('-')`). Affected **all** flags. 4 tests + 2
partial-flag snapshots updated (they'd captured the latent bug).
2. **`4cacb82` — table alias flagged as an unknown column.** In a
SELECT, the projection (`sum(ol.count*…)`) can reference an alias
whose `FROM … OrderLines ol` sits *after* the cursor. The candidate
engine recovers that via the §10.6 full-input lookahead (ADR-0032),
but `invalid_ident_at_cursor` only walked text *before* the cursor —
so `ol` matched no scope and got a red "ERR" overlay on an otherwise
valid query. Fix: give the validity check the same full-input
lookahead and bail when the partial prefix-matches a binding's alias
or table. 1 test.
3. **`f7155ce` — the `:` one-shot escape broke live SQL feedback.**
Submission strips the `:` (ADR-0003), but the *live* feedback kept it
in the buffer handed to the walker, which bailed at the `:`. Effect:
under `:`, Tab completed nothing and a valid query could flash `[ERR]`
— while the same line in full `mode advanced` worked. (The hint
already stripped it, hence "hint shows the name but Tab does
nothing".) Fix: one shared `App::feedback_view()` (the `:`-stripped
SQL + mapped cursor + stripped offset) routed through completion (with
a `replaced_range` offset shift), the validity verdict, and rendering
(new `render_input_runs_feedback` highlights/overlays the view shifted
by the offset; the `:` renders as plain text); the ambient hint was
consolidated onto it (removing the duplicate `strip_one_shot_prefix`).
3 tests + the 9 existing colon tests still green.
## §3. Investigated, **no code change** (working as designed)
- **Comma-`FROM` implicit join** (`select … from A, B, C`) is
**deliberately rejected** — ADR-0032 §11 / OOS-3: *"comma-FROM teaches
habits we do not want to encourage; `CROSS JOIN` covers the same shape
explicitly."* The explicit equivalent (`CROSS JOIN … WHERE …`) works.
- **`sum(…)` returning one row** with no `GROUP BY` is **correct SQL**
(the aggregate collapses the result to one row; SQLite/the playground
allow the non-aggregated columns where Postgres would error). The
user's query needed `group by o.id`. Verified (1 row).
## §4. Open issues filed this session — **next session's candidates**
All on `git.lazyeval.net/oli/rdbms-playground`, label `enhancement`:
- **#26`seed <table>` hint omits the optional count.** A complete
command's optional positional *number* has no Tab candidate, so it's
invisible. `IntroProse` doesn't fit (it only fires for incomplete
required slots; the completing Seq match clears the hint). Needs a way
to advertise optional positional non-keyword args. *(I attempted +
reverted this during Phase 2; see the analysis in the issue.)*
- **#27 — Bottom status line: keybindings-only, context- and
state-aware.** Per-nav-focus keybindings (Input vs sidebar), **include
transient states** (Tab-cycle, history) — user preference — and add
`mode advanced` to the empty-input hint. May warrant a small ADR.
- **#28 — Reconsider relationship prose in `add column` (incidental DDL)
confirmations.** Currently by design (ADR-0044 §1 keeps prose, not
diagrams, for incidental DDL). **User preference: do NOT show the
`References:` / `Referenced by:` block** in the add-column
confirmation at all — focus on the change just made. This revisits a
decided area → land as a **new ADR** superseding the relevant part of
ADR-0016 §5 / ADR-0044 §1; confirm scope (just `add column`, or all
incidental DDL).
## §5. Other open work (unchanged from handoff-66 §6)
`seed` is **feature-complete** (`requirements.md` SD1 `[x]`, SD2 `[x]`).
Remaining roadmap, user's call:
- **H2 `hint`** — the last A1 gap (its own ADR).
- **TT5 CI** — test infra exists; no CI workflow yet.
- **TT4 PTY (Tier-4)** — ADR-0008 specifies it; not wired.
- Larger: **V4 journal**, **tutorial/lesson system** (each needs an ADR).
A possible quick follow-up: a friendlier "use an explicit `JOIN`"
parse-error for comma-`FROM` (point 1) — not filed; mention if wanted.
## §6. How to take over
1. Read handoffs 65 → 66 → 67, `CLAUDE.md`, `docs/requirements.md`.
2. `seed` Phase 2 is done (ADR-0048 Status block is current). The
manual-testing fixes (§2) are committed and green.
3. Pick from §4 (filed issues #26/#27/#28) or §5 (roadmap). #28 is a
decision/ADR; #27 is UX (maybe ADR); #26 is a hint-system enhancement.
4. Consider a `cargo sweep` at this milestone (`target/` grows across
sessions).
+66 -26
View File
@@ -88,12 +88,16 @@ since ADR-0027.)
because relationships are cross-table rather than per-table, they
get their own sibling panel stacked below the tables list, not
nested items within it — user-confirmed 2026-06-10.)*
- [/] **S3** Output panel renders a visualization of the
currently selected item and supports multiple tabs.
*(Partial, verified 2026-06-07: single-element structure
visualisation renders (`output_render.rs:82-180`); **multiple
tabs are not implemented** — the output is one line buffer, no
tab abstraction. Same multi-tab gap as V2.)*
- [x] **S3** Output panel renders a visualization of the
currently selected item.
*(Satisfied: single-element structure visualisation renders
(`output_render.rs:82-180`) — select a table, see its columns /
types / keys. **Multi-tab clause withdrawn 2026-06-11** (user
decision): the original wording promised "and supports multiple
tabs", but the output model is settling on the single scrollable
**V4 journal** rather than switchable tabs, so the tab clause is
dropped from tracked scope. A future return to tabbed output would
be a fresh requirement, not this one. Same withdrawal as V2.)*
- [x] **S4** Hint area below the input field, showing hints about
the current input or last error.
*(Verified 2026-06-07: `ui.rs:1088-1110` `render_hint_panel` /
@@ -242,13 +246,12 @@ since ADR-0027.)
available in both modes: `save`, `save as`, `load`, `new`,
`rebuild`, `export`, `import`, `seed`, `replay`, `undo`,
`redo`, `mode`, `help`, `hint`, `quit`.
*(Partial, verified 2026-06-07: 13 of 15 implemented and
available in both modes — `quit`/`q`, `mode simple|advanced`,
`help`, `save`, `save as`, `load`, `new`, `rebuild`, `export`,
`import`, `replay`, `undo`, `redo` (REGISTRY in
`grammar/app.rs:249-333`). **Missing: `seed`** (tracked as SD1)
**and `hint`** (tracked as H2) — neither is registered. A1
closes when SD1 + H2 land.)*
*(Partial: **14 of 15** implemented and available in both modes —
`quit`/`q`, `mode simple|advanced`, `help`, `save`, `save as`,
`load`, `new`, `rebuild`, `export`, `import`, `replay`, `undo`,
`redo`, and now **`seed`** (ADR-0048 / SD1, done 2026-06-11).
**Only `hint`** (tracked as H2) remains unregistered. A1 closes
when H2 lands.)*
## DSL data commands
@@ -469,15 +472,18 @@ since ADR-0027.)
"relationship-relevant" reach). The §3 last-resort helper line was
considered and rejected. Two `/runda` passes (design + implementation).
Selection-nav and the broader journal direction remain in V4.)*
- [/] **V2** SQL query results render as a dynamic table view in
the output pane, with multiple result tabs supported.
*(Partial, verified 2026-06-07: the **table view** is done —
`output_render.rs:38-72` `render_data_table` renders a
box-drawing frame with aligned columns (numeric right, text
left) and NULL/control-char sanitisation, for `show data` and
after every write (ADR-0014). **Missing: multiple result tabs**
— the output is a single `VecDeque<OutputLine>` with no tab
abstraction (same gap as S3). Multi-tab sits in V4 territory.)*
- [x] **V2** SQL query results render as a dynamic table view in
the output pane.
*(Satisfied: the **table view** is done — `output_render.rs:38-72`
`render_data_table` renders a box-drawing frame with aligned
columns (numeric right, text left) and NULL/control-char
sanitisation, for `show data` and after every write (ADR-0014).
**Multi-tab clause withdrawn 2026-06-11** (user decision): the
original wording promised "with multiple result tabs supported";
retained multi-result output, if ever wanted, now belongs to the
single scrollable **V4 journal** direction rather than switchable
tabs, so the tab clause is dropped from tracked scope. A future
return would be a new requirement. Same withdrawal as S3.)*
- [~] **V3** Full ER-diagram export (whole-database graph, viewed
outside the TUI) — low priority; design and ADR pending.
- [~] **V4** Output panel as a *scrollable per-session log* with
@@ -492,7 +498,13 @@ since ADR-0027.)
*(Partial: PageUp / PageDown scrolling of the existing line
buffer is in, with new output snapping the view to the most
recent. The full V4 scope — smart structure rendering, log
styling, Markdown export, scroll indicator — remains pending.)*
styling, Markdown export, scroll indicator — remains pending.
**As of 2026-06-11 this journal model is the sole tracked
direction for evolving the output pane:** the competing multi-tab
output alternative (the trailing clauses of S3 and V2) was
withdrawn from scope by user decision, so retained / multi-result
output, if pursued, is folded into this journal rather than into
switchable tabs.)*
- [x] **V5** `show <kind> [<name>]` family of commands for
redisplaying schema info on demand.
*(Done 2026-06-07: `show table <name>` + `show data <Table>`
@@ -652,11 +664,39 @@ since ADR-0027.)
## Sample data / seeding
- [ ] **SD1** `seed <table> [count]` generates plausible fake
- [x] **SD1** `seed <table> [count]` generates plausible fake
data; junction tables are seeded with valid foreign-key
references drawn from existing parent rows.
- [~] **SD2** Detailed seeding rules (per-type generators,
locale, determinism, override hooks) — design and ADR pending.
*(Done 2026-06-11 via **ADR-0048** (commits `202e25a``fbd219b`).
Whole-row `seed <table> [count] [--seed <n>]` with realistic
name-aware generation (`fake` crate + a type-gated heuristic
catalogue, table-context name disambiguation, hand-rolled
`product` generator, bounded dates), identifier + constraint
uniqueness, **junction tables seeded with valid FK references
drawn from existing parent rows** (distinct combinations, capped;
empty-parent friendly error), `IN`-CHECK derivation, a
required-column block guard, undo as one step, replay as a data
write, a capped auto-show + enum/CHECK advisory, and an O(N)
single-transaction path. The `set` override clause and
`<table>.<column>` column-fill landed in SD2 Phase 2, below.)*
- [x] **SD2** Detailed seeding rules (per-type generators,
locale, determinism, override hooks).
*(Done 2026-06-11 via **ADR-0048** (Phase 1 + Phase 2). Phase 1:
type-gated name-aware per-type generators with a `fake`-backed
catalogue + table-context disambiguation, **`--seed` determinism**
(serial/FK/shortid all reproducible — D4 holds with no
exceptions), English-only locale (X2). **Phase 2 (the "override
hooks" core):** the `set` override clause — fixed value /
pick-from-list / `as <generator>` / `between` range (numeric and
**quoted** dates, type-aware; an override drops the column from
the generic-fill advisory) — and the `<table>.<column>`
column-fill form (an UPDATE over existing rows, refusing
PK/autogen targets, empty-table no-op, FK/unique-respecting, one
undo step). Adds the `KNOWN_GENERATORS` vocabulary (D9), a range
`Generator`, and full completion / highlight / validity / help /
parse-error-pedagogy wiring. Deferred SD2 increments:
user-defined custom generators, NULL injection, multi-locale,
recursive parent auto-seed.)*
## Query analysis
+254 -19
View File
@@ -646,6 +646,44 @@ impl App {
}
}
/// The input view the **live-feedback** walkers (completion, ambient
/// hint, validity verdict, highlight overlays) should see, plus the
/// byte offset stripped from the front and the cursor mapped into the
/// view.
///
/// Under the `:` one-shot escape (ADR-0003) the buffer carries a
/// leading `:` (and an auto-inserted space) that is *not* advanced
/// SQL — submission already strips it before parsing, but the live
/// feedback did not, so the walker bailed at the `:` and resolved
/// nothing (no completion / hint, a spurious error overlay). This
/// returns the stripped SQL exactly as submission sees it, so the
/// feedback matches a real advanced-mode session. `offset` maps any
/// walker-returned byte position (completion `replaced_range`,
/// overlay spans) back to real-buffer coordinates.
///
/// For every non-one-shot input this is the identity
/// `(&input, cursor, 0)`.
#[must_use]
pub fn feedback_view(&self) -> (&str, usize, usize) {
if matches!(self.effective_mode(), EffectiveMode::AdvancedOneShot) {
// The first non-whitespace char is the `:` (per
// `effective_mode`); strip up to and including it, then any
// following whitespace — mirroring submission's
// `trimmed[1..].trim()`.
let leading_ws = self.input.len() - self.input.trim_start().len();
let mut offset = leading_ws + 1; // past the `:`
while offset < self.input.len()
&& self.input.as_bytes()[offset].is_ascii_whitespace()
{
offset += 1;
}
let view = &self.input[offset..];
let cursor = self.input_cursor.saturating_sub(offset).min(view.len());
return (view, cursor, offset);
}
(&self.input, self.input_cursor, 0)
}
/// The validity-indicator verdict for the current input
/// (ADR-0027 §3). `None` when the input would run clean.
///
@@ -667,11 +705,10 @@ impl App {
EffectiveMode::AdvancedPersistent
| EffectiveMode::AdvancedOneShot => Mode::Advanced,
};
crate::dsl::walker::input_verdict_in_mode(
&self.input,
Some(&self.schema_cache),
mode,
)
// Strip the `:` one-shot prefix so the walker verdicts the SQL
// itself, not the escape marker (which it can't parse).
let (view, _cursor, _offset) = self.feedback_view();
crate::dsl::walker::input_verdict_in_mode(view, Some(&self.schema_cache), mode)
}
/// Process one event from the runtime, mutating state and
@@ -771,6 +808,10 @@ impl App {
self.handle_dsl_insert_success(&command, &result);
Vec::new()
}
AppEvent::DslSeedSucceeded { command, result } => {
self.handle_dsl_seed_success(&command, &result);
Vec::new()
}
AppEvent::DslUpdateSucceeded {
command,
result,
@@ -1395,13 +1436,7 @@ impl App {
}
fn start_or_complete_at(&mut self, multi_start_idx: usize) {
let cursor = self.input_cursor.min(self.input.len());
let Some(comp) = crate::completion::candidates_at_cursor_in_mode(
&self.input,
cursor,
&self.schema_cache,
self.effective_mode().as_mode(),
) else {
let Some(comp) = self.completion_for_feedback() else {
return;
};
if comp.candidates.len() == 1 {
@@ -1413,13 +1448,7 @@ impl App {
}
fn start_or_complete_last(&mut self) {
let cursor = self.input_cursor.min(self.input.len());
let Some(comp) = crate::completion::candidates_at_cursor_in_mode(
&self.input,
cursor,
&self.schema_cache,
self.effective_mode().as_mode(),
) else {
let Some(comp) = self.completion_for_feedback() else {
return;
};
if comp.candidates.len() == 1 {
@@ -1430,6 +1459,22 @@ impl App {
}
}
/// Completion at the cursor, computed against the `:`-stripped
/// feedback view (ADR-0003 one-shot) with its `replaced_range`
/// mapped back to real-buffer coordinates so `commit_*` edit the
/// right span. Identity for non-one-shot input (offset 0).
fn completion_for_feedback(&self) -> Option<crate::completion::Completion> {
let (view, view_cursor, offset) = self.feedback_view();
let mut comp = crate::completion::candidates_at_cursor_in_mode(
view,
view_cursor.min(view.len()),
&self.schema_cache,
self.effective_mode().as_mode(),
)?;
comp.replaced_range = (comp.replaced_range.0 + offset, comp.replaced_range.1 + offset);
Some(comp)
}
/// Single-candidate commit: insert "<text> " (with trailing
/// space) and DO NOT create a memo. The user can keep
/// typing or press Tab again to fresh-complete at the new
@@ -2072,6 +2117,39 @@ impl App {
}
}
/// Render a successful `seed` (ADR-0048): the ✓ echo, the seeded-row
/// count (with a cap note when the unique-value space ran out), the
/// capped preview table (D18), and a Hint-styled advisory naming
/// columns filled with generic text that look like fixed value sets
/// (D12/D13).
fn handle_dsl_seed_success(&mut self, command: &Command, result: &crate::db::SeedResult) {
self.note_ok_summary(command);
let mut summary = crate::t!(
"ok.rows_seeded",
count = result.produced,
table = result.table
);
if result.produced < result.requested {
summary.push(' ');
summary.push_str(&crate::t!("seed.capped", requested = result.requested));
}
self.note_system(summary);
for line in crate::output_render::render_data_table(&result.data) {
self.note_system(line);
}
if !result.advisory_columns.is_empty() {
// `column` (the first advised column) seeds the concrete
// repair examples (D13 Phase 2/3 wording); `columns` lists
// them all.
self.push_category_three_prose(crate::t!(
"seed.advisory_generic",
columns = result.advisory_columns.join(", "),
column = result.advisory_columns[0],
table = result.table
));
}
}
fn handle_dsl_update_success(&mut self, command: &Command, result: &UpdateResult) {
self.note_ok_summary(command);
self.note_system(crate::t!("ok.rows_updated", count = result.rows_affected));
@@ -2390,6 +2468,9 @@ impl App {
// the executor), like the named DSL drop.
C::SqlDropIndex { .. } => (Operation::DropIndex, None, None),
C::Insert { table, .. } => (Operation::Insert, Some(table.as_str()), None),
// Seed generates inserts; FK/constraint failures read as
// insert errors (ADR-0048).
C::Seed { table, .. } => (Operation::Insert, Some(table.as_str()), None),
C::Update { table, .. } => (Operation::Update, Some(table.as_str()), None),
C::Delete { table, .. } => (Operation::Delete, Some(table.as_str()), None),
C::ShowData { name, .. } | C::ShowTable { name } => {
@@ -4936,6 +5017,86 @@ mod tests {
assert_eq!(app.effective_mode(), EffectiveMode::AdvancedPersistent);
}
/// Build a two-table cache (`Orders(id, customer_id)` +
/// `Customers(id, name)`) for the `:` one-shot SQL-feedback tests.
fn install_join_schema(app: &mut App) {
use crate::completion::TableColumn;
use crate::dsl::types::Type;
app.schema_cache.tables = vec!["Orders".into(), "Customers".into()];
app.schema_cache.table_columns.insert(
"Orders".into(),
vec![TableColumn::new("id", Type::Serial), TableColumn::new("customer_id", Type::Int)],
);
app.schema_cache.table_columns.insert(
"Customers".into(),
vec![TableColumn::new("id", Type::Serial), TableColumn::new("name", Type::Text)],
);
for t in app.schema_cache.tables.clone() {
for c in &app.schema_cache.table_columns[&t] {
app.schema_cache.columns.push(c.name.clone());
}
}
}
#[test]
fn colon_one_shot_gives_sql_completion_the_stripped_view() {
// Bug (manual testing): the `:` one-shot escape (ADR-0003) left
// the leading `:` in the buffer passed to the live SQL feedback,
// so the walker bailed at `:` and Tab completed nothing — while
// the identical line in full `mode advanced` completed. Now the
// feedback view strips the `:`, so both behave the same.
let body = "select c.name from Orders o join Customers c on c.id=o.cu";
// Full advanced mode: completes `o.cu` → `o.customer_id`.
let mut adv = App::new();
adv.mode = Mode::Advanced;
install_join_schema(&mut adv);
type_str(&mut adv, body);
adv.update(key(KeyCode::Tab));
assert!(
adv.input.ends_with("o.customer_id "),
"full advanced should complete: {:?}",
adv.input
);
// `:` one-shot from simple mode: must complete the same way, and
// the `:` prefix must be preserved in the buffer.
let mut one = App::new();
one.mode = Mode::Simple;
install_join_schema(&mut one);
one.update(key(KeyCode::Char(':')));
type_str(&mut one, body);
assert_eq!(one.effective_mode(), EffectiveMode::AdvancedOneShot);
one.update(key(KeyCode::Tab));
assert!(
one.input.trim_start().starts_with(':'),
"the `:` prefix is kept: {:?}",
one.input
);
assert!(
one.input.ends_with("o.customer_id "),
"`:` one-shot must complete the SQL column too: {:?}",
one.input
);
}
#[test]
fn colon_one_shot_validity_is_clean_for_a_valid_query() {
// A *valid* `:`-prefixed query must not light the `[ERR]`
// indicator (the walker used to choke on the `:` and always
// report Error).
let mut app = App::new();
install_join_schema(&mut app);
app.update(key(KeyCode::Char(':')));
type_str(&mut app, "select name from Customers");
assert_eq!(
app.input_validity_verdict(),
None,
"a valid one-shot query should verdict clean, got {:?}",
app.input_validity_verdict(),
);
}
#[test]
fn effective_mode_flips_to_one_shot_when_colon_typed_in_simple_mode() {
let mut app = App::new();
@@ -6223,6 +6384,80 @@ mod tests {
);
}
#[test]
fn seed_success_renders_count_preview_and_advisory() {
// ADR-0048: handle_dsl_seed_success renders the seeded-row count,
// the preview table, and the enum/CHECK advisory.
let mut app = App::new();
app.output
.push_back(OutputLine::echo("seed users 20", crate::mode::Mode::Simple));
app.update(AppEvent::DslSeedSucceeded {
command: Command::Seed {
table: "users".to_string(),
target_column: None,
count: Some(20),
overrides: Vec::new(),
rng_seed: None,
},
result: crate::db::SeedResult {
table: "users".to_string(),
requested: 20,
produced: 20,
data: crate::db::DataResult {
table_name: "users".to_string(),
columns: vec!["name".to_string()],
column_types: vec![None],
rows: vec![vec![Some("Alice".to_string())]],
},
advisory_columns: vec!["status".to_string()],
},
});
let texts: Vec<String> = app.output.iter().map(|l| l.text.clone()).collect();
assert!(
texts.iter().any(|t| t.contains("20 row(s) seeded into users")),
"seeded-row count surfaced: {texts:?}",
);
assert!(
texts.iter().any(|t| t.contains("status") && t.contains("generic text")),
"the advisory names the enum-ish column: {texts:?}",
);
}
#[test]
fn seed_success_reports_a_cap() {
// produced < requested → the cap note appears next to the count.
let mut app = App::new();
app.output
.push_back(OutputLine::echo("seed J 10", crate::mode::Mode::Simple));
app.update(AppEvent::DslSeedSucceeded {
command: Command::Seed {
table: "J".to_string(),
target_column: None,
count: Some(10),
overrides: Vec::new(),
rng_seed: None,
},
result: crate::db::SeedResult {
table: "J".to_string(),
requested: 10,
produced: 4,
data: crate::db::DataResult {
table_name: "J".to_string(),
columns: Vec::new(),
column_types: Vec::new(),
rows: Vec::new(),
},
advisory_columns: Vec::new(),
},
});
let texts: Vec<String> = app.output.iter().map(|l| l.text.clone()).collect();
assert!(
texts.iter().any(|t| t.contains("4 row(s) seeded into J")
&& t.contains("of 10 requested")),
"the cap note surfaces requested vs produced: {texts:?}",
);
}
#[test]
fn sql_delete_returning_renders_cascade_and_result_table() {
// ADR-0033 3g: a DELETE … RETURNING surfaces BOTH the cascade
+229 -18
View File
@@ -120,7 +120,13 @@ impl SchemaCache {
IdentSource::Columns => &self.columns,
IdentSource::Relationships => &self.relationships,
IdentSource::Indexes => &self.indexes,
IdentSource::NewName | IdentSource::Types | IdentSource::Free => &[],
// Curated / invented sources never come from the schema
// cache — `Generators` candidates are supplied separately
// from the `seed` vocabulary (ADR-0048 D9).
IdentSource::NewName
| IdentSource::Types
| IdentSource::Generators
| IdentSource::Free => &[],
}
}
@@ -327,6 +333,37 @@ pub fn candidates_at_cursor_with_in_mode(
break;
}
}
// Flag-aware extension. The plain walk above stops at `-`, so a
// flag the user is mid-typing (`-`, `--`, `--all`, `--create-fk`)
// leaves an *empty* partial sitting just after the dash(es) — which
// made the engine offer every keyword (a `-` prefix-matches nothing,
// so the empty-prefix path let `on` through) and, worse, replace an
// empty range so accepting produced `-on` / `---create-fk`. When a
// dash-prefixed token sits at a word boundary AND a flag is actually
// expected here, treat the whole dash-run-plus-body as the partial so
// it is matched and replaced wholesale. The "flag is expected" gate
// (one cheap probe on the pre-dash prefix) keeps a signed number /
// minus (`where x = -5`) from being mis-read as a flag.
{
let mut run = cursor;
while run > 0 {
let p = bytes[run - 1];
if p.is_ascii_alphanumeric() || p == b'_' || p == b'-' {
run -= 1;
} else {
break;
}
}
let word_boundary = run == 0 || bytes[run - 1].is_ascii_whitespace();
if run < cursor && bytes[run] == b'-' && word_boundary && run < start {
let pre = crate::dsl::walker::completion_probe_in_mode(&input[..run], cache, mode);
if pre.expected.iter().any(|e| matches!(e, Expectation::Flag(_))) {
start = run;
}
}
}
let partial_prefix = input[start..cursor].to_string();
let leading = &input[..start];
@@ -623,29 +660,19 @@ pub fn candidates_at_cursor_with_in_mode(
// Source 1.55: flag candidates (`--name`). Surfaced as a
// distinct CandidateKind so the hint panel can colour them
// with `tok_flag` (matching how they'll appear after
// insertion). The standard prefix matcher walks back over
// alphanumeric + underscore, which does NOT cross `-`, so
// when the user types `--all` the partial is `all` — match
// the flag's body against that. Otherwise match the full
// `--name` against the partial (which may be empty or start
// with `--`).
// insertion). The flag-aware partial detection above captures any
// leading dash-run, so the partial is one of: empty, all-dashes
// (`-` / `--`), or `[-]+body`. Stripping the leading dashes and
// matching the remainder against the flag *body* handles all of
// them uniformly (empty / all-dashes → match every flag).
let flag_needle = partial_prefix.trim_start_matches('-').to_lowercase();
let flags: Vec<String> = expected
.iter()
.filter_map(|e| match e {
Expectation::Flag(name) => Some(*name),
_ => None,
})
.filter(|body| {
if partial_prefix.starts_with("--") {
format!("--{body}")
.to_lowercase()
.starts_with(&lowered_prefix)
} else if partial_prefix.is_empty() {
true
} else {
body.to_lowercase().starts_with(&lowered_prefix)
}
})
.filter(|body| body.to_lowercase().starts_with(&flag_needle))
.map(|body| format!("--{body}"))
.collect();
@@ -709,6 +736,22 @@ pub fn candidates_at_cursor_with_in_mode(
} else {
Vec::new()
};
// Source 1.9: fake-data generator names (ADR-0048 D9). At the
// `seed … set <col> as ⟨here⟩` slot (`IdentSource::Generators`) the
// curated vocabulary is offered so a learner can discover `email` /
// `product` / … by Tab. Same `Function` kind / `tok_function` colour
// as SQL functions (no new theme colour — ADR-0048 §Grammar).
let has_generator_slot = expected
.iter()
.any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. }));
if has_generator_slot {
functions.extend(
crate::seed::KNOWN_GENERATORS
.iter()
.filter(|g| matches_prefix(g))
.map(|g| (*g).to_string()),
);
}
// Source 2: schema identifiers — accumulated across every
// matching schema-listable `Ident { source }` expectation.
@@ -1200,6 +1243,45 @@ pub fn invalid_ident_at_cursor_in_mode(
if has_sql_expr_slot && crate::dsl::sql_functions::is_known_function_prefix(partial) {
return None;
}
// A bare ident at a SQL expression slot may be a **table alias / name**
// the user is mid-typing as a qualifier (`ol` in `sum(ol.count)`). The
// defining FROM clause can sit *after* the cursor — the projection
// references it — so the leading-only walk has an empty from-scope and
// would wrongly flag the alias as an unknown column. Recover the scope
// from the FULL input (mirrors the §10.6 edit-an-existing-query
// lookahead the candidate engine uses for column narrowing) and bail
// when the partial prefix-matches a binding's alias or table name.
if has_sql_expr_slot {
let full = crate::dsl::walker::completion_probe_in_mode(input, cache, mode);
let lowered = partial.to_lowercase();
let matches_qualifier = full.from_scope.iter().any(|b| {
b.alias
.as_deref()
.is_some_and(|a| a.to_lowercase().starts_with(&lowered))
|| b.table.to_lowercase().starts_with(&lowered)
});
if matches_qualifier {
return None;
}
}
// ADR-0048 D9: the `seed … set <col> as <gen>` slot is a curated
// vocabulary (`IdentSource::Generators`), not a schema source, so the
// schema-column check below would never see it. A partial that
// prefix-matches a known generator is an in-progress name; anything
// else is an unknown generator → flag it `[ERR]` while typing.
let has_generator_slot = expected
.iter()
.any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. }));
if has_generator_slot {
if crate::seed::is_known_generator_prefix(partial) {
return None;
}
return Some(InvalidIdent {
range: (start, cursor),
found: partial.to_string(),
source: IdentSource::Generators,
});
}
// Find every schema-listable source in the expected list.
let sources: Vec<IdentSource> = expected
.iter()
@@ -1488,6 +1570,71 @@ mod tests {
);
}
#[test]
fn single_dash_offers_flags_not_keywords_and_replaces_the_dash() {
// Bug (manual testing): `add 1:n relationship … -` (one dash)
// offered the `on` keyword *and* `--create-fk`, and accepting
// produced `-on` / `---create-fk` because the lone `-` was not
// part of the replaced range. A dash at a flag position is a
// flag-in-progress: offer flags, exclude keywords, replace the
// dash on accept.
let input = "add 1:n relationship from X.a to Y.b -";
let c = candidates_at_cursor(input, input.len(), &SchemaCache::default())
.expect("a `-` at a flag position offers candidates");
let texts: Vec<&str> = c.candidates.iter().map(|x| x.text.as_str()).collect();
assert!(texts.contains(&"--create-fk"), "should offer --create-fk: {texts:?}");
assert!(!texts.contains(&"on"), "must NOT offer `on` after a dash: {texts:?}");
assert_eq!(
c.replaced_range,
(input.len() - 1, input.len()),
"the `-` must be inside the replaced range so accept yields `--create-fk`",
);
}
#[test]
fn double_dash_replaces_both_dashes_on_accept() {
let input = "delete from T --";
let c = candidates_at_cursor_in_mode(
input,
input.len(),
&SchemaCache::default(),
Mode::Simple,
)
.expect("`--` offers the flag");
assert!(c.candidates.iter().any(|x| x.text == "--all-rows"));
assert_eq!(
c.replaced_range,
(input.len() - 2, input.len()),
"both dashes are replaced so accept yields `--all-rows`, not `----all-rows`",
);
}
#[test]
fn dash_at_a_value_position_is_not_treated_as_a_flag() {
// `show data T where x = -5` — the `-` is a sign, not a flag.
// No flag is expected here, so the dash must not be swallowed
// into a flag partial: the partial stays `5` (the original
// value-operand behaviour), and no `--…` candidate appears.
let mut s = SchemaCache::default();
s.tables.push("T".into());
s.columns.push("x".into());
let input = "show data T where x = -5";
if let Some(c) =
candidates_at_cursor_in_mode(input, input.len(), &s, Mode::Simple)
{
assert!(
!c.candidates.iter().any(|x| x.text.starts_with("--")),
"no flags at a value position: {:?}",
c.candidates,
);
assert_eq!(
c.replaced_range,
(input.len() - 1, input.len()),
"only the `5` is the partial; the `-` (sign) is not captured",
);
}
}
#[test]
fn typed_dashes_offer_the_optional_cascade_flag_on_drop_column() {
// The same optional-flag class: `drop column … [--cascade]`.
@@ -2606,6 +2753,70 @@ mod tests {
);
}
#[test]
fn invalid_ident_does_not_flag_a_table_alias_used_before_its_from_clause() {
// Manual-testing bug: in `select … sum(ol.count*…) … from … OrderLines ol …`
// the projection references alias `ol` whose FROM binding sits
// *after* the cursor. The leading-only walk had an empty from-scope
// and wrongly flagged `ol` as an unknown column (a red "ERR" overlay
// on an otherwise-valid query). The full-input lookahead must
// recover the scope (ADR-0032 §10.6) so `ol` is not flagged.
use crate::dsl::types::Type;
let mut s = SchemaCache::default();
s.tables.push("OrderLines".into());
s.columns.push("count".into());
s.table_columns
.insert("OrderLines".into(), vec![TableColumn::new("count", Type::Int)]);
let input = "select sum(ol.count) from OrderLines ol";
let cursor = input.find("ol.count").unwrap() + 2; // right after `ol`
assert!(
invalid_ident_at_cursor_in_mode(input, cursor, &s, Mode::Advanced).is_none(),
"a table alias used before its FROM clause must not be flagged as a bad column",
);
}
#[test]
fn invalid_ident_fires_for_unknown_generator_after_as() {
// ADR-0048 D9: an unknown name at the `set <col> as <gen>` slot is
// flagged `[ERR]` while typing.
let cache = two_table_schema();
let input = "seed a set name as bogus";
let inv = invalid_ident_at_cursor(input, input.len(), &cache)
.expect("unknown generator must flag");
assert_eq!(inv.found, "bogus");
assert_eq!(inv.source, IdentSource::Generators);
}
#[test]
fn invalid_ident_fires_for_unknown_column_in_seed_set_and_column_fill() {
// ADR-0048: an unknown column at the `set <col>` slot and the
// `<table>.<col>` column-fill slot is flagged like any other
// column slot (both are `IdentSource::Columns`).
let cache = two_table_schema(); // table `a`; columns id, name
let set_in = invalid_ident_at_cursor("seed a set xyz", 14, &cache)
.expect("unknown column in `set` must flag");
assert_eq!(set_in.found, "xyz");
assert_eq!(set_in.source, IdentSource::Columns);
let fill = invalid_ident_at_cursor("seed a.xyz", 10, &cache)
.expect("unknown column in column-fill must flag");
assert_eq!(fill.source, IdentSource::Columns);
}
#[test]
fn invalid_ident_does_not_fire_for_generator_prefix() {
// A prefix of a known generator is an in-progress name, not a typo.
let cache = two_table_schema();
assert!(
invalid_ident_at_cursor("seed a set name as ema", 22, &cache).is_none(),
"`ema` prefixes `email` — must not flag",
);
assert!(
invalid_ident_at_cursor("seed a set name as email", 24, &cache).is_none(),
"`email` is a known generator — must not flag",
);
}
fn two_table_schema() -> SchemaCache {
use crate::dsl::types::Type;
let mut s = SchemaCache::default();
+999 -23
View File
File diff suppressed because it is too large Load Diff
+53
View File
@@ -402,6 +402,25 @@ pub enum Command {
filter: Option<Expr>,
limit: Option<u64>,
},
/// Populate a table with generated fake data (ADR-0048, SD1/SD2).
/// `count` defaults to 20 when omitted; `rng_seed` (from the
/// `--seed <n>` flag) makes generation reproducible.
///
/// Phase 2 surfaces (ADR-0048 D1/D2):
/// - `target_column` is `Some` for the **column-fill** form
/// `seed <table>.<column>` — fill one column across the table's
/// *existing* rows (an UPDATE), rather than generating new rows.
/// - `overrides` carries the `set <col> …` clause: per-column pins
/// that take precedence over the heuristic generator (D2).
Seed {
table: String,
/// `Some(col)` → column-fill mode (UPDATE existing rows);
/// `None` → whole-row generation (INSERT new rows).
target_column: Option<String>,
count: Option<u64>,
overrides: Vec<SeedOverride>,
rng_seed: Option<u64>,
},
/// Replay a sequence of DSL commands from a file. Each line
/// is parsed and dispatched through the same pipeline as
/// interactive input. Blank lines and lines whose first
@@ -637,6 +656,38 @@ impl RowFilter {
}
}
/// One `set <col> …` override on a `seed` command (ADR-0048 D2, Phase 2).
///
/// The user can pin a column's generated values to a constant, a
/// pick-list, an explicit named generator, or a range — overriding the
/// per-column heuristic the executor would otherwise pick. `column` is
/// the user-typed column name (validated against the table at execution,
/// like every other column slot).
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SeedOverride {
pub column: String,
pub kind: SeedOverrideKind,
}
/// The four `set` override forms (ADR-0048 D2).
///
/// Values arrive as the DSL's `Value` (quoted text / unquoted number —
/// dates are quoted text per the D2 amendment); the `Generator` name is
/// a raw string validated at execution because `src/dsl` cannot depend
/// on `src/seed` (the curated vocabulary lives there).
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SeedOverrideKind {
/// `set status = 'pending'` — every row gets the constant.
Fixed(Value),
/// `set role in ('admin', 'editor')` — uniform pick from the list.
PickList(Vec<Value>),
/// `set work_addr as email` — force the named generator (D9).
Generator(String),
/// `set price between 10 and 100` — uniform in `[low, high]`;
/// numeric or (quoted) date bounds per the destination column type.
Range { low: Value, high: Value },
}
/// A complex WHERE expression (ADR-0026 §4).
///
/// Built by `grammar::expr::build_expr` from the flat
@@ -949,6 +1000,7 @@ impl Command {
} => "show index",
Self::ShowList { kind, .. } => kind.command_name(),
Self::Insert { .. } => "insert into",
Self::Seed { .. } => "seed",
Self::Update { .. } => "update",
Self::Delete { .. } => "delete from",
Self::ShowData { .. } => "show data",
@@ -997,6 +1049,7 @@ impl Command {
| Self::AddConstraint { table, .. }
| Self::DropConstraint { table, .. }
| Self::Insert { table, .. }
| Self::Seed { table, .. }
| Self::Update { table, .. }
| Self::Delete { table, .. } => table,
// For relationships we focus on the parent (1-side):
+346 -1
View File
@@ -24,7 +24,9 @@
//! later swap that capture for the same typed slots used here, adding
//! live hints/highlighting.
use crate::dsl::command::{Command, Expr, RowFilter, ShowListKind};
use crate::dsl::command::{
Command, Expr, RowFilter, SeedOverride, SeedOverrideKind, ShowListKind,
};
use crate::dsl::grammar::{
CommandNode, IdentSource, Node, NumberValidator, ValidationError, Word, expr,
shared::{
@@ -425,6 +427,152 @@ const LIMIT_CLAUSE_NODES: &[Node] = &[
];
const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES);
// =================================================================
// seed — `seed <T>[.<col>] [<count>] [set <overrides>] [--seed <n>]`
// (ADR-0048, SD1 whole-row + SD2 Phase 2 set-clause /
// column-fill)
// =================================================================
/// Optional positional row count. Reuses `LIMIT_VALIDATOR` (a
/// non-negative integer).
const SEED_COUNT: Node = Node::NumberLit {
validator: Some(LIMIT_VALIDATOR),
};
/// `--seed <n>` — a reproducible-generation flag carrying a numeric
/// seed (ADR-0048 D4). The only flag in the DSL that takes a value;
/// `build_seed` reads the number immediately after the flag.
const SEED_FLAG_NODES: &[Node] = &[
Node::Flag("seed"),
Node::NumberLit {
validator: Some(LIMIT_VALIDATOR),
},
];
const SEED_FLAG: Node = Node::Seq(SEED_FLAG_NODES);
// --- column-fill target: the optional `.<column>` (ADR-0048 D1
// form 2) ----------------------------------------------------
//
// `seed users.email …` fills one column across existing rows. The
// table ident stops at `.` (idents are alnum/underscore), so an
// `Optional(Seq['.', column])` after the table cleanly discriminates:
// when the next token is not `.`, the `Punct('.')` first-child
// NoMatches and `walk_optional` skips it; once `.` commits, a missing
// column propagates as the user mid-typing `seed users.` (driver
// `walk_optional` semantics). The column resolves against
// `current_table_columns` (populated by `TABLE_NAME_WRITES`).
const SEED_TARGET_COLUMN: Node = Node::Ident {
source: IdentSource::Columns,
role: "seed_target_column",
validator: None,
highlight_override: None,
writes_table: false,
writes_column: false,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
const SEED_DOT_COLUMN_NODES: &[Node] = &[Node::Punct('.'), SEED_TARGET_COLUMN];
const SEED_DOT_COLUMN: Node = Node::Optional(&Node::Seq(SEED_DOT_COLUMN_NODES));
// --- the `set <col> <override>[, …]` clause (ADR-0048 D2) --------
//
// Each override pins one column's generation. The column slot
// `writes_column` so the typed value slots (`PER_COLUMN_VALUE`, the
// same `current_column_value` dispatch `update … set` uses) narrow to
// the column's type — so list/range/fixed values get the column's
// typed slot (quoted text, unquoted number, quoted date) and a
// type-mismatched literal is flagged. The four tails each start with a
// distinct token (`=` / `in` / `between` / `as`), so the `Choice`
// discriminates cleanly (no Optional-first branch).
/// The `set <col>` column slot. Distinct role from `update`'s
/// `update_set_column` and the expression `expr_column`.
const SEED_SET_COLUMN: Node = Node::Ident {
source: IdentSource::Columns,
role: "seed_set_column",
validator: None,
highlight_override: None,
writes_table: false,
writes_column: true,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
/// `as <generator>` — the curated generator-name vocabulary (D9),
/// highlighted in the `tok_function` colour. The slot is structural
/// (any identifier matches); the name is validated at execution and
/// flagged live by the validity indicator.
const SEED_GENERATOR: Node = Node::Ident {
source: IdentSource::Generators,
role: "seed_generator",
validator: None,
highlight_override: Some(crate::dsl::grammar::HighlightClass::Function),
writes_table: false,
writes_column: false,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
/// `= <value>` — a fixed constant for every row.
const SEED_OV_FIXED_NODES: &[Node] = &[Node::Punct('='), PER_COLUMN_VALUE];
/// `in ( <value> [, <value>]* )` — uniform pick from the list.
const SEED_OV_IN_VALUES: Node = Node::Repeated {
inner: &PER_COLUMN_VALUE,
separator: Some(&Node::Punct(',')),
min: 1,
};
const SEED_OV_IN_NODES: &[Node] = &[
Node::Word(Word::keyword("in")),
Node::Punct('('),
SEED_OV_IN_VALUES,
Node::Punct(')'),
];
/// `between <value> and <value>` — uniform in the (typed) range.
const SEED_OV_BETWEEN_NODES: &[Node] = &[
Node::Word(Word::keyword("between")),
PER_COLUMN_VALUE,
Node::Word(Word::keyword("and")),
PER_COLUMN_VALUE,
];
/// `as <generator>` — force a named generator.
const SEED_OV_AS_NODES: &[Node] = &[Node::Word(Word::keyword("as")), SEED_GENERATOR];
const SEED_OV_TAIL_CHOICES: &[Node] = &[
Node::Seq(SEED_OV_FIXED_NODES),
Node::Seq(SEED_OV_IN_NODES),
Node::Seq(SEED_OV_BETWEEN_NODES),
Node::Seq(SEED_OV_AS_NODES),
];
const SEED_OV_TAIL: Node = Node::Choice(SEED_OV_TAIL_CHOICES);
const SEED_OVERRIDE_NODES: &[Node] = &[SEED_SET_COLUMN, SEED_OV_TAIL];
const SEED_OVERRIDE: Node = Node::Seq(SEED_OVERRIDE_NODES);
const SEED_OVERRIDES: Node = Node::Repeated {
inner: &SEED_OVERRIDE,
separator: Some(&Node::Punct(',')),
min: 1,
};
const SEED_SET_CLAUSE_NODES: &[Node] =
&[Node::Word(Word::keyword("set")), SEED_OVERRIDES];
const SEED_SET_CLAUSE: Node = Node::Seq(SEED_SET_CLAUSE_NODES);
const SEED_NODES: &[Node] = &[
// `writes_table` so the `.column` target, the `set <col>=…`
// clause's column slots, and the typed value slots all resolve
// against this table.
TABLE_NAME_WRITES,
SEED_DOT_COLUMN,
Node::Optional(&SEED_COUNT),
Node::Optional(&SEED_SET_CLAUSE),
Node::Optional(&SEED_FLAG),
];
const SEED_SHAPE: Node = Node::Seq(SEED_NODES);
const UPDATE_NODES: &[Node] = &[
TABLE_NAME_WRITES,
Node::Word(Word::keyword("set")),
@@ -708,6 +856,195 @@ fn build_show_limit(path: &MatchedPath) -> Result<Option<u64>, ValidationError>
})
}
/// Build a `seed <T>[.<col>] [<count>] [set <overrides>] [--seed <n>]`
/// command (ADR-0048, SD1 + SD2 Phase 2).
///
/// - `target_column` (column-fill, D1 form 2) is the `seed_target_column`
/// ident, present only for the `seed <T>.<col>` form.
/// - The positional `count` is the `NumberLit` that precedes both the
/// `set` keyword and the `--seed` flag — bounding it that way keeps a
/// `set age between 18 and 80` value (also a `NumberLit`) from being
/// mistaken for the count.
/// - `--seed <n>` is the `NumberLit` right after the flag (D4).
/// - `overrides` (D2) is folded from the flat `set`-clause terminals.
fn build_seed(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
let table = require_ident(path, "table_name")?;
let target_column = ident_text(path, "seed_target_column").map(str::to_string);
let flag_idx = path
.items
.iter()
.position(|i| matches!(&i.kind, MatchedKind::Flag("seed")));
let set_idx = path
.items
.iter()
.position(|i| matches!(&i.kind, MatchedKind::Word("set")));
let rng_seed = flag_idx
.and_then(|fi| path.items.get(fi + 1))
.filter(|i| matches!(i.kind, MatchedKind::NumberLit))
.map(|i| parse_seed_u64(&i.text))
.transpose()?;
// The count is bounded to before the `set` clause and the flag, so a
// numeric value inside `set` (e.g. `between 18 and 80`) is never read
// as the count.
let count_boundary = [set_idx, flag_idx]
.into_iter()
.flatten()
.min()
.unwrap_or(path.items.len());
let count = path
.items
.iter()
.enumerate()
.find(|(idx, i)| matches!(i.kind, MatchedKind::NumberLit) && *idx < count_boundary)
.map(|(_, i)| parse_seed_u64(&i.text))
.transpose()?;
let overrides = build_seed_overrides(path, set_idx, flag_idx)?;
Ok(Command::Seed {
table,
target_column,
count,
overrides,
rng_seed,
})
}
/// Fold the flat `set`-clause terminals into [`SeedOverride`]s
/// (ADR-0048 D2). The clause region runs from just after `Word("set")`
/// to the `--seed` flag (or the path end). Each override begins at a
/// `seed_set_column` ident; the token right after it selects the form
/// (`=` / `in` / `between` / `as`). Top-level comma separators between
/// overrides are skipped (the `in (...)` form consumes its own inner
/// commas up to `)`).
fn build_seed_overrides(
path: &MatchedPath,
set_idx: Option<usize>,
flag_idx: Option<usize>,
) -> Result<Vec<SeedOverride>, ValidationError> {
let Some(set_idx) = set_idx else {
return Ok(Vec::new());
};
let end = flag_idx.unwrap_or(path.items.len());
let region = &path.items[set_idx + 1..end];
let mut overrides = Vec::new();
let mut i = 0;
while i < region.len() {
// The next override starts at its column ident; skip the
// top-level comma separators (and any stray token) between them.
let MatchedKind::Ident {
role: "seed_set_column",
..
} = &region[i].kind
else {
i += 1;
continue;
};
let column = region[i].text.clone();
i += 1;
let kind = parse_seed_override_tail(region, &mut i, &column)?;
overrides.push(SeedOverride { column, kind });
}
Ok(overrides)
}
/// Parse one override tail starting at `region[*i]` (just past the
/// column ident), advancing `*i` past the consumed tokens.
fn parse_seed_override_tail(
region: &[MatchedItem],
i: &mut usize,
column: &str,
) -> Result<SeedOverrideKind, ValidationError> {
let head = region.get(*i).ok_or_else(|| seed_set_error(column))?;
match &head.kind {
MatchedKind::Punct('=') => {
*i += 1;
let value = seed_take_value(region, i, column)?;
Ok(SeedOverrideKind::Fixed(value))
}
MatchedKind::Word("in") => {
*i += 1; // `in`
// `(`
if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Punct('('))) {
*i += 1;
}
let mut values = Vec::new();
while let Some(item) = region.get(*i) {
match &item.kind {
MatchedKind::Punct(')') => {
*i += 1;
break;
}
MatchedKind::Punct(',') => {
*i += 1;
}
_ => values.push(seed_take_value(region, i, column)?),
}
}
Ok(SeedOverrideKind::PickList(values))
}
MatchedKind::Word("between") => {
*i += 1; // `between`
let low = seed_take_value(region, i, column)?;
if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Word("and"))) {
*i += 1;
}
let high = seed_take_value(region, i, column)?;
Ok(SeedOverrideKind::Range { low, high })
}
MatchedKind::Word("as") => {
*i += 1; // `as`
let gen_item = region
.get(*i)
.filter(|t| matches!(t.kind, MatchedKind::Ident { role: "seed_generator", .. }))
.ok_or_else(|| seed_set_error(column))?;
*i += 1;
Ok(SeedOverrideKind::Generator(gen_item.text.clone()))
}
_ => Err(seed_set_error(column)),
}
}
/// Take one value literal at `region[*i]`, advancing past it.
///
/// The grammar's typed value slots only ever match value literals (a
/// bare unquoted word fails to match the slot and is rejected *before*
/// this fold runs — D2's quoting requirement enforced structurally), so
/// a non-literal here can only mean a grammar/builder drift bug; the
/// `Err` is a drift guard (mirrors `expr::build_expr`).
fn seed_take_value(
region: &[MatchedItem],
i: &mut usize,
column: &str,
) -> Result<Value, ValidationError> {
let item = region.get(*i).ok_or_else(|| seed_set_error(column))?;
let value = item_to_value(item).ok_or_else(|| seed_set_error(column))?;
*i += 1;
Ok(value)
}
/// Drift-guard error for the `set`-clause fold (see `seed_take_value`).
fn seed_set_error(column: &str) -> ValidationError {
ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", format!("malformed `set` clause for `{column}`"))],
}
}
fn parse_seed_u64(text: &str) -> Result<u64, ValidationError> {
text.parse::<u64>().map_err(|_| ValidationError {
message_key: "parse.custom.bind_type_mismatch",
args: vec![
("found", text.to_string()),
("expected", "non-negative integer".to_string()),
],
})
}
fn build_insert(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
let table = require_ident(path, "table_name")?;
@@ -1452,6 +1789,14 @@ pub static SHOW: CommandNode = CommandNode {
"parse.usage.show_index",
],};
pub static SEED: CommandNode = CommandNode {
entry: Word::keyword("seed"),
shape: SEED_SHAPE,
ast_builder: build_seed,
help_id: Some("data.seed"),
usage_ids: &["parse.usage.seed"],
};
pub static INSERT: CommandNode = CommandNode {
entry: Word::keyword("insert"),
shape: INSERT_SHAPE,
+17
View File
@@ -57,6 +57,12 @@ pub enum HighlightClass {
String,
Punct,
Flag,
/// A curated function-vocabulary name — the `seed … set <col> as
/// <generator>` generator names (ADR-0048 D2/§Grammar). Rendered in
/// the existing `tok_function` colour (ADR-0022 Amд6 blue — no new
/// theme colour), assigned via a generator slot's
/// `highlight_override`, not by byte shape.
Function,
Error,
}
@@ -86,6 +92,14 @@ pub enum IdentSource {
/// content validator on column-type slots; not user-listable
/// from the schema.
Types,
/// Closed, curated set of fake-data generator names (ADR-0048
/// D9) — the `seed … set <col> as <generator>` slot. Like
/// `Types`, not user-listable from the schema; the vocabulary
/// lives in `src/seed` and the completion engine offers it. The
/// grammar slot is purely structural (matches any identifier);
/// an unknown name is flagged live (validity) and rejected at
/// execution.
Generators,
/// Any identifier shape; used by synthetic catch-all branches
/// (e.g., the unknown-value branch of `mode <value>`).
Free,
@@ -117,6 +131,7 @@ impl IdentSource {
Self::Relationships => "relationship name",
Self::Indexes => "index name",
Self::Types => "type",
Self::Generators => "generator name",
}
}
@@ -134,6 +149,7 @@ impl IdentSource {
"relationship name" => Some(Self::Relationships),
"index name" => Some(Self::Indexes),
"type" => Some(Self::Types),
"generator name" => Some(Self::Generators),
_ => None,
}
}
@@ -714,6 +730,7 @@ pub static REGISTRY: &[(&CommandNode, CommandCategory)] = &[
(&ddl::CREATE, CommandCategory::Simple),
(&ddl::CREATE_M2N, CommandCategory::Simple),
(&data::SHOW, CommandCategory::Simple),
(&data::SEED, CommandCategory::Simple),
(&data::INSERT, CommandCategory::Simple),
(&data::UPDATE, CommandCategory::Simple),
(&data::DELETE, CommandCategory::Simple),
+1
View File
@@ -300,6 +300,7 @@ fn format_expectation(e: &crate::dsl::walker::outcome::Expectation) -> String {
IdentSource::Relationships => "relationship name".to_string(),
IdentSource::Indexes => "index name".to_string(),
IdentSource::Types => "type".to_string(),
IdentSource::Generators => "generator name".to_string(),
IdentSource::NewName | IdentSource::Free => "identifier".to_string(),
},
Expectation::Punct(c) => format!("`{c}`"),
+10 -6
View File
@@ -18,17 +18,21 @@ const DEFAULT_LEN: usize = 10;
pub const MIN_LEN: usize = 10;
pub const MAX_LEN: usize = 12;
/// Generate a fresh shortid using thread-local RNG.
/// Generate a fresh shortid using the thread-local RNG.
#[must_use]
pub fn generate() -> String {
generate_len(DEFAULT_LEN)
generate_with_rng(&mut rand::rng())
}
/// Generate a shortid from a caller-supplied RNG.
///
/// Lets `seed --seed <n>` produce **reproducible** shortid values
/// (ADR-0048 D4) by threading its seeded RNG through, while the default
/// [`generate`] keeps its thread-RNG behaviour for ordinary inserts.
#[must_use]
fn generate_len(len: usize) -> String {
let mut rng = rand::rng();
let mut out = String::with_capacity(len);
for _ in 0..len {
pub fn generate_with_rng<R: RngExt + ?Sized>(rng: &mut R) -> String {
let mut out = String::with_capacity(DEFAULT_LEN);
for _ in 0..DEFAULT_LEN {
let idx = rng.random_range(0..ALPHABET.len());
out.push(ALPHABET[idx] as char);
}
+12
View File
@@ -240,6 +240,18 @@ mod tests {
);
}
#[test]
fn seed_generator_name_highlighted_as_function() {
// ADR-0048 D9: the `set <col> as <gen>` generator name carries the
// `Function` highlight class (via the slot's `highlight_override`),
// rendered in the shared `tok_function` colour.
let runs = run("seed Members set role as email");
assert!(
runs.iter().any(|(_, _, c)| *c == HighlightClass::Function),
"generator name `email` should be Function-highlighted: {runs:?}"
);
}
#[test]
fn unknown_command_word_classified_by_byte_shape() {
// Walker doesn't engage; fallback classifies as Identifier.
+4
View File
@@ -1236,6 +1236,10 @@ fn schema_existence_diagnostics(
IdentSource::Relationships
| IdentSource::Indexes
| IdentSource::Types
// `Generators` (the `set … as <gen>` slot, ADR-0048 D9) is a
// curated vocabulary; its unknown-name validity is handled by
// the completion-layer indicator, not this walker diagnostic.
| IdentSource::Generators
| IdentSource::Free => {}
}
}
+4
View File
@@ -87,6 +87,10 @@ pub enum AppEvent {
command: Command,
result: InsertResult,
},
DslSeedSucceeded {
command: Command,
result: crate::db::SeedResult,
},
DslUpdateSucceeded {
command: Command,
result: UpdateResult,
+5
View File
@@ -207,6 +207,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[
("help.ddl.rename", &[]),
("help.ddl.change", &[]),
("help.data.show", &[]),
("help.data.seed", &[]),
("help.data.insert", &[]),
("help.data.update", &[]),
("help.data.delete", &[]),
@@ -308,6 +309,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[
("parse.usage.undo", &[]),
("parse.usage.save", &[]),
("parse.usage.select", &[]),
("parse.usage.seed", &[]),
("parse.usage.show_data", &[]),
("parse.usage.show_table", &[]),
("parse.usage.show_tables", &[]),
@@ -548,7 +550,10 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[
("ok.index_dropped_with_column", &["index"]),
("ok.rows_deleted", &["count"]),
("ok.rows_inserted", &["count"]),
("ok.rows_seeded", &["count", "table"]),
("ok.rows_updated", &["count"]),
("seed.capped", &["requested"]),
("seed.advisory_generic", &["columns", "column", "table"]),
// ---- Client-side success notes (ADR-0017 §6, ADR-0018 §9) ----
("client_side.auto_fill_add_serial", &["count"]),
("client_side.auto_fill_add_shortid", &["count"]),
+24
View File
@@ -333,6 +333,17 @@ help:
show indexes — list all indexes
show relationship <name> — show one relationship's detail
show index <name> — show one index's detail
seed: |-
seed <T> [<count>] — fill a table with generated sample rows
(default 20). Existing rows are kept;
foreign keys draw from existing parent rows.
seed <T> ... set <c> = 'v' | in ('a','b') | as <gen> | between x and y
— pin how a column is generated: a fixed
value, a pick-list, a named generator
(email, name, product, ...), or a range.
seed <T>.<col> [set ...] — fill one column across the EXISTING rows
(the follow-up to `add column`).
seed <T> ... --seed <n> — reproducible: same data for the same n.
insert: |-
insert into <T> [(cols)] [values] (vals) — add a row
update: |-
@@ -569,6 +580,7 @@ parse:
change_column: |-
change column [in] [table] <Table>: <Name> (<Type>)
[--force-conversion | --dont-convert]
seed: "seed <Table> [count] [set <col> = ... | in (...) | as <gen> | between x and y] | seed <Table>.<col>"
show_data: "show data <Table>"
show_table: "show table <Table>"
show_tables: "show tables"
@@ -978,6 +990,17 @@ db:
# template couldn't provide. Re-introduce a key here if a non-English
# locale lands.)
# Seed-command notes (ADR-0048): the cap note when the unique-value
# space is exhausted, and the advisory that flags columns filled with
# generic text that look like fixed value sets.
seed:
capped: "(of {requested} requested — ran out of distinct value combinations)"
# ADR-0048 D13 (Phase 2/3 wording): name the generically-filled
# enum-ish / CHECK columns and point at the concrete repairs — the
# `set` clause on a fresh seed, or the column-fill form for the rows
# just created.
advisory_generic: "{columns} filled with generic text — they look like fixed value sets. Pin them next time with `set {column} in ('…', '…')`, or fix these rows with `seed {table}.{column} set {column} in ('…', '…')`."
ok:
# ADR-0040: the generic `[ok] <verb> <subject>` summary line was
# retired — a successful command's echo line now carries a ✓
@@ -985,6 +1008,7 @@ ok:
# per-operation row-count footers below still convey real payload
# and are unchanged.
rows_inserted: " {count} row(s) inserted"
rows_seeded: " {count} row(s) seeded into {table}"
rows_updated: " {count} row(s) updated"
rows_deleted: " {count} row(s) deleted"
# Shown beneath a `drop column --cascade` summary, once per
+98 -7
View File
@@ -84,16 +84,60 @@ pub fn render_input_runs_in_mode(
cache: &crate::completion::SchemaCache,
mode: Mode,
) -> Vec<StyledRun> {
let mut runs = lex_to_runs_in_mode(input, theme, mode);
// Identity feedback view — highlight/overlay the whole input.
render_input_runs_feedback(input, cursor_byte, theme, cache, mode, input, cursor_byte, 0)
}
/// [`render_input_runs_in_mode`] with a separate **feedback view** for
/// the walker-driven highlighting and overlays.
///
/// Under the `:` one-shot escape (ADR-0003) the buffer carries a leading
/// `:` that is not advanced SQL; `view` is the stripped SQL (and
/// `view_cursor` the cursor within it) so the walker highlights and
/// diagnoses the SQL itself, while the `:` prefix renders as plain text.
/// `offset` is the byte length stripped from the front — base runs and
/// overlay positions are shifted by it back into `input` coordinates.
/// Callers without a one-shot escape pass `(input, cursor, 0)` (what
/// [`render_input_runs_in_mode`] does).
#[must_use]
#[allow(clippy::too_many_arguments)]
pub fn render_input_runs_feedback(
input: &str,
cursor_byte: usize,
theme: &Theme,
cache: &crate::completion::SchemaCache,
mode: Mode,
view: &str,
view_cursor: usize,
offset: usize,
) -> Vec<StyledRun> {
// Base highlighting runs over the SQL view, shifted into buffer
// coordinates; the stripped prefix (the `:` + space) renders as
// plain foreground text.
let mut runs: Vec<StyledRun> = if offset == 0 {
lex_to_runs_in_mode(input, theme, mode)
} else {
let mut r = vec![StyledRun {
byte_range: (0, offset),
style: ratatui::style::Style::default().fg(theme.fg),
}];
r.extend(lex_to_runs_in_mode(view, theme, mode).into_iter().map(|run| {
StyledRun {
byte_range: (run.byte_range.0 + offset, run.byte_range.1 + offset),
..run
}
}));
r
};
if let InputState::DefiniteErrorAt(pos) =
classify_parse_result(parse_command_with_schema_in_mode(input, cache, mode))
classify_parse_result(parse_command_with_schema_in_mode(view, cache, mode))
{
overlay_error(&mut runs, pos, theme);
overlay_error(&mut runs, pos + offset, theme);
}
if let Some(inv) =
crate::completion::invalid_ident_at_cursor_in_mode(input, cursor_byte, cache, mode)
crate::completion::invalid_ident_at_cursor_in_mode(view, view_cursor, cache, mode)
{
overlay_error(&mut runs, inv.range.0, theme);
overlay_error(&mut runs, inv.range.0 + offset, theme);
}
// Schema-aware diagnostics (ADR-0027 §2): unknown table /
// column (ERROR), or a dubious comparison (WARNING), is
@@ -101,12 +145,12 @@ pub fn render_input_runs_in_mode(
// so a problem the user has typed past stays visible. The
// mode-aware walk picks up the SQL-specific diagnostics from
// ADR-0032 in advanced mode.
for diag in walker::input_diagnostics_in_mode(input, Some(cache), mode) {
for diag in walker::input_diagnostics_in_mode(view, Some(cache), mode) {
let colour = match diag.severity {
walker::Severity::Error => theme.tok_error,
walker::Severity::Warning => theme.warning,
};
overlay_span(&mut runs, diag.span, colour);
overlay_span(&mut runs, (diag.span.0 + offset, diag.span.1 + offset), colour);
}
inject_cursor(&mut runs, input, cursor_byte, theme);
runs
@@ -817,6 +861,9 @@ fn ambient_hint_core_in_mode(
crate::dsl::grammar::IdentSource::Tables => "table",
crate::dsl::grammar::IdentSource::Columns => "column",
crate::dsl::grammar::IdentSource::Relationships => "relationship",
// The `seed … set <col> as <gen>` curated vocabulary
// (ADR-0048 D9) flags an unknown name here.
crate::dsl::grammar::IdentSource::Generators => "generator",
// `NewName`, `Types`, `Free` are filtered out by
// `invalid_ident_at_cursor` (it only fires for
// known-set sources via `completes_from_schema`), so
@@ -1105,6 +1152,50 @@ mod tests {
assert!(reversed(&runs[0]));
}
#[test]
fn one_shot_colon_highlights_the_sql_and_overlays_no_error() {
// ADR-0003 `:` one-shot: the SQL after the `:` must highlight and
// diagnose like real advanced mode — the `:` prefix renders as
// plain text and a valid query carries no error overlay (the old
// path let the walker choke on the `:` and mark it red).
use crate::completion::{SchemaCache, TableColumn};
use crate::dsl::types::Type;
let theme = dark();
let mut cache = SchemaCache::default();
cache.tables.push("Customers".into());
cache.columns.push("name".into());
cache
.table_columns
.insert("Customers".into(), vec![TableColumn::new("name", Type::Text)]);
let input = ": select name from Customers";
let view = "select name from Customers";
let offset = 2; // ": "
let runs = render_input_runs_feedback(
input,
input.len(),
&theme,
&cache,
Mode::Advanced,
view,
view.len(),
offset,
);
assert!(
runs.iter().all(|r| r.style.fg != Some(theme.tok_error)),
"a valid one-shot query must carry no error overlay: {runs:?}",
);
assert!(
runs.iter()
.any(|r| r.byte_range.0 == offset && r.style.fg == Some(theme.tok_keyword)),
"the `select` keyword (past the `: ` prefix) is keyword-coloured: {runs:?}",
);
assert_eq!(
runs.first().unwrap().byte_range.0,
0,
"the `:` prefix is rendered from byte 0",
);
}
#[test]
fn keyword_token_takes_keyword_colour() {
let theme = dark();
+1
View File
@@ -23,6 +23,7 @@ pub mod output_render;
pub mod persistence;
pub mod project;
pub mod runtime;
pub mod seed;
pub mod theme;
pub mod type_change;
pub mod ui;
+16
View File
@@ -1492,6 +1492,10 @@ fn spawn_dsl_dispatch(
command: command.clone(),
result,
},
Ok(CommandOutcome::Seed(result)) => AppEvent::DslSeedSucceeded {
command: command.clone(),
result,
},
Ok(CommandOutcome::Update(result)) => AppEvent::DslUpdateSucceeded {
command: command.clone(),
result,
@@ -2364,6 +2368,7 @@ enum CommandOutcome {
ShowRelationship(Option<Box<crate::db::RelationshipDiagramData>>),
QueryPlan(QueryPlan),
Insert(InsertResult),
Seed(crate::db::SeedResult),
Update(UpdateResult),
Delete(DeleteResult),
ChangeColumn(ChangeColumnTypeResult),
@@ -2911,6 +2916,17 @@ async fn execute_command_typed(
.insert(table, columns, values, src)
.await
.map(CommandOutcome::Insert),
// ADR-0048 (SD1/SD2 Phase 2).
Command::Seed {
table,
target_column,
count,
overrides,
rng_seed,
} => database
.seed(table, target_column, count, overrides, rng_seed, src)
.await
.map(CommandOutcome::Seed),
Command::Update {
table,
assignments,
+193
View File
@@ -0,0 +1,193 @@
//! Parse a simple `<column> IN ('a', 'b', …)` CHECK into its allowed
//! value list (ADR-0048 D17), so the common enum-as-CHECK pattern seeds
//! from the permitted values instead of generic text. Anything more
//! complex (ranges, expressions, multi-column, non-literal items)
//! returns `None`; the executor then best-effort generates and lets a
//! violation surface through the friendly-error layer.
/// Extract the string-literal values of a `<column> IN ( … )` CHECK.
///
/// Case-insensitive on the `IN` keyword and the column name; tolerates a
/// quoted column (`"status"`). Every list item must be a single-quoted
/// string literal (`''` is an embedded quote). Returns `None` for any
/// other shape.
#[must_use]
pub fn parse_in_check_values(check: &str, column: &str) -> Option<Vec<String>> {
let (in_idx, paren_open) = find_in_paren(check)?;
if !lhs_is_column(check[..in_idx].trim(), column) {
return None;
}
let values = extract_quoted_list(&check[paren_open..])?;
if values.is_empty() { None } else { Some(values) }
}
const fn is_ident_byte(b: u8) -> bool {
b.is_ascii_alphanumeric() || b == b'_'
}
/// Find the `IN` keyword (as a word, outside string literals) that is
/// followed by `(`. Returns `(byte index of `IN`, byte index of `(`)`.
fn find_in_paren(check: &str) -> Option<(usize, usize)> {
let bytes = check.as_bytes();
let mut i = 0;
let mut in_quote = false;
while i < bytes.len() {
let b = bytes[i];
if in_quote {
if b == b'\'' {
in_quote = false;
}
i += 1;
continue;
}
if b == b'\'' {
in_quote = true;
i += 1;
continue;
}
let is_in = (b == b'i' || b == b'I')
&& bytes.get(i + 1).is_some_and(|n| *n == b'n' || *n == b'N');
if is_in {
let before_ok = i == 0 || !is_ident_byte(bytes[i - 1]);
let after = i + 2;
let after_ok = bytes.get(after).is_none_or(|n| !is_ident_byte(*n));
if before_ok && after_ok {
let mut k = after;
while bytes.get(k).is_some_and(u8::is_ascii_whitespace) {
k += 1;
}
if bytes.get(k) == Some(&b'(') {
return Some((i, k));
}
}
}
i += 1;
}
None
}
fn lhs_is_column(lhs: &str, column: &str) -> bool {
let t = lhs.trim();
let stripped = t
.strip_prefix('"')
.and_then(|s| s.strip_suffix('"'))
.unwrap_or(t);
stripped.eq_ignore_ascii_case(column)
}
/// Parse `( 'a', 'b', … )` from a string starting at `(` into the
/// unescaped literals. `None` if any item is not a pure quoted literal.
fn extract_quoted_list(s: &str) -> Option<Vec<String>> {
let mut chars = s.chars().peekable();
if chars.next()? != '(' {
return None;
}
let mut values = Vec::new();
loop {
while chars.peek().is_some_and(|c| c.is_whitespace()) {
chars.next();
}
match chars.peek()? {
')' => {
chars.next();
break;
}
'\'' => {
let v = read_quoted(&mut chars)?;
values.push(v);
while chars.peek().is_some_and(|c| c.is_whitespace()) {
chars.next();
}
match chars.next()? {
',' => {}
')' => break,
_ => return None,
}
}
_ => return None,
}
}
Some(values)
}
/// Read a single-quoted string literal (cursor at the opening `'`),
/// unescaping `''` to `'`.
fn read_quoted(chars: &mut std::iter::Peekable<std::str::Chars>) -> Option<String> {
if chars.next()? != '\'' {
return None;
}
let mut out = String::new();
loop {
match chars.next()? {
'\'' => {
if chars.peek() == Some(&'\'') {
chars.next();
out.push('\'');
} else {
return Some(out);
}
}
c => out.push(c),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn parses_a_simple_in_check() {
assert_eq!(
parse_in_check_values("status IN ('active', 'closed')", "status"),
Some(vec!["active".to_string(), "closed".to_string()])
);
}
#[test]
fn tolerates_a_quoted_column_and_lowercase_in() {
assert_eq!(
parse_in_check_values("\"status\" in ('a','b','c')", "status"),
Some(vec!["a".into(), "b".into(), "c".into()])
);
}
#[test]
fn unescapes_embedded_quotes() {
assert_eq!(
parse_in_check_values("note IN ('it''s', 'ok')", "note"),
Some(vec!["it's".into(), "ok".into()])
);
}
#[test]
fn handles_commas_and_parens_inside_literals() {
assert_eq!(
parse_in_check_values("label IN ('a, b', 'c)d')", "label"),
Some(vec!["a, b".into(), "c)d".into()])
);
}
#[test]
fn rejects_non_literal_lists() {
assert_eq!(parse_in_check_values("n IN (1, 2, 3)", "n"), None);
}
#[test]
fn rejects_non_in_checks() {
assert_eq!(parse_in_check_values("age >= 0", "age"), None);
assert_eq!(parse_in_check_values("length(name) > 0", "name"), None);
}
#[test]
fn rejects_when_lhs_is_a_different_column() {
assert_eq!(parse_in_check_values("status IN ('a')", "role"), None);
}
#[test]
fn does_not_trip_on_in_inside_a_word_or_literal() {
// `min` contains "in" but is not the IN operator.
assert_eq!(parse_in_check_values("min(x) > 0", "x"), None);
}
}
+584
View File
@@ -0,0 +1,584 @@
//! Value production: turn a [`Generator`] + a seeded RNG into a
//! [`Value`] (ADR-0048 D8/D9). Realistic generators come from the
//! `fake` crate (English locale); `product` is hand-rolled (D9, no
//! commerce module exists); dates are generated against a **fixed
//! reference epoch** so a `--seed` run is fully reproducible without
//! depending on the wall clock (D8 bounded windows).
//!
//! The stateful markers ([`Generator::IdentitySequential`],
//! [`Generator::ForeignKeySample`]) are resolved by the executor with
//! database context; if one reaches here un-intercepted it falls back
//! to type-based generation rather than panicking.
use chrono::{Datelike, NaiveDate};
use fake::Fake;
use rand::RngExt;
use crate::dsl::types::Type;
use crate::dsl::value::Value;
use crate::seed::{Generator, SeedRng};
/// Fixed anchor for bounded date/datetime windows. Using a constant
/// (rather than `now()`) keeps `--seed` output reproducible across days
/// and makes tests deterministic. It advances with releases.
const REF_YEAR: i32 = 2025;
const REF_MONTH: u32 = 6;
const REF_DAY: u32 = 1;
/// `~3 years` window for "recent" dates, in days.
const RECENT_WINDOW_DAYS: i64 = 3 * 365;
/// Adult birth window (≈1880 years ago), in days.
const ADULT_MIN_DAYS: i64 = 18 * 365;
const ADULT_MAX_DAYS: i64 = 80 * 365;
/// Produce one value for `generator` against destination type `ty`.
#[must_use]
pub fn generate_value(generator: &Generator, ty: Type, rng: &mut SeedRng) -> Value {
use fake::faker::address::en as addr;
use fake::faker::company::en as company;
use fake::faker::internet::en as net;
use fake::faker::job::en as job;
use fake::faker::lorem::en as lorem;
use fake::faker::name::en as name;
use fake::faker::phone_number::en as phone;
match generator {
Generator::FirstName => Value::Text(name::FirstName().fake_with_rng(rng)),
Generator::LastName => Value::Text(name::LastName().fake_with_rng(rng)),
Generator::FullName => Value::Text(name::Name().fake_with_rng(rng)),
Generator::Email => Value::Text(net::FreeEmail().fake_with_rng(rng)),
Generator::Username => Value::Text(net::Username().fake_with_rng(rng)),
Generator::Password => Value::Text(net::Password(8..16).fake_with_rng(rng)),
Generator::Phone => Value::Text(phone::PhoneNumber().fake_with_rng(rng)),
Generator::City => Value::Text(addr::CityName().fake_with_rng(rng)),
Generator::Country => Value::Text(addr::CountryName().fake_with_rng(rng)),
Generator::StateName => Value::Text(addr::StateName().fake_with_rng(rng)),
Generator::Street => Value::Text(addr::StreetName().fake_with_rng(rng)),
Generator::ZipCode => Value::Text(addr::ZipCode().fake_with_rng(rng)),
Generator::Company => Value::Text(company::CompanyName().fake_with_rng(rng)),
Generator::JobTitle => Value::Text(job::Title().fake_with_rng(rng)),
Generator::ProductName => Value::Text(product_name(rng)),
Generator::Sentence => Value::Text(lorem::Sentence(5..12).fake_with_rng(rng)),
Generator::Paragraph => Value::Text(lorem::Paragraph(2..4).fake_with_rng(rng)),
Generator::Url => {
let word: String = lorem::Word().fake_with_rng(rng);
let suffix: String = net::DomainSuffix().fake_with_rng(rng);
Value::Text(format!("https://{word}.{suffix}"))
}
// Hand-rolled — `fake`'s color module is feature-gated (it pulls
// an extra crate); a hex colour is trivial from the RNG.
Generator::HexColor => Value::Text(format!("#{:06X}", rng.random_range(0..0x0100_0000))),
Generator::CurrencyAmount => currency_amount(ty, rng),
Generator::Age => Value::Number(rng.random_range(18..=80).to_string()),
Generator::SmallInt => Value::Number(rng.random_range(1..=100).to_string()),
Generator::DateRecent => Value::Text(format_date(random_past_date(rng, 0, RECENT_WINDOW_DAYS))),
Generator::DateAdult => {
Value::Text(format_date(random_past_date(rng, ADULT_MIN_DAYS, ADULT_MAX_DAYS)))
}
Generator::DateTimeRecent => Value::Text(random_recent_datetime(rng)),
Generator::Boolean => Value::Bool(rng.random_range(0..2) == 1),
Generator::PickFrom(values) if !values.is_empty() => {
let chosen: &String = pick(rng, values);
literal_to_value(chosen, ty)
}
// The `set <col> between low and high` override (D2). Bounds are
// interpreted per the destination type; the executor has already
// validated they parse, so a defensive parse failure here falls
// back to type-based generation rather than producing junk.
Generator::Range { low, high } => range_value(low, high, ty, rng),
// Un-intercepted markers + an empty pick list → type-based.
Generator::PickFrom(_)
| Generator::IdentitySequential
| Generator::ForeignKeySample
| Generator::Generic => generic_for_type(ty, rng),
}
}
/// Uniform value in `[low, high]` for the `between` override (D2).
///
/// Bounds are interpreted by destination type. Returns the type-based
/// fallback for a bound that does not parse or a type that has no range
/// meaning — the executor pre-validates, so this is defensive only.
fn range_value(low: &str, high: &str, ty: Type, rng: &mut SeedRng) -> Value {
match ty {
Type::Int | Type::Serial => parse_int_range(low, high)
.map(|(lo, hi)| Value::Number(rng.random_range(lo..=hi).to_string()))
.unwrap_or_else(|| generic_for_type(ty, rng)),
Type::Real | Type::Decimal => parse_real_range(low, high)
.map(|(lo, hi)| {
let v = rng.random::<f64>().mul_add(hi - lo, lo);
Value::Number(format!("{v:.2}"))
})
.unwrap_or_else(|| generic_for_type(ty, rng)),
Type::Date => parse_date_range(low, high)
.map(|(lo, hi)| Value::Text(format_date(random_date_between(rng, lo, hi))))
.unwrap_or_else(|| generic_for_type(ty, rng)),
Type::DateTime => parse_datetime_range(low, high)
.map(|(lo, hi)| Value::Text(random_datetime_between(rng, lo, hi)))
.unwrap_or_else(|| generic_for_type(ty, rng)),
// text / bool / blob / shortid have no range meaning.
_ => generic_for_type(ty, rng),
}
}
/// Validate that `low`/`high` parse as bounds for `ty`.
///
/// The `between` override (D2) is checked by the executor *before*
/// generation. Returns a short human reason on failure (the executor
/// wraps it in a friendly error naming the column), `None` when valid.
#[must_use]
pub fn range_bounds_reason(ty: Type, low: &str, high: &str) -> Option<String> {
let ok = match ty {
Type::Int | Type::Serial => parse_int_range(low, high).is_some(),
Type::Real | Type::Decimal => parse_real_range(low, high).is_some(),
Type::Date => parse_date_range(low, high).is_some(),
Type::DateTime => parse_datetime_range(low, high).is_some(),
// text / bool / blob / shortid have no range meaning.
Type::Text | Type::Bool | Type::Blob | Type::ShortId => false,
};
if ok {
return None;
}
Some(match ty {
Type::Int | Type::Serial => "expected two whole numbers, e.g. `between 1 and 100`".to_string(),
Type::Real | Type::Decimal => "expected two numbers, e.g. `between 1.0 and 9.99`".to_string(),
Type::Date => "expected two quoted dates, e.g. `between '2023-01-01' and '2024-12-31'`".to_string(),
Type::DateTime => {
"expected two quoted datetimes, e.g. `between '2023-01-01T00:00:00' and '2024-12-31T23:59:59'`"
.to_string()
}
Type::Text | Type::Bool | Type::Blob | Type::ShortId => {
"a `between` range only applies to numeric and date/datetime columns".to_string()
}
})
}
/// Parse and order an integer range; `None` if either bound is not an
/// integer.
fn parse_int_range(low: &str, high: &str) -> Option<(i64, i64)> {
let lo: i64 = low.trim().parse().ok()?;
let hi: i64 = high.trim().parse().ok()?;
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
}
fn parse_real_range(low: &str, high: &str) -> Option<(f64, f64)> {
let lo: f64 = low.trim().parse().ok()?;
let hi: f64 = high.trim().parse().ok()?;
if !lo.is_finite() || !hi.is_finite() {
return None;
}
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
}
fn parse_date_range(low: &str, high: &str) -> Option<(NaiveDate, NaiveDate)> {
let lo = NaiveDate::parse_from_str(low.trim(), "%Y-%m-%d").ok()?;
let hi = NaiveDate::parse_from_str(high.trim(), "%Y-%m-%d").ok()?;
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
}
/// Accept both the `T`-separated and space-separated datetime spellings
/// the app validates (`bind_datetime` / `validate_datetime`).
fn parse_one_datetime(s: &str) -> Option<chrono::NaiveDateTime> {
let t = s.trim();
chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%dT%H:%M:%S")
.or_else(|_| chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%d %H:%M:%S"))
.ok()
}
fn parse_datetime_range(
low: &str,
high: &str,
) -> Option<(chrono::NaiveDateTime, chrono::NaiveDateTime)> {
let lo = parse_one_datetime(low)?;
let hi = parse_one_datetime(high)?;
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
}
/// Uniform date in `[lo, hi]` (inclusive).
fn random_date_between(rng: &mut SeedRng, lo: NaiveDate, hi: NaiveDate) -> NaiveDate {
let lo_ce = lo.num_days_from_ce();
let hi_ce = hi.num_days_from_ce();
let day = rng.random_range(lo_ce..=hi_ce);
NaiveDate::from_num_days_from_ce_opt(day).unwrap_or(lo)
}
/// Uniform datetime in `[lo, hi]`, rendered `YYYY-MM-DDTHH:MM:SS`.
fn random_datetime_between(
rng: &mut SeedRng,
lo: chrono::NaiveDateTime,
hi: chrono::NaiveDateTime,
) -> String {
let lo_s = lo.and_utc().timestamp();
let hi_s = hi.and_utc().timestamp();
let secs = if lo_s <= hi_s {
rng.random_range(lo_s..=hi_s)
} else {
rng.random_range(hi_s..=lo_s)
};
let dt = chrono::DateTime::from_timestamp(secs, 0)
.map_or(lo, |d| d.naive_utc());
dt.format("%Y-%m-%dT%H:%M:%S").to_string()
}
/// Type-based fallback generation (D8). Never produces NULL for a
/// generatable type; `blob`/`serial`/`shortid` are handled by the
/// executor (autogen / block guard) and yield NULL here only as a
/// last resort.
fn generic_for_type(ty: Type, rng: &mut SeedRng) -> Value {
use fake::faker::lorem::en as lorem;
match ty {
Type::Text => {
let words: Vec<String> = lorem::Words(2..4).fake_with_rng(rng);
Value::Text(words.join(" "))
}
Type::ShortId => Value::Text(crate::dsl::shortid::generate_with_rng(rng)),
Type::Int => Value::Number(rng.random_range(1..=10_000).to_string()),
Type::Serial => Value::Number(rng.random_range(1..=10_000).to_string()),
Type::Real => {
let n: f64 = rng.random_range(0..100_000) as f64 / 100.0;
Value::Number(format!("{n:.2}"))
}
Type::Decimal => {
let dollars = rng.random_range(0..10_000);
let cents = rng.random_range(0..100);
Value::Number(format!("{dollars}.{cents:02}"))
}
Type::Bool => Value::Bool(rng.random_range(0..2) == 1),
Type::Date => Value::Text(format_date(random_past_date(rng, 0, RECENT_WINDOW_DAYS))),
Type::DateTime => Value::Text(random_recent_datetime(rng)),
Type::Blob => Value::Null,
}
}
/// Wrap a fixed-list literal as the right `Value` shape for `ty` (used
/// by `PickFrom` — enum / `IN`-CHECK values).
fn literal_to_value(s: &str, ty: Type) -> Value {
match ty {
Type::Int | Type::Serial | Type::Real | Type::Decimal => Value::Number(s.to_string()),
Type::Bool => Value::Bool(matches!(s.to_ascii_lowercase().as_str(), "true" | "1")),
_ => Value::Text(s.to_string()),
}
}
/// A money-shaped amount: whole for `int`/`serial`, two-decimal for the
/// fractional numeric types.
fn currency_amount(ty: Type, rng: &mut SeedRng) -> Value {
match ty {
Type::Real | Type::Decimal => {
let dollars = rng.random_range(1..=1_000);
let cents = rng.random_range(0..100);
Value::Number(format!("{dollars}.{cents:02}"))
}
// int / serial / anything else numeric → whole amount.
_ => Value::Number(rng.random_range(1..=1_000).to_string()),
}
}
// — the hand-rolled `product` generator (D9) —
const PRODUCT_ADJECTIVES: &[&str] = &[
"Sleek", "Rustic", "Ergonomic", "Handcrafted", "Refined", "Modern",
"Vintage", "Compact", "Premium", "Lightweight", "Durable", "Elegant",
"Sturdy", "Smooth", "Gorgeous", "Intelligent", "Practical", "Awesome",
"Incredible", "Recycled",
];
const PRODUCT_MATERIALS: &[&str] = &[
"Wooden", "Copper", "Granite", "Cotton", "Steel", "Leather", "Bamboo",
"Plastic", "Ceramic", "Glass", "Concrete", "Rubber", "Bronze", "Marble",
"Linen", "Silk", "Aluminum", "Wool", "Gold", "Carbon",
];
const PRODUCT_NOUNS: &[&str] = &[
"Chair", "Lamp", "Table", "Bottle", "Backpack", "Keyboard", "Mug",
"Shoes", "Jacket", "Watch", "Wallet", "Bench", "Hat", "Gloves",
"Towel", "Ball", "Bike", "Knife", "Pillow", "Blanket",
];
fn product_name(rng: &mut SeedRng) -> String {
format!(
"{} {} {}",
pick(rng, PRODUCT_ADJECTIVES),
pick(rng, PRODUCT_MATERIALS),
pick(rng, PRODUCT_NOUNS),
)
}
// — bounded dates (D8) —
const fn reference_date() -> NaiveDate {
match NaiveDate::from_ymd_opt(REF_YEAR, REF_MONTH, REF_DAY) {
Some(d) => d,
None => panic!("reference date constants must be valid"),
}
}
/// A date between `min_days_ago` and `max_days_ago` before the
/// reference epoch (inclusive).
fn random_past_date(rng: &mut SeedRng, min_days_ago: i64, max_days_ago: i64) -> NaiveDate {
let days_ago = rng.random_range(min_days_ago..=max_days_ago);
let ce = reference_date().num_days_from_ce();
let target = ce - i32::try_from(days_ago).unwrap_or(0);
NaiveDate::from_num_days_from_ce_opt(target).unwrap_or_else(reference_date)
}
fn format_date(date: NaiveDate) -> String {
date.format("%Y-%m-%d").to_string()
}
/// A recent datetime: a recent date plus a random time-of-day, rendered
/// as `YYYY-MM-DDTHH:MM:SS`.
fn random_recent_datetime(rng: &mut SeedRng) -> String {
let date = random_past_date(rng, 0, RECENT_WINDOW_DAYS);
let h = rng.random_range(0..24);
let m = rng.random_range(0..60);
let s = rng.random_range(0..60);
format!("{}T{h:02}:{m:02}:{s:02}", format_date(date))
}
/// Pick a uniformly random element from a non-empty slice.
fn pick<'a, T>(rng: &mut SeedRng, items: &'a [T]) -> &'a T {
&items[rng.random_range(0..items.len())]
}
#[cfg(test)]
mod tests {
use super::*;
use crate::seed::make_rng;
use pretty_assertions::assert_eq;
fn gen_once(generator: &Generator, ty: Type, seed: u64) -> Value {
let mut rng = make_rng(Some(seed));
generate_value(generator, ty, &mut rng)
}
#[test]
fn generation_is_deterministic_for_a_fixed_seed() {
for generator in [
Generator::FullName,
Generator::Email,
Generator::ProductName,
Generator::DateRecent,
Generator::CurrencyAmount,
] {
let a = gen_once(&generator, Type::Text, 7);
let b = gen_once(&generator, Type::Text, 7);
assert_eq!(a, b, "{generator:?} must reproduce for a fixed seed");
}
}
#[test]
fn text_generators_produce_nonempty_text() {
for generator in [
Generator::FirstName,
Generator::LastName,
Generator::FullName,
Generator::Email,
Generator::Username,
Generator::Company,
Generator::City,
Generator::ProductName,
] {
let v = gen_once(&generator, Type::Text, 3);
match v {
Value::Text(s) => assert!(!s.trim().is_empty(), "{generator:?} produced empty text"),
other => panic!("{generator:?} produced non-text {other:?}"),
}
}
}
#[test]
fn email_looks_like_an_email() {
let v = gen_once(&Generator::Email, Type::Text, 11);
let Value::Text(s) = v else { panic!("not text") };
assert!(s.contains('@'), "email should contain @: {s}");
}
#[test]
fn product_name_is_three_capitalised_words() {
let v = gen_once(&Generator::ProductName, Type::Text, 99);
let Value::Text(s) = v else { panic!("not text") };
let words: Vec<&str> = s.split(' ').collect();
assert_eq!(words.len(), 3, "product name should be 3 words: {s}");
for w in words {
assert!(w.chars().next().unwrap().is_ascii_uppercase(), "word `{w}` not capitalised");
}
}
#[test]
fn recent_dates_fall_within_the_bounded_window() {
let mut rng = make_rng(Some(1));
let earliest = reference_date()
.checked_sub_days(chrono::Days::new(RECENT_WINDOW_DAYS as u64))
.unwrap();
let latest = reference_date();
for _ in 0..200 {
let v = generate_value(&Generator::DateRecent, Type::Date, &mut rng);
let Value::Text(s) = v else { panic!("date not text") };
let d = NaiveDate::parse_from_str(&s, "%Y-%m-%d").expect("valid ISO date");
assert!(d >= earliest && d <= latest, "date {d} outside recent window");
}
}
#[test]
fn dob_dates_fall_within_the_adult_window() {
let mut rng = make_rng(Some(2));
let earliest = reference_date()
.checked_sub_days(chrono::Days::new(ADULT_MAX_DAYS as u64))
.unwrap();
let latest = reference_date()
.checked_sub_days(chrono::Days::new(ADULT_MIN_DAYS as u64))
.unwrap();
for _ in 0..200 {
let v = generate_value(&Generator::DateAdult, Type::Date, &mut rng);
let Value::Text(s) = v else { panic!("date not text") };
let d = NaiveDate::parse_from_str(&s, "%Y-%m-%d").expect("valid ISO date");
assert!(d >= earliest && d <= latest, "dob {d} outside adult window");
}
}
#[test]
fn datetime_is_iso_shaped() {
let v = gen_once(&Generator::DateTimeRecent, Type::DateTime, 5);
let Value::Text(s) = v else { panic!("not text") };
assert!(s.contains('T'), "datetime needs a T separator: {s}");
// Parses as a naive datetime.
chrono::NaiveDateTime::parse_from_str(&s, "%Y-%m-%dT%H:%M:%S")
.unwrap_or_else(|e| panic!("invalid datetime {s}: {e}"));
}
#[test]
fn currency_is_whole_for_int_and_fractional_for_decimal() {
let Value::Number(int_amt) = gen_once(&Generator::CurrencyAmount, Type::Int, 4) else {
panic!("not a number")
};
assert!(!int_amt.contains('.'), "int currency should be whole: {int_amt}");
let Value::Number(dec_amt) = gen_once(&Generator::CurrencyAmount, Type::Decimal, 4) else {
panic!("not a number")
};
assert!(dec_amt.contains('.'), "decimal currency should have cents: {dec_amt}");
}
#[test]
fn age_is_in_human_range() {
let mut rng = make_rng(Some(8));
for _ in 0..100 {
let Value::Number(a) = generate_value(&Generator::Age, Type::Int, &mut rng) else {
panic!("age not a number")
};
let n: i64 = a.parse().unwrap();
assert!((18..=80).contains(&n), "age {n} out of range");
}
}
#[test]
fn pick_from_chooses_a_listed_value() {
let generator = Generator::PickFrom(vec!["active".into(), "closed".into()]);
let mut rng = make_rng(Some(6));
for _ in 0..50 {
let Value::Text(s) = generate_value(&generator, Type::Text, &mut rng) else {
panic!("not text")
};
assert!(matches!(s.as_str(), "active" | "closed"), "unexpected pick {s}");
}
}
#[test]
fn pick_from_wraps_numeric_values_as_numbers() {
let generator = Generator::PickFrom(vec!["1".into(), "2".into(), "3".into()]);
let mut rng = make_rng(Some(6));
let v = generate_value(&generator, Type::Int, &mut rng);
assert!(matches!(v, Value::Number(_)), "numeric pick should be a Number: {v:?}");
}
#[test]
fn int_range_stays_within_inclusive_bounds() {
let g = Generator::Range { low: "10".into(), high: "20".into() };
let mut rng = make_rng(Some(5));
for _ in 0..200 {
let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else {
panic!("int range should be a number")
};
let n: i64 = s.parse().unwrap();
assert!((10..=20).contains(&n), "int {n} out of [10,20]");
}
}
#[test]
fn real_range_stays_within_bounds_and_has_cents() {
let g = Generator::Range { low: "1.0".into(), high: "9.0".into() };
let mut rng = make_rng(Some(5));
for _ in 0..200 {
let Value::Number(s) = generate_value(&g, Type::Real, &mut rng) else {
panic!("real range should be a number")
};
let n: f64 = s.parse().unwrap();
assert!((1.0..=9.0).contains(&n), "real {n} out of [1,9]");
assert!(s.contains('.'), "real should be formatted with cents: {s}");
}
}
#[test]
fn date_range_stays_within_quoted_bounds() {
let g = Generator::Range {
low: "2023-01-01".into(),
high: "2023-12-31".into(),
};
let lo = NaiveDate::parse_from_str("2023-01-01", "%Y-%m-%d").unwrap();
let hi = NaiveDate::parse_from_str("2023-12-31", "%Y-%m-%d").unwrap();
let mut rng = make_rng(Some(9));
for _ in 0..200 {
let Value::Text(s) = generate_value(&g, Type::Date, &mut rng) else {
panic!("date range should be text")
};
let d = NaiveDate::parse_from_str(&s, "%Y-%m-%d").expect("valid date");
assert!(d >= lo && d <= hi, "date {d} out of range");
}
}
#[test]
fn reversed_bounds_are_tolerated() {
let g = Generator::Range { low: "20".into(), high: "10".into() };
let mut rng = make_rng(Some(1));
let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else {
panic!("number")
};
let n: i64 = s.parse().unwrap();
assert!((10..=20).contains(&n), "reversed bounds still produce in-range: {n}");
}
#[test]
fn range_bounds_reason_accepts_compatible_and_rejects_incompatible() {
// Numeric / date / datetime accept; text / bool reject.
assert!(range_bounds_reason(Type::Int, "1", "10").is_none());
assert!(range_bounds_reason(Type::Real, "1.5", "9.9").is_none());
assert!(range_bounds_reason(Type::Date, "2023-01-01", "2024-01-01").is_none());
assert!(range_bounds_reason(Type::DateTime, "2023-01-01T00:00:00", "2024-01-01T00:00:00").is_none());
// Non-numeric bound on a numeric column.
assert!(range_bounds_reason(Type::Int, "abc", "10").is_some());
// A range on a text column is meaningless.
assert!(range_bounds_reason(Type::Text, "a", "z").is_some());
assert!(range_bounds_reason(Type::Bool, "0", "1").is_some());
}
#[test]
fn markers_fall_back_to_type_based_generation() {
// An un-intercepted marker must not panic; it generates by type.
let v = gen_once(&Generator::IdentitySequential, Type::Text, 1);
assert!(matches!(v, Value::Text(_)));
let v = gen_once(&Generator::ForeignKeySample, Type::Int, 1);
assert!(matches!(v, Value::Number(_)));
}
#[test]
fn generic_fallback_matches_each_type() {
let mut rng = make_rng(Some(0));
assert!(matches!(generate_value(&Generator::Generic, Type::Text, &mut rng), Value::Text(_)));
assert!(matches!(generate_value(&Generator::Generic, Type::Int, &mut rng), Value::Number(_)));
assert!(matches!(generate_value(&Generator::Generic, Type::Bool, &mut rng), Value::Bool(_)));
assert!(matches!(generate_value(&Generator::Generic, Type::Blob, &mut rng), Value::Null));
// shortid fallback is a valid base58 id.
let Value::Text(sid) = generate_value(&Generator::Generic, Type::ShortId, &mut rng) else {
panic!("shortid not text")
};
assert!(crate::dsl::shortid::validate(&sid).is_ok(), "invalid shortid {sid}");
}
}
+440
View File
@@ -0,0 +1,440 @@
//! Generator selection: the name-aware, type-gated catalogue (ADR-0048
//! D7), table-context disambiguation for `name`/`title` (D11), the
//! identifier-family rule (D10), and enum-ish detection (D12).
//!
//! Selection is **token-based**: a column name is split on `_`, `-` and
//! camelCase boundaries, lowercased, and matched against an
//! ordered, most-specific-first list. Each rule is **type-gated** — a
//! name match only fires when the column's type is compatible, so a
//! column called `email` typed `int` falls through to type-based
//! generation rather than producing a string. Documented false-positive
//! guards keep `username`/`filename` away from the bare person-name
//! rule.
use tracing::trace;
use crate::dsl::types::Type;
use crate::seed::{ColumnSpec, Generator};
/// Choose the generator for a column (ADR-0048 D7/D10/D11/D12).
///
/// Precedence: foreign keys and `IN`-CHECK columns are resolved first
/// (the executor / a fixed list), then the ordered name catalogue, then
/// the type-based fallback.
#[must_use]
pub fn choose_generator(table: &str, col: &ColumnSpec) -> Generator {
let generator = choose_generator_inner(table, col);
trace!(
table = table,
column = %col.name,
ty = %col.ty,
chosen = ?generator,
"seed: chose generator for column"
);
generator
}
fn choose_generator_inner(table: &str, col: &ColumnSpec) -> Generator {
// FK columns are filled by sampling existing parent rows (D14) —
// the executor owns that; generation here would be wrong.
if col.is_foreign_key {
return Generator::ForeignKeySample;
}
// A simple `col IN (…)` CHECK becomes the value source (D17), so the
// common enum-as-CHECK pattern just works.
if let Some(values) = &col.check_in_values
&& !values.is_empty()
{
return Generator::PickFrom(values.clone());
}
let toks = tokens(&col.name);
match_name_generator(table, &toks, col.ty).unwrap_or(Generator::Generic)
}
/// Whether a column name looks like an enum / fixed-value set that has
/// no sensible generic generator (D12). Used by the executor to drive
/// the post-seed advisory; such columns still receive generic text.
#[must_use]
pub fn is_enum_ish(name: &str) -> bool {
const ENUM_TOKENS: &[&str] = &[
"role", "status", "state", "type", "kind", "category", "level",
"tier", "stage", "priority", "gender",
];
let toks = tokens(name);
toks.iter().any(|t| ENUM_TOKENS.contains(&t.as_str()))
}
/// The ordered, most-specific-first name catalogue. Returns `None` when
/// nothing matches (→ type-based fallback) or when a name matches but
/// its type gate fails.
fn match_name_generator(table: &str, toks: &[String], ty: Type) -> Option<Generator> {
let text = type_is_text(ty);
let numeric = ty.is_numeric();
// — Person —
if text && (has_any(toks, &["fname", "firstname"]) || has_seq(toks, "first", "name")) {
return Some(Generator::FirstName);
}
if text
&& (has_any(toks, &["lname", "lastname", "surname"]) || has_seq(toks, "last", "name"))
{
return Some(Generator::LastName);
}
if text && (has_any(toks, &["username", "login", "handle"]) || has_seq(toks, "user", "name")) {
return Some(Generator::Username);
}
if text && has_any(toks, &["email", "emails"]) {
return Some(Generator::Email);
}
if text && has_any(toks, &["password", "passwd", "pwd"]) {
return Some(Generator::Password);
}
if text && has_any(toks, &["phone", "mobile", "cell", "tel", "telephone"]) {
return Some(Generator::Phone);
}
// — bare `name` / `title` → table-context (D11) —
// Guarded against the `*_name` false positives handled above (those
// returned already) plus structural names like `filename`/`table_name`.
if text && has_any(toks, &["name", "title"]) && !is_name_false_positive(toks) {
return Some(name_by_table_context(table));
}
// — Address —
if text && has_any(toks, &["city", "town"]) {
return Some(Generator::City);
}
if text && has_token(toks, "country") {
return Some(Generator::Country);
}
// `province` / explicit `state_name`/`state_abbr` → a real state name.
// Bare `state` is left to enum-ish (it usually means status), so we
// require `province` or a `state` token paired with name/abbr.
if text && (has_token(toks, "province") || (has_token(toks, "state") && has_any(toks, &["name", "abbr"]))) {
return Some(Generator::StateName);
}
if text && has_any(toks, &["street", "address", "addr"]) {
return Some(Generator::Street);
}
if text && has_any(toks, &["zip", "zipcode", "postcode", "postal"]) {
return Some(Generator::ZipCode);
}
// — Organisation / job —
if text && has_any(toks, &["company", "employer", "org", "organization", "organisation"]) {
return Some(Generator::Company);
}
if text && has_any(toks, &["job", "position", "profession", "occupation"]) {
return Some(Generator::JobTitle);
}
// — Free text —
if text && has_any(toks, &["description", "bio", "notes", "note", "summary", "comment", "comments", "about"]) {
return Some(Generator::Sentence);
}
if text && has_any(toks, &["url", "website", "homepage", "link"]) {
return Some(Generator::Url);
}
if text && has_any(toks, &["color", "colour"]) {
return Some(Generator::HexColor);
}
// — Numeric —
if numeric && has_any(toks, &["price", "amount", "cost", "salary", "balance", "total", "fee", "revenue"]) {
return Some(Generator::CurrencyAmount);
}
if numeric && has_token(toks, "age") {
return Some(Generator::Age);
}
if numeric && has_any(toks, &["quantity", "qty", "stock", "count"]) {
return Some(Generator::SmallInt);
}
// — Temporal (bounded, D8) —
if matches!(ty, Type::Date) && has_any(toks, &["dob", "birthday", "birthdate"]) {
return Some(Generator::DateAdult);
}
if matches!(ty, Type::Date) && has_token(toks, "date") {
return Some(Generator::DateRecent);
}
if matches!(ty, Type::DateTime) && has_any(toks, &["timestamp", "datetime", "at"]) {
return Some(Generator::DateTimeRecent);
}
// — Boolean —
if matches!(ty, Type::Bool)
&& (toks.first().map(String::as_str) == Some("is")
|| toks.first().map(String::as_str) == Some("has")
|| has_any(toks, &["active", "enabled", "verified", "deleted"]))
{
return Some(Generator::Boolean);
}
// — Identifier family (D10) — late so phone/email/etc. win first.
if matches!(ty, Type::Int | Type::Text) && is_identifier_name(toks) {
return Some(Generator::IdentitySequential);
}
None
}
/// Resolve a bare `name`/`title` column by the **table** it lives in
/// (D11): product-ish → a product name, company-ish → a company name,
/// person-ish → a person name, otherwise a generic person name.
fn name_by_table_context(table: &str) -> Generator {
let toks = tokens(table);
const PRODUCTY: &[&str] = &[
"product", "products", "item", "items", "good", "goods",
"merchandise", "catalog", "catalogue", "inventory", "sku", "skus",
];
const COMPANYISH: &[&str] = &[
"company", "companies", "vendor", "vendors", "supplier",
"suppliers", "manufacturer", "manufacturers", "brand", "brands",
"organization", "organisation",
];
const PERSONISH: &[&str] = &[
"user", "users", "customer", "customers", "person", "people",
"employee", "employees", "member", "members", "contact",
"contacts", "author", "authors", "student", "students",
];
if has_any(&toks, PRODUCTY) {
Generator::ProductName
} else if has_any(&toks, COMPANYISH) {
Generator::Company
} else if has_any(&toks, PERSONISH) {
Generator::FullName
} else {
// Unknown table: a person name is the most generally useful
// default for a bare `name` column.
Generator::FullName
}
}
/// Names ending in `name`/`title` that are NOT person names. The
/// specific `first`/`last`/`user` cases are matched earlier and return
/// before this guard; this catches structural names.
fn is_name_false_positive(toks: &[String]) -> bool {
const NON_PERSON: &[&str] = &[
"file", "table", "host", "domain", "field", "class", "tag",
"event", "path", "col", "column", "db", "schema", "index", "key",
"page", "node", "type",
];
has_any(toks, NON_PERSON) && has_any(toks, &["name", "title"])
}
/// Identifier-family names (D10): treated as unique identifiers. FK
/// columns never reach here (handled in [`choose_generator`]).
fn is_identifier_name(toks: &[String]) -> bool {
const ID_TOKENS: &[&str] = &["id", "code", "sku", "ref", "reference", "barcode"];
if has_any(toks, ID_TOKENS) {
return true;
}
// `*_number` / `*_no` as an identifier, but only when qualified
// (a bare `number`/`no` is too ambiguous, and `phone_number` already
// matched the phone rule earlier).
toks.len() >= 2 && has_any(toks, &["number", "no"])
}
// — token utilities —
/// Split a column/table name into lowercase tokens on `_`, `-`, spaces,
/// and camelCase boundaries. `created_at` → [`created`, `at`];
/// `firstName` → [`first`, `name`]; `DOB` → [`dob`].
fn tokens(name: &str) -> Vec<String> {
let mut out = Vec::new();
let mut cur = String::new();
let mut prev_was_lower_or_digit = false;
for ch in name.chars() {
if ch == '_' || ch == '-' || ch == ' ' {
if !cur.is_empty() {
out.push(std::mem::take(&mut cur));
}
prev_was_lower_or_digit = false;
continue;
}
// camelCase boundary: an uppercase letter following a lowercase
// letter or digit starts a new token.
if ch.is_ascii_uppercase() && prev_was_lower_or_digit && !cur.is_empty() {
out.push(std::mem::take(&mut cur));
}
cur.push(ch.to_ascii_lowercase());
prev_was_lower_or_digit = ch.is_ascii_lowercase() || ch.is_ascii_digit();
}
if !cur.is_empty() {
out.push(cur);
}
out
}
fn has_token(toks: &[String], t: &str) -> bool {
toks.iter().any(|x| x == t)
}
fn has_any(toks: &[String], candidates: &[&str]) -> bool {
candidates.iter().any(|c| has_token(toks, c))
}
/// Whether `a` is immediately followed by `b` in the token list — for
/// matching split compound names like `first name` / `user name`.
fn has_seq(toks: &[String], a: &str, b: &str) -> bool {
toks.windows(2).any(|w| w[0] == a && w[1] == b)
}
/// Text-typed for heuristic purposes — `text`, `shortid`, plus the
/// text-backed `decimal`/`date`/`datetime` are excluded here because
/// those have their own dedicated gates; only `text`/`shortid` accept
/// free-text generators.
const fn type_is_text(ty: Type) -> bool {
matches!(ty, Type::Text | Type::ShortId)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::seed::ColumnSpec;
use pretty_assertions::assert_eq;
fn choose(table: &str, name: &str, ty: Type) -> Generator {
choose_generator(table, &ColumnSpec::plain(name, ty))
}
#[test]
fn person_name_fields_map_to_name_generators() {
assert_eq!(choose("users", "first_name", Type::Text), Generator::FirstName);
assert_eq!(choose("users", "firstName", Type::Text), Generator::FirstName);
assert_eq!(choose("users", "last_name", Type::Text), Generator::LastName);
assert_eq!(choose("users", "surname", Type::Text), Generator::LastName);
}
#[test]
fn contact_fields_map_correctly() {
assert_eq!(choose("users", "email", Type::Text), Generator::Email);
assert_eq!(choose("users", "work_email", Type::Text), Generator::Email);
assert_eq!(choose("users", "username", Type::Text), Generator::Username);
assert_eq!(choose("users", "user_name", Type::Text), Generator::Username);
assert_eq!(choose("users", "phone", Type::Text), Generator::Phone);
assert_eq!(choose("accounts", "password", Type::Text), Generator::Password);
}
#[test]
fn address_fields_map_correctly() {
assert_eq!(choose("a", "city", Type::Text), Generator::City);
assert_eq!(choose("a", "country", Type::Text), Generator::Country);
assert_eq!(choose("a", "street", Type::Text), Generator::Street);
assert_eq!(choose("a", "zip", Type::Text), Generator::ZipCode);
assert_eq!(choose("a", "postcode", Type::Text), Generator::ZipCode);
assert_eq!(choose("a", "province", Type::Text), Generator::StateName);
}
#[test]
fn bare_name_uses_table_context() {
// D11 — the same column name resolves differently by table.
assert_eq!(choose("products", "name", Type::Text), Generator::ProductName);
assert_eq!(choose("items", "title", Type::Text), Generator::ProductName);
assert_eq!(choose("users", "name", Type::Text), Generator::FullName);
assert_eq!(choose("customers", "name", Type::Text), Generator::FullName);
assert_eq!(choose("vendors", "name", Type::Text), Generator::Company);
// Unknown table → person name default.
assert_eq!(choose("widgets", "name", Type::Text), Generator::FullName);
}
#[test]
fn name_false_positives_do_not_become_person_names() {
// These must NOT resolve to a person/product name.
assert_ne!(choose("files", "filename", Type::Text), Generator::FullName);
assert_ne!(choose("meta", "table_name", Type::Text), Generator::FullName);
// They fall through to a generic / non-person generator.
assert_eq!(choose("files", "filename", Type::Text), Generator::Generic);
}
#[test]
fn numeric_name_heuristics_are_type_gated() {
// `price` on a numeric column → currency; on text → falls through.
assert_eq!(choose("p", "price", Type::Int), Generator::CurrencyAmount);
assert_eq!(choose("p", "price", Type::Decimal), Generator::CurrencyAmount);
assert_eq!(choose("p", "price", Type::Text), Generator::Generic);
assert_eq!(choose("u", "age", Type::Int), Generator::Age);
assert_eq!(choose("o", "quantity", Type::Int), Generator::SmallInt);
}
#[test]
fn email_on_wrong_type_falls_through() {
// The type gate: an `email` int column does NOT get a string —
// it falls through to type-based generation.
assert_eq!(choose("u", "email", Type::Int), Generator::Generic);
}
#[test]
fn temporal_fields_are_bounded_and_type_gated() {
assert_eq!(choose("u", "dob", Type::Date), Generator::DateAdult);
assert_eq!(choose("o", "order_date", Type::Date), Generator::DateRecent);
assert_eq!(choose("o", "created_at", Type::DateTime), Generator::DateTimeRecent);
assert_eq!(choose("o", "timestamp", Type::DateTime), Generator::DateTimeRecent);
// Wrong type → not a date generator.
assert_eq!(choose("o", "order_date", Type::Int), Generator::Generic);
}
#[test]
fn boolean_fields_map_to_boolean() {
assert_eq!(choose("u", "is_active", Type::Bool), Generator::Boolean);
assert_eq!(choose("u", "has_paid", Type::Bool), Generator::Boolean);
assert_eq!(choose("u", "enabled", Type::Bool), Generator::Boolean);
}
#[test]
fn identifier_family_is_unique_sequential() {
assert_eq!(choose("t", "code", Type::Text), Generator::IdentitySequential);
assert_eq!(choose("t", "sku", Type::Text), Generator::IdentitySequential);
assert_eq!(choose("t", "order_number", Type::Int), Generator::IdentitySequential);
assert_eq!(choose("t", "external_id", Type::Int), Generator::IdentitySequential);
}
#[test]
fn foreign_key_columns_defer_to_executor() {
let mut spec = ColumnSpec::plain("user_id", Type::Int);
spec.is_foreign_key = true;
assert_eq!(choose_generator("orders", &spec), Generator::ForeignKeySample);
}
#[test]
fn check_in_values_become_pick_from() {
let mut spec = ColumnSpec::plain("status", Type::Text);
spec.check_in_values = Some(vec!["active".into(), "closed".into()]);
assert_eq!(
choose_generator("orders", &spec),
Generator::PickFrom(vec!["active".into(), "closed".into()])
);
}
#[test]
fn enum_ish_names_are_detected_for_the_advisory() {
assert!(is_enum_ish("status"));
assert!(is_enum_ish("role"));
assert!(is_enum_ish("order_state"));
assert!(is_enum_ish("priority"));
assert!(!is_enum_ish("email"));
assert!(!is_enum_ish("first_name"));
}
#[test]
fn enum_ish_columns_fall_through_to_generic() {
// No special generator — generic text + the advisory flags them.
assert_eq!(choose("orders", "status", Type::Text), Generator::Generic);
assert_eq!(choose("users", "role", Type::Text), Generator::Generic);
}
#[test]
fn unmatched_columns_use_type_based_fallback() {
assert_eq!(choose("t", "some_freeform_field", Type::Text), Generator::Generic);
}
#[test]
fn tokenizer_splits_on_all_boundaries() {
assert_eq!(tokens("created_at"), vec!["created", "at"]);
assert_eq!(tokens("firstName"), vec!["first", "name"]);
assert_eq!(tokens("DOB"), vec!["dob"]);
assert_eq!(tokens("user-email"), vec!["user", "email"]);
assert_eq!(tokens("HTTPStatus"), vec!["httpstatus"]);
}
}
+213
View File
@@ -0,0 +1,213 @@
//! Pure fake-data generation library for the `seed` command (ADR-0048).
//!
//! This module is the **generation half** of `seed`: given a column's
//! shape (name, type, constraints), it chooses a *generator* and turns
//! a seeded RNG into plausible [`Value`]s. It is deliberately decoupled
//! from `db.rs` — it knows nothing about SQLite, the worker thread, or
//! persistence — so it stays pure and unit-testable, with exact-value
//! assertions made possible by the seedable RNG (ADR-0048 D4).
//!
//! The executor (`db.rs::do_seed`) adapts the real schema into
//! [`ColumnSpec`]s, calls [`choose_generator`] per column, and then
//! [`generate_value`] per row — except for the *stateful* markers
//! ([`Generator::IdentitySequential`], [`Generator::ForeignKeySample`])
//! which need database context (existing rows, the running sequence)
//! and so are resolved by the executor, not here.
//!
//! Layout:
//! - this file — the public types ([`ColumnSpec`], [`Generator`],
//! [`SeedRng`]) and the RNG constructor.
//! - [`heuristics`] — [`choose_generator`] + the name-aware catalogue
//! (D7), table-context disambiguation (D11), identifier (D10) and
//! enum-ish (D12) detection.
//! - [`generators`] — [`generate_value`]: per-generator value
//! production, the hand-rolled `product` generator (D9) and the
//! bounded date windows (D8).
mod check;
mod generators;
mod heuristics;
mod vocabulary;
pub use check::parse_in_check_values;
pub use generators::{generate_value, range_bounds_reason};
pub use heuristics::{choose_generator, is_enum_ish};
pub use vocabulary::{generator_for_name, is_known_generator_prefix, KNOWN_GENERATORS};
use rand::rngs::StdRng;
use rand::{RngExt, SeedableRng};
use crate::dsl::types::Type;
/// The RNG that drives all seed generation.
///
/// A single seeded `StdRng` feeds both `fake`'s `fake_with_rng` and the
/// hand-rolled generators, so a `--seed` value fully determines the
/// output (ADR-0048 D4). `rand 0.10`'s `StdRng` satisfies `fake`'s
/// `RngExt` bound (it re-exports `rand::RngExt`), so the same handle
/// works on both sides.
pub type SeedRng = StdRng;
/// Build the seed RNG.
///
/// With `Some(seed)` the stream is reproducible; with `None` it is
/// seeded from entropy (via the thread RNG) so each run differs.
/// Seeding `StdRng` from a single `u64` in both cases keeps
/// construction uniform and avoids `rand`'s churn-prone from-entropy
/// constructors.
#[must_use]
pub fn make_rng(seed: Option<u64>) -> SeedRng {
let seed = seed.unwrap_or_else(|| rand::rng().random::<u64>());
StdRng::seed_from_u64(seed)
}
/// A column described in just enough detail to choose and run a
/// generator. Built by the executor from the real schema; kept
/// independent of `db.rs`'s `ReadColumn` so this library stays pure.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ColumnSpec {
/// The column's name — the primary signal for generator choice.
pub name: String,
/// The user-facing playground type — gates every name heuristic.
pub ty: Type,
/// `NOT NULL` — the executor uses this for the block guard (D1);
/// generation always produces a value, so it is informational here.
pub not_null: bool,
/// Part of the table's primary key.
pub primary_key: bool,
/// Carries a `UNIQUE` constraint (or is a single-column PK).
pub unique: bool,
/// A foreign-key column — generation is the executor's job
/// (sample an existing parent row, D14), so [`choose_generator`]
/// returns [`Generator::ForeignKeySample`].
pub is_foreign_key: bool,
/// Values parsed from a simple `col IN ('a', 'b', …)` CHECK
/// constraint (D17). When present, generation draws from them so
/// the common enum-as-CHECK pattern "just works".
pub check_in_values: Option<Vec<String>>,
}
impl ColumnSpec {
/// Convenience constructor for a plain, unconstrained column —
/// used heavily in tests.
#[cfg(test)]
#[must_use]
pub fn plain(name: &str, ty: Type) -> Self {
Self {
name: name.to_string(),
ty,
not_null: false,
primary_key: false,
unique: false,
is_foreign_key: false,
check_in_values: None,
}
}
}
/// The chosen generation strategy for a column.
///
/// Most variants are *stateless* — [`generate_value`] turns them into a
/// [`Value`] from the RNG alone. Two are *stateful markers* that the
/// executor must intercept (they need database context):
/// [`Self::IdentitySequential`] (the running `MAX+offset` sequence,
/// D10) and [`Self::ForeignKeySample`] (draw from existing parent
/// rows, D14). For safety [`generate_value`] treats an un-intercepted
/// marker as [`Self::Generic`] rather than panicking.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Generator {
// — Person —
FirstName,
LastName,
/// A full person name (table-context default for `name`/`title`).
FullName,
Email,
Username,
Password,
Phone,
// — Address —
City,
Country,
StateName,
Street,
ZipCode,
// — Organisation / commerce —
Company,
JobTitle,
/// Hand-rolled `{adjective} {material} {noun}` (D9) — `fake` has no
/// commerce module.
ProductName,
// — Free text —
Sentence,
Paragraph,
Url,
HexColor,
// — Numeric —
/// A money-shaped amount (whole for `int`, two-decimal otherwise).
CurrencyAmount,
/// A plausible human age (1880).
Age,
/// A small positive integer (quantities, counts).
SmallInt,
// — Temporal (bounded windows, D8) —
/// A date within the last few years.
DateRecent,
/// A date in an adult birth window (≈1880 years ago) — for `dob`.
DateAdult,
/// A datetime within the last few years.
DateTimeRecent,
// — Boolean —
Boolean,
// — Stateful markers (executor-resolved) —
/// Unique sequential identifier (D10): the executor supplies
/// `MAX(col)+offset`. Chosen for identifier-named non-FK columns.
IdentitySequential,
/// FK column (D14): the executor samples an existing parent key.
ForeignKeySample,
// — List / range (the `set` override clause, D2) —
/// Uniform pick from a fixed list — a simple `IN`-CHECK (D17), an
/// enum, or a `set <col> in (…)` / `= <value>` override (D2).
PickFrom(Vec<String>),
/// Uniform value in `[low, high]` — the `set <col> between low and
/// high` override (D2). Bounds are the raw literal strings; their
/// interpretation (int / real / date / datetime) follows the
/// destination column type at generation time. The executor
/// validates type-compatibility *before* generation (a bound that
/// does not parse for the column type is a friendly error), so
/// [`generate_value`] only ever sees parseable bounds; a defensive
/// parse failure falls back to type-based generation.
Range { low: String, high: String },
/// Type-based fallback (D8) when no name heuristic matches.
Generic,
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn same_seed_yields_identical_rng_streams() {
let mut a = make_rng(Some(42));
let mut b = make_rng(Some(42));
let xs: Vec<u64> = (0..8).map(|_| a.random::<u64>()).collect();
let ys: Vec<u64> = (0..8).map(|_| b.random::<u64>()).collect();
assert_eq!(xs, ys, "a fixed seed must reproduce the stream");
}
#[test]
fn different_seeds_yield_different_streams() {
let mut a = make_rng(Some(1));
let mut b = make_rng(Some(2));
let xs: Vec<u64> = (0..8).map(|_| a.random::<u64>()).collect();
let ys: Vec<u64> = (0..8).map(|_| b.random::<u64>()).collect();
assert_ne!(xs, ys);
}
#[test]
fn unseeded_rng_constructs_without_panicking() {
// Entropy-seeded path: just exercise it.
let mut rng = make_rng(None);
let _ = rng.random::<u64>();
}
}
+149
View File
@@ -0,0 +1,149 @@
//! The curated named-generator vocabulary (ADR-0048 D9).
//!
//! This is the **single source of truth** for "what generator names can
//! a learner write after `set <col> as …`", shared by three consumers
//! (mirroring `KNOWN_SQL_FUNCTIONS`, ADR-0022 Amд6):
//!
//! - **Tab completion** — the `seed … set <col> as ⟨here⟩` slot offers
//! these names (`src/completion.rs`).
//! - **The typing-time validity indicator (ADR-0027)** — an unknown
//! name after `as` is flagged `[ERR]` while typing.
//! - **The executor** — `db.rs::do_seed` maps a name to a [`Generator`]
//! via [`generator_for_name`]; an unknown name is a friendly error.
//!
//! The list is a deliberately *curated pedagogical set* — the generators
//! a learner reaches for, not every internal [`Generator`] variant
//! (stateful markers like `ForeignKeySample` are executor-only and have
//! no name). It is lowercase + sorted (pinned by a unit test).
use crate::seed::Generator;
/// The curated generator names, lowercase and **sorted** (invariant
/// pinned by a test — completion relies on stable order and a
/// case-insensitive prefix match against these canonical spellings).
pub const KNOWN_GENERATORS: &[&str] = &[
"age",
"bool",
"city",
"color",
"company",
"country",
"date",
"datetime",
"email",
"first_name",
"job",
"last_name",
"name",
"paragraph",
"password",
"phone",
"price",
"product",
"sentence",
"state",
"street",
"url",
"username",
"zip",
];
/// Map a generator name (case-insensitive) to its [`Generator`].
///
/// `None` for an unrecognised name — the executor turns that into a
/// friendly "unknown generator" error naming the curated set. A couple
/// of common spelling variants (`firstname`, `lastname`, `colour`,
/// `full_name`) are accepted as aliases even though only the canonical
/// spelling is offered for completion.
#[must_use]
pub fn generator_for_name(name: &str) -> Option<Generator> {
let n = name.to_ascii_lowercase();
let g = match n.as_str() {
"name" | "full_name" => Generator::FullName,
"first_name" | "firstname" => Generator::FirstName,
"last_name" | "lastname" | "surname" => Generator::LastName,
"email" => Generator::Email,
"username" => Generator::Username,
"password" => Generator::Password,
"phone" => Generator::Phone,
"city" => Generator::City,
"country" => Generator::Country,
"state" => Generator::StateName,
"street" => Generator::Street,
"zip" => Generator::ZipCode,
"company" => Generator::Company,
"job" => Generator::JobTitle,
"product" => Generator::ProductName,
"sentence" => Generator::Sentence,
"paragraph" => Generator::Paragraph,
"url" => Generator::Url,
"color" | "colour" => Generator::HexColor,
"price" => Generator::CurrencyAmount,
"age" => Generator::Age,
"date" => Generator::DateRecent,
"datetime" => Generator::DateTimeRecent,
"bool" => Generator::Boolean,
_ => return None,
};
Some(g)
}
/// Whether `partial` is a case-insensitive prefix of at least one known
/// generator name.
///
/// An empty `partial` matches every generator (it is a prefix of all) —
/// mirrors `is_known_function_prefix`. Used by the validity indicator to
/// avoid flagging a still-being-typed name.
#[must_use]
pub fn is_known_generator_prefix(partial: &str) -> bool {
let lowered = partial.to_ascii_lowercase();
KNOWN_GENERATORS.iter().any(|g| g.starts_with(&lowered))
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn known_generators_is_sorted_and_lowercase() {
let mut sorted = KNOWN_GENERATORS.to_vec();
sorted.sort_unstable();
assert_eq!(KNOWN_GENERATORS, sorted.as_slice(), "must be sorted");
for g in KNOWN_GENERATORS {
assert_eq!(*g, g.to_ascii_lowercase(), "must be lowercase: {g}");
}
}
#[test]
fn every_listed_name_maps_to_a_generator() {
for g in KNOWN_GENERATORS {
assert!(
generator_for_name(g).is_some(),
"listed generator name `{g}` has no mapping"
);
}
}
#[test]
fn mapping_is_case_insensitive_and_has_aliases() {
assert_eq!(generator_for_name("EMAIL"), Some(Generator::Email));
assert_eq!(generator_for_name("FirstName"), Some(Generator::FirstName));
assert_eq!(generator_for_name("colour"), Some(Generator::HexColor));
assert_eq!(generator_for_name("full_name"), Some(Generator::FullName));
}
#[test]
fn unknown_name_has_no_mapping() {
assert_eq!(generator_for_name("bogus"), None);
assert_eq!(generator_for_name(""), None);
}
#[test]
fn prefix_check_matches_known_and_rejects_unknown() {
assert!(is_known_generator_prefix("ema"));
assert!(is_known_generator_prefix("EMA"));
assert!(is_known_generator_prefix("")); // empty is a prefix of all
assert!(!is_known_generator_prefix("zzz"));
}
}
+2
View File
@@ -163,6 +163,7 @@ impl Theme {
HighlightClass::String => self.tok_string,
HighlightClass::Punct => self.tok_punct,
HighlightClass::Flag => self.tok_flag,
HighlightClass::Function => self.tok_function,
HighlightClass::Error => self.tok_error,
}
}
@@ -228,6 +229,7 @@ mod tests {
assert_eq!(t.highlight_class_color(HighlightClass::String), t.tok_string);
assert_eq!(t.highlight_class_color(HighlightClass::Punct), t.tok_punct);
assert_eq!(t.highlight_class_color(HighlightClass::Flag), t.tok_flag);
assert_eq!(t.highlight_class_color(HighlightClass::Function), t.tok_function);
assert_eq!(t.highlight_class_color(HighlightClass::Error), t.tok_error);
}
+19 -27
View File
@@ -1438,12 +1438,19 @@ fn render_input_one_row(
let offset = input_scroll_offset(line_cols, cursor_col, tw, app.input_scroll_offset);
app.input_scroll_offset = offset;
let runs = crate::input_render::render_input_runs_in_mode(
// Strip the `:` one-shot prefix for the SQL highlighting/overlays
// (ADR-0003); the `:` itself renders as plain text. Identity for
// non-one-shot input.
let (fb_view, fb_cursor, fb_off) = app.feedback_view();
let runs = crate::input_render::render_input_runs_feedback(
&app.input,
cursor,
theme,
&app.schema_cache,
mode_for_render,
fb_view,
fb_cursor,
fb_off,
);
let spans = runs_to_spans(&app.input, &runs);
@@ -1507,12 +1514,19 @@ fn render_input_two_rows(
let offset = input_scroll_offset(line_cols, cursor_col, capacity, app.input_scroll_offset);
app.input_scroll_offset = offset;
let runs = crate::input_render::render_input_runs_in_mode(
// Strip the `:` one-shot prefix for the SQL highlighting/overlays
// (ADR-0003); the `:` itself renders as plain text. Identity for
// non-one-shot input.
let (fb_view, fb_cursor, fb_off) = app.feedback_view();
let runs = crate::input_render::render_input_runs_feedback(
&app.input,
cursor,
theme,
&app.schema_cache,
mode_for_render,
fb_view,
fb_cursor,
fb_off,
);
let cells = expand_runs_to_cells(&app.input, &runs);
let len = cells.len();
@@ -1621,23 +1635,6 @@ fn runs_to_spans<'a>(
.collect()
}
/// Strip a leading one-shot `:` sigil (and the whitespace after
/// it) from `input`, returning the advanced command slice and the
/// cursor remapped into it. Mirrors `App::submit`'s `:` handling
/// so the hint panel hints at the command, not the sigil
/// (ADR-0022 Amendment 1). Used only when the effective mode is
/// `AdvancedOneShot`, where `input` is guaranteed to start (after
/// any leading whitespace) with `:`.
fn strip_one_shot_prefix(input: &str, cursor: usize) -> (&str, usize) {
let lead_ws = input.len() - input.trim_start().len();
let after_colon = lead_ws + 1; // skip the `:`
let ws_after = input[after_colon..].len() - input[after_colon..].trim_start().len();
let prefix_len = (after_colon + ws_after).min(input.len());
let effective = &input[prefix_len..];
let effective_cursor = cursor.saturating_sub(prefix_len).min(effective.len());
(effective, effective_cursor)
}
/// Resolve the Hint panel body into its rendered lines, pre-wrapped
/// to the panel's inner width and clamped to `max_rows` with an
/// ellipsis backstop (issue #12). `max_rows` is the geometry-fixed row
@@ -1679,14 +1676,9 @@ fn resolve_hint_lines(
// In one-shot advanced mode (`:` prefix in simple mode) the
// raw input carries the `:` sigil, which is not part of the
// grammar. Strip it for the ambient computation so the hint
// reflects the advanced command — mirroring `App::submit`.
let (hint_input, hint_cursor) = match app.effective_mode() {
EffectiveMode::AdvancedOneShot => {
strip_one_shot_prefix(&app.input, app.input_cursor)
}
_ => (app.input.as_str(), app.input_cursor),
};
// grammar. The shared feedback view strips it so the hint reflects
// the advanced command — mirroring `App::submit` (ADR-0003).
let (hint_input, hint_cursor, _off) = app.feedback_view();
let ambient = crate::input_render::ambient_hint_in_mode(
hint_input,
hint_cursor,
+1
View File
@@ -23,6 +23,7 @@ mod m2n;
mod parse_error_pedagogy;
mod project_lifecycle;
mod replay_command;
mod seed;
mod sql_alter_table;
mod sql_create_index;
mod sql_create_table;
+9
View File
@@ -109,6 +109,14 @@ fn near_miss_matrix_simple_mode() {
("delete", &["after `delete`, expected `from`", "delete from <Table>"]),
("delete from", &["after `delete from`, expected table name", "delete from <Table>"]),
("delete from T", &["expected `where` or `--all-rows`", "delete from <Table>"]),
("seed", &["after `seed`, expected table name", "seed <Table> [count]"]),
// Phase 2 (ADR-0048 D2/D1): malformed `set` clause + column-fill.
("seed T set", &["after `seed T set`, expected column name", "seed <Table>.<col>"]),
(
"seed T set role",
&["after `seed T set role`, expected `=`, `in`, `between`, or `as`", "seed <Table>.<col>"],
),
("seed T.", &["after `seed T.`, expected column name", "seed <Table>.<col>"]),
("replay", &["after `replay`, expected string literal or path", "replay <path>"]),
("explain", &["after `explain`, expected `show`, `update`, or `delete`", "explain show data"]),
// advanced-only entry word typed in simple mode → "this is SQL" rail
@@ -539,3 +547,4 @@ fn caret_aligns_under_offending_token() {
+1277
View File
File diff suppressed because it is too large Load Diff
+66
View File
@@ -237,6 +237,7 @@ fn command_kind_label(cmd: &rdbms_playground::dsl::Command) -> String {
ShowTable { .. } => "ShowTable".into(),
ShowList { kind, name } => format!("ShowList({kind:?}, {})", name.is_some()),
Insert { .. } => "Insert".into(),
Seed { .. } => "Seed".into(),
Update { .. } => "Update".into(),
Delete { .. } => "Delete".into(),
ShowData { .. } => "ShowData".into(),
@@ -440,3 +441,68 @@ fn smoke_assess_parse_label_round_trips() {
assert_eq!(a.parse_result.as_deref(), Ok("Insert"));
assert!(matches!(a.state, InputState::Valid));
}
/// `seed` (ADR-0048) gets the standard ambient surface for free from
/// grammar registration: table-name completion, the validity indicator
/// flagging an unknown table, and the `--seed` flag offered as a
/// candidate.
#[test]
fn seed_completion_and_validity() {
let schema = schema_serial_pk(); // Customers(id serial, Name, Email)
// Completion: `seed ` offers existing table names.
let cands = completion_candidate_texts(&assess_at_end("seed ", &schema));
assert!(
cands.iter().any(|c| c == "Customers"),
"`seed ` should complete table names, got {cands:?}"
);
// Validity (ADR-0027): a known table seeds clean; an unknown one is
// flagged (same table slot as update/delete/show data).
let ok = assess_at_end("seed Customers 5", &schema);
assert!(matches!(ok.state, InputState::Valid), "known table: {:?}", ok.state);
// seed's unknown-table behaviour must match its closest sibling
// `show data` (same table-only slot), whatever that is.
let seed_ghost = assess_at_end("seed Ghost 5", &schema).state;
let show_ghost = assess_at_end("show data Ghost", &schema).state;
assert_eq!(
std::mem::discriminant(&seed_ghost),
std::mem::discriminant(&show_ghost),
"seed should treat an unknown table like `show data`: seed={seed_ghost:?}, show={show_ghost:?}"
);
// The `--seed` reproducibility flag is offered after the count.
let flag_cands = completion_candidate_texts(&assess_at_end("seed Customers 5 ", &schema));
assert!(
flag_cands.iter().any(|c| c.contains("seed")),
"`--seed` should be offered as a candidate, got {flag_cands:?}"
);
// Phase 2 (ADR-0048 D2): the `set` clause is offered after the count.
assert!(
flag_cands.iter().any(|c| c == "set"),
"`set` should be offered after the count, got {flag_cands:?}"
);
// `set ` offers the active table's columns (narrowed to Customers).
let set_cands = completion_candidate_texts(&assess_at_end("seed Customers set ", &schema));
assert!(
set_cands.iter().any(|c| c == "Name") && set_cands.iter().any(|c| c == "Email"),
"`set ` should complete this table's columns, got {set_cands:?}"
);
// `set <col> as ` offers the curated generator vocabulary (D9).
let gen_cands =
completion_candidate_texts(&assess_at_end("seed Customers set Email as ", &schema));
assert!(
gen_cands.iter().any(|c| c == "email") && gen_cands.iter().any(|c| c == "product"),
"`as ` should complete generator names, got {gen_cands:?}"
);
// Column-fill (D1 form 2): `seed Customers.` offers the columns.
let fill_cands = completion_candidate_texts(&assess_at_end("seed Customers.", &schema));
assert!(
fill_cands.iter().any(|c| c == "Name"),
"`seed Customers.` should complete column names, got {fill_cands:?}"
);
}
@@ -24,10 +24,10 @@ Assessment {
completion: Some(
Completion {
replaced_range: (
24,
22,
27,
),
partial_prefix: "all",
partial_prefix: "--all",
candidates: [
Candidate {
text: "--all-rows",
@@ -24,10 +24,10 @@ Assessment {
completion: Some(
Completion {
replaced_range: (
33,
31,
36,
),
partial_prefix: "all",
partial_prefix: "--all",
candidates: [
Candidate {
text: "--all-rows",