Merge branch 'main' into website

This commit is contained in:
claude@clouddev1
2026-06-12 13:22:52 +00:00
38 changed files with 6222 additions and 142 deletions
Generated
+18
View File
@@ -419,6 +419,12 @@ dependencies = [
"syn 2.0.117", "syn 2.0.117",
] ]
[[package]]
name = "deunicode"
version = "1.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04"
[[package]] [[package]]
name = "diff" name = "diff"
version = "0.1.13" version = "0.1.13"
@@ -518,6 +524,17 @@ dependencies = [
"num-traits", "num-traits",
] ]
[[package]]
name = "fake"
version = "5.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea6be833b323a56361118a747470a45a1bcd5c52a2ec9b1e40c83dafe687e453"
dependencies = [
"deunicode",
"either",
"rand 0.10.1",
]
[[package]] [[package]]
name = "fallible-iterator" name = "fallible-iterator"
version = "0.3.0" version = "0.3.0"
@@ -1527,6 +1544,7 @@ dependencies = [
"crossterm", "crossterm",
"csv", "csv",
"directories", "directories",
"fake",
"futures-util", "futures-util",
"gethostname", "gethostname",
"insta", "insta",
+8
View File
@@ -24,6 +24,14 @@ chrono = { version = "0.4.44", default-features = false, features = ["clock"] }
crossterm = { version = "0.29.0", features = ["event-stream"] } crossterm = { version = "0.29.0", features = ["event-stream"] }
csv = "1.4.0" csv = "1.4.0"
directories = "6.0.0" directories = "6.0.0"
# Realistic fake-data generators for the `seed` command (ADR-0048):
# names, emails, addresses, companies, lorem, etc. Default features
# only — the basic fakers need no flags; date/datetime values are
# generated in-house (rand + the existing `chrono`) for the bounded
# windows ADR-0048 D8 requires, so `fake`'s `chrono` feature is
# deliberately omitted. No commerce/product module exists, so the
# `product` generator is hand-rolled (D9).
fake = "5"
futures-util = "0.3.32" futures-util = "0.3.32"
gethostname = "1.1.0" gethostname = "1.1.0"
rand = "0.10.1" rand = "0.10.1"
+677
View File
@@ -0,0 +1,677 @@
# ADR-0048: `seed` — fake-data generation command (SD1, opens SD2)
## Status
**Accepted (2026-06-11); Phase 1 + Phase 2 implemented (2026-06-11).** Design
settled with the user across an extended fork dialogue (every decision
below was escalated and user-chosen), then hardened by a pre-build
`/runda` Devil's-Advocate pass that found six blockers — undo
integration (D15), replay semantics (D16), `set` value quoting (D2),
CHECK-constraint handling (D17), a phase-ordering bug in the advisory
(D13), and auto-show flooding (D18) — plus refinements (state-relative
reproducibility, compound-FK tuple sampling, column-fill constraint
rules, the `fake` dependency scan), all folded in.
**Phase 1 shipped** test-first across commits `202e25a` (generation
library + `fake` dependency) → `f1e9484` (command skeleton) →
`73493fa` (FK sampling) → `9c13501` (uniqueness / junction / IN-CHECK)
`0b3ab3c` (`SeedResult` / preview / advisory / count cap) →
`e6ff63d` (single-transaction O(N) path) → `fbd219b` (`--seed` flag,
ambient wiring, and a whole-implementation `/runda` pass). The
post-implementation `/runda` found eight gaps — FK-sampling
determinism (now `ORDER BY`), shortid reproducibility (now from the
seeded RNG, so **D4 holds with no exceptions**), and six untested
ADR decisions (D5/D15/D16/D17 + atomicity + zero-count), all closed.
**2358 tests pass / 0 fail / 0 skip; clippy clean.**
**Implemented in Phase 1:** the whole-row `seed <table> [count]
[--seed <n>]` form and every D1D18 decision *except* the two
Phase-2 surfaces.
**Phase 2 implemented (2026-06-11):** both remaining surfaces — the
**`set` override clause** (D2: fixed value / pick-list / named
generator / range, quoted literals, type-aware) and the
**`<table>.<column>` column-fill** form (D1 form 2: an UPDATE over
existing rows, refusing PK/autogen targets, empty-table no-op, one undo
step). The named-generator vocabulary (D9) lives in `src/seed`
(`KNOWN_GENERATORS` / `generator_for_name`); a new range `Generator`
(`src/seed/generators.rs`) backs `between`; the override clause is
folded from the flat matched path (`build_seed_overrides`,
`src/dsl/grammar/data.rs`) and applied to the per-column plan
(`apply_seed_overrides`, `src/db.rs`), with column-fill in
`do_seed_column_fill`. Full ambient wiring: completion (the generator
vocabulary after `as`, the `set`/`.col` column slots), highlighting
(`HighlightClass::Function``tok_function`, the generator slot), the
validity indicator (`IdentSource::Generators` — an unknown name flagged
`[ERR]`), help, and parse-error pedagogy rows. The D13 advisory now
carries its Phase-2/3 wording (points at `set` and the column-fill
repair). A post-implementation `/runda` pass then added one
user-chosen refinement: a **bounded override on a UNIQUE column** (a
fixed value / too-short pick-list) is now a **friendly error** rather
than a silent uniqueness cap (see D2). **2400 tests pass / 0 fail / 0
skip; clippy clean.** Two
implementation refinements vs. this ADR's wording, both met the
user-facing contract: dates in the range form are **quoted** (the D2
amendment, above — no date-literal token exists); and the `set` value
slots reuse `update`'s typed `current_column_value` (no spurious
column-ref match) rather than the raw expression operand.
Further SD2 increments (custom user generators, NULL injection,
multi-locale, recursive parent auto-seed) remain out of scope (see Out
of scope).
Closes `requirements.md` **SD1** and delivers the core of **SD2**
(per-type generators, determinism, the `fake`-backed catalogue). It
also closes one of the two remaining gaps in **A1** ("all canonical
app-level commands") — `seed`; the other, `hint` (**H2**), is
separate.
Builds on: ADR-0014 (data operations, the `Value`/`Bound` value model,
the auto-show pattern, FK-error enrichment), ADR-0005/0011 (the type
vocabulary and `Type::fk_target_type()`), ADR-0012/0013 (the column /
relationship metadata tables, the rebuild-table primitive — *read* by
seed for schema introspection), ADR-0024 (the unified grammar tree /
`CommandNode` registration that gives completion, hints, help-id,
usage-id for free), ADR-0022 (ambient typing assistance — the
`KNOWN_SQL_FUNCTIONS` curated-vocabulary pattern that the
generator-name list mirrors), ADR-0026 (the `in (...)` / `between ...
and ...` expression grammar the override clause reuses), ADR-0027 (the
validity-indicator diagnostics model), and ADR-0038 (the
`OutputStyleClass::Hint` styled output used for the post-seed
advisory). Honours ADR-0003 (both modes, no sigil), ADR-0009 (DSL
conventions — keyword grammar, `--` flags for opt-in choices, one
sigil only), ADR-0002 (no engine name in user-facing strings), and
ADR-0015 (per-command write-through persistence).
## Context
`seed <table> [count]` is the last unbuilt **data-authoring** command
in the requirements. The pedagogical value is high: a learner who has
just modelled a schema wants rows to query against *now*, without
hand-typing dozens of `insert`s. A teacher wants a one-liner that
fills a demo database with believable data. SD1 commits to "plausible
fake data; junction tables seeded with valid foreign-key references
drawn from existing parent rows." SD2 deferred the *how* — "per-type
generators, locale, determinism, override hooks" — explicitly pending
this ADR.
The design conversation widened the scope deliberately, with the user
confirming each step:
- **Realism matters more than minimalism** for a teaching tool. Random
`text_a3f9` values teach nothing; `Alice Martinez` /
`alice.m@example.com` make queries feel real. → adopt a faker
library and make generation **name-aware**.
- **The column *name* is the strongest signal** for what a value should
look like, but it is **ambiguous** without the **table** for the
`name`/`title` family (`products.name``users.name`).
- **Heuristics will miss**, so a **manual override** surface is
required, not optional — this is SD2's "override hooks", brought
forward.
- **Identifiers and enums** are special: `id`-ish columns want
uniqueness; `status`-ish columns have no sensible generic value and
should be *flagged*, not guessed.
The novel work is the **generation layer**. Everything downstream —
type validation, autogen autofill (`serial`/`shortid`), FK
enforcement, per-command persistence, the auto-show outcome — is
reused from the existing insert/update machinery as **shared helper
functions**, per the X5 architecture preference (unique commands, with
mechanics shared as library functions — *not* by emitting
`Command::Insert` to borrow `do_insert`).
## Decision
Add a dedicated **`seed`** command (its own AST variant and its own
`do_seed` worker executor) available in **both modes**, with the
surface and behaviour below. Generation is realistic, name- and
table-aware, type-gated, with a manual override clause and a
reproducibility flag.
**Command classification (important, set by the replay decision
D16).** Although `requirements.md` A1 lists `seed` among the
"app-level commands" (meaning: part of the canonical command surface,
no sigil, both modes), `seed` is architecturally a **data-authoring
command** — a sibling of `insert`/`update`/`delete`, **not** an
app-lifecycle `AppCommand`. It is therefore **not** added to
`is_app_lifecycle_entry_word` / completion's
`empty_input_offers_app_command_entry_keywords` (those mirror the
`AppCommand` set and must match — `seed` belongs in neither): `replay`
re-runs it as a data write (D16).
### D1 — Command surface (fork, user-chosen: "whole-row + column-fill")
Two forms:
1. **Whole-row generation**`seed <table> [count]`
Generates `count` new rows (an INSERT path). `count` **defaults to
20** (D6) when omitted. Every user-fillable column is filled per the
generation rules (D7D12); `serial`/`shortid` autogen columns are
left to the existing autofill helpers.
2. **Column-fill on existing rows**`seed <table>.<column>`
Fills `<column>` across the table's **existing** rows (an UPDATE
path) — the natural follow-up to `add column`. Combined with the
`set` clause (D2) this is also the precise repair for a single
mis-guessed column: `seed users.work_addr set work_addr as email`.
Column-fill **refuses** PK columns and autogen (`serial`/`shortid`)
columns (a friendly error — you don't "fill" an identity column),
and **respects** the same UNIQUE / FK / required rules as whole-row
generation (a UNIQUE target gets collision-free values; an FK
target samples from the parent, D14). On an **empty** table it is a
friendly no-op ("no rows to fill").
**Zero / over-cap counts.** `seed <table> 0` is a friendly no-op;
`count` over the maximum (D6) is a friendly error.
The column-restricted-*insert* form (`seed t (a, b)` — new rows, only
some columns filled) was considered and **rejected** as marginal and
constraint-fragile (see Alternatives).
**Required-column block guard (user requirement).** If seed cannot
produce a value for a `NOT NULL` column — the only real case is a
`NOT NULL blob` column, which has no DSL value path — it **refuses the
whole operation with a friendly error** naming the column, rather than
attempting a NULL insert that would violate the constraint. The check
is a pre-flight over the resolved per-column plan, before any write.
### D2 — Manual override: the `set` clause (fork, user-chosen: "value + list + generator + range")
An optional, comma-separated `set` clause overrides generation per
column. Four forms, all reusing existing grammar vocabulary so there
is nothing new to learn:
| Form | Example | Meaning |
|---|---|---|
| Fixed value | `set status = 'pending'` | every row gets the constant |
| Pick-from-list | `set role in ('admin', 'editor', 'viewer')` | uniform random choice from the list |
| Explicit generator | `set work_addr as email` | force a named generator (D9) |
| Range | `set price between 10 and 100` | uniform in range; **also dates**`set signup between '2023-01-01' and '2024-12-31'` |
Multiple clauses combine: `seed users 20 set role in ('admin',
'user'), status = 'active', signup between '2023-01-01' and
'2024-12-31'`.
**Override × UNIQUE capacity (post-implementation `/runda`, user-chosen:
"friendly error").** A *bounded* override — a fixed value, or a
pick-list — on a **single-column-UNIQUE** target (a `UNIQUE` column or a
single-column PK) that offers fewer **distinct** values than the row
count cannot fill the run; rather than let the D10 uniqueness machinery
silently cap it (e.g. `seed users 100 set email = 'x'` → 1 row), seed
**refuses up front** with a friendly error pointing at the fixes (use a
generator, or a longer list). Generators and ranges are treated as
effectively unbounded sources — if one genuinely exhausts, the D14
distinct-combination cap still applies. Compound uniqueness is exempt
(the *other* key columns can still vary).
**Quoting (fork, user-chosen: "quoted, grammar-consistent").** Text
values and list items are **quoted string literals** (`'admin'`),
exactly as everywhere else in the DSL — only **numbers** stay
unquoted. **Amendment (2026-06-11, Phase 2 build):** the original
wording said "numbers *and dates* stay unquoted", but this DSL has
**no date-literal token**`Value` is `Number`/`Text` only, and a
date is a **quoted string** validated by `bind_date` (`'2023-01-01'`)
everywhere else (insert / update / `where`). An unquoted `2023-01-01`
lexes as `2023`,`-`,`01`,… and cannot parse. So **dates in the range
form are quoted** (`between '2023-01-01' and '2024-12-31'`) — which is
in fact *more* faithful to this decision's own "quoted,
grammar-consistent" principle. Numbers remain unquoted (`NumberLit`).
This reuses the ADR-0026 expression grammar **unchanged**:
the DA pass confirmed that the `in (...)` form's operands are typed
value slots, so a *bare* `admin` would parse as a **column reference**
(→ "unknown column"), not a string. Quoting is therefore not a style
preference but a correctness requirement of grammar reuse. The range
form is **type-aware**: numeric bounds for numeric columns, date
bounds for date/datetime columns; a type-incompatible bound is a
friendly error. `=`, `in (...)`, and `between ... and ...` are the
ADR-0026 expression operators; `set` is the ADR-0014 UPDATE keyword;
`as` is borrowed from the SQL alias slot. The `as <generator>` operand
is a bare name from the curated generator vocabulary (D9), not a
value. The override takes precedence over every heuristic.
### D3 — Generation library: `fake` crate + hand-rolled gaps (fork, user-chosen: "name-aware + realistic")
Add the **`fake`** crate (v5.x at time of writing; English locale for
v1 per X2) for realistic values: names, emails, usernames, addresses,
companies, phone numbers, lorem text, dates. Generation is driven by a
per-column **generator** chosen by the heuristics (D7) or the override
(D2), falling back to **type-based** generation (D8).
**Implementation-time verifications (resolved 2026-06-11 when the
dependency was added):**
- **`rand` de-duplication — clean.** `fake` 5.1.0 depends on
`rand = "0.10"`, the **same major** as the project's `rand 0.10.1`,
so `cargo tree -e normal` resolves a **single** `rand 0.10.1` (no
runtime duplication; the `rand 0.8.6` visible to `cargo tree -i
rand` is only `fake`'s own dev-dependency, never compiled for us).
Consequence for D4: one seeded `rand 0.10` `StdRng` can drive
**both** `fake`'s `fake_with_rng` and the hand-rolled generators —
determinism is single-RNG, single-version, and shares `shortid.rs`'s
`rand` version.
- **`fake` module inventory / features — confirmed.** Default features
(`["either"]`) cover the core string fakers used here
(Name/Internet/Address/Company/Lorem/PhoneNumber); `fake`'s `chrono`
feature is **deliberately omitted** (dates generated in-house for
D8's bounded windows). No commerce/product module exists → `product`
is hand-rolled (D9). (The exact faker call sites are pinned when the
generation library is built.)
- **Security (new-dependency posture) — clean.** The `fake` tree (296
packages total) scanned clean by **all three** mandated scanners:
`osv-scanner` (no issues), `grype` (no vulnerabilities), `trivy fs
--scanners vuln` (0). No findings to document or accept.
### D4 — Determinism: `--seed <n>` (fork, user-chosen: "optional flag")
Generation is **random by default**. The optional `--seed <n>` flag
makes a run **reproducible**: **same database state + same `--seed`
identical data**. The "database state" qualifier matters (DA
refinement) — FK sampling (D14), identifier sequencing (D10), and
UNIQUE collision-avoidance all *read existing rows*, so reproducibility
is relative to the data already present, not absolute. Value: teachers
hand out one dataset; demos are stable; and the feature's own tests
can assert **exact** output (against a known starting state).
Implemented with a seedable RNG threaded through every generator (no
`thread_rng` on the seeded path). `--` flag per ADR-0009 (opt-in
choice). Naming note: the flag `--seed` and the command `seed` share a
word but never collide grammatically (`seed users 20 --seed 42` parses
unambiguously). This flag is also the determinism lever for **replay**
(D16): a recorded `seed … --seed N` line reproduces on replay; a bare
`seed …` line regenerates fresh data.
### D5 — Both modes (A1)
`seed` is a canonical app-level command, available in **simple and
advanced** mode, no sigil — like `save`/`load`/`export`/`replay`.
### D6 — Default count: 20; bounded maximum
Omitted `count`**20** rows: enough to make `where`, `group by`,
`order by`, and `limit` meaningful without flooding the output pane.
A **maximum** is enforced (proposed 10 000) to prevent a typo
(`seed t 1000000`) from hanging the app or bloating the project; over
the cap → friendly error stating the limit.
### D7 — Name-aware heuristics, type-gated (the catalogue)
A column's **name** selects a generator, but a name rule only fires
when the column's **type** is compatible (a column named `email` typed
`int` does **not** get a string — it falls through to type-based int).
Matching is **case-insensitive**, **token-based** (split on `_`,
camelCase, kebab), **most-specific-first**, with documented
false-positive guards. The catalogue (representative; full table lives
with the implementation):
| Column name (tokens) | Generator | Type gate |
|---|---|---|
| `first_name`/`fname` · `last_name`/`surname`/`lname` | first / last name | text |
| `name`/`full_name` · `title` | **table-context** name (D11) | text |
| `email`/`*_email` | email | text |
| `username`/`login`/`handle` | username | text |
| `password`/`pwd` | password | text |
| `phone`/`mobile`/`cell`/`tel` | phone number | text |
| `city`/`town` · `country` · `state`/`province` | address parts | text |
| `street`/`address`/`addr` · `zip`/`postcode`/`postal` | address parts | text |
| `company`/`employer`/`org` · `job`/`position`/`profession` | company / job | text |
| `description`/`bio`/`notes`/`summary`/`comment` | sentence / paragraph | text |
| `url`/`website`/`homepage` · `color`/`colour` | URL / hex colour | text |
| `price`/`amount`/`cost`/`salary`/`balance`/`total` | currency-range number | numeric |
| `age` · `quantity`/`qty`/`stock`/`count` | 1880 · small int | numeric |
| `date`/`*_date` | date, recent ~3 yr window | date |
| `dob`/`birthday` | date, adult window (1880 yr ago) | date |
| `timestamp`/`datetime` · `created_at`/`updated_at`/`*_at` | datetime, recent window (`updated_at``created_at`) | datetime |
| `is_*`/`has_*`/`active`/`enabled` | boolean | bool |
| **identifier family** (D10) | unique sequential | int/text |
| **enum-ish family** (D12) | generic text + flag | (text) |
**False-positive guards (documented):** `username`/`filename`/
`table_name`/`*_name` handled before the bare `name` rule so they do
**not** resolve to person-name; the bare `name`/`title` rule requires a
standalone token or a recognised `*_name` suffix.
### D8 — Type-based fallback
When no name rule matches (or to satisfy a name rule's type gate),
generate by **type**: `text`→realistic words/short phrase, `int`
bounded random, `real`→random double, `decimal`→formatted number,
`bool`→random, `date`/`datetime`→**bounded recent** value (never "any
point in all of history" — per the user's date concern), `serial`/
`shortid`→omitted (autogen helpers fill them), `blob`→unsupported
(nullable→NULL; `NOT NULL`→D1 block guard).
### D9 — Named generators + the `product` generator
The generators addressable via `set ... as <generator>` (D2) and
chosen by D7 form a **curated, named vocabulary**`name`,
`first_name`, `last_name`, `email`, `username`, `phone`, `city`,
`country`, `street`, `zip`, `company`, `job`, `sentence`, `paragraph`,
`url`, `color`, `price`, `age`, `date`, `datetime`, `bool`, `product`,
… — the single source of truth shared by the executor, the completion
source, and the highlighter (mirroring `KNOWN_SQL_FUNCTIONS`,
ADR-0022 Amд6).
**`product`** is **hand-rolled** (the `fake` crate has no
commerce/product module — D3): `{adjective} {material} {noun}` from
three small baked-in word lists (~20 each) → "Sleek Bamboo Keyboard",
"Vintage Leather Backpack". Seedable through the D4 RNG. Always
addressable as `set <col> as product`, and auto-selected by D11 for
the `name`/`title` family in product-ish tables.
### D10 — Identifier family → unique by name (fork, user-chosen: "unique sequential")
A column in the identifier family — `id`, `*_id` **that is not an FK**,
`code`, `sku`, `ref`/`reference`, `number`/`no`, `barcode` — that is
**not** a serial/shortid autogen column and **not** the PK is treated
as an identifier and gets **unique** values: **int → sequential**
(`MAX(col)+1` ascending, reads like real ids, never collides);
**text → unique short code** (generate-with-retry). Precedence:
**FK detection wins** over this rule (an FK `user_id` *should* have
duplicates — many children per parent), so `*_id` only triggers
uniqueness when the column is not a foreign key.
**Constraint-driven uniqueness is independent and mandatory:** any
column with a `UNIQUE` constraint (or a user-fillable single-column
PK) gets guaranteed-unique generation regardless of name — a
correctness requirement, not a heuristic. Generation for such columns
uses retry/sequence to guarantee no collision within the batch and
against existing rows.
### D11 — Table-context disambiguation for `name`/`title` (fork, user-chosen: "table-context-aware")
For the `name`/`title` family **only**, the heuristic also reads the
**table** name token:
- `product`/`item`/`goods`/`merchandise`/`catalog`/`inventory`
`product` generator (D9)
- `company`/`companies`/`vendor`/`supplier`/`manufacturer`/`brand`
company name
- `user`/`customer`/`person`/`people`/`employee`/`member`/`contact`/
`author`/`student` → person name
- unrecognised table → generic word
This resolves the real ambiguity (`products.name` → "Sleek Bamboo
Keyboard"; `users.name` → "Alice Martinez"; `vendors.name` → "Globex
Corp"). It is a deliberately **scoped** use of table context — the only
place the table name influences generation.
### D12 — Enum-ish names → generic + post-seed advisory (fork, user-chosen: "flag enum-ish only")
Enum-ish names — `role`, `status`, `type`, `state`, `kind`,
`category`, `level`, `tier`, `stage`, `priority`, `gender` — have **no
sensible generic generator**, so they are **not guessed**: they fall
through to generic text (they must still be filled — a `NOT NULL`
status cannot be left empty). Seed then emits a **post-seed advisory**
(D13) naming them and pointing at the `set ... in (...)` override.
### D13 — Reporting: post-seed advisory (fork, user-chosen: "flag enum-ish only")
After a successful seed, in addition to the normal auto-show outcome
(row count + the affected rows, per ADR-0014), seed appends a
**`OutputStyleClass::Hint`** advisory **only** when one or more
enum-ish columns (D12) — **or columns guarded by a CHECK that seed
could not derive values from** (D17) — were filled generically.
The wording is **phase-aware** (DA finding: the advisory must not name
features that ship later). In **Phase 1** (no `set` clause yet) it
names the columns and explains they were filled generically. From
**Phase 2/3** it points at the concrete repair:
```
# Phase 1 wording:
✓ Seeded 20 rows into users
status, role were filled with generic text — they look like
fixed value sets you may want to choose deliberately.
# Phase 2/3 wording (set clause + column-fill exist):
✓ Seeded 20 rows into users
status, role filled generically. Fix existing rows with
seed users.status set status in ('active','inactive'),
or pass set … on the next seed.
```
Note the repair for **already-seeded rows** is the **column-fill**
form (`seed users.status set …`), not "re-seed" (which would add more
rows) — DA correction. This is a **result-time** note (cheap, reusing
ADR-0038's hint rendering), not a typing-time warning. The fuller
"per-column report" (every column → its generator) was considered and
**deferred** (see Alternatives / Out of scope).
### D14 — Foreign keys (SD1; fork on empty-parent, user-chosen: "friendly error")
- **Each FK** is filled by sampling **uniformly** from the **existing
rows** of the parent table's referenced column(s). Duplicates are
expected and correct (many children per parent). For a **compound
FK**, the referenced **tuple is sampled jointly** (a whole existing
parent key), never per-column independently — independent sampling
could fabricate a `(a, b)` pair that exists in no parent row and
would fail FK enforcement (DA refinement).
- **Empty parent** → seed **refuses with a friendly error** naming the
parent and the FK column ("seed `users` first — `orders.user_id`
references it"). Safe, predictable, teaches FK dependency order.
Recursive parent auto-seed is **deferred** to a future `--recursive`
opt-in (Out of scope).
- **Junction / compound-PK tables** (SD1's explicit case): sample
**distinct combinations** of the parent PK tuples to satisfy the
compound PK's uniqueness; if `count` exceeds the number of available
distinct combinations, **cap** at the maximum and note it in the
outcome.
- **Self-referential FK** (`manager_id → id`): if nullable, leave NULL
or point at an earlier row in the same batch; if `NOT NULL` on an
otherwise-empty table, friendly error. Documented edge case.
- **Nullable FKs** are **always filled** in v1 (predictable);
occasional-NULL injection is deferred.
### D15 — Undo: one snapshot per seed (DA finding; ADR-0006)
Seed is a mutation, so it must participate in undo. The draft omitted
this; the DA found the codebase already has the right primitive —
`BeginBatch` / `EndBatch` (`db.rs`), used by `replay` so a multi-write
run collapses to **one** boundary snapshot. `do_seed` wraps its
generated writes in `begin_batch` / `end_batch`, so **`seed users 20`
is a single undo step**, not 20 — matching ADR-0006 Amendment 1's
batch model. Column-fill's bulk UPDATE is likewise one step. (`import`
remains the only data-affecting op outside undo, per ADR-0015 §11;
seed is firmly inside it.)
### D16 — Replay: seed re-runs as a data write (fork, user-chosen)
`replay` re-executes a recorded `seed` line as a **data-write
command** — it is **not** in the app-lifecycle skip-set (see Command
classification, above). Consequence, accepted by the user: a **bare**
`seed users 20` regenerates **fresh, divergent** data on each replay;
a `seed users 20 --seed 42` line (the determinism lever, D4)
**reproduces** the original data. This keeps seed faithful to its
nature as a data write and puts reproducibility exactly where the
`--seed` flag already lives. (Seeded *data* is in any case durable
independently of replay, via the ADR-0015 CSV store + `rebuild`;
replay is the scripting re-run path, U4.) The DA confirmed the wiring
trap: because seed is *not* an `AppCommand`, it is correctly absent
from `is_app_lifecycle_entry_word` and replay dispatches it through
the normal data path rather than aborting.
### D17 — CHECK constraints: derive from simple `IN`, else friendly-fail (fork, user-chosen)
A CHECK on a generically-filled column would otherwise fail the whole
batch (DA finding — the block guard only covered `NOT NULL blob`).
Two-tier handling, per the user:
1. **Derive from simple `IN`-CHECKs.** When a column's CHECK is the
common enum-as-CHECK shape — `col IN ('a', 'b', …)` (the column's
own CHECK, single-column, literal list) — seed **parses out the
allowed values and uses them as the generator** (uniform choice).
The frequent `CHECK (status IN ('active','closed'))` case then
"just works" with no override needed.
2. **Best-effort + friendly fail for the rest.** For CHECKs seed
cannot interpret (ranges, expressions, multi-column), it generates
best-effort; if a generated row violates the CHECK, the insert
fails through the existing **H1 friendly-error layer** (ADR-0019)
naming the constraint and pointing at `set`. Such CHECK-guarded
columns are also **pre-flagged in the advisory** (D13) alongside
enum-ish names, so the user is warned before hitting the failure.
No new CHECK engine — tier 1 is a narrow literal-`IN` parse over the
CHECK text already stored in metadata; tier 2 is the existing failure
path.
### D18 — Auto-show is capped for large seeds (DA finding)
ADR-0014 auto-show renders "the affected rows" — fine for one insert,
a wall for a 10 000-row seed. Seed's outcome shows a **capped
preview** (proposed first **20** rows) with a `(showing 20 of N)`
note, not the full set. The row **count** is always reported in full;
only the rendered table is capped.
## Grammar, AST, and cross-cutting wiring
Per ADR-0024, `seed` is registered as a `CommandNode` so completion,
hints, help, and usage flow from one definition. The wiring, as
**explicit acceptance criteria** (a `/runda` pass must verify each —
ADR-0045 showed "claimed verified" is not verified):
- **AST + executor.** A dedicated command variant (`Seed { table,
target_column: Option<String>, count: Option<u32>, overrides:
Vec<SeedOverride>, rng_seed: Option<u64> }`) and a dedicated
`do_seed` worker executor. `do_seed` **reuses shared helpers**
(value binding `impl_value_for`, autogen autofill, FK enrichment,
the multi-row parameterised-insert pattern of `plan_autogen_autofill`,
the UPDATE path for column-fill, per-command persistence, the
`begin_batch`/`end_batch` undo primitive of D15) as library
functions — it does **not** emit `Command::Insert`/`Command::Update`
(X5).
- **Replay / undo classification (D15/D16).** `do_seed` brackets its
writes in one batch (one undo step). The `seed` entry word is
**deliberately absent** from `is_app_lifecycle_entry_word` and
completion's `empty_input_offers_app_command_entry_keywords` (the
`AppCommand` mirror) so replay re-runs it as a data write — an
explicit acceptance check, since the default for an unlisted
recognised command must be "replayed", not "abort".
- **Completion sources:** table-name (existing tables); `.column` and
`set`-clause column slots (columns of the named table); the
generator-name vocabulary (D9) after `as`; `count` number; `set` /
`=` / `in` / `as` / `between` / `and` keywords; `--seed` flag.
- **Syntax highlighting:** `seed` keyword; the generator-name
vocabulary highlighted as **`tok_function`** (reuse the existing
ADR-0022 Amд6 blue — no new theme colour).
- **Hints:** ambient per-slot "what's next" and usage hints, both
modes.
- **Help:** `help seed` topic (`help_id` + per-command block); the
general `help` list picks it up automatically via REGISTRY.
- **Parse-error pedagogy (ADR-0042):** near-miss matrix rows for `seed`
(bare / missing-table / wrong-token / malformed `set`), both modes.
- **Validity indicator (ADR-0027):** typing-time `[ERR]`/`[WRN]` for
unknown table, unknown column (in `.column` or `set`), unknown
generator name after `as`.
- **No DSL→SQL teaching echo (ADR-0038).** `seed` is a utility/app
command, not a DSL form of a SQL statement, so the echo does not
apply. (A future "show the generated INSERTs" is out of scope —
it would dump `count` statements.)
## Implementation phasing
Design is whole; the **implementation** is phased into reviewable,
test-first commits:
1. **Core whole-row seed** *(done, Phase 1)* — grammar/AST/executor;
type-based generation + the `fake`-backed name heuristics
(D7/D8/D11); identifier uniqueness (D10) + constraint uniqueness; FK
sampling (joint tuples) + empty-parent error + junction
distinct-combos (D14); `--seed` determinism (D4); default count + cap
+ zero-no-op (D6/D1); required-column block guard (D1); **undo batch
(D15)**; **replay-as-data-write classification (D16)**; **CHECK
derive / friendly-fail (D17)**; **capped auto-show (D18)**; the
enum/CHECK advisory in its **Phase-1 wording** (D12/D13); full
ambient wiring; both modes.
2. **The `set` override clause** (D2) *(done, Phase 2)* — value / list /
generator / range, type-aware, with completion + highlight +
validity for the generator-name slot.
3. **Column-fill mode** (`seed <table>.<column>`, D1 form 2) *(done,
Phase 2)* — the UPDATE path.
Each phase is independently green before the next. (Phases 2 and 3
landed together — they share the `set`-override executor machinery, so
splitting them risked a state where `set` parsed but column-fill
silently no-op'd.)
## Testing (ADR-0008 tiers 13; test-first)
- **Tier 1 (unit, deterministic via `--seed`):** generator selection
(name × type-gate matrix, including every false-positive guard of
D7); table-context disambiguation (D11); identifier uniqueness and
the FK-wins-over-`*_id` precedence (D10); bounded-date windows (D8);
the `product` generator shape; override resolution + precedence (D2);
the required-column block guard (D1); the count cap (D6). Exact-value
assertions are possible because `--seed` fixes the RNG.
- **Tier 2 (insta snapshots):** the seeded data table render and the
enum advisory (D13) at representative sizes, light + dark.
- **Tier 3 (integration, full event loop):** `seed users 20` end to
end (rows land in db + CSV + history, auto-show, persistence);
FK sampling against a populated parent (incl. a **compound FK** —
every child tuple exists in the parent); **empty-parent friendly
error**; **junction** seeding with distinct combinations and the
over-cap note; the `set` clause forms (quoted literals); **column-
fill** on existing rows (incl. refusal of PK/autogen targets, empty-
table no-op); reproducibility (`--seed 42` twice → identical data
from a fixed state); both modes. Plus the DA-driven cases:
**one-undo-step** (seed then a single `undo` removes all rows);
**replay** of a bare `seed` line (divergent) vs a `--seed` line
(reproduced); **`IN`-CHECK auto-derivation** ("just works") and a
**complex-CHECK friendly failure**; **capped auto-show** on a large
seed.
"All green, no skips" is the only acceptable end state; the Phase-1
baseline (2290 passing / 0 failing / 0 skipped / 1 ignored doctest) is
the regression floor.
## Out of scope / deferred (future SD2 work)
- **Recursive parent auto-seed** (`--recursive`) — D14 errors instead.
- **NULL injection** for nullable columns (teaching optional
relationships / `IS NULL`) — v1 always fills.
- **Multi-locale** generation — English only (X2).
- **User-defined custom generators** (true "override hooks" — register
a named generator) — the `set ... as <builtin>` surface covers the
common need; custom generators are a later SD2 increment.
- **Full per-column seed report** — D13 flags enum-ish only.
- **Column-restricted insert** (`seed t (a, b)`) — rejected (D1).
- **"Show the generated SQL"** teaching echo for seed.
## Alternatives considered
- **Hand-rolled generators only (no `fake`):** minimal dependency, but
synthetic-looking data (`text_a3f9`) — rejected on pedagogy
(pedagogy wins ties).
- **Type-only generation (no name awareness):** simpler, but misses
the biggest UX win (a `users` table that reads like real people) —
rejected.
- **Column-name-only `name` (no table context):** leaves
`products.name` → person names, requiring a manual override on every
product/company table — rejected for the `name`/`title` family
(D11).
- **No override clause (heuristics + type only):** could not answer
"the heuristic guessed wrong, fix it" or enum columns — rejected;
the `set` clause (D2) is the answer to the user's Q3.
- **Recursive auto-seed of empty parents:** powerful but magical and
can seed tables the user did not name — deferred behind a future
flag (D14).
- **Always-random (no `--seed`):** simplest, but no reproducible
datasets and weaker tests — rejected (D4).
- **Full per-column report by default:** a nice teaching artifact but
verbose on wide tables — deferred; flag-only advisory chosen (D13).
- **Reuse `Command::Insert`/`do_insert` directly** from seed: tempting
for code reuse, but collapses command identity and violates X5 —
rejected in favour of a dedicated `do_seed` that calls shared
*helpers*.
- **Skip seed on replay** (classify as app-lifecycle, D16): consistent
with A1's "app-level" label and avoids divergent data, but seed is a
data write and silently skipping it on a scripted re-run is
surprising — rejected; `--seed` is the determinism lever instead.
- **Bare-word `set` list items** (`in (admin, …)`, D2): matched the
early mockups and reads cleaner, but bare words are column
references in the reused grammar (would error) and would force a
custom list form — rejected for quoted literals (grammar reuse +
DSL consistency).
- **Pre-flight refuse any CHECK-bearing table** (D17): safest but
blocks seeding too many legitimate tables — rejected for the
derive-`IN`-else-friendly-fail tier.
- **`set`-driven NULL / per-column report / recursive parent seed:**
deferred — see Out of scope.
+1
View File
@@ -60,3 +60,4 @@ This directory contains the project's ADRs, recorded per
- [ADR-0045 — `create m:n relationship` convenience command (C4)](0045-mn-convenience.md) — **Accepted + implemented 2026-06-10** (closes `requirements.md` **C4**; all forks user-confirmed + a `/runda` DA pass that verified the `do_create_table` reuse against code and corrected the "no PK-less tables" assumption — advanced SQL `create table t (a int)` has none, so a parent-PK guard is retained). Implementation corrected a second ADR premise: "the walker already dispatches multiple nodes per entry word" held only in *advanced* mode — two simple-mode spots (dispatcher `decide`, completion continuation-merge) assumed ≤1 DSL form per entry word and were generalized **behaviour-preservingly** (dispatch reduces to the old single-candidate commit; completion merge gated on `simple_count > 1`). Junction echo wired (`render_create_m2n`, round-trips as SQL). `create m:n relationship from <T1> to <T2> [as <name>]` generates a junction table with one FK column per parent PK column, a **compound PK over all the FK columns** (the textbook junction — the pair is unique, no duplicate links), and **two 1:n relationships**, all in **one transaction = one undo step** (built by reusing `do_create_table`, which already takes `foreign_keys` + writes relationship metadata — no batch bracketing). Forks all user-chosen: junction PK = compound-over-FKs (vs surrogate serial / no PK); referential actions = **`CASCADE`** on delete+update (vs NO ACTION / RESTRICT); naming = auto `{T1}_{T2}` + optional `as` (vs auto-only); available in **both modes** (Simple-category DSL, like the sibling relationship commands). FK columns named `{parent_table}_{pk_column}` (disambiguates shared `id`; generalises to compound parents via ADR-0043), typed via `fk_target_type` (ADR-0011). A distinct `Command::CreateM2nRelationship` (not lowered to `CreateTable`) preserves command identity (X5) and lets the teaching echo speak in m:n terms. Cross-cutting wiring enumerated: separate `CREATE_M2N` `CommandNode` (own `help_id`/`usage_ids`), `("m","m:n")` completion composite, `HintMode`s, grammar-driven highlighting, `help`/`help create`, `parse_error_pedagogy` near-miss matrix, teaching echo. OOS: **self-referential m:n** (`from T to T`) refused outright (user-confirmed "full stop" — directional column-naming is more than this beginner convenience warrants); per-relationship action overrides; extra junction payload columns; m:n diagram echo; renaming the auto-generated relationships - [ADR-0045 — `create m:n relationship` convenience command (C4)](0045-mn-convenience.md) — **Accepted + implemented 2026-06-10** (closes `requirements.md` **C4**; all forks user-confirmed + a `/runda` DA pass that verified the `do_create_table` reuse against code and corrected the "no PK-less tables" assumption — advanced SQL `create table t (a int)` has none, so a parent-PK guard is retained). Implementation corrected a second ADR premise: "the walker already dispatches multiple nodes per entry word" held only in *advanced* mode — two simple-mode spots (dispatcher `decide`, completion continuation-merge) assumed ≤1 DSL form per entry word and were generalized **behaviour-preservingly** (dispatch reduces to the old single-candidate commit; completion merge gated on `simple_count > 1`). Junction echo wired (`render_create_m2n`, round-trips as SQL). `create m:n relationship from <T1> to <T2> [as <name>]` generates a junction table with one FK column per parent PK column, a **compound PK over all the FK columns** (the textbook junction — the pair is unique, no duplicate links), and **two 1:n relationships**, all in **one transaction = one undo step** (built by reusing `do_create_table`, which already takes `foreign_keys` + writes relationship metadata — no batch bracketing). Forks all user-chosen: junction PK = compound-over-FKs (vs surrogate serial / no PK); referential actions = **`CASCADE`** on delete+update (vs NO ACTION / RESTRICT); naming = auto `{T1}_{T2}` + optional `as` (vs auto-only); available in **both modes** (Simple-category DSL, like the sibling relationship commands). FK columns named `{parent_table}_{pk_column}` (disambiguates shared `id`; generalises to compound parents via ADR-0043), typed via `fk_target_type` (ADR-0011). A distinct `Command::CreateM2nRelationship` (not lowered to `CreateTable`) preserves command identity (X5) and lets the teaching echo speak in m:n terms. Cross-cutting wiring enumerated: separate `CREATE_M2N` `CommandNode` (own `help_id`/`usage_ids`), `("m","m:n")` completion composite, `HintMode`s, grammar-driven highlighting, `help`/`help create`, `parse_error_pedagogy` near-miss matrix, teaching echo. OOS: **self-referential m:n** (`from T to T`) refused outright (user-confirmed "full stop" — directional column-naming is more than this beginner convenience warrants); per-relationship action overrides; extra junction payload columns; m:n diagram echo; renaming the auto-generated relationships
- [ADR-0046 — Schema sidebar focus/navigation mode and responsive input & hint layout (UI #20/#21/#23)](0046-sidebar-navigation-and-responsive-input-hint.md) — **Accepted + implemented 2026-06-10, phased A→B→C** (8 commits `9f5f76b``22bec61`; closes Gitea **#20** hint jumpiness, **#21** left-column improvements, **#23** long input — all forks user-confirmed, including the persistent show/hide toggle which is **deferred**: the Ctrl-O peek covers #21's "keystroke to show and hide"). Two decisions landed differently from the draft (recorded inline): relationship data on **`App`** not `SchemaCache` (DB2); the nav overlay clears **only the sidebar strip + a one-column gutter**, panels staying visible behind (DC2). Treats the three UI issues as one coupled decision because they share the terminal's width/height budget. **Phase A (input & hint):** the hint panel's height becomes a function of **terminal geometry, fixed between resizes** (not of hint content), eliminating the #20 jump at its source — measured catalog shows ≥ ~54-col right-column width never needs > 2 hint lines, so 3 lines is a rare narrow-terminal-only case; height buckets `H<40` compact (input 1 row + horizontal scroll / hint 2) vs `H≥40` comfortable (input 2 rows soft-wrap / hint 2), output `Min(5)` honoured first under degradation; input gains horizontal scroll (`input_scroll_offset`, single logical `String`**not** I1 multi-line) and 2-row soft-wrap display when tall, preserving ADR-0027's 6-col indicator reserve. **Phase B (sidebar):** the 26-col Tables column is **kept but made optional and richer** (not deleted — pedagogy wins ties) — **width-derived session-only** visibility (visible iff width > 90 or a Ctrl-O peek is active — no stored field; hides at width ≤ 90 so the 90-col screencasts drop it; ADR-0015 format untouched), plus a **relationships panel** rendered narrow with endpoints broken at the arrow, ellipsized — a **separate sibling panel** that **overrides S2**'s nested-list extension model (relationships are cross-table). the full records live on a new **`App.relationships`** field (revised from the ADR's original `SchemaCache.relationship_details` at implementation — `SchemaCache` is walker-facing and needs only the names, kept in `relationships: Vec<String>`; details are UI-only, so `App` mirrors `app.tables` and avoids ~23 fixture edits), delivered by `Database::read_all_relationships` + an `AppEvent::RelationshipsRefreshed`; the two left panels split vertically with the relationships panel floored at 5 rows ("(none)" when empty) and capped at 50 % of the column (DB4). **Phase C (navigation mode):** **`Ctrl-O`** enters a focus cycle (Input → Tables → Relationships → Input; `Esc` exits) orthogonal to the ADR-0003 input mode — **`Ctrl-B` was rejected on review as the default tmux prefix** (unreachable inside tmux); the focused panel **expands to ~4050 cols as a `Clear` overlay** (right panels stay unchanging underneath) and scrolls via **Up/Down (line) + PageUp/PageDown (page)** (context-rebind, reusing the output-scroll viewport mechanism), with an accent focus border; all non-nav keys inert in nav mode (and nav keys inert while a modal is open). Forks all user-chosen: keep-optional-richer (vs remove/narrow); navigation-mode (vs modeless modifier scroll); `Ctrl-O` (Ctrl-B rejected = tmux prefix); overlay (vs layout re-split); inert-non-nav-keys; geometry-fixed hint height; `H<40/≥40` thresholds; session-only persistence; Up/Down line-scroll; **separate relationships panel overriding S2**; **no hint-area toggle** (S4's stale "keyboard-toggleable" claim struck — never implemented, unwanted). A pre-build `/runda` DA pass drove these corrections: caught the `Ctrl-B`/tmux collision, the `SchemaCache` retype that would have broken completion, the 2-row-input/indicator placement, the missing nav-mode key disposition + modal gate, and three unreferenced requirements (S1 evolved, S2 overridden, S4 corrected); also cross-checked open issue **#22** (overlay/annotation layer — separate ADR, adjacent). OOS: true multi-line input (I1); readline shortcuts (I1b); cross-session sidebar persistence; output as a third nav focus; relationship search/edit from the panel; hint-area toggle; #22's annotation layer. Accepted consequence: the 90-col visibility threshold makes a terminal's output *narrower* when widened across the boundary (sidebar appears) - [ADR-0046 — Schema sidebar focus/navigation mode and responsive input & hint layout (UI #20/#21/#23)](0046-sidebar-navigation-and-responsive-input-hint.md) — **Accepted + implemented 2026-06-10, phased A→B→C** (8 commits `9f5f76b``22bec61`; closes Gitea **#20** hint jumpiness, **#21** left-column improvements, **#23** long input — all forks user-confirmed, including the persistent show/hide toggle which is **deferred**: the Ctrl-O peek covers #21's "keystroke to show and hide"). Two decisions landed differently from the draft (recorded inline): relationship data on **`App`** not `SchemaCache` (DB2); the nav overlay clears **only the sidebar strip + a one-column gutter**, panels staying visible behind (DC2). Treats the three UI issues as one coupled decision because they share the terminal's width/height budget. **Phase A (input & hint):** the hint panel's height becomes a function of **terminal geometry, fixed between resizes** (not of hint content), eliminating the #20 jump at its source — measured catalog shows ≥ ~54-col right-column width never needs > 2 hint lines, so 3 lines is a rare narrow-terminal-only case; height buckets `H<40` compact (input 1 row + horizontal scroll / hint 2) vs `H≥40` comfortable (input 2 rows soft-wrap / hint 2), output `Min(5)` honoured first under degradation; input gains horizontal scroll (`input_scroll_offset`, single logical `String`**not** I1 multi-line) and 2-row soft-wrap display when tall, preserving ADR-0027's 6-col indicator reserve. **Phase B (sidebar):** the 26-col Tables column is **kept but made optional and richer** (not deleted — pedagogy wins ties) — **width-derived session-only** visibility (visible iff width > 90 or a Ctrl-O peek is active — no stored field; hides at width ≤ 90 so the 90-col screencasts drop it; ADR-0015 format untouched), plus a **relationships panel** rendered narrow with endpoints broken at the arrow, ellipsized — a **separate sibling panel** that **overrides S2**'s nested-list extension model (relationships are cross-table). the full records live on a new **`App.relationships`** field (revised from the ADR's original `SchemaCache.relationship_details` at implementation — `SchemaCache` is walker-facing and needs only the names, kept in `relationships: Vec<String>`; details are UI-only, so `App` mirrors `app.tables` and avoids ~23 fixture edits), delivered by `Database::read_all_relationships` + an `AppEvent::RelationshipsRefreshed`; the two left panels split vertically with the relationships panel floored at 5 rows ("(none)" when empty) and capped at 50 % of the column (DB4). **Phase C (navigation mode):** **`Ctrl-O`** enters a focus cycle (Input → Tables → Relationships → Input; `Esc` exits) orthogonal to the ADR-0003 input mode — **`Ctrl-B` was rejected on review as the default tmux prefix** (unreachable inside tmux); the focused panel **expands to ~4050 cols as a `Clear` overlay** (right panels stay unchanging underneath) and scrolls via **Up/Down (line) + PageUp/PageDown (page)** (context-rebind, reusing the output-scroll viewport mechanism), with an accent focus border; all non-nav keys inert in nav mode (and nav keys inert while a modal is open). Forks all user-chosen: keep-optional-richer (vs remove/narrow); navigation-mode (vs modeless modifier scroll); `Ctrl-O` (Ctrl-B rejected = tmux prefix); overlay (vs layout re-split); inert-non-nav-keys; geometry-fixed hint height; `H<40/≥40` thresholds; session-only persistence; Up/Down line-scroll; **separate relationships panel overriding S2**; **no hint-area toggle** (S4's stale "keyboard-toggleable" claim struck — never implemented, unwanted). A pre-build `/runda` DA pass drove these corrections: caught the `Ctrl-B`/tmux collision, the `SchemaCache` retype that would have broken completion, the 2-row-input/indicator placement, the missing nav-mode key disposition + modal gate, and three unreferenced requirements (S1 evolved, S2 overridden, S4 corrected); also cross-checked open issue **#22** (overlay/annotation layer — separate ADR, adjacent). OOS: true multi-line input (I1); readline shortcuts (I1b); cross-session sidebar persistence; output as a third nav focus; relationship search/edit from the panel; hint-area toggle; #22's annotation layer. Accepted consequence: the 90-col visibility threshold makes a terminal's output *narrower* when widened across the boundary (sidebar appears)
- [ADR-0047 — Demonstration overlay layer (keystroke badges + step captions)](0047-demonstration-overlay-layer.md) — **Accepted 2026-06-10; implemented 2026-06-11, phased A→B→C (closes Gitea #22)** (commits `f879d54``2d0f4b2`; no `requirements.md` item — tracked by issue + ADR per convention; all forks user-confirmed + a pre-build `/runda` pass that produced 10 tightening findings and a whole-implementation `/runda` pass that returned PASS, no blockers). An in-app **demonstration mode** (`--demo` flag / `RDBMS_PLAYGROUND_DEMO` env, **off by default, zero footprint when off**) that renders two transient overlays so `autocast` screencasts — and live teaching, and a future guided-lesson system — can show otherwise-invisible interactions. **Keystroke badges** (`[TAB]`, `[ENTER]`, `[UP]`, …): **automatic, app-detected** over a fixed set of glyph-less keys (the app already sees every key, so it re-records for free), label via a pure `demo_badge_label(&KeyEvent)`; the badge **auto-expires on a ~1.5 s timer** that extends the runtime's existing time-boxed-`recv` arm condition (`debounce.is_armed() || badge_pending`; expiry `Instant` in the runtime, `App.demo_badge` the render mirror — mirroring the `input` vs `input_indicator` split). **Step captions**: a **stealth, control-code-delimited input buffer** toggled by **`Ctrl+]`** (byte `0x1D` → arrives as `Char('5')+CONTROL`, verified against crossterm 0.29 `parse.rs:110-113`; chosen over `Ctrl+!`, which is **not a single ASCII byte so autocast cannot send it** — the same wall as arrow keys, R4) — typed characters accumulate **invisibly** (prompt untouched, no echo/history), `Backspace` edits, other keys inert, a second `Ctrl+]` **commits** to the caption box (empty commit dismisses); lives in pure-sync `App::update()`, **intercepted before the modal gate** so captions/badges work **over the load picker** (the `#24` projects cast). Both render as **floating flat black-on-yellow rectangles** (solid fill, **no border glyphs** — a one-cell text margin, deliberately unlike the app's bordered panels; user decision post-build, `2d0f4b2`) **at the output panel's inner bottom-right**, drawn **last over modals**, badge **stacked above** the caption, **no layout reflow**; caption **word-wraps to ≤ 3 lines** (35 rows), badge fixed 3 rows; clamp/skip guard for tiny terminals; a new **`App.last_output_area: Rect`** (set in `render_output_panel`) gives the top-level draw the anchor. Caption persists **until the next keystroke**; badge suppressed while capturing. Forks all user-chosen: `--demo` activation (vs hidden command / chord); automatic badges (vs scripted); stealth buffer (vs typed-command / preloaded-file); floating bottom-right boxes (vs HUD / banner / subtitle); `Ctrl+]` trigger; wrap-to-3-line captions; ~1.5 s badge / next-keystroke caption timing. Tested test-first across Tier 1 (label fn, capture state machine incl. over-modal + demo-off gate, nearest-deadline helper), Tier 2 (insta snapshots: badge/caption/both-stacked at 90×26 light+dark, short-terminal clamp), Tier 3 (`--demo` plumbing, badge set/suppressed, caption-without-input wiring), CLI (`--demo` parse + env fallback) — with an **honest limit** noted: the `tokio` timer wiring inside `run_loop` is exercised via the pure pieces + Tier-3 plumbing, not a standalone integration test of the timeout (same posture as the existing `IndicatorDebounce`). One intentional, user-acknowledged behaviour: `Ctrl-C` is inert while capturing (every non-`Ctrl+]` key is, by spec). Final tally **2290 passing / 0 failing / 0 skipped** (1 long-standing ignored doctest), clippy clean. OOS: scripted/manual badge push; badges for glyph keys; configurable styling/placement; the guided-lesson system itself (own ADR); cross-session/-switch persistence; localised caption content; arrow-only cast interactions (output-pane scroll); wiring the overlays into the website `casts.mjs` scripts (website-branch follow-up). Implementation phased **A** (`--demo` plumbing) → **B** (badges) → **C** (captions) + a flat-rectangle restyle - [ADR-0047 — Demonstration overlay layer (keystroke badges + step captions)](0047-demonstration-overlay-layer.md) — **Accepted 2026-06-10; implemented 2026-06-11, phased A→B→C (closes Gitea #22)** (commits `f879d54``2d0f4b2`; no `requirements.md` item — tracked by issue + ADR per convention; all forks user-confirmed + a pre-build `/runda` pass that produced 10 tightening findings and a whole-implementation `/runda` pass that returned PASS, no blockers). An in-app **demonstration mode** (`--demo` flag / `RDBMS_PLAYGROUND_DEMO` env, **off by default, zero footprint when off**) that renders two transient overlays so `autocast` screencasts — and live teaching, and a future guided-lesson system — can show otherwise-invisible interactions. **Keystroke badges** (`[TAB]`, `[ENTER]`, `[UP]`, …): **automatic, app-detected** over a fixed set of glyph-less keys (the app already sees every key, so it re-records for free), label via a pure `demo_badge_label(&KeyEvent)`; the badge **auto-expires on a ~1.5 s timer** that extends the runtime's existing time-boxed-`recv` arm condition (`debounce.is_armed() || badge_pending`; expiry `Instant` in the runtime, `App.demo_badge` the render mirror — mirroring the `input` vs `input_indicator` split). **Step captions**: a **stealth, control-code-delimited input buffer** toggled by **`Ctrl+]`** (byte `0x1D` → arrives as `Char('5')+CONTROL`, verified against crossterm 0.29 `parse.rs:110-113`; chosen over `Ctrl+!`, which is **not a single ASCII byte so autocast cannot send it** — the same wall as arrow keys, R4) — typed characters accumulate **invisibly** (prompt untouched, no echo/history), `Backspace` edits, other keys inert, a second `Ctrl+]` **commits** to the caption box (empty commit dismisses); lives in pure-sync `App::update()`, **intercepted before the modal gate** so captions/badges work **over the load picker** (the `#24` projects cast). Both render as **floating flat black-on-yellow rectangles** (solid fill, **no border glyphs** — a one-cell text margin, deliberately unlike the app's bordered panels; user decision post-build, `2d0f4b2`) **at the output panel's inner bottom-right**, drawn **last over modals**, badge **stacked above** the caption, **no layout reflow**; caption **word-wraps to ≤ 3 lines** (35 rows), badge fixed 3 rows; clamp/skip guard for tiny terminals; a new **`App.last_output_area: Rect`** (set in `render_output_panel`) gives the top-level draw the anchor. Caption persists **until the next keystroke**; badge suppressed while capturing. Forks all user-chosen: `--demo` activation (vs hidden command / chord); automatic badges (vs scripted); stealth buffer (vs typed-command / preloaded-file); floating bottom-right boxes (vs HUD / banner / subtitle); `Ctrl+]` trigger; wrap-to-3-line captions; ~1.5 s badge / next-keystroke caption timing. Tested test-first across Tier 1 (label fn, capture state machine incl. over-modal + demo-off gate, nearest-deadline helper), Tier 2 (insta snapshots: badge/caption/both-stacked at 90×26 light+dark, short-terminal clamp), Tier 3 (`--demo` plumbing, badge set/suppressed, caption-without-input wiring), CLI (`--demo` parse + env fallback) — with an **honest limit** noted: the `tokio` timer wiring inside `run_loop` is exercised via the pure pieces + Tier-3 plumbing, not a standalone integration test of the timeout (same posture as the existing `IndicatorDebounce`). One intentional, user-acknowledged behaviour: `Ctrl-C` is inert while capturing (every non-`Ctrl+]` key is, by spec). Final tally **2290 passing / 0 failing / 0 skipped** (1 long-standing ignored doctest), clippy clean. OOS: scripted/manual badge push; badges for glyph keys; configurable styling/placement; the guided-lesson system itself (own ADR); cross-session/-switch persistence; localised caption content; arrow-only cast interactions (output-pane scroll); wiring the overlays into the website `casts.mjs` scripts (website-branch follow-up). Implementation phased **A** (`--demo` plumbing) → **B** (badges) → **C** (captions) + a flat-rectangle restyle
- [ADR-0048 — `seed` fake-data generation command](0048-seed-fake-data-generation.md) — **Accepted 2026-06-11; Phase 1 + Phase 2 implemented 2026-06-11** (Phase 1 commits `202e25a``fbd219b`; design settled with the user across an extended fork dialogue, hardened by a pre-build `/runda` pass (six blockers folded in), a post-implementation `/runda` pass (eight gaps closed — FK/shortid determinism so **D4 holds with no exceptions**, plus six untested ADR decisions), and a Phase-2 pre-build `/runda` pass (which caught the no-date-literal-token reality → the D2 quoted-dates amendment), and a post-implementation `/runda` pass (which added a friendly error for a bounded override on a UNIQUE column — see D2); **2400 tests pass, clippy clean**). Closes `requirements.md` **SD1** and the core of **SD2**; closes the `seed` half of **A1**. **Phase 1 shipped:** whole-row `seed <table> [count] [--seed <n>]` with realistic name-aware generation (the `fake` crate + a type-gated heuristic catalogue, table-context name disambiguation, hand-rolled `product` generator, bounded dates), identifier + constraint uniqueness incl. junction distinct-combos, FK sampling from existing parent rows (empty-parent error), `IN`-CHECK derivation + complex-CHECK advisory, a required-column block guard, `--seed` reproducibility (serial/FK/shortid all deterministic), undo as one batch step, replay as a data write, a capped auto-show preview, the enum/CHECK advisory, and an O(N) single-transaction insert path. **Phase 2 shipped (2026-06-11):** the `set` override clause (D2 — fixed value / pick-list / `as <generator>` / `between` range, **quoted** dates per the D2 amendment, type-aware, override drops the column from the advisory) and the `<table>.<column>` column-fill form (D1 form 2 — an UPDATE over existing rows, refusing PK/autogen targets, empty-table no-op, FK/unique-respecting, one undo step), with the new `KNOWN_GENERATORS` vocabulary (D9), a range `Generator`, full completion/highlight (`HighlightClass::Function`)/validity (`IdentSource::Generators`)/help/pedagogy wiring, and the D13 advisory's Phase-2/3 wording. Further SD2 increments (custom generators, NULL injection, multi-locale, recursive auto-seed) out of scope. Closes `requirements.md` **SD1** and the core of **SD2**; closes the `seed` half of **A1** (the other being `hint`/**H2**). A dedicated `seed` command (own AST variant + `do_seed` executor, **both modes**) generating **realistic, name-aware** fake data. Two forms: **`seed <table> [count]`** (new rows, default **20**, capped) and **`seed <table>.<column>`** (fill a column on existing rows, an UPDATE). Generation adds the **`fake` crate** (v5, English) driven by a **type-gated, token-matched name-heuristic catalogue** (~30 patterns, documented false-positive guards), with **table-context** disambiguating the `name`/`title` family (`products.name`→product, `users.name`→person, `vendors.name`→company), a **hand-rolled `product` generator** (`fake` has no commerce module), **bounded dates** (`date`/`timestamp`/`dob`/`*_at` recognised, recent windows — never "all of history"), the **identifier family** (`id`/`code`/`ref`/`number`, non-FK/non-PK) → **unique sequential**, and **enum-ish names** (`role`/`status`/`type`/…) left generic + a **post-seed Hint advisory** pointing at `set … in (…)`. A **`set` override clause** — `= value` / `in (a,b,c)` / `as <generator>` / `between a and b` (numeric **and** date), reusing ADR-0026 operators — answers the heuristic-miss case. **`--seed <n>`** makes runs reproducible (and enables exact-value tests). **FK** columns sampled uniformly from existing parent rows (**empty parent → friendly error**, no recursion v1); **junction/compound-PK** tables seeded with **distinct combinations**, capped + noted (SD1). A **required-column block guard** refuses rather than NULL-violate a `NOT NULL` column it can't fill (e.g. `NOT NULL blob`). Full ambient wiring (completion incl. a new generator-name vocabulary highlighted as `tok_function`, hints, `help seed`, ADR-0042 near-miss matrix, ADR-0027 validity); **no DSL→SQL teaching echo** (seed is a utility command, not a SQL twin). Honours **X5**`do_seed` reuses insert/update *mechanics as helpers*, not by emitting `Command::Insert`. Implementation phased: (1) core whole-row seed → (2) `set` overrides → (3) column-fill. Deferred (future SD2): recursive auto-seed, NULL injection, multi-locale, user-defined custom generators, full per-column report
+14 -11
View File
@@ -8,9 +8,8 @@ to end across three phases + a restyle).
## §1. State at handoff ## §1. State at handoff
**Branch:** `main`. **HEAD `2d0f4b2`** plus an **uncommitted docs **Branch:** `main`. **HEAD `f0afec3`** — all work committed, nothing
finalization** (ADR-0047 status → implemented, README index, this pending. Unpushed (push is the user's step; normal working state).
handoff — see §6). Push is the user's step.
**Tests: 2290 passing / 0 failing / 0 skipped / 1 ignored** (the 1 **Tests: 2290 passing / 0 failing / 0 skipped / 1 ignored** (the 1
ignored is the long-standing `friendly` doctest). **Clippy clean** ignored is the long-standing `friendly` doctest). **Clippy clean**
@@ -18,6 +17,7 @@ ignored is the long-standing `friendly` doctest). **Clippy clean**
**This session's commits:** **This session's commits:**
``` ```
f0afec3 docs: session handoff 64 + ADR-0047 implemented (#22/#24)
2d0f4b2 feat(ui): flat filled rectangles for demo overlays (#22, ADR-0047 D4) 2d0f4b2 feat(ui): flat filled rectangles for demo overlays (#22, ADR-0047 D4)
241f60c feat(ui): demo-mode step-caption stealth buffer (#22, ADR-0047 D3/D4) 241f60c feat(ui): demo-mode step-caption stealth buffer (#22, ADR-0047 D3/D4)
2584e76 feat(ui): demo-mode keystroke badges (#22, ADR-0047 D2/D4/D5) 2584e76 feat(ui): demo-mode keystroke badges (#22, ADR-0047 D2/D4/D5)
@@ -26,8 +26,9 @@ e9eb1b1 docs: ADR-0047 — demonstration overlay layer for casts/teaching (#22)
638b4c9 feat(app): vi-style j/k/g/G navigation in the load picker (#24) 638b4c9 feat(app): vi-style j/k/g/G navigation in the load picker (#24)
``` ```
**Issues closed:** **#24** (vi nav) and **#22** (demo overlays) — close **Issues closed:** both **#24** (vi nav) and **#22** (demo overlays) are
#22 once the docs finalization commit lands. **closed on Gitea** with closing comments — verified via the filtered
issue list. Nothing left open from this session's scope.
## §2. #24 — vi-style load-picker navigation (commit `638b4c9`) ## §2. #24 — vi-style load-picker navigation (commit `638b4c9`)
@@ -107,13 +108,15 @@ existing `IndicatorDebounce` already takes. A future Tier-4 PTY harness
## §6. How to take over ## §6. How to take over
**Nothing is pending from this session** — both issues are closed, all
docs landed (`f0afec3`), tree is green. The next session **returns to the
open requirements backlog** (§7). Suggested start: run `/whatsnext`
(it reads this handoff), or pick from §7 below.
1. Read handoffs 62 → 63 → 64, `CLAUDE.md`, `docs/requirements.md`, 1. Read handoffs 62 → 63 → 64, `CLAUDE.md`, `docs/requirements.md`,
`docs/adr/README.md`, and **ADR-0047** (fully landed). `docs/adr/README.md`. ADR-0047 is fully landed; revisit only for
2. **Pending:** the docs finalization commit (ADR-0047 status → demo-overlay follow-ups.
implemented; README index; this handoff). Commit as 2. **For demo-overlay work:** `App` has `demo_mode`, `demo_badge`,
`docs: session handoff 64 + ADR-0047 implemented (#22/#24)` (the user
confirms commit messages). Then close **#22** on Gitea.
3. **For demo-overlay work:** `App` has `demo_mode`, `demo_badge`,
`demo_badge_seq`, `demo_caption`, `demo_caption_capturing`, `demo_badge_seq`, `demo_caption`, `demo_caption_capturing`,
`demo_caption_buffer`, `last_output_area`. Rendering: `demo_caption_buffer`, `last_output_area`. Rendering:
`render_demo_overlays` / `render_badge_box` / `render_caption_box` / `render_demo_overlays` / `render_badge_box` / `render_caption_box` /
+144
View File
@@ -0,0 +1,144 @@
# Session handoff — 2026-06-11 (65)
Sixty-fifth handover. Continues from handoff-64 (ADR-0047 demo
overlays). This session designed and shipped **ADR-0048 — the `seed`
fake-data generation command (SD1)**, Phase 1, end to end: an ADR with
an extended fork dialogue + two `/runda` passes, then a phased
test-first build.
## §1. State at handoff
**Branch:** `main`. **HEAD will be the doc-wrap-up commit** (see §6) —
all seed work committed, nothing pending. Unpushed (push is the user's
step; normal working state).
**Tests: 2358 passing / 0 failing / 0 skipped / 1 ignored** (the long
-standing `friendly` doctest). **Clippy clean** (nursery, all targets).
+68 over handoff-64's 2290.
**`cargo sweep` run** at wrap-up: `target/` 1.6 G → 183 M.
**This session's commits:**
```
202e25a feat(seed): fake-data generation library + fake dependency (P1.1)
f1e9484 feat(seed): command plumbing + walking skeleton (P1.2)
73493fa feat(seed): FK sampling, empty-parent error, block guard (P1.3a)
9c13501 feat(seed): uniqueness, junction distinct-combos, IN-CHECK (P1.3b)
0b3ab3c feat(seed): SeedResult outcome, capped preview, advisory, count cap (P1.3c)
e6ff63d perf(seed): single-transaction multi-row insert path (P1.3d)
fbd219b feat(seed): --seed flag, ambient wiring, and /runda hardening (P1.4 + DA)
```
(plus the earlier `4d0ae77` multi-tab-scope withdrawal and `0af7f56`
ADR-0048 doc, and the wrap-up doc commit.)
## §2. What `seed` does (Phase 1 — read ADR-0048)
`seed <table> [count] [--seed <n>]` — populate a table with realistic
fake data. **Available in both modes** (A1).
- **Realistic, name-aware generation:** the **`fake` crate** (v5,
English) driven by a **type-gated heuristic catalogue** (`src/seed/
heuristics.rs`) — `email`→email, `first_name`→first name, `price`
currency, etc., each only firing when the column *type* is
compatible. **Table-context** disambiguates `name`/`title`
(`products.name`→a hand-rolled **product** name, `users.name`→person,
`vendors.name`→company). **Bounded dates** (`dob`/`created_at`/
`date`/`timestamp` → recent windows, never "all of history", anchored
to a fixed reference epoch for reproducibility). Type-based fallback
otherwise.
- **Uniqueness (D10):** the user-fillable PK, compound UNIQUE
constraints, single-column UNIQUE, and identifier-named columns
(`id`/`code`/…) stay distinct across the batch and vs existing rows;
**junction tables** get **distinct FK combinations** (capped at the
available product, reported). Identifier ints get a monotonic
sequence.
- **FK (D14):** every FK column samples an existing parent row (compound
FK reads one consistent parent row); **empty parent → friendly
error**.
- **`IN`-CHECK (D17):** a simple `col IN ('a','b')` CHECK becomes the
value source (enum-as-CHECK just works); complex CHECKs are flagged in
the advisory and best-effort generated (a violation rolls the batch
back).
- **Reproducibility (D4):** `--seed <n>` → identical data on the same DB
state. **Holds with no exceptions** — serial (rowid/MAX+1), FK
(`ORDER BY`), **shortid (seeded RNG)**, all generators.
- **Output:** the seeded-row count, a **capped preview** (first 20
rows), and a **Hint-styled advisory** naming enum-ish / underivable-
CHECK columns filled generically. Count cap 10 000; `seed t 0` no-op.
- **Safety:** one **undo** step (snapshot wraps the whole seed);
**replay** re-runs it as a data write; the insert path is a single
transaction (O(N), atomic, commit-db-last preserved).
## §3. Where the code lives
- **`src/seed/`** — the pure generation library (no DB): `mod.rs`
(`ColumnSpec`, `Generator`, `SeedRng`, `make_rng`), `heuristics.rs`
(`choose_generator` + the catalogue + `is_enum_ish`), `generators.rs`
(`generate_value` + the `product` generator + bounded dates),
`check.rs` (`parse_in_check_values`). ~40 Tier-1 tests, deterministic.
- **`src/db.rs`** — `do_seed` (+ `SeedColPlan`, `sample_parent_key_
tuples`, `seed_value_list_key`, `seed_max_int`, `SeedResult`,
`DEFAULT_SEED_COUNT`/`MAX_SEED_COUNT`/`SEED_PREVIEW_CAP`), the new
**`insert_one_row`** core extracted from `do_insert` (shared, no
tx/persist — so seed runs N rows in one tx), and the `Request::Seed` /
`Database::seed` / worker wiring.
- **`src/dsl/grammar/data.rs`** — `SEED` `CommandNode`, `build_seed`,
the `--seed` flag grammar (`Seq[Flag("seed"), NumberLit]`, the first
DSL flag with a value). `Command::Seed` in `command.rs`.
- **Runtime/render** — `CommandOutcome::Seed`, `AppEvent::
DslSeedSucceeded`, `App::handle_dsl_seed_success`. Catalog keys
`ok.rows_seeded` / `seed.capped` / `seed.advisory_generic` /
`help.data.seed` / `parse.usage.seed`.
- **Tests** — `tests/it/seed.rs` (25 integration tests),
`tests/typing_surface/mod.rs` (`seed_completion_and_validity`),
`tests/it/parse_error_pedagogy.rs` (bare-`seed` near-miss row),
`src/app.rs` (two render tests), `src/dsl/shortid.rs`
(`generate_with_rng`).
## §4. Process notes (the two `/runda` passes)
- **Pre-build `/runda`** (on the ADR) found six blockers — undo
integration (D15), replay semantics (D16), `set`-value quoting (D2),
CHECK handling (D17), an advisory phase-ordering bug (D13), auto-show
flooding (D18) — all folded into ADR-0048 before any code; the three
genuine forks re-escalated and user-resolved.
- **Post-implementation `/runda`** (on the whole implementation) found
**eight gaps**, all closed: FK-sampling determinism (→ `ORDER BY`),
**shortid not reproducible** (→ seeded RNG, fixed not documented — the
user chose the fix), and six **untested ADR decisions** (D5 advanced
mode, D15 undo, D16 replay, D17 complex-CHECK advisory, atomic
rollback, zero-count) — tests added for each.
## §5. Phase 2 (deferred — designed in ADR-0048, NOT built)
These are the only seed pieces left; both have full designs in
ADR-0048:
1. **The `set` override clause (D2)** — `seed t 20 set role in
('a','b'), status = 'x', work_addr as email, price between 10 and
100`. Value / pick-from-list / explicit-generator / range, **quoted
literals** (grammar-consistent). This is the SD2 "override hooks"
core. The `ColumnSpec.check_in_values` → `PickFrom` plumbing and the
`Generator` vocabulary already exist; this adds the grammar + a `set`
clause that overrides the per-column plan.
2. **Column-fill (`seed <table>.<column>`, D1 form 2)** — fill one
column across *existing* rows (an UPDATE). Refuses PK/autogen targets;
empty-table no-op.
`requirements.md`: **SD1 `[x]`**, **SD2 `[/]`** (core done; the two
above open), **A1 14/15** (only `hint`/**H2** unregistered).
## §6. How to take over
1. Read handoffs 63 → 64 → 65, `CLAUDE.md`, `docs/requirements.md`,
`docs/adr/0048-seed-fake-data-generation.md` (the whole thing — D1
D18 + the as-built status block).
2. **Seed is feature-complete for Phase 1; nothing pending.** Next
options (user's call): seed **Phase 2** (`set` clause + column-fill);
**H2 `hint`** (closes A1) — own ADR; **TT5 CI**; or the larger
**V4 journal** / **tutorial** ADRs.
3. Two minor, user-deferred observations (non-blocking): the uniqueness
retry cap (`MAX_ATTEMPTS=200`) can cap a *medium* unique domain
slightly below its true size (junction/small domains are exact);
`literal_to_value` doesn't type-check an IN-CHECK literal vs a numeric
column (a malformed `int IN ('a')` CHECK fails cleanly at bind).
+145
View File
@@ -0,0 +1,145 @@
# Session handoff — 2026-06-11 (66)
Sixty-sixth handover. Continues from handoff-65 (ADR-0048 `seed`
Phase 1). This session built **ADR-0048 Phase 2** end to end: the
**`set` override clause** (D2) and the **`<table>.<column>`
column-fill** form (D1 form 2) — the two surfaces Phase 1 deliberately
deferred. Designed-then-DA-vetted (a `/runda` pass that caught a real
ADR-vs-grammar conflict), then built test-first.
## §1. State at handoff
**Branch:** `main`. All Phase-2 work is in the working tree;
**commits are pending the user's approval** (see §6). Unpushed is the
normal working state.
**Tests: 2400 passing / 0 failing / 0 skipped / 1 ignored** (the
long-standing `friendly` doctest). **Clippy clean** (nursery, all
targets). +42 over handoff-65's 2358.
## §2. What landed (read ADR-0048 — Status + D1/D2/D9/D13)
`seed <T>[.<col>] [count] [set <overrides>] [--seed <n>]`.
- **`set` override clause (D2):** four forms, comma-separated —
`status = 'active'` (fixed), `role in ('a','b')` (pick-list),
`work_addr as email` (named generator), `price between 10 and 100`
(range; numeric **and quoted dates**). Type-aware; an override
**drops its column from the generic-fill advisory** (D13). Value
slots reuse `update`'s typed `current_column_value` (quoting
enforced structurally — a bare word is rejected).
- **Column-fill (D1 form 2):** `seed users.email [set …]` fills one
column across **existing** rows (an UPDATE). Refuses PK / autogen
(`serial`/`shortid`/`blob`) targets; **empty table → friendly
no-op**; FK target samples the parent; UNIQUE/identifier target gets
collision-free values; **one undo step**; `set` may only adjust the
filled column; a row count is refused.
- **Named-generator vocabulary (D9):** `src/seed/vocabulary.rs`
`KNOWN_GENERATORS` + `generator_for_name` + `is_known_generator_prefix`,
the single source of truth for completion, validity, and the executor.
- **Range generator:** `Generator::Range { low, high }` in
`src/seed/generators.rs`, interpreted per destination type;
`range_bounds_reason` validates compatibility before generation.
- **Ambient wiring:** completion (generator names after `as`, the
`set <col>` and `.col` column slots, the `set` keyword); highlight
(new `HighlightClass::Function` → existing `tok_function`); validity
(new `IdentSource::Generators` — unknown generator flagged `[ERR]`;
unknown column in `set`/`.col` flagged via the existing Columns
path); help (`help.data.seed`); parse-error pedagogy near-miss rows;
the D13 advisory's **Phase-2/3 wording** (points at `set` and the
column-fill repair). Both modes (D5).
## §3. The ADR amendment (a real DA find)
The pre-build `/runda` pass found that **ADR-0048 D2's "dates stay
unquoted" was impossible** — this DSL has **no date-literal token**
(`Value` is `Number`/`Text`; dates are quoted strings validated by
`bind_date`). Escalated to the user, who chose **quoted dates +
amend the ADR** (the grammar-consistent option). D2 now carries a
dated amendment; the range form uses `between '2023-01-01' and
'2024-12-31'`. This was the only divergence from the ADR text; numbers
remain unquoted.
## §4. Where the code lives
- **`src/dsl/command.rs`** — `Command::Seed` gains `target_column:
Option<String>` + `overrides: Vec<SeedOverride>`; new `SeedOverride`
/ `SeedOverrideKind`.
- **`src/dsl/grammar/data.rs`** — `SEED_SET_CLAUSE` + `SEED_DOT_COLUMN`
grammar; `SEED_GENERATOR` slot (`IdentSource::Generators`,
`HighlightClass::Function`); `build_seed` + the override fold
(`build_seed_overrides` / `parse_seed_override_tail`).
- **`src/dsl/grammar/mod.rs`** — `IdentSource::Generators` +
`HighlightClass::Function`.
- **`src/db.rs`** — `apply_seed_overrides` / `seed_override_plan` /
`seed_override_literal`; `do_seed_column_fill`; `do_seed` +
`Database::seed` + worker wiring threaded with the new params.
- **`src/seed/`** — `vocabulary.rs` (new); `generators.rs` (range
generator + `range_bounds_reason`); `mod.rs` (`Generator::Range`).
- **`src/completion.rs`** — generator candidates after `as`; generator
validity. **`src/input_render.rs`** — `"generator"` invalid-ident
kind. **`src/theme.rs`** — `Function → tok_function`.
- **Catalog**`help.data.seed`, `parse.usage.seed`,
`seed.advisory_generic` (Phase-2/3 wording) in `en-US.yaml`;
`keys.rs` placeholders updated.
- **Tests**`tests/it/seed.rs` (+~30: builder fold, executor
set/column-fill, undo, advanced mode), `src/seed/{vocabulary,
generators}.rs` (range + vocabulary units), `src/completion.rs`
(generator + column validity), `src/dsl/walker/highlight.rs`,
`tests/typing_surface/mod.rs` (completion slots),
`tests/it/parse_error_pedagogy.rs` (near-miss rows).
## §5. Two implementation refinements vs. the ADR (both met the contract)
- **Quoted dates** (the D2 amendment, §3).
- **Value slots reuse `current_column_value`** (the `update … set`
typed slot) rather than the raw ADR-0026 expression operand — no
spurious column-ref match, typed narrowing, consistent with
`update`. The user-facing contract (quoted literals, type-aware) is
fully met.
The `seed_take_value` / `seed_set_error` builder paths are
drift-guards (the typed slots only ever match value literals, so a bare
word is rejected at the grammar level) — they use the generic
`parse.error_wrapper`, mirroring `expr::build_expr`.
## §6. How to take over / next steps
1. Read handoffs 64 → 65 → 66, `CLAUDE.md`, `docs/requirements.md`,
`docs/adr/0048-…md` (Status block + D1/D2/D9/D13 + the amendment).
2. **Seed is feature-complete (SD1 + SD2).** `requirements.md`: **SD1
`[x]`, SD2 `[x]`**. The only open A1 gap is `hint`/**H2** (own ADR).
3. **Commits pending approval.** Suggested split:
- `feat(seed): set override clause + column-fill (ADR-0048 Phase 2)`
— all `src/` + `tests/` changes.
- `docs: ADR-0048 Phase 2 implemented + handoff 66` — ADR / README /
requirements / this file.
4. Next options (user's call): **H2 `hint`** (closes A1); **TT5 CI**;
the larger **V4 journal** / **tutorial** ADRs; or Tier-4 PTY (TT4).
5. Consider a `cargo sweep` at this milestone (`target/` grows).
## §7. Post-implementation `/runda` pass (done this session)
A DA pass over the completed code found **no correctness bugs and no
dropped requirements**; all D1D18 acceptance criteria verified met,
tests confirmed to catch regressions. One **design fork** was surfaced
and **resolved by the user**:
- **Bounded override × UNIQUE column** — a fixed value / too-short
pick-list on a single-column-UNIQUE target used to silently cap the
run (e.g. `seed users 100 set email = 'x'` → 1 row). Now a **friendly
error** up front (`seed_override_capacity_guard`, `src/db.rs`), for
both whole-row and column-fill; generators/ranges stay cap-based
(unbounded sources). ADR-0048 D2 documents it; two tests pin it.
Remaining **non-blocking** edges (noted, not bugs):
- Overriding an **FK column** with a literal: the override wins (D2); a
non-parent value fails safely through the FK-error layer.
- **Column-fill of one column of a *compound* FK** samples that column
independently → an invalid tuple fails safely (UPDATE rejected,
rollback), never corrupts. Single-column FKs / non-FK columns are
exact.
- The generator slot uses the **default candidate-ladder hint** (offers
the vocabulary), not a dedicated prose intro — discoverability is met
by completion; a prose intro is optional polish.
+119
View File
@@ -0,0 +1,119 @@
# Session handoff — 2026-06-12 (67)
Sixty-seventh handover. Continues directly from handoff-66 (ADR-0048
`seed` Phase 2, committed). This was a **manual-testing pass**: the user
exercised the app, found several rough edges, and we triaged each into
*fix now* vs *file an issue*. Net result: **three bug fixes committed**
and **three enhancement issues filed**.
## §1. State at handoff
**Branch:** `main`. Working tree **clean**; all work committed. Unpushed
(push is the user's step).
**Tests: 2407 passing / 0 failing / 0 skipped / 1 ignored** (the
long-standing `friendly` doctest). **Clippy clean** (nursery, all
targets). +7 over handoff-66's 2400.
**Commits since handoff-65:**
```
f7155ce fix(input): thread the `:` one-shot escape into live SQL feedback
4cacb82 fix(completion): don't flag a table alias used before its FROM clause
c3e0103 fix(completion): flag-aware partial so a dash completes flags, not keywords
30b2677 docs: ADR-0048 Phase 2 implemented + handoff 66
a12facc feat(seed): set override clause + column-fill (ADR-0048 Phase 2)
```
(`a12facc`/`30b2677` are the Phase-2 work documented in handoff-66.)
## §2. Bug fixes this session (all committed, all tested)
1. **`c3e0103` — flag completion ate the dash.** Typing a flag at a
flag position (`add 1:n relationship … -`) offered the `on` keyword
and, on accept, produced `-on` / `---create-fk`: the partial-token
walk stopped at `-`, so the dash was outside the replaced range.
Fix: flag-aware partial detection (a dash-prefixed token at a word
boundary is a flag-in-progress, **gated on a flag being expected** so
`where x = -5` stays a number) + a unified flag matcher
(`trim_start_matches('-')`). Affected **all** flags. 4 tests + 2
partial-flag snapshots updated (they'd captured the latent bug).
2. **`4cacb82` — table alias flagged as an unknown column.** In a
SELECT, the projection (`sum(ol.count*…)`) can reference an alias
whose `FROM … OrderLines ol` sits *after* the cursor. The candidate
engine recovers that via the §10.6 full-input lookahead (ADR-0032),
but `invalid_ident_at_cursor` only walked text *before* the cursor —
so `ol` matched no scope and got a red "ERR" overlay on an otherwise
valid query. Fix: give the validity check the same full-input
lookahead and bail when the partial prefix-matches a binding's alias
or table. 1 test.
3. **`f7155ce` — the `:` one-shot escape broke live SQL feedback.**
Submission strips the `:` (ADR-0003), but the *live* feedback kept it
in the buffer handed to the walker, which bailed at the `:`. Effect:
under `:`, Tab completed nothing and a valid query could flash `[ERR]`
— while the same line in full `mode advanced` worked. (The hint
already stripped it, hence "hint shows the name but Tab does
nothing".) Fix: one shared `App::feedback_view()` (the `:`-stripped
SQL + mapped cursor + stripped offset) routed through completion (with
a `replaced_range` offset shift), the validity verdict, and rendering
(new `render_input_runs_feedback` highlights/overlays the view shifted
by the offset; the `:` renders as plain text); the ambient hint was
consolidated onto it (removing the duplicate `strip_one_shot_prefix`).
3 tests + the 9 existing colon tests still green.
## §3. Investigated, **no code change** (working as designed)
- **Comma-`FROM` implicit join** (`select … from A, B, C`) is
**deliberately rejected** — ADR-0032 §11 / OOS-3: *"comma-FROM teaches
habits we do not want to encourage; `CROSS JOIN` covers the same shape
explicitly."* The explicit equivalent (`CROSS JOIN … WHERE …`) works.
- **`sum(…)` returning one row** with no `GROUP BY` is **correct SQL**
(the aggregate collapses the result to one row; SQLite/the playground
allow the non-aggregated columns where Postgres would error). The
user's query needed `group by o.id`. Verified (1 row).
## §4. Open issues filed this session — **next session's candidates**
All on `git.lazyeval.net/oli/rdbms-playground`, label `enhancement`:
- **#26`seed <table>` hint omits the optional count.** A complete
command's optional positional *number* has no Tab candidate, so it's
invisible. `IntroProse` doesn't fit (it only fires for incomplete
required slots; the completing Seq match clears the hint). Needs a way
to advertise optional positional non-keyword args. *(I attempted +
reverted this during Phase 2; see the analysis in the issue.)*
- **#27 — Bottom status line: keybindings-only, context- and
state-aware.** Per-nav-focus keybindings (Input vs sidebar), **include
transient states** (Tab-cycle, history) — user preference — and add
`mode advanced` to the empty-input hint. May warrant a small ADR.
- **#28 — Reconsider relationship prose in `add column` (incidental DDL)
confirmations.** Currently by design (ADR-0044 §1 keeps prose, not
diagrams, for incidental DDL). **User preference: do NOT show the
`References:` / `Referenced by:` block** in the add-column
confirmation at all — focus on the change just made. This revisits a
decided area → land as a **new ADR** superseding the relevant part of
ADR-0016 §5 / ADR-0044 §1; confirm scope (just `add column`, or all
incidental DDL).
## §5. Other open work (unchanged from handoff-66 §6)
`seed` is **feature-complete** (`requirements.md` SD1 `[x]`, SD2 `[x]`).
Remaining roadmap, user's call:
- **H2 `hint`** — the last A1 gap (its own ADR).
- **TT5 CI** — test infra exists; no CI workflow yet.
- **TT4 PTY (Tier-4)** — ADR-0008 specifies it; not wired.
- Larger: **V4 journal**, **tutorial/lesson system** (each needs an ADR).
A possible quick follow-up: a friendlier "use an explicit `JOIN`"
parse-error for comma-`FROM` (point 1) — not filed; mention if wanted.
## §6. How to take over
1. Read handoffs 65 → 66 → 67, `CLAUDE.md`, `docs/requirements.md`.
2. `seed` Phase 2 is done (ADR-0048 Status block is current). The
manual-testing fixes (§2) are committed and green.
3. Pick from §4 (filed issues #26/#27/#28) or §5 (roadmap). #28 is a
decision/ADR; #27 is UX (maybe ADR); #26 is a hint-system enhancement.
4. Consider a `cargo sweep` at this milestone (`target/` grows across
sessions).
+66 -26
View File
@@ -88,12 +88,16 @@ since ADR-0027.)
because relationships are cross-table rather than per-table, they because relationships are cross-table rather than per-table, they
get their own sibling panel stacked below the tables list, not get their own sibling panel stacked below the tables list, not
nested items within it — user-confirmed 2026-06-10.)* nested items within it — user-confirmed 2026-06-10.)*
- [/] **S3** Output panel renders a visualization of the - [x] **S3** Output panel renders a visualization of the
currently selected item and supports multiple tabs. currently selected item.
*(Partial, verified 2026-06-07: single-element structure *(Satisfied: single-element structure visualisation renders
visualisation renders (`output_render.rs:82-180`); **multiple (`output_render.rs:82-180`) — select a table, see its columns /
tabs are not implemented** — the output is one line buffer, no types / keys. **Multi-tab clause withdrawn 2026-06-11** (user
tab abstraction. Same multi-tab gap as V2.)* decision): the original wording promised "and supports multiple
tabs", but the output model is settling on the single scrollable
**V4 journal** rather than switchable tabs, so the tab clause is
dropped from tracked scope. A future return to tabbed output would
be a fresh requirement, not this one. Same withdrawal as V2.)*
- [x] **S4** Hint area below the input field, showing hints about - [x] **S4** Hint area below the input field, showing hints about
the current input or last error. the current input or last error.
*(Verified 2026-06-07: `ui.rs:1088-1110` `render_hint_panel` / *(Verified 2026-06-07: `ui.rs:1088-1110` `render_hint_panel` /
@@ -242,13 +246,12 @@ since ADR-0027.)
available in both modes: `save`, `save as`, `load`, `new`, available in both modes: `save`, `save as`, `load`, `new`,
`rebuild`, `export`, `import`, `seed`, `replay`, `undo`, `rebuild`, `export`, `import`, `seed`, `replay`, `undo`,
`redo`, `mode`, `help`, `hint`, `quit`. `redo`, `mode`, `help`, `hint`, `quit`.
*(Partial, verified 2026-06-07: 13 of 15 implemented and *(Partial: **14 of 15** implemented and available in both modes —
available in both modes — `quit`/`q`, `mode simple|advanced`, `quit`/`q`, `mode simple|advanced`, `help`, `save`, `save as`,
`help`, `save`, `save as`, `load`, `new`, `rebuild`, `export`, `load`, `new`, `rebuild`, `export`, `import`, `replay`, `undo`,
`import`, `replay`, `undo`, `redo` (REGISTRY in `redo`, and now **`seed`** (ADR-0048 / SD1, done 2026-06-11).
`grammar/app.rs:249-333`). **Missing: `seed`** (tracked as SD1) **Only `hint`** (tracked as H2) remains unregistered. A1 closes
**and `hint`** (tracked as H2) — neither is registered. A1 when H2 lands.)*
closes when SD1 + H2 land.)*
## DSL data commands ## DSL data commands
@@ -469,15 +472,18 @@ since ADR-0027.)
"relationship-relevant" reach). The §3 last-resort helper line was "relationship-relevant" reach). The §3 last-resort helper line was
considered and rejected. Two `/runda` passes (design + implementation). considered and rejected. Two `/runda` passes (design + implementation).
Selection-nav and the broader journal direction remain in V4.)* Selection-nav and the broader journal direction remain in V4.)*
- [/] **V2** SQL query results render as a dynamic table view in - [x] **V2** SQL query results render as a dynamic table view in
the output pane, with multiple result tabs supported. the output pane.
*(Partial, verified 2026-06-07: the **table view** is done — *(Satisfied: the **table view** is done — `output_render.rs:38-72`
`output_render.rs:38-72` `render_data_table` renders a `render_data_table` renders a box-drawing frame with aligned
box-drawing frame with aligned columns (numeric right, text columns (numeric right, text left) and NULL/control-char
left) and NULL/control-char sanitisation, for `show data` and sanitisation, for `show data` and after every write (ADR-0014).
after every write (ADR-0014). **Missing: multiple result tabs** **Multi-tab clause withdrawn 2026-06-11** (user decision): the
— the output is a single `VecDeque<OutputLine>` with no tab original wording promised "with multiple result tabs supported";
abstraction (same gap as S3). Multi-tab sits in V4 territory.)* retained multi-result output, if ever wanted, now belongs to the
single scrollable **V4 journal** direction rather than switchable
tabs, so the tab clause is dropped from tracked scope. A future
return would be a new requirement. Same withdrawal as S3.)*
- [~] **V3** Full ER-diagram export (whole-database graph, viewed - [~] **V3** Full ER-diagram export (whole-database graph, viewed
outside the TUI) — low priority; design and ADR pending. outside the TUI) — low priority; design and ADR pending.
- [~] **V4** Output panel as a *scrollable per-session log* with - [~] **V4** Output panel as a *scrollable per-session log* with
@@ -492,7 +498,13 @@ since ADR-0027.)
*(Partial: PageUp / PageDown scrolling of the existing line *(Partial: PageUp / PageDown scrolling of the existing line
buffer is in, with new output snapping the view to the most buffer is in, with new output snapping the view to the most
recent. The full V4 scope — smart structure rendering, log recent. The full V4 scope — smart structure rendering, log
styling, Markdown export, scroll indicator — remains pending.)* styling, Markdown export, scroll indicator — remains pending.
**As of 2026-06-11 this journal model is the sole tracked
direction for evolving the output pane:** the competing multi-tab
output alternative (the trailing clauses of S3 and V2) was
withdrawn from scope by user decision, so retained / multi-result
output, if pursued, is folded into this journal rather than into
switchable tabs.)*
- [x] **V5** `show <kind> [<name>]` family of commands for - [x] **V5** `show <kind> [<name>]` family of commands for
redisplaying schema info on demand. redisplaying schema info on demand.
*(Done 2026-06-07: `show table <name>` + `show data <Table>` *(Done 2026-06-07: `show table <name>` + `show data <Table>`
@@ -652,11 +664,39 @@ since ADR-0027.)
## Sample data / seeding ## Sample data / seeding
- [ ] **SD1** `seed <table> [count]` generates plausible fake - [x] **SD1** `seed <table> [count]` generates plausible fake
data; junction tables are seeded with valid foreign-key data; junction tables are seeded with valid foreign-key
references drawn from existing parent rows. references drawn from existing parent rows.
- [~] **SD2** Detailed seeding rules (per-type generators, *(Done 2026-06-11 via **ADR-0048** (commits `202e25a``fbd219b`).
locale, determinism, override hooks) — design and ADR pending. Whole-row `seed <table> [count] [--seed <n>]` with realistic
name-aware generation (`fake` crate + a type-gated heuristic
catalogue, table-context name disambiguation, hand-rolled
`product` generator, bounded dates), identifier + constraint
uniqueness, **junction tables seeded with valid FK references
drawn from existing parent rows** (distinct combinations, capped;
empty-parent friendly error), `IN`-CHECK derivation, a
required-column block guard, undo as one step, replay as a data
write, a capped auto-show + enum/CHECK advisory, and an O(N)
single-transaction path. The `set` override clause and
`<table>.<column>` column-fill landed in SD2 Phase 2, below.)*
- [x] **SD2** Detailed seeding rules (per-type generators,
locale, determinism, override hooks).
*(Done 2026-06-11 via **ADR-0048** (Phase 1 + Phase 2). Phase 1:
type-gated name-aware per-type generators with a `fake`-backed
catalogue + table-context disambiguation, **`--seed` determinism**
(serial/FK/shortid all reproducible — D4 holds with no
exceptions), English-only locale (X2). **Phase 2 (the "override
hooks" core):** the `set` override clause — fixed value /
pick-from-list / `as <generator>` / `between` range (numeric and
**quoted** dates, type-aware; an override drops the column from
the generic-fill advisory) — and the `<table>.<column>`
column-fill form (an UPDATE over existing rows, refusing
PK/autogen targets, empty-table no-op, FK/unique-respecting, one
undo step). Adds the `KNOWN_GENERATORS` vocabulary (D9), a range
`Generator`, and full completion / highlight / validity / help /
parse-error-pedagogy wiring. Deferred SD2 increments:
user-defined custom generators, NULL injection, multi-locale,
recursive parent auto-seed.)*
## Query analysis ## Query analysis
+254 -19
View File
@@ -646,6 +646,44 @@ impl App {
} }
} }
/// The input view the **live-feedback** walkers (completion, ambient
/// hint, validity verdict, highlight overlays) should see, plus the
/// byte offset stripped from the front and the cursor mapped into the
/// view.
///
/// Under the `:` one-shot escape (ADR-0003) the buffer carries a
/// leading `:` (and an auto-inserted space) that is *not* advanced
/// SQL — submission already strips it before parsing, but the live
/// feedback did not, so the walker bailed at the `:` and resolved
/// nothing (no completion / hint, a spurious error overlay). This
/// returns the stripped SQL exactly as submission sees it, so the
/// feedback matches a real advanced-mode session. `offset` maps any
/// walker-returned byte position (completion `replaced_range`,
/// overlay spans) back to real-buffer coordinates.
///
/// For every non-one-shot input this is the identity
/// `(&input, cursor, 0)`.
#[must_use]
pub fn feedback_view(&self) -> (&str, usize, usize) {
if matches!(self.effective_mode(), EffectiveMode::AdvancedOneShot) {
// The first non-whitespace char is the `:` (per
// `effective_mode`); strip up to and including it, then any
// following whitespace — mirroring submission's
// `trimmed[1..].trim()`.
let leading_ws = self.input.len() - self.input.trim_start().len();
let mut offset = leading_ws + 1; // past the `:`
while offset < self.input.len()
&& self.input.as_bytes()[offset].is_ascii_whitespace()
{
offset += 1;
}
let view = &self.input[offset..];
let cursor = self.input_cursor.saturating_sub(offset).min(view.len());
return (view, cursor, offset);
}
(&self.input, self.input_cursor, 0)
}
/// The validity-indicator verdict for the current input /// The validity-indicator verdict for the current input
/// (ADR-0027 §3). `None` when the input would run clean. /// (ADR-0027 §3). `None` when the input would run clean.
/// ///
@@ -667,11 +705,10 @@ impl App {
EffectiveMode::AdvancedPersistent EffectiveMode::AdvancedPersistent
| EffectiveMode::AdvancedOneShot => Mode::Advanced, | EffectiveMode::AdvancedOneShot => Mode::Advanced,
}; };
crate::dsl::walker::input_verdict_in_mode( // Strip the `:` one-shot prefix so the walker verdicts the SQL
&self.input, // itself, not the escape marker (which it can't parse).
Some(&self.schema_cache), let (view, _cursor, _offset) = self.feedback_view();
mode, crate::dsl::walker::input_verdict_in_mode(view, Some(&self.schema_cache), mode)
)
} }
/// Process one event from the runtime, mutating state and /// Process one event from the runtime, mutating state and
@@ -771,6 +808,10 @@ impl App {
self.handle_dsl_insert_success(&command, &result); self.handle_dsl_insert_success(&command, &result);
Vec::new() Vec::new()
} }
AppEvent::DslSeedSucceeded { command, result } => {
self.handle_dsl_seed_success(&command, &result);
Vec::new()
}
AppEvent::DslUpdateSucceeded { AppEvent::DslUpdateSucceeded {
command, command,
result, result,
@@ -1395,13 +1436,7 @@ impl App {
} }
fn start_or_complete_at(&mut self, multi_start_idx: usize) { fn start_or_complete_at(&mut self, multi_start_idx: usize) {
let cursor = self.input_cursor.min(self.input.len()); let Some(comp) = self.completion_for_feedback() else {
let Some(comp) = crate::completion::candidates_at_cursor_in_mode(
&self.input,
cursor,
&self.schema_cache,
self.effective_mode().as_mode(),
) else {
return; return;
}; };
if comp.candidates.len() == 1 { if comp.candidates.len() == 1 {
@@ -1413,13 +1448,7 @@ impl App {
} }
fn start_or_complete_last(&mut self) { fn start_or_complete_last(&mut self) {
let cursor = self.input_cursor.min(self.input.len()); let Some(comp) = self.completion_for_feedback() else {
let Some(comp) = crate::completion::candidates_at_cursor_in_mode(
&self.input,
cursor,
&self.schema_cache,
self.effective_mode().as_mode(),
) else {
return; return;
}; };
if comp.candidates.len() == 1 { if comp.candidates.len() == 1 {
@@ -1430,6 +1459,22 @@ impl App {
} }
} }
/// Completion at the cursor, computed against the `:`-stripped
/// feedback view (ADR-0003 one-shot) with its `replaced_range`
/// mapped back to real-buffer coordinates so `commit_*` edit the
/// right span. Identity for non-one-shot input (offset 0).
fn completion_for_feedback(&self) -> Option<crate::completion::Completion> {
let (view, view_cursor, offset) = self.feedback_view();
let mut comp = crate::completion::candidates_at_cursor_in_mode(
view,
view_cursor.min(view.len()),
&self.schema_cache,
self.effective_mode().as_mode(),
)?;
comp.replaced_range = (comp.replaced_range.0 + offset, comp.replaced_range.1 + offset);
Some(comp)
}
/// Single-candidate commit: insert "<text> " (with trailing /// Single-candidate commit: insert "<text> " (with trailing
/// space) and DO NOT create a memo. The user can keep /// space) and DO NOT create a memo. The user can keep
/// typing or press Tab again to fresh-complete at the new /// typing or press Tab again to fresh-complete at the new
@@ -2072,6 +2117,39 @@ impl App {
} }
} }
/// Render a successful `seed` (ADR-0048): the ✓ echo, the seeded-row
/// count (with a cap note when the unique-value space ran out), the
/// capped preview table (D18), and a Hint-styled advisory naming
/// columns filled with generic text that look like fixed value sets
/// (D12/D13).
fn handle_dsl_seed_success(&mut self, command: &Command, result: &crate::db::SeedResult) {
self.note_ok_summary(command);
let mut summary = crate::t!(
"ok.rows_seeded",
count = result.produced,
table = result.table
);
if result.produced < result.requested {
summary.push(' ');
summary.push_str(&crate::t!("seed.capped", requested = result.requested));
}
self.note_system(summary);
for line in crate::output_render::render_data_table(&result.data) {
self.note_system(line);
}
if !result.advisory_columns.is_empty() {
// `column` (the first advised column) seeds the concrete
// repair examples (D13 Phase 2/3 wording); `columns` lists
// them all.
self.push_category_three_prose(crate::t!(
"seed.advisory_generic",
columns = result.advisory_columns.join(", "),
column = result.advisory_columns[0],
table = result.table
));
}
}
fn handle_dsl_update_success(&mut self, command: &Command, result: &UpdateResult) { fn handle_dsl_update_success(&mut self, command: &Command, result: &UpdateResult) {
self.note_ok_summary(command); self.note_ok_summary(command);
self.note_system(crate::t!("ok.rows_updated", count = result.rows_affected)); self.note_system(crate::t!("ok.rows_updated", count = result.rows_affected));
@@ -2390,6 +2468,9 @@ impl App {
// the executor), like the named DSL drop. // the executor), like the named DSL drop.
C::SqlDropIndex { .. } => (Operation::DropIndex, None, None), C::SqlDropIndex { .. } => (Operation::DropIndex, None, None),
C::Insert { table, .. } => (Operation::Insert, Some(table.as_str()), None), C::Insert { table, .. } => (Operation::Insert, Some(table.as_str()), None),
// Seed generates inserts; FK/constraint failures read as
// insert errors (ADR-0048).
C::Seed { table, .. } => (Operation::Insert, Some(table.as_str()), None),
C::Update { table, .. } => (Operation::Update, Some(table.as_str()), None), C::Update { table, .. } => (Operation::Update, Some(table.as_str()), None),
C::Delete { table, .. } => (Operation::Delete, Some(table.as_str()), None), C::Delete { table, .. } => (Operation::Delete, Some(table.as_str()), None),
C::ShowData { name, .. } | C::ShowTable { name } => { C::ShowData { name, .. } | C::ShowTable { name } => {
@@ -4936,6 +5017,86 @@ mod tests {
assert_eq!(app.effective_mode(), EffectiveMode::AdvancedPersistent); assert_eq!(app.effective_mode(), EffectiveMode::AdvancedPersistent);
} }
/// Build a two-table cache (`Orders(id, customer_id)` +
/// `Customers(id, name)`) for the `:` one-shot SQL-feedback tests.
fn install_join_schema(app: &mut App) {
use crate::completion::TableColumn;
use crate::dsl::types::Type;
app.schema_cache.tables = vec!["Orders".into(), "Customers".into()];
app.schema_cache.table_columns.insert(
"Orders".into(),
vec![TableColumn::new("id", Type::Serial), TableColumn::new("customer_id", Type::Int)],
);
app.schema_cache.table_columns.insert(
"Customers".into(),
vec![TableColumn::new("id", Type::Serial), TableColumn::new("name", Type::Text)],
);
for t in app.schema_cache.tables.clone() {
for c in &app.schema_cache.table_columns[&t] {
app.schema_cache.columns.push(c.name.clone());
}
}
}
#[test]
fn colon_one_shot_gives_sql_completion_the_stripped_view() {
// Bug (manual testing): the `:` one-shot escape (ADR-0003) left
// the leading `:` in the buffer passed to the live SQL feedback,
// so the walker bailed at `:` and Tab completed nothing — while
// the identical line in full `mode advanced` completed. Now the
// feedback view strips the `:`, so both behave the same.
let body = "select c.name from Orders o join Customers c on c.id=o.cu";
// Full advanced mode: completes `o.cu` → `o.customer_id`.
let mut adv = App::new();
adv.mode = Mode::Advanced;
install_join_schema(&mut adv);
type_str(&mut adv, body);
adv.update(key(KeyCode::Tab));
assert!(
adv.input.ends_with("o.customer_id "),
"full advanced should complete: {:?}",
adv.input
);
// `:` one-shot from simple mode: must complete the same way, and
// the `:` prefix must be preserved in the buffer.
let mut one = App::new();
one.mode = Mode::Simple;
install_join_schema(&mut one);
one.update(key(KeyCode::Char(':')));
type_str(&mut one, body);
assert_eq!(one.effective_mode(), EffectiveMode::AdvancedOneShot);
one.update(key(KeyCode::Tab));
assert!(
one.input.trim_start().starts_with(':'),
"the `:` prefix is kept: {:?}",
one.input
);
assert!(
one.input.ends_with("o.customer_id "),
"`:` one-shot must complete the SQL column too: {:?}",
one.input
);
}
#[test]
fn colon_one_shot_validity_is_clean_for_a_valid_query() {
// A *valid* `:`-prefixed query must not light the `[ERR]`
// indicator (the walker used to choke on the `:` and always
// report Error).
let mut app = App::new();
install_join_schema(&mut app);
app.update(key(KeyCode::Char(':')));
type_str(&mut app, "select name from Customers");
assert_eq!(
app.input_validity_verdict(),
None,
"a valid one-shot query should verdict clean, got {:?}",
app.input_validity_verdict(),
);
}
#[test] #[test]
fn effective_mode_flips_to_one_shot_when_colon_typed_in_simple_mode() { fn effective_mode_flips_to_one_shot_when_colon_typed_in_simple_mode() {
let mut app = App::new(); let mut app = App::new();
@@ -6223,6 +6384,80 @@ mod tests {
); );
} }
#[test]
fn seed_success_renders_count_preview_and_advisory() {
// ADR-0048: handle_dsl_seed_success renders the seeded-row count,
// the preview table, and the enum/CHECK advisory.
let mut app = App::new();
app.output
.push_back(OutputLine::echo("seed users 20", crate::mode::Mode::Simple));
app.update(AppEvent::DslSeedSucceeded {
command: Command::Seed {
table: "users".to_string(),
target_column: None,
count: Some(20),
overrides: Vec::new(),
rng_seed: None,
},
result: crate::db::SeedResult {
table: "users".to_string(),
requested: 20,
produced: 20,
data: crate::db::DataResult {
table_name: "users".to_string(),
columns: vec!["name".to_string()],
column_types: vec![None],
rows: vec![vec![Some("Alice".to_string())]],
},
advisory_columns: vec!["status".to_string()],
},
});
let texts: Vec<String> = app.output.iter().map(|l| l.text.clone()).collect();
assert!(
texts.iter().any(|t| t.contains("20 row(s) seeded into users")),
"seeded-row count surfaced: {texts:?}",
);
assert!(
texts.iter().any(|t| t.contains("status") && t.contains("generic text")),
"the advisory names the enum-ish column: {texts:?}",
);
}
#[test]
fn seed_success_reports_a_cap() {
// produced < requested → the cap note appears next to the count.
let mut app = App::new();
app.output
.push_back(OutputLine::echo("seed J 10", crate::mode::Mode::Simple));
app.update(AppEvent::DslSeedSucceeded {
command: Command::Seed {
table: "J".to_string(),
target_column: None,
count: Some(10),
overrides: Vec::new(),
rng_seed: None,
},
result: crate::db::SeedResult {
table: "J".to_string(),
requested: 10,
produced: 4,
data: crate::db::DataResult {
table_name: "J".to_string(),
columns: Vec::new(),
column_types: Vec::new(),
rows: Vec::new(),
},
advisory_columns: Vec::new(),
},
});
let texts: Vec<String> = app.output.iter().map(|l| l.text.clone()).collect();
assert!(
texts.iter().any(|t| t.contains("4 row(s) seeded into J")
&& t.contains("of 10 requested")),
"the cap note surfaces requested vs produced: {texts:?}",
);
}
#[test] #[test]
fn sql_delete_returning_renders_cascade_and_result_table() { fn sql_delete_returning_renders_cascade_and_result_table() {
// ADR-0033 3g: a DELETE … RETURNING surfaces BOTH the cascade // ADR-0033 3g: a DELETE … RETURNING surfaces BOTH the cascade
+229 -18
View File
@@ -120,7 +120,13 @@ impl SchemaCache {
IdentSource::Columns => &self.columns, IdentSource::Columns => &self.columns,
IdentSource::Relationships => &self.relationships, IdentSource::Relationships => &self.relationships,
IdentSource::Indexes => &self.indexes, IdentSource::Indexes => &self.indexes,
IdentSource::NewName | IdentSource::Types | IdentSource::Free => &[], // Curated / invented sources never come from the schema
// cache — `Generators` candidates are supplied separately
// from the `seed` vocabulary (ADR-0048 D9).
IdentSource::NewName
| IdentSource::Types
| IdentSource::Generators
| IdentSource::Free => &[],
} }
} }
@@ -327,6 +333,37 @@ pub fn candidates_at_cursor_with_in_mode(
break; break;
} }
} }
// Flag-aware extension. The plain walk above stops at `-`, so a
// flag the user is mid-typing (`-`, `--`, `--all`, `--create-fk`)
// leaves an *empty* partial sitting just after the dash(es) — which
// made the engine offer every keyword (a `-` prefix-matches nothing,
// so the empty-prefix path let `on` through) and, worse, replace an
// empty range so accepting produced `-on` / `---create-fk`. When a
// dash-prefixed token sits at a word boundary AND a flag is actually
// expected here, treat the whole dash-run-plus-body as the partial so
// it is matched and replaced wholesale. The "flag is expected" gate
// (one cheap probe on the pre-dash prefix) keeps a signed number /
// minus (`where x = -5`) from being mis-read as a flag.
{
let mut run = cursor;
while run > 0 {
let p = bytes[run - 1];
if p.is_ascii_alphanumeric() || p == b'_' || p == b'-' {
run -= 1;
} else {
break;
}
}
let word_boundary = run == 0 || bytes[run - 1].is_ascii_whitespace();
if run < cursor && bytes[run] == b'-' && word_boundary && run < start {
let pre = crate::dsl::walker::completion_probe_in_mode(&input[..run], cache, mode);
if pre.expected.iter().any(|e| matches!(e, Expectation::Flag(_))) {
start = run;
}
}
}
let partial_prefix = input[start..cursor].to_string(); let partial_prefix = input[start..cursor].to_string();
let leading = &input[..start]; let leading = &input[..start];
@@ -623,29 +660,19 @@ pub fn candidates_at_cursor_with_in_mode(
// Source 1.55: flag candidates (`--name`). Surfaced as a // Source 1.55: flag candidates (`--name`). Surfaced as a
// distinct CandidateKind so the hint panel can colour them // distinct CandidateKind so the hint panel can colour them
// with `tok_flag` (matching how they'll appear after // with `tok_flag` (matching how they'll appear after
// insertion). The standard prefix matcher walks back over // insertion). The flag-aware partial detection above captures any
// alphanumeric + underscore, which does NOT cross `-`, so // leading dash-run, so the partial is one of: empty, all-dashes
// when the user types `--all` the partial is `all` — match // (`-` / `--`), or `[-]+body`. Stripping the leading dashes and
// the flag's body against that. Otherwise match the full // matching the remainder against the flag *body* handles all of
// `--name` against the partial (which may be empty or start // them uniformly (empty / all-dashes → match every flag).
// with `--`). let flag_needle = partial_prefix.trim_start_matches('-').to_lowercase();
let flags: Vec<String> = expected let flags: Vec<String> = expected
.iter() .iter()
.filter_map(|e| match e { .filter_map(|e| match e {
Expectation::Flag(name) => Some(*name), Expectation::Flag(name) => Some(*name),
_ => None, _ => None,
}) })
.filter(|body| { .filter(|body| body.to_lowercase().starts_with(&flag_needle))
if partial_prefix.starts_with("--") {
format!("--{body}")
.to_lowercase()
.starts_with(&lowered_prefix)
} else if partial_prefix.is_empty() {
true
} else {
body.to_lowercase().starts_with(&lowered_prefix)
}
})
.map(|body| format!("--{body}")) .map(|body| format!("--{body}"))
.collect(); .collect();
@@ -709,6 +736,22 @@ pub fn candidates_at_cursor_with_in_mode(
} else { } else {
Vec::new() Vec::new()
}; };
// Source 1.9: fake-data generator names (ADR-0048 D9). At the
// `seed … set <col> as ⟨here⟩` slot (`IdentSource::Generators`) the
// curated vocabulary is offered so a learner can discover `email` /
// `product` / … by Tab. Same `Function` kind / `tok_function` colour
// as SQL functions (no new theme colour — ADR-0048 §Grammar).
let has_generator_slot = expected
.iter()
.any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. }));
if has_generator_slot {
functions.extend(
crate::seed::KNOWN_GENERATORS
.iter()
.filter(|g| matches_prefix(g))
.map(|g| (*g).to_string()),
);
}
// Source 2: schema identifiers — accumulated across every // Source 2: schema identifiers — accumulated across every
// matching schema-listable `Ident { source }` expectation. // matching schema-listable `Ident { source }` expectation.
@@ -1200,6 +1243,45 @@ pub fn invalid_ident_at_cursor_in_mode(
if has_sql_expr_slot && crate::dsl::sql_functions::is_known_function_prefix(partial) { if has_sql_expr_slot && crate::dsl::sql_functions::is_known_function_prefix(partial) {
return None; return None;
} }
// A bare ident at a SQL expression slot may be a **table alias / name**
// the user is mid-typing as a qualifier (`ol` in `sum(ol.count)`). The
// defining FROM clause can sit *after* the cursor — the projection
// references it — so the leading-only walk has an empty from-scope and
// would wrongly flag the alias as an unknown column. Recover the scope
// from the FULL input (mirrors the §10.6 edit-an-existing-query
// lookahead the candidate engine uses for column narrowing) and bail
// when the partial prefix-matches a binding's alias or table name.
if has_sql_expr_slot {
let full = crate::dsl::walker::completion_probe_in_mode(input, cache, mode);
let lowered = partial.to_lowercase();
let matches_qualifier = full.from_scope.iter().any(|b| {
b.alias
.as_deref()
.is_some_and(|a| a.to_lowercase().starts_with(&lowered))
|| b.table.to_lowercase().starts_with(&lowered)
});
if matches_qualifier {
return None;
}
}
// ADR-0048 D9: the `seed … set <col> as <gen>` slot is a curated
// vocabulary (`IdentSource::Generators`), not a schema source, so the
// schema-column check below would never see it. A partial that
// prefix-matches a known generator is an in-progress name; anything
// else is an unknown generator → flag it `[ERR]` while typing.
let has_generator_slot = expected
.iter()
.any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. }));
if has_generator_slot {
if crate::seed::is_known_generator_prefix(partial) {
return None;
}
return Some(InvalidIdent {
range: (start, cursor),
found: partial.to_string(),
source: IdentSource::Generators,
});
}
// Find every schema-listable source in the expected list. // Find every schema-listable source in the expected list.
let sources: Vec<IdentSource> = expected let sources: Vec<IdentSource> = expected
.iter() .iter()
@@ -1488,6 +1570,71 @@ mod tests {
); );
} }
#[test]
fn single_dash_offers_flags_not_keywords_and_replaces_the_dash() {
// Bug (manual testing): `add 1:n relationship … -` (one dash)
// offered the `on` keyword *and* `--create-fk`, and accepting
// produced `-on` / `---create-fk` because the lone `-` was not
// part of the replaced range. A dash at a flag position is a
// flag-in-progress: offer flags, exclude keywords, replace the
// dash on accept.
let input = "add 1:n relationship from X.a to Y.b -";
let c = candidates_at_cursor(input, input.len(), &SchemaCache::default())
.expect("a `-` at a flag position offers candidates");
let texts: Vec<&str> = c.candidates.iter().map(|x| x.text.as_str()).collect();
assert!(texts.contains(&"--create-fk"), "should offer --create-fk: {texts:?}");
assert!(!texts.contains(&"on"), "must NOT offer `on` after a dash: {texts:?}");
assert_eq!(
c.replaced_range,
(input.len() - 1, input.len()),
"the `-` must be inside the replaced range so accept yields `--create-fk`",
);
}
#[test]
fn double_dash_replaces_both_dashes_on_accept() {
let input = "delete from T --";
let c = candidates_at_cursor_in_mode(
input,
input.len(),
&SchemaCache::default(),
Mode::Simple,
)
.expect("`--` offers the flag");
assert!(c.candidates.iter().any(|x| x.text == "--all-rows"));
assert_eq!(
c.replaced_range,
(input.len() - 2, input.len()),
"both dashes are replaced so accept yields `--all-rows`, not `----all-rows`",
);
}
#[test]
fn dash_at_a_value_position_is_not_treated_as_a_flag() {
// `show data T where x = -5` — the `-` is a sign, not a flag.
// No flag is expected here, so the dash must not be swallowed
// into a flag partial: the partial stays `5` (the original
// value-operand behaviour), and no `--…` candidate appears.
let mut s = SchemaCache::default();
s.tables.push("T".into());
s.columns.push("x".into());
let input = "show data T where x = -5";
if let Some(c) =
candidates_at_cursor_in_mode(input, input.len(), &s, Mode::Simple)
{
assert!(
!c.candidates.iter().any(|x| x.text.starts_with("--")),
"no flags at a value position: {:?}",
c.candidates,
);
assert_eq!(
c.replaced_range,
(input.len() - 1, input.len()),
"only the `5` is the partial; the `-` (sign) is not captured",
);
}
}
#[test] #[test]
fn typed_dashes_offer_the_optional_cascade_flag_on_drop_column() { fn typed_dashes_offer_the_optional_cascade_flag_on_drop_column() {
// The same optional-flag class: `drop column … [--cascade]`. // The same optional-flag class: `drop column … [--cascade]`.
@@ -2606,6 +2753,70 @@ mod tests {
); );
} }
#[test]
fn invalid_ident_does_not_flag_a_table_alias_used_before_its_from_clause() {
// Manual-testing bug: in `select … sum(ol.count*…) … from … OrderLines ol …`
// the projection references alias `ol` whose FROM binding sits
// *after* the cursor. The leading-only walk had an empty from-scope
// and wrongly flagged `ol` as an unknown column (a red "ERR" overlay
// on an otherwise-valid query). The full-input lookahead must
// recover the scope (ADR-0032 §10.6) so `ol` is not flagged.
use crate::dsl::types::Type;
let mut s = SchemaCache::default();
s.tables.push("OrderLines".into());
s.columns.push("count".into());
s.table_columns
.insert("OrderLines".into(), vec![TableColumn::new("count", Type::Int)]);
let input = "select sum(ol.count) from OrderLines ol";
let cursor = input.find("ol.count").unwrap() + 2; // right after `ol`
assert!(
invalid_ident_at_cursor_in_mode(input, cursor, &s, Mode::Advanced).is_none(),
"a table alias used before its FROM clause must not be flagged as a bad column",
);
}
#[test]
fn invalid_ident_fires_for_unknown_generator_after_as() {
// ADR-0048 D9: an unknown name at the `set <col> as <gen>` slot is
// flagged `[ERR]` while typing.
let cache = two_table_schema();
let input = "seed a set name as bogus";
let inv = invalid_ident_at_cursor(input, input.len(), &cache)
.expect("unknown generator must flag");
assert_eq!(inv.found, "bogus");
assert_eq!(inv.source, IdentSource::Generators);
}
#[test]
fn invalid_ident_fires_for_unknown_column_in_seed_set_and_column_fill() {
// ADR-0048: an unknown column at the `set <col>` slot and the
// `<table>.<col>` column-fill slot is flagged like any other
// column slot (both are `IdentSource::Columns`).
let cache = two_table_schema(); // table `a`; columns id, name
let set_in = invalid_ident_at_cursor("seed a set xyz", 14, &cache)
.expect("unknown column in `set` must flag");
assert_eq!(set_in.found, "xyz");
assert_eq!(set_in.source, IdentSource::Columns);
let fill = invalid_ident_at_cursor("seed a.xyz", 10, &cache)
.expect("unknown column in column-fill must flag");
assert_eq!(fill.source, IdentSource::Columns);
}
#[test]
fn invalid_ident_does_not_fire_for_generator_prefix() {
// A prefix of a known generator is an in-progress name, not a typo.
let cache = two_table_schema();
assert!(
invalid_ident_at_cursor("seed a set name as ema", 22, &cache).is_none(),
"`ema` prefixes `email` — must not flag",
);
assert!(
invalid_ident_at_cursor("seed a set name as email", 24, &cache).is_none(),
"`email` is a known generator — must not flag",
);
}
fn two_table_schema() -> SchemaCache { fn two_table_schema() -> SchemaCache {
use crate::dsl::types::Type; use crate::dsl::types::Type;
let mut s = SchemaCache::default(); let mut s = SchemaCache::default();
+999 -23
View File
File diff suppressed because it is too large Load Diff
+53
View File
@@ -402,6 +402,25 @@ pub enum Command {
filter: Option<Expr>, filter: Option<Expr>,
limit: Option<u64>, limit: Option<u64>,
}, },
/// Populate a table with generated fake data (ADR-0048, SD1/SD2).
/// `count` defaults to 20 when omitted; `rng_seed` (from the
/// `--seed <n>` flag) makes generation reproducible.
///
/// Phase 2 surfaces (ADR-0048 D1/D2):
/// - `target_column` is `Some` for the **column-fill** form
/// `seed <table>.<column>` — fill one column across the table's
/// *existing* rows (an UPDATE), rather than generating new rows.
/// - `overrides` carries the `set <col> …` clause: per-column pins
/// that take precedence over the heuristic generator (D2).
Seed {
table: String,
/// `Some(col)` → column-fill mode (UPDATE existing rows);
/// `None` → whole-row generation (INSERT new rows).
target_column: Option<String>,
count: Option<u64>,
overrides: Vec<SeedOverride>,
rng_seed: Option<u64>,
},
/// Replay a sequence of DSL commands from a file. Each line /// Replay a sequence of DSL commands from a file. Each line
/// is parsed and dispatched through the same pipeline as /// is parsed and dispatched through the same pipeline as
/// interactive input. Blank lines and lines whose first /// interactive input. Blank lines and lines whose first
@@ -637,6 +656,38 @@ impl RowFilter {
} }
} }
/// One `set <col> …` override on a `seed` command (ADR-0048 D2, Phase 2).
///
/// The user can pin a column's generated values to a constant, a
/// pick-list, an explicit named generator, or a range — overriding the
/// per-column heuristic the executor would otherwise pick. `column` is
/// the user-typed column name (validated against the table at execution,
/// like every other column slot).
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SeedOverride {
pub column: String,
pub kind: SeedOverrideKind,
}
/// The four `set` override forms (ADR-0048 D2).
///
/// Values arrive as the DSL's `Value` (quoted text / unquoted number —
/// dates are quoted text per the D2 amendment); the `Generator` name is
/// a raw string validated at execution because `src/dsl` cannot depend
/// on `src/seed` (the curated vocabulary lives there).
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SeedOverrideKind {
/// `set status = 'pending'` — every row gets the constant.
Fixed(Value),
/// `set role in ('admin', 'editor')` — uniform pick from the list.
PickList(Vec<Value>),
/// `set work_addr as email` — force the named generator (D9).
Generator(String),
/// `set price between 10 and 100` — uniform in `[low, high]`;
/// numeric or (quoted) date bounds per the destination column type.
Range { low: Value, high: Value },
}
/// A complex WHERE expression (ADR-0026 §4). /// A complex WHERE expression (ADR-0026 §4).
/// ///
/// Built by `grammar::expr::build_expr` from the flat /// Built by `grammar::expr::build_expr` from the flat
@@ -949,6 +1000,7 @@ impl Command {
} => "show index", } => "show index",
Self::ShowList { kind, .. } => kind.command_name(), Self::ShowList { kind, .. } => kind.command_name(),
Self::Insert { .. } => "insert into", Self::Insert { .. } => "insert into",
Self::Seed { .. } => "seed",
Self::Update { .. } => "update", Self::Update { .. } => "update",
Self::Delete { .. } => "delete from", Self::Delete { .. } => "delete from",
Self::ShowData { .. } => "show data", Self::ShowData { .. } => "show data",
@@ -997,6 +1049,7 @@ impl Command {
| Self::AddConstraint { table, .. } | Self::AddConstraint { table, .. }
| Self::DropConstraint { table, .. } | Self::DropConstraint { table, .. }
| Self::Insert { table, .. } | Self::Insert { table, .. }
| Self::Seed { table, .. }
| Self::Update { table, .. } | Self::Update { table, .. }
| Self::Delete { table, .. } => table, | Self::Delete { table, .. } => table,
// For relationships we focus on the parent (1-side): // For relationships we focus on the parent (1-side):
+346 -1
View File
@@ -24,7 +24,9 @@
//! later swap that capture for the same typed slots used here, adding //! later swap that capture for the same typed slots used here, adding
//! live hints/highlighting. //! live hints/highlighting.
use crate::dsl::command::{Command, Expr, RowFilter, ShowListKind}; use crate::dsl::command::{
Command, Expr, RowFilter, SeedOverride, SeedOverrideKind, ShowListKind,
};
use crate::dsl::grammar::{ use crate::dsl::grammar::{
CommandNode, IdentSource, Node, NumberValidator, ValidationError, Word, expr, CommandNode, IdentSource, Node, NumberValidator, ValidationError, Word, expr,
shared::{ shared::{
@@ -425,6 +427,152 @@ const LIMIT_CLAUSE_NODES: &[Node] = &[
]; ];
const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES); const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES);
// =================================================================
// seed — `seed <T>[.<col>] [<count>] [set <overrides>] [--seed <n>]`
// (ADR-0048, SD1 whole-row + SD2 Phase 2 set-clause /
// column-fill)
// =================================================================
/// Optional positional row count. Reuses `LIMIT_VALIDATOR` (a
/// non-negative integer).
const SEED_COUNT: Node = Node::NumberLit {
validator: Some(LIMIT_VALIDATOR),
};
/// `--seed <n>` — a reproducible-generation flag carrying a numeric
/// seed (ADR-0048 D4). The only flag in the DSL that takes a value;
/// `build_seed` reads the number immediately after the flag.
const SEED_FLAG_NODES: &[Node] = &[
Node::Flag("seed"),
Node::NumberLit {
validator: Some(LIMIT_VALIDATOR),
},
];
const SEED_FLAG: Node = Node::Seq(SEED_FLAG_NODES);
// --- column-fill target: the optional `.<column>` (ADR-0048 D1
// form 2) ----------------------------------------------------
//
// `seed users.email …` fills one column across existing rows. The
// table ident stops at `.` (idents are alnum/underscore), so an
// `Optional(Seq['.', column])` after the table cleanly discriminates:
// when the next token is not `.`, the `Punct('.')` first-child
// NoMatches and `walk_optional` skips it; once `.` commits, a missing
// column propagates as the user mid-typing `seed users.` (driver
// `walk_optional` semantics). The column resolves against
// `current_table_columns` (populated by `TABLE_NAME_WRITES`).
const SEED_TARGET_COLUMN: Node = Node::Ident {
source: IdentSource::Columns,
role: "seed_target_column",
validator: None,
highlight_override: None,
writes_table: false,
writes_column: false,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
const SEED_DOT_COLUMN_NODES: &[Node] = &[Node::Punct('.'), SEED_TARGET_COLUMN];
const SEED_DOT_COLUMN: Node = Node::Optional(&Node::Seq(SEED_DOT_COLUMN_NODES));
// --- the `set <col> <override>[, …]` clause (ADR-0048 D2) --------
//
// Each override pins one column's generation. The column slot
// `writes_column` so the typed value slots (`PER_COLUMN_VALUE`, the
// same `current_column_value` dispatch `update … set` uses) narrow to
// the column's type — so list/range/fixed values get the column's
// typed slot (quoted text, unquoted number, quoted date) and a
// type-mismatched literal is flagged. The four tails each start with a
// distinct token (`=` / `in` / `between` / `as`), so the `Choice`
// discriminates cleanly (no Optional-first branch).
/// The `set <col>` column slot. Distinct role from `update`'s
/// `update_set_column` and the expression `expr_column`.
const SEED_SET_COLUMN: Node = Node::Ident {
source: IdentSource::Columns,
role: "seed_set_column",
validator: None,
highlight_override: None,
writes_table: false,
writes_column: true,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
/// `as <generator>` — the curated generator-name vocabulary (D9),
/// highlighted in the `tok_function` colour. The slot is structural
/// (any identifier matches); the name is validated at execution and
/// flagged live by the validity indicator.
const SEED_GENERATOR: Node = Node::Ident {
source: IdentSource::Generators,
role: "seed_generator",
validator: None,
highlight_override: Some(crate::dsl::grammar::HighlightClass::Function),
writes_table: false,
writes_column: false,
writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
/// `= <value>` — a fixed constant for every row.
const SEED_OV_FIXED_NODES: &[Node] = &[Node::Punct('='), PER_COLUMN_VALUE];
/// `in ( <value> [, <value>]* )` — uniform pick from the list.
const SEED_OV_IN_VALUES: Node = Node::Repeated {
inner: &PER_COLUMN_VALUE,
separator: Some(&Node::Punct(',')),
min: 1,
};
const SEED_OV_IN_NODES: &[Node] = &[
Node::Word(Word::keyword("in")),
Node::Punct('('),
SEED_OV_IN_VALUES,
Node::Punct(')'),
];
/// `between <value> and <value>` — uniform in the (typed) range.
const SEED_OV_BETWEEN_NODES: &[Node] = &[
Node::Word(Word::keyword("between")),
PER_COLUMN_VALUE,
Node::Word(Word::keyword("and")),
PER_COLUMN_VALUE,
];
/// `as <generator>` — force a named generator.
const SEED_OV_AS_NODES: &[Node] = &[Node::Word(Word::keyword("as")), SEED_GENERATOR];
const SEED_OV_TAIL_CHOICES: &[Node] = &[
Node::Seq(SEED_OV_FIXED_NODES),
Node::Seq(SEED_OV_IN_NODES),
Node::Seq(SEED_OV_BETWEEN_NODES),
Node::Seq(SEED_OV_AS_NODES),
];
const SEED_OV_TAIL: Node = Node::Choice(SEED_OV_TAIL_CHOICES);
const SEED_OVERRIDE_NODES: &[Node] = &[SEED_SET_COLUMN, SEED_OV_TAIL];
const SEED_OVERRIDE: Node = Node::Seq(SEED_OVERRIDE_NODES);
const SEED_OVERRIDES: Node = Node::Repeated {
inner: &SEED_OVERRIDE,
separator: Some(&Node::Punct(',')),
min: 1,
};
const SEED_SET_CLAUSE_NODES: &[Node] =
&[Node::Word(Word::keyword("set")), SEED_OVERRIDES];
const SEED_SET_CLAUSE: Node = Node::Seq(SEED_SET_CLAUSE_NODES);
const SEED_NODES: &[Node] = &[
// `writes_table` so the `.column` target, the `set <col>=…`
// clause's column slots, and the typed value slots all resolve
// against this table.
TABLE_NAME_WRITES,
SEED_DOT_COLUMN,
Node::Optional(&SEED_COUNT),
Node::Optional(&SEED_SET_CLAUSE),
Node::Optional(&SEED_FLAG),
];
const SEED_SHAPE: Node = Node::Seq(SEED_NODES);
const UPDATE_NODES: &[Node] = &[ const UPDATE_NODES: &[Node] = &[
TABLE_NAME_WRITES, TABLE_NAME_WRITES,
Node::Word(Word::keyword("set")), Node::Word(Word::keyword("set")),
@@ -708,6 +856,195 @@ fn build_show_limit(path: &MatchedPath) -> Result<Option<u64>, ValidationError>
}) })
} }
/// Build a `seed <T>[.<col>] [<count>] [set <overrides>] [--seed <n>]`
/// command (ADR-0048, SD1 + SD2 Phase 2).
///
/// - `target_column` (column-fill, D1 form 2) is the `seed_target_column`
/// ident, present only for the `seed <T>.<col>` form.
/// - The positional `count` is the `NumberLit` that precedes both the
/// `set` keyword and the `--seed` flag — bounding it that way keeps a
/// `set age between 18 and 80` value (also a `NumberLit`) from being
/// mistaken for the count.
/// - `--seed <n>` is the `NumberLit` right after the flag (D4).
/// - `overrides` (D2) is folded from the flat `set`-clause terminals.
fn build_seed(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
let table = require_ident(path, "table_name")?;
let target_column = ident_text(path, "seed_target_column").map(str::to_string);
let flag_idx = path
.items
.iter()
.position(|i| matches!(&i.kind, MatchedKind::Flag("seed")));
let set_idx = path
.items
.iter()
.position(|i| matches!(&i.kind, MatchedKind::Word("set")));
let rng_seed = flag_idx
.and_then(|fi| path.items.get(fi + 1))
.filter(|i| matches!(i.kind, MatchedKind::NumberLit))
.map(|i| parse_seed_u64(&i.text))
.transpose()?;
// The count is bounded to before the `set` clause and the flag, so a
// numeric value inside `set` (e.g. `between 18 and 80`) is never read
// as the count.
let count_boundary = [set_idx, flag_idx]
.into_iter()
.flatten()
.min()
.unwrap_or(path.items.len());
let count = path
.items
.iter()
.enumerate()
.find(|(idx, i)| matches!(i.kind, MatchedKind::NumberLit) && *idx < count_boundary)
.map(|(_, i)| parse_seed_u64(&i.text))
.transpose()?;
let overrides = build_seed_overrides(path, set_idx, flag_idx)?;
Ok(Command::Seed {
table,
target_column,
count,
overrides,
rng_seed,
})
}
/// Fold the flat `set`-clause terminals into [`SeedOverride`]s
/// (ADR-0048 D2). The clause region runs from just after `Word("set")`
/// to the `--seed` flag (or the path end). Each override begins at a
/// `seed_set_column` ident; the token right after it selects the form
/// (`=` / `in` / `between` / `as`). Top-level comma separators between
/// overrides are skipped (the `in (...)` form consumes its own inner
/// commas up to `)`).
fn build_seed_overrides(
path: &MatchedPath,
set_idx: Option<usize>,
flag_idx: Option<usize>,
) -> Result<Vec<SeedOverride>, ValidationError> {
let Some(set_idx) = set_idx else {
return Ok(Vec::new());
};
let end = flag_idx.unwrap_or(path.items.len());
let region = &path.items[set_idx + 1..end];
let mut overrides = Vec::new();
let mut i = 0;
while i < region.len() {
// The next override starts at its column ident; skip the
// top-level comma separators (and any stray token) between them.
let MatchedKind::Ident {
role: "seed_set_column",
..
} = &region[i].kind
else {
i += 1;
continue;
};
let column = region[i].text.clone();
i += 1;
let kind = parse_seed_override_tail(region, &mut i, &column)?;
overrides.push(SeedOverride { column, kind });
}
Ok(overrides)
}
/// Parse one override tail starting at `region[*i]` (just past the
/// column ident), advancing `*i` past the consumed tokens.
fn parse_seed_override_tail(
region: &[MatchedItem],
i: &mut usize,
column: &str,
) -> Result<SeedOverrideKind, ValidationError> {
let head = region.get(*i).ok_or_else(|| seed_set_error(column))?;
match &head.kind {
MatchedKind::Punct('=') => {
*i += 1;
let value = seed_take_value(region, i, column)?;
Ok(SeedOverrideKind::Fixed(value))
}
MatchedKind::Word("in") => {
*i += 1; // `in`
// `(`
if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Punct('('))) {
*i += 1;
}
let mut values = Vec::new();
while let Some(item) = region.get(*i) {
match &item.kind {
MatchedKind::Punct(')') => {
*i += 1;
break;
}
MatchedKind::Punct(',') => {
*i += 1;
}
_ => values.push(seed_take_value(region, i, column)?),
}
}
Ok(SeedOverrideKind::PickList(values))
}
MatchedKind::Word("between") => {
*i += 1; // `between`
let low = seed_take_value(region, i, column)?;
if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Word("and"))) {
*i += 1;
}
let high = seed_take_value(region, i, column)?;
Ok(SeedOverrideKind::Range { low, high })
}
MatchedKind::Word("as") => {
*i += 1; // `as`
let gen_item = region
.get(*i)
.filter(|t| matches!(t.kind, MatchedKind::Ident { role: "seed_generator", .. }))
.ok_or_else(|| seed_set_error(column))?;
*i += 1;
Ok(SeedOverrideKind::Generator(gen_item.text.clone()))
}
_ => Err(seed_set_error(column)),
}
}
/// Take one value literal at `region[*i]`, advancing past it.
///
/// The grammar's typed value slots only ever match value literals (a
/// bare unquoted word fails to match the slot and is rejected *before*
/// this fold runs — D2's quoting requirement enforced structurally), so
/// a non-literal here can only mean a grammar/builder drift bug; the
/// `Err` is a drift guard (mirrors `expr::build_expr`).
fn seed_take_value(
region: &[MatchedItem],
i: &mut usize,
column: &str,
) -> Result<Value, ValidationError> {
let item = region.get(*i).ok_or_else(|| seed_set_error(column))?;
let value = item_to_value(item).ok_or_else(|| seed_set_error(column))?;
*i += 1;
Ok(value)
}
/// Drift-guard error for the `set`-clause fold (see `seed_take_value`).
fn seed_set_error(column: &str) -> ValidationError {
ValidationError {
message_key: "parse.error_wrapper",
args: vec![("detail", format!("malformed `set` clause for `{column}`"))],
}
}
fn parse_seed_u64(text: &str) -> Result<u64, ValidationError> {
text.parse::<u64>().map_err(|_| ValidationError {
message_key: "parse.custom.bind_type_mismatch",
args: vec![
("found", text.to_string()),
("expected", "non-negative integer".to_string()),
],
})
}
fn build_insert(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> { fn build_insert(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
let table = require_ident(path, "table_name")?; let table = require_ident(path, "table_name")?;
@@ -1452,6 +1789,14 @@ pub static SHOW: CommandNode = CommandNode {
"parse.usage.show_index", "parse.usage.show_index",
],}; ],};
pub static SEED: CommandNode = CommandNode {
entry: Word::keyword("seed"),
shape: SEED_SHAPE,
ast_builder: build_seed,
help_id: Some("data.seed"),
usage_ids: &["parse.usage.seed"],
};
pub static INSERT: CommandNode = CommandNode { pub static INSERT: CommandNode = CommandNode {
entry: Word::keyword("insert"), entry: Word::keyword("insert"),
shape: INSERT_SHAPE, shape: INSERT_SHAPE,
+17
View File
@@ -57,6 +57,12 @@ pub enum HighlightClass {
String, String,
Punct, Punct,
Flag, Flag,
/// A curated function-vocabulary name — the `seed … set <col> as
/// <generator>` generator names (ADR-0048 D2/§Grammar). Rendered in
/// the existing `tok_function` colour (ADR-0022 Amд6 blue — no new
/// theme colour), assigned via a generator slot's
/// `highlight_override`, not by byte shape.
Function,
Error, Error,
} }
@@ -86,6 +92,14 @@ pub enum IdentSource {
/// content validator on column-type slots; not user-listable /// content validator on column-type slots; not user-listable
/// from the schema. /// from the schema.
Types, Types,
/// Closed, curated set of fake-data generator names (ADR-0048
/// D9) — the `seed … set <col> as <generator>` slot. Like
/// `Types`, not user-listable from the schema; the vocabulary
/// lives in `src/seed` and the completion engine offers it. The
/// grammar slot is purely structural (matches any identifier);
/// an unknown name is flagged live (validity) and rejected at
/// execution.
Generators,
/// Any identifier shape; used by synthetic catch-all branches /// Any identifier shape; used by synthetic catch-all branches
/// (e.g., the unknown-value branch of `mode <value>`). /// (e.g., the unknown-value branch of `mode <value>`).
Free, Free,
@@ -117,6 +131,7 @@ impl IdentSource {
Self::Relationships => "relationship name", Self::Relationships => "relationship name",
Self::Indexes => "index name", Self::Indexes => "index name",
Self::Types => "type", Self::Types => "type",
Self::Generators => "generator name",
} }
} }
@@ -134,6 +149,7 @@ impl IdentSource {
"relationship name" => Some(Self::Relationships), "relationship name" => Some(Self::Relationships),
"index name" => Some(Self::Indexes), "index name" => Some(Self::Indexes),
"type" => Some(Self::Types), "type" => Some(Self::Types),
"generator name" => Some(Self::Generators),
_ => None, _ => None,
} }
} }
@@ -714,6 +730,7 @@ pub static REGISTRY: &[(&CommandNode, CommandCategory)] = &[
(&ddl::CREATE, CommandCategory::Simple), (&ddl::CREATE, CommandCategory::Simple),
(&ddl::CREATE_M2N, CommandCategory::Simple), (&ddl::CREATE_M2N, CommandCategory::Simple),
(&data::SHOW, CommandCategory::Simple), (&data::SHOW, CommandCategory::Simple),
(&data::SEED, CommandCategory::Simple),
(&data::INSERT, CommandCategory::Simple), (&data::INSERT, CommandCategory::Simple),
(&data::UPDATE, CommandCategory::Simple), (&data::UPDATE, CommandCategory::Simple),
(&data::DELETE, CommandCategory::Simple), (&data::DELETE, CommandCategory::Simple),
+1
View File
@@ -300,6 +300,7 @@ fn format_expectation(e: &crate::dsl::walker::outcome::Expectation) -> String {
IdentSource::Relationships => "relationship name".to_string(), IdentSource::Relationships => "relationship name".to_string(),
IdentSource::Indexes => "index name".to_string(), IdentSource::Indexes => "index name".to_string(),
IdentSource::Types => "type".to_string(), IdentSource::Types => "type".to_string(),
IdentSource::Generators => "generator name".to_string(),
IdentSource::NewName | IdentSource::Free => "identifier".to_string(), IdentSource::NewName | IdentSource::Free => "identifier".to_string(),
}, },
Expectation::Punct(c) => format!("`{c}`"), Expectation::Punct(c) => format!("`{c}`"),
+10 -6
View File
@@ -18,17 +18,21 @@ const DEFAULT_LEN: usize = 10;
pub const MIN_LEN: usize = 10; pub const MIN_LEN: usize = 10;
pub const MAX_LEN: usize = 12; pub const MAX_LEN: usize = 12;
/// Generate a fresh shortid using thread-local RNG. /// Generate a fresh shortid using the thread-local RNG.
#[must_use] #[must_use]
pub fn generate() -> String { pub fn generate() -> String {
generate_len(DEFAULT_LEN) generate_with_rng(&mut rand::rng())
} }
/// Generate a shortid from a caller-supplied RNG.
///
/// Lets `seed --seed <n>` produce **reproducible** shortid values
/// (ADR-0048 D4) by threading its seeded RNG through, while the default
/// [`generate`] keeps its thread-RNG behaviour for ordinary inserts.
#[must_use] #[must_use]
fn generate_len(len: usize) -> String { pub fn generate_with_rng<R: RngExt + ?Sized>(rng: &mut R) -> String {
let mut rng = rand::rng(); let mut out = String::with_capacity(DEFAULT_LEN);
let mut out = String::with_capacity(len); for _ in 0..DEFAULT_LEN {
for _ in 0..len {
let idx = rng.random_range(0..ALPHABET.len()); let idx = rng.random_range(0..ALPHABET.len());
out.push(ALPHABET[idx] as char); out.push(ALPHABET[idx] as char);
} }
+12
View File
@@ -240,6 +240,18 @@ mod tests {
); );
} }
#[test]
fn seed_generator_name_highlighted_as_function() {
// ADR-0048 D9: the `set <col> as <gen>` generator name carries the
// `Function` highlight class (via the slot's `highlight_override`),
// rendered in the shared `tok_function` colour.
let runs = run("seed Members set role as email");
assert!(
runs.iter().any(|(_, _, c)| *c == HighlightClass::Function),
"generator name `email` should be Function-highlighted: {runs:?}"
);
}
#[test] #[test]
fn unknown_command_word_classified_by_byte_shape() { fn unknown_command_word_classified_by_byte_shape() {
// Walker doesn't engage; fallback classifies as Identifier. // Walker doesn't engage; fallback classifies as Identifier.
+4
View File
@@ -1236,6 +1236,10 @@ fn schema_existence_diagnostics(
IdentSource::Relationships IdentSource::Relationships
| IdentSource::Indexes | IdentSource::Indexes
| IdentSource::Types | IdentSource::Types
// `Generators` (the `set … as <gen>` slot, ADR-0048 D9) is a
// curated vocabulary; its unknown-name validity is handled by
// the completion-layer indicator, not this walker diagnostic.
| IdentSource::Generators
| IdentSource::Free => {} | IdentSource::Free => {}
} }
} }
+4
View File
@@ -87,6 +87,10 @@ pub enum AppEvent {
command: Command, command: Command,
result: InsertResult, result: InsertResult,
}, },
DslSeedSucceeded {
command: Command,
result: crate::db::SeedResult,
},
DslUpdateSucceeded { DslUpdateSucceeded {
command: Command, command: Command,
result: UpdateResult, result: UpdateResult,
+5
View File
@@ -207,6 +207,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[
("help.ddl.rename", &[]), ("help.ddl.rename", &[]),
("help.ddl.change", &[]), ("help.ddl.change", &[]),
("help.data.show", &[]), ("help.data.show", &[]),
("help.data.seed", &[]),
("help.data.insert", &[]), ("help.data.insert", &[]),
("help.data.update", &[]), ("help.data.update", &[]),
("help.data.delete", &[]), ("help.data.delete", &[]),
@@ -308,6 +309,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[
("parse.usage.undo", &[]), ("parse.usage.undo", &[]),
("parse.usage.save", &[]), ("parse.usage.save", &[]),
("parse.usage.select", &[]), ("parse.usage.select", &[]),
("parse.usage.seed", &[]),
("parse.usage.show_data", &[]), ("parse.usage.show_data", &[]),
("parse.usage.show_table", &[]), ("parse.usage.show_table", &[]),
("parse.usage.show_tables", &[]), ("parse.usage.show_tables", &[]),
@@ -548,7 +550,10 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[
("ok.index_dropped_with_column", &["index"]), ("ok.index_dropped_with_column", &["index"]),
("ok.rows_deleted", &["count"]), ("ok.rows_deleted", &["count"]),
("ok.rows_inserted", &["count"]), ("ok.rows_inserted", &["count"]),
("ok.rows_seeded", &["count", "table"]),
("ok.rows_updated", &["count"]), ("ok.rows_updated", &["count"]),
("seed.capped", &["requested"]),
("seed.advisory_generic", &["columns", "column", "table"]),
// ---- Client-side success notes (ADR-0017 §6, ADR-0018 §9) ---- // ---- Client-side success notes (ADR-0017 §6, ADR-0018 §9) ----
("client_side.auto_fill_add_serial", &["count"]), ("client_side.auto_fill_add_serial", &["count"]),
("client_side.auto_fill_add_shortid", &["count"]), ("client_side.auto_fill_add_shortid", &["count"]),
+24
View File
@@ -333,6 +333,17 @@ help:
show indexes — list all indexes show indexes — list all indexes
show relationship <name> — show one relationship's detail show relationship <name> — show one relationship's detail
show index <name> — show one index's detail show index <name> — show one index's detail
seed: |-
seed <T> [<count>] — fill a table with generated sample rows
(default 20). Existing rows are kept;
foreign keys draw from existing parent rows.
seed <T> ... set <c> = 'v' | in ('a','b') | as <gen> | between x and y
— pin how a column is generated: a fixed
value, a pick-list, a named generator
(email, name, product, ...), or a range.
seed <T>.<col> [set ...] — fill one column across the EXISTING rows
(the follow-up to `add column`).
seed <T> ... --seed <n> — reproducible: same data for the same n.
insert: |- insert: |-
insert into <T> [(cols)] [values] (vals) — add a row insert into <T> [(cols)] [values] (vals) — add a row
update: |- update: |-
@@ -569,6 +580,7 @@ parse:
change_column: |- change_column: |-
change column [in] [table] <Table>: <Name> (<Type>) change column [in] [table] <Table>: <Name> (<Type>)
[--force-conversion | --dont-convert] [--force-conversion | --dont-convert]
seed: "seed <Table> [count] [set <col> = ... | in (...) | as <gen> | between x and y] | seed <Table>.<col>"
show_data: "show data <Table>" show_data: "show data <Table>"
show_table: "show table <Table>" show_table: "show table <Table>"
show_tables: "show tables" show_tables: "show tables"
@@ -978,6 +990,17 @@ db:
# template couldn't provide. Re-introduce a key here if a non-English # template couldn't provide. Re-introduce a key here if a non-English
# locale lands.) # locale lands.)
# Seed-command notes (ADR-0048): the cap note when the unique-value
# space is exhausted, and the advisory that flags columns filled with
# generic text that look like fixed value sets.
seed:
capped: "(of {requested} requested — ran out of distinct value combinations)"
# ADR-0048 D13 (Phase 2/3 wording): name the generically-filled
# enum-ish / CHECK columns and point at the concrete repairs — the
# `set` clause on a fresh seed, or the column-fill form for the rows
# just created.
advisory_generic: "{columns} filled with generic text — they look like fixed value sets. Pin them next time with `set {column} in ('…', '…')`, or fix these rows with `seed {table}.{column} set {column} in ('…', '…')`."
ok: ok:
# ADR-0040: the generic `[ok] <verb> <subject>` summary line was # ADR-0040: the generic `[ok] <verb> <subject>` summary line was
# retired — a successful command's echo line now carries a ✓ # retired — a successful command's echo line now carries a ✓
@@ -985,6 +1008,7 @@ ok:
# per-operation row-count footers below still convey real payload # per-operation row-count footers below still convey real payload
# and are unchanged. # and are unchanged.
rows_inserted: " {count} row(s) inserted" rows_inserted: " {count} row(s) inserted"
rows_seeded: " {count} row(s) seeded into {table}"
rows_updated: " {count} row(s) updated" rows_updated: " {count} row(s) updated"
rows_deleted: " {count} row(s) deleted" rows_deleted: " {count} row(s) deleted"
# Shown beneath a `drop column --cascade` summary, once per # Shown beneath a `drop column --cascade` summary, once per
+98 -7
View File
@@ -84,16 +84,60 @@ pub fn render_input_runs_in_mode(
cache: &crate::completion::SchemaCache, cache: &crate::completion::SchemaCache,
mode: Mode, mode: Mode,
) -> Vec<StyledRun> { ) -> Vec<StyledRun> {
let mut runs = lex_to_runs_in_mode(input, theme, mode); // Identity feedback view — highlight/overlay the whole input.
render_input_runs_feedback(input, cursor_byte, theme, cache, mode, input, cursor_byte, 0)
}
/// [`render_input_runs_in_mode`] with a separate **feedback view** for
/// the walker-driven highlighting and overlays.
///
/// Under the `:` one-shot escape (ADR-0003) the buffer carries a leading
/// `:` that is not advanced SQL; `view` is the stripped SQL (and
/// `view_cursor` the cursor within it) so the walker highlights and
/// diagnoses the SQL itself, while the `:` prefix renders as plain text.
/// `offset` is the byte length stripped from the front — base runs and
/// overlay positions are shifted by it back into `input` coordinates.
/// Callers without a one-shot escape pass `(input, cursor, 0)` (what
/// [`render_input_runs_in_mode`] does).
#[must_use]
#[allow(clippy::too_many_arguments)]
pub fn render_input_runs_feedback(
input: &str,
cursor_byte: usize,
theme: &Theme,
cache: &crate::completion::SchemaCache,
mode: Mode,
view: &str,
view_cursor: usize,
offset: usize,
) -> Vec<StyledRun> {
// Base highlighting runs over the SQL view, shifted into buffer
// coordinates; the stripped prefix (the `:` + space) renders as
// plain foreground text.
let mut runs: Vec<StyledRun> = if offset == 0 {
lex_to_runs_in_mode(input, theme, mode)
} else {
let mut r = vec![StyledRun {
byte_range: (0, offset),
style: ratatui::style::Style::default().fg(theme.fg),
}];
r.extend(lex_to_runs_in_mode(view, theme, mode).into_iter().map(|run| {
StyledRun {
byte_range: (run.byte_range.0 + offset, run.byte_range.1 + offset),
..run
}
}));
r
};
if let InputState::DefiniteErrorAt(pos) = if let InputState::DefiniteErrorAt(pos) =
classify_parse_result(parse_command_with_schema_in_mode(input, cache, mode)) classify_parse_result(parse_command_with_schema_in_mode(view, cache, mode))
{ {
overlay_error(&mut runs, pos, theme); overlay_error(&mut runs, pos + offset, theme);
} }
if let Some(inv) = if let Some(inv) =
crate::completion::invalid_ident_at_cursor_in_mode(input, cursor_byte, cache, mode) crate::completion::invalid_ident_at_cursor_in_mode(view, view_cursor, cache, mode)
{ {
overlay_error(&mut runs, inv.range.0, theme); overlay_error(&mut runs, inv.range.0 + offset, theme);
} }
// Schema-aware diagnostics (ADR-0027 §2): unknown table / // Schema-aware diagnostics (ADR-0027 §2): unknown table /
// column (ERROR), or a dubious comparison (WARNING), is // column (ERROR), or a dubious comparison (WARNING), is
@@ -101,12 +145,12 @@ pub fn render_input_runs_in_mode(
// so a problem the user has typed past stays visible. The // so a problem the user has typed past stays visible. The
// mode-aware walk picks up the SQL-specific diagnostics from // mode-aware walk picks up the SQL-specific diagnostics from
// ADR-0032 in advanced mode. // ADR-0032 in advanced mode.
for diag in walker::input_diagnostics_in_mode(input, Some(cache), mode) { for diag in walker::input_diagnostics_in_mode(view, Some(cache), mode) {
let colour = match diag.severity { let colour = match diag.severity {
walker::Severity::Error => theme.tok_error, walker::Severity::Error => theme.tok_error,
walker::Severity::Warning => theme.warning, walker::Severity::Warning => theme.warning,
}; };
overlay_span(&mut runs, diag.span, colour); overlay_span(&mut runs, (diag.span.0 + offset, diag.span.1 + offset), colour);
} }
inject_cursor(&mut runs, input, cursor_byte, theme); inject_cursor(&mut runs, input, cursor_byte, theme);
runs runs
@@ -817,6 +861,9 @@ fn ambient_hint_core_in_mode(
crate::dsl::grammar::IdentSource::Tables => "table", crate::dsl::grammar::IdentSource::Tables => "table",
crate::dsl::grammar::IdentSource::Columns => "column", crate::dsl::grammar::IdentSource::Columns => "column",
crate::dsl::grammar::IdentSource::Relationships => "relationship", crate::dsl::grammar::IdentSource::Relationships => "relationship",
// The `seed … set <col> as <gen>` curated vocabulary
// (ADR-0048 D9) flags an unknown name here.
crate::dsl::grammar::IdentSource::Generators => "generator",
// `NewName`, `Types`, `Free` are filtered out by // `NewName`, `Types`, `Free` are filtered out by
// `invalid_ident_at_cursor` (it only fires for // `invalid_ident_at_cursor` (it only fires for
// known-set sources via `completes_from_schema`), so // known-set sources via `completes_from_schema`), so
@@ -1105,6 +1152,50 @@ mod tests {
assert!(reversed(&runs[0])); assert!(reversed(&runs[0]));
} }
#[test]
fn one_shot_colon_highlights_the_sql_and_overlays_no_error() {
// ADR-0003 `:` one-shot: the SQL after the `:` must highlight and
// diagnose like real advanced mode — the `:` prefix renders as
// plain text and a valid query carries no error overlay (the old
// path let the walker choke on the `:` and mark it red).
use crate::completion::{SchemaCache, TableColumn};
use crate::dsl::types::Type;
let theme = dark();
let mut cache = SchemaCache::default();
cache.tables.push("Customers".into());
cache.columns.push("name".into());
cache
.table_columns
.insert("Customers".into(), vec![TableColumn::new("name", Type::Text)]);
let input = ": select name from Customers";
let view = "select name from Customers";
let offset = 2; // ": "
let runs = render_input_runs_feedback(
input,
input.len(),
&theme,
&cache,
Mode::Advanced,
view,
view.len(),
offset,
);
assert!(
runs.iter().all(|r| r.style.fg != Some(theme.tok_error)),
"a valid one-shot query must carry no error overlay: {runs:?}",
);
assert!(
runs.iter()
.any(|r| r.byte_range.0 == offset && r.style.fg == Some(theme.tok_keyword)),
"the `select` keyword (past the `: ` prefix) is keyword-coloured: {runs:?}",
);
assert_eq!(
runs.first().unwrap().byte_range.0,
0,
"the `:` prefix is rendered from byte 0",
);
}
#[test] #[test]
fn keyword_token_takes_keyword_colour() { fn keyword_token_takes_keyword_colour() {
let theme = dark(); let theme = dark();
+1
View File
@@ -23,6 +23,7 @@ pub mod output_render;
pub mod persistence; pub mod persistence;
pub mod project; pub mod project;
pub mod runtime; pub mod runtime;
pub mod seed;
pub mod theme; pub mod theme;
pub mod type_change; pub mod type_change;
pub mod ui; pub mod ui;
+16
View File
@@ -1492,6 +1492,10 @@ fn spawn_dsl_dispatch(
command: command.clone(), command: command.clone(),
result, result,
}, },
Ok(CommandOutcome::Seed(result)) => AppEvent::DslSeedSucceeded {
command: command.clone(),
result,
},
Ok(CommandOutcome::Update(result)) => AppEvent::DslUpdateSucceeded { Ok(CommandOutcome::Update(result)) => AppEvent::DslUpdateSucceeded {
command: command.clone(), command: command.clone(),
result, result,
@@ -2364,6 +2368,7 @@ enum CommandOutcome {
ShowRelationship(Option<Box<crate::db::RelationshipDiagramData>>), ShowRelationship(Option<Box<crate::db::RelationshipDiagramData>>),
QueryPlan(QueryPlan), QueryPlan(QueryPlan),
Insert(InsertResult), Insert(InsertResult),
Seed(crate::db::SeedResult),
Update(UpdateResult), Update(UpdateResult),
Delete(DeleteResult), Delete(DeleteResult),
ChangeColumn(ChangeColumnTypeResult), ChangeColumn(ChangeColumnTypeResult),
@@ -2911,6 +2916,17 @@ async fn execute_command_typed(
.insert(table, columns, values, src) .insert(table, columns, values, src)
.await .await
.map(CommandOutcome::Insert), .map(CommandOutcome::Insert),
// ADR-0048 (SD1/SD2 Phase 2).
Command::Seed {
table,
target_column,
count,
overrides,
rng_seed,
} => database
.seed(table, target_column, count, overrides, rng_seed, src)
.await
.map(CommandOutcome::Seed),
Command::Update { Command::Update {
table, table,
assignments, assignments,
+193
View File
@@ -0,0 +1,193 @@
//! Parse a simple `<column> IN ('a', 'b', …)` CHECK into its allowed
//! value list (ADR-0048 D17), so the common enum-as-CHECK pattern seeds
//! from the permitted values instead of generic text. Anything more
//! complex (ranges, expressions, multi-column, non-literal items)
//! returns `None`; the executor then best-effort generates and lets a
//! violation surface through the friendly-error layer.
/// Extract the string-literal values of a `<column> IN ( … )` CHECK.
///
/// Case-insensitive on the `IN` keyword and the column name; tolerates a
/// quoted column (`"status"`). Every list item must be a single-quoted
/// string literal (`''` is an embedded quote). Returns `None` for any
/// other shape.
#[must_use]
pub fn parse_in_check_values(check: &str, column: &str) -> Option<Vec<String>> {
let (in_idx, paren_open) = find_in_paren(check)?;
if !lhs_is_column(check[..in_idx].trim(), column) {
return None;
}
let values = extract_quoted_list(&check[paren_open..])?;
if values.is_empty() { None } else { Some(values) }
}
const fn is_ident_byte(b: u8) -> bool {
b.is_ascii_alphanumeric() || b == b'_'
}
/// Find the `IN` keyword (as a word, outside string literals) that is
/// followed by `(`. Returns `(byte index of `IN`, byte index of `(`)`.
fn find_in_paren(check: &str) -> Option<(usize, usize)> {
let bytes = check.as_bytes();
let mut i = 0;
let mut in_quote = false;
while i < bytes.len() {
let b = bytes[i];
if in_quote {
if b == b'\'' {
in_quote = false;
}
i += 1;
continue;
}
if b == b'\'' {
in_quote = true;
i += 1;
continue;
}
let is_in = (b == b'i' || b == b'I')
&& bytes.get(i + 1).is_some_and(|n| *n == b'n' || *n == b'N');
if is_in {
let before_ok = i == 0 || !is_ident_byte(bytes[i - 1]);
let after = i + 2;
let after_ok = bytes.get(after).is_none_or(|n| !is_ident_byte(*n));
if before_ok && after_ok {
let mut k = after;
while bytes.get(k).is_some_and(u8::is_ascii_whitespace) {
k += 1;
}
if bytes.get(k) == Some(&b'(') {
return Some((i, k));
}
}
}
i += 1;
}
None
}
fn lhs_is_column(lhs: &str, column: &str) -> bool {
let t = lhs.trim();
let stripped = t
.strip_prefix('"')
.and_then(|s| s.strip_suffix('"'))
.unwrap_or(t);
stripped.eq_ignore_ascii_case(column)
}
/// Parse `( 'a', 'b', … )` from a string starting at `(` into the
/// unescaped literals. `None` if any item is not a pure quoted literal.
fn extract_quoted_list(s: &str) -> Option<Vec<String>> {
let mut chars = s.chars().peekable();
if chars.next()? != '(' {
return None;
}
let mut values = Vec::new();
loop {
while chars.peek().is_some_and(|c| c.is_whitespace()) {
chars.next();
}
match chars.peek()? {
')' => {
chars.next();
break;
}
'\'' => {
let v = read_quoted(&mut chars)?;
values.push(v);
while chars.peek().is_some_and(|c| c.is_whitespace()) {
chars.next();
}
match chars.next()? {
',' => {}
')' => break,
_ => return None,
}
}
_ => return None,
}
}
Some(values)
}
/// Read a single-quoted string literal (cursor at the opening `'`),
/// unescaping `''` to `'`.
fn read_quoted(chars: &mut std::iter::Peekable<std::str::Chars>) -> Option<String> {
if chars.next()? != '\'' {
return None;
}
let mut out = String::new();
loop {
match chars.next()? {
'\'' => {
if chars.peek() == Some(&'\'') {
chars.next();
out.push('\'');
} else {
return Some(out);
}
}
c => out.push(c),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn parses_a_simple_in_check() {
assert_eq!(
parse_in_check_values("status IN ('active', 'closed')", "status"),
Some(vec!["active".to_string(), "closed".to_string()])
);
}
#[test]
fn tolerates_a_quoted_column_and_lowercase_in() {
assert_eq!(
parse_in_check_values("\"status\" in ('a','b','c')", "status"),
Some(vec!["a".into(), "b".into(), "c".into()])
);
}
#[test]
fn unescapes_embedded_quotes() {
assert_eq!(
parse_in_check_values("note IN ('it''s', 'ok')", "note"),
Some(vec!["it's".into(), "ok".into()])
);
}
#[test]
fn handles_commas_and_parens_inside_literals() {
assert_eq!(
parse_in_check_values("label IN ('a, b', 'c)d')", "label"),
Some(vec!["a, b".into(), "c)d".into()])
);
}
#[test]
fn rejects_non_literal_lists() {
assert_eq!(parse_in_check_values("n IN (1, 2, 3)", "n"), None);
}
#[test]
fn rejects_non_in_checks() {
assert_eq!(parse_in_check_values("age >= 0", "age"), None);
assert_eq!(parse_in_check_values("length(name) > 0", "name"), None);
}
#[test]
fn rejects_when_lhs_is_a_different_column() {
assert_eq!(parse_in_check_values("status IN ('a')", "role"), None);
}
#[test]
fn does_not_trip_on_in_inside_a_word_or_literal() {
// `min` contains "in" but is not the IN operator.
assert_eq!(parse_in_check_values("min(x) > 0", "x"), None);
}
}
+584
View File
@@ -0,0 +1,584 @@
//! Value production: turn a [`Generator`] + a seeded RNG into a
//! [`Value`] (ADR-0048 D8/D9). Realistic generators come from the
//! `fake` crate (English locale); `product` is hand-rolled (D9, no
//! commerce module exists); dates are generated against a **fixed
//! reference epoch** so a `--seed` run is fully reproducible without
//! depending on the wall clock (D8 bounded windows).
//!
//! The stateful markers ([`Generator::IdentitySequential`],
//! [`Generator::ForeignKeySample`]) are resolved by the executor with
//! database context; if one reaches here un-intercepted it falls back
//! to type-based generation rather than panicking.
use chrono::{Datelike, NaiveDate};
use fake::Fake;
use rand::RngExt;
use crate::dsl::types::Type;
use crate::dsl::value::Value;
use crate::seed::{Generator, SeedRng};
/// Fixed anchor for bounded date/datetime windows. Using a constant
/// (rather than `now()`) keeps `--seed` output reproducible across days
/// and makes tests deterministic. It advances with releases.
const REF_YEAR: i32 = 2025;
const REF_MONTH: u32 = 6;
const REF_DAY: u32 = 1;
/// `~3 years` window for "recent" dates, in days.
const RECENT_WINDOW_DAYS: i64 = 3 * 365;
/// Adult birth window (≈1880 years ago), in days.
const ADULT_MIN_DAYS: i64 = 18 * 365;
const ADULT_MAX_DAYS: i64 = 80 * 365;
/// Produce one value for `generator` against destination type `ty`.
#[must_use]
pub fn generate_value(generator: &Generator, ty: Type, rng: &mut SeedRng) -> Value {
use fake::faker::address::en as addr;
use fake::faker::company::en as company;
use fake::faker::internet::en as net;
use fake::faker::job::en as job;
use fake::faker::lorem::en as lorem;
use fake::faker::name::en as name;
use fake::faker::phone_number::en as phone;
match generator {
Generator::FirstName => Value::Text(name::FirstName().fake_with_rng(rng)),
Generator::LastName => Value::Text(name::LastName().fake_with_rng(rng)),
Generator::FullName => Value::Text(name::Name().fake_with_rng(rng)),
Generator::Email => Value::Text(net::FreeEmail().fake_with_rng(rng)),
Generator::Username => Value::Text(net::Username().fake_with_rng(rng)),
Generator::Password => Value::Text(net::Password(8..16).fake_with_rng(rng)),
Generator::Phone => Value::Text(phone::PhoneNumber().fake_with_rng(rng)),
Generator::City => Value::Text(addr::CityName().fake_with_rng(rng)),
Generator::Country => Value::Text(addr::CountryName().fake_with_rng(rng)),
Generator::StateName => Value::Text(addr::StateName().fake_with_rng(rng)),
Generator::Street => Value::Text(addr::StreetName().fake_with_rng(rng)),
Generator::ZipCode => Value::Text(addr::ZipCode().fake_with_rng(rng)),
Generator::Company => Value::Text(company::CompanyName().fake_with_rng(rng)),
Generator::JobTitle => Value::Text(job::Title().fake_with_rng(rng)),
Generator::ProductName => Value::Text(product_name(rng)),
Generator::Sentence => Value::Text(lorem::Sentence(5..12).fake_with_rng(rng)),
Generator::Paragraph => Value::Text(lorem::Paragraph(2..4).fake_with_rng(rng)),
Generator::Url => {
let word: String = lorem::Word().fake_with_rng(rng);
let suffix: String = net::DomainSuffix().fake_with_rng(rng);
Value::Text(format!("https://{word}.{suffix}"))
}
// Hand-rolled — `fake`'s color module is feature-gated (it pulls
// an extra crate); a hex colour is trivial from the RNG.
Generator::HexColor => Value::Text(format!("#{:06X}", rng.random_range(0..0x0100_0000))),
Generator::CurrencyAmount => currency_amount(ty, rng),
Generator::Age => Value::Number(rng.random_range(18..=80).to_string()),
Generator::SmallInt => Value::Number(rng.random_range(1..=100).to_string()),
Generator::DateRecent => Value::Text(format_date(random_past_date(rng, 0, RECENT_WINDOW_DAYS))),
Generator::DateAdult => {
Value::Text(format_date(random_past_date(rng, ADULT_MIN_DAYS, ADULT_MAX_DAYS)))
}
Generator::DateTimeRecent => Value::Text(random_recent_datetime(rng)),
Generator::Boolean => Value::Bool(rng.random_range(0..2) == 1),
Generator::PickFrom(values) if !values.is_empty() => {
let chosen: &String = pick(rng, values);
literal_to_value(chosen, ty)
}
// The `set <col> between low and high` override (D2). Bounds are
// interpreted per the destination type; the executor has already
// validated they parse, so a defensive parse failure here falls
// back to type-based generation rather than producing junk.
Generator::Range { low, high } => range_value(low, high, ty, rng),
// Un-intercepted markers + an empty pick list → type-based.
Generator::PickFrom(_)
| Generator::IdentitySequential
| Generator::ForeignKeySample
| Generator::Generic => generic_for_type(ty, rng),
}
}
/// Uniform value in `[low, high]` for the `between` override (D2).
///
/// Bounds are interpreted by destination type. Returns the type-based
/// fallback for a bound that does not parse or a type that has no range
/// meaning — the executor pre-validates, so this is defensive only.
fn range_value(low: &str, high: &str, ty: Type, rng: &mut SeedRng) -> Value {
match ty {
Type::Int | Type::Serial => parse_int_range(low, high)
.map(|(lo, hi)| Value::Number(rng.random_range(lo..=hi).to_string()))
.unwrap_or_else(|| generic_for_type(ty, rng)),
Type::Real | Type::Decimal => parse_real_range(low, high)
.map(|(lo, hi)| {
let v = rng.random::<f64>().mul_add(hi - lo, lo);
Value::Number(format!("{v:.2}"))
})
.unwrap_or_else(|| generic_for_type(ty, rng)),
Type::Date => parse_date_range(low, high)
.map(|(lo, hi)| Value::Text(format_date(random_date_between(rng, lo, hi))))
.unwrap_or_else(|| generic_for_type(ty, rng)),
Type::DateTime => parse_datetime_range(low, high)
.map(|(lo, hi)| Value::Text(random_datetime_between(rng, lo, hi)))
.unwrap_or_else(|| generic_for_type(ty, rng)),
// text / bool / blob / shortid have no range meaning.
_ => generic_for_type(ty, rng),
}
}
/// Validate that `low`/`high` parse as bounds for `ty`.
///
/// The `between` override (D2) is checked by the executor *before*
/// generation. Returns a short human reason on failure (the executor
/// wraps it in a friendly error naming the column), `None` when valid.
#[must_use]
pub fn range_bounds_reason(ty: Type, low: &str, high: &str) -> Option<String> {
let ok = match ty {
Type::Int | Type::Serial => parse_int_range(low, high).is_some(),
Type::Real | Type::Decimal => parse_real_range(low, high).is_some(),
Type::Date => parse_date_range(low, high).is_some(),
Type::DateTime => parse_datetime_range(low, high).is_some(),
// text / bool / blob / shortid have no range meaning.
Type::Text | Type::Bool | Type::Blob | Type::ShortId => false,
};
if ok {
return None;
}
Some(match ty {
Type::Int | Type::Serial => "expected two whole numbers, e.g. `between 1 and 100`".to_string(),
Type::Real | Type::Decimal => "expected two numbers, e.g. `between 1.0 and 9.99`".to_string(),
Type::Date => "expected two quoted dates, e.g. `between '2023-01-01' and '2024-12-31'`".to_string(),
Type::DateTime => {
"expected two quoted datetimes, e.g. `between '2023-01-01T00:00:00' and '2024-12-31T23:59:59'`"
.to_string()
}
Type::Text | Type::Bool | Type::Blob | Type::ShortId => {
"a `between` range only applies to numeric and date/datetime columns".to_string()
}
})
}
/// Parse and order an integer range; `None` if either bound is not an
/// integer.
fn parse_int_range(low: &str, high: &str) -> Option<(i64, i64)> {
let lo: i64 = low.trim().parse().ok()?;
let hi: i64 = high.trim().parse().ok()?;
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
}
fn parse_real_range(low: &str, high: &str) -> Option<(f64, f64)> {
let lo: f64 = low.trim().parse().ok()?;
let hi: f64 = high.trim().parse().ok()?;
if !lo.is_finite() || !hi.is_finite() {
return None;
}
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
}
fn parse_date_range(low: &str, high: &str) -> Option<(NaiveDate, NaiveDate)> {
let lo = NaiveDate::parse_from_str(low.trim(), "%Y-%m-%d").ok()?;
let hi = NaiveDate::parse_from_str(high.trim(), "%Y-%m-%d").ok()?;
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
}
/// Accept both the `T`-separated and space-separated datetime spellings
/// the app validates (`bind_datetime` / `validate_datetime`).
fn parse_one_datetime(s: &str) -> Option<chrono::NaiveDateTime> {
let t = s.trim();
chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%dT%H:%M:%S")
.or_else(|_| chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%d %H:%M:%S"))
.ok()
}
fn parse_datetime_range(
low: &str,
high: &str,
) -> Option<(chrono::NaiveDateTime, chrono::NaiveDateTime)> {
let lo = parse_one_datetime(low)?;
let hi = parse_one_datetime(high)?;
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
}
/// Uniform date in `[lo, hi]` (inclusive).
fn random_date_between(rng: &mut SeedRng, lo: NaiveDate, hi: NaiveDate) -> NaiveDate {
let lo_ce = lo.num_days_from_ce();
let hi_ce = hi.num_days_from_ce();
let day = rng.random_range(lo_ce..=hi_ce);
NaiveDate::from_num_days_from_ce_opt(day).unwrap_or(lo)
}
/// Uniform datetime in `[lo, hi]`, rendered `YYYY-MM-DDTHH:MM:SS`.
fn random_datetime_between(
rng: &mut SeedRng,
lo: chrono::NaiveDateTime,
hi: chrono::NaiveDateTime,
) -> String {
let lo_s = lo.and_utc().timestamp();
let hi_s = hi.and_utc().timestamp();
let secs = if lo_s <= hi_s {
rng.random_range(lo_s..=hi_s)
} else {
rng.random_range(hi_s..=lo_s)
};
let dt = chrono::DateTime::from_timestamp(secs, 0)
.map_or(lo, |d| d.naive_utc());
dt.format("%Y-%m-%dT%H:%M:%S").to_string()
}
/// Type-based fallback generation (D8). Never produces NULL for a
/// generatable type; `blob`/`serial`/`shortid` are handled by the
/// executor (autogen / block guard) and yield NULL here only as a
/// last resort.
fn generic_for_type(ty: Type, rng: &mut SeedRng) -> Value {
use fake::faker::lorem::en as lorem;
match ty {
Type::Text => {
let words: Vec<String> = lorem::Words(2..4).fake_with_rng(rng);
Value::Text(words.join(" "))
}
Type::ShortId => Value::Text(crate::dsl::shortid::generate_with_rng(rng)),
Type::Int => Value::Number(rng.random_range(1..=10_000).to_string()),
Type::Serial => Value::Number(rng.random_range(1..=10_000).to_string()),
Type::Real => {
let n: f64 = rng.random_range(0..100_000) as f64 / 100.0;
Value::Number(format!("{n:.2}"))
}
Type::Decimal => {
let dollars = rng.random_range(0..10_000);
let cents = rng.random_range(0..100);
Value::Number(format!("{dollars}.{cents:02}"))
}
Type::Bool => Value::Bool(rng.random_range(0..2) == 1),
Type::Date => Value::Text(format_date(random_past_date(rng, 0, RECENT_WINDOW_DAYS))),
Type::DateTime => Value::Text(random_recent_datetime(rng)),
Type::Blob => Value::Null,
}
}
/// Wrap a fixed-list literal as the right `Value` shape for `ty` (used
/// by `PickFrom` — enum / `IN`-CHECK values).
fn literal_to_value(s: &str, ty: Type) -> Value {
match ty {
Type::Int | Type::Serial | Type::Real | Type::Decimal => Value::Number(s.to_string()),
Type::Bool => Value::Bool(matches!(s.to_ascii_lowercase().as_str(), "true" | "1")),
_ => Value::Text(s.to_string()),
}
}
/// A money-shaped amount: whole for `int`/`serial`, two-decimal for the
/// fractional numeric types.
fn currency_amount(ty: Type, rng: &mut SeedRng) -> Value {
match ty {
Type::Real | Type::Decimal => {
let dollars = rng.random_range(1..=1_000);
let cents = rng.random_range(0..100);
Value::Number(format!("{dollars}.{cents:02}"))
}
// int / serial / anything else numeric → whole amount.
_ => Value::Number(rng.random_range(1..=1_000).to_string()),
}
}
// — the hand-rolled `product` generator (D9) —
const PRODUCT_ADJECTIVES: &[&str] = &[
"Sleek", "Rustic", "Ergonomic", "Handcrafted", "Refined", "Modern",
"Vintage", "Compact", "Premium", "Lightweight", "Durable", "Elegant",
"Sturdy", "Smooth", "Gorgeous", "Intelligent", "Practical", "Awesome",
"Incredible", "Recycled",
];
const PRODUCT_MATERIALS: &[&str] = &[
"Wooden", "Copper", "Granite", "Cotton", "Steel", "Leather", "Bamboo",
"Plastic", "Ceramic", "Glass", "Concrete", "Rubber", "Bronze", "Marble",
"Linen", "Silk", "Aluminum", "Wool", "Gold", "Carbon",
];
const PRODUCT_NOUNS: &[&str] = &[
"Chair", "Lamp", "Table", "Bottle", "Backpack", "Keyboard", "Mug",
"Shoes", "Jacket", "Watch", "Wallet", "Bench", "Hat", "Gloves",
"Towel", "Ball", "Bike", "Knife", "Pillow", "Blanket",
];
fn product_name(rng: &mut SeedRng) -> String {
format!(
"{} {} {}",
pick(rng, PRODUCT_ADJECTIVES),
pick(rng, PRODUCT_MATERIALS),
pick(rng, PRODUCT_NOUNS),
)
}
// — bounded dates (D8) —
const fn reference_date() -> NaiveDate {
match NaiveDate::from_ymd_opt(REF_YEAR, REF_MONTH, REF_DAY) {
Some(d) => d,
None => panic!("reference date constants must be valid"),
}
}
/// A date between `min_days_ago` and `max_days_ago` before the
/// reference epoch (inclusive).
fn random_past_date(rng: &mut SeedRng, min_days_ago: i64, max_days_ago: i64) -> NaiveDate {
let days_ago = rng.random_range(min_days_ago..=max_days_ago);
let ce = reference_date().num_days_from_ce();
let target = ce - i32::try_from(days_ago).unwrap_or(0);
NaiveDate::from_num_days_from_ce_opt(target).unwrap_or_else(reference_date)
}
fn format_date(date: NaiveDate) -> String {
date.format("%Y-%m-%d").to_string()
}
/// A recent datetime: a recent date plus a random time-of-day, rendered
/// as `YYYY-MM-DDTHH:MM:SS`.
fn random_recent_datetime(rng: &mut SeedRng) -> String {
let date = random_past_date(rng, 0, RECENT_WINDOW_DAYS);
let h = rng.random_range(0..24);
let m = rng.random_range(0..60);
let s = rng.random_range(0..60);
format!("{}T{h:02}:{m:02}:{s:02}", format_date(date))
}
/// Pick a uniformly random element from a non-empty slice.
fn pick<'a, T>(rng: &mut SeedRng, items: &'a [T]) -> &'a T {
&items[rng.random_range(0..items.len())]
}
#[cfg(test)]
mod tests {
use super::*;
use crate::seed::make_rng;
use pretty_assertions::assert_eq;
fn gen_once(generator: &Generator, ty: Type, seed: u64) -> Value {
let mut rng = make_rng(Some(seed));
generate_value(generator, ty, &mut rng)
}
#[test]
fn generation_is_deterministic_for_a_fixed_seed() {
for generator in [
Generator::FullName,
Generator::Email,
Generator::ProductName,
Generator::DateRecent,
Generator::CurrencyAmount,
] {
let a = gen_once(&generator, Type::Text, 7);
let b = gen_once(&generator, Type::Text, 7);
assert_eq!(a, b, "{generator:?} must reproduce for a fixed seed");
}
}
#[test]
fn text_generators_produce_nonempty_text() {
for generator in [
Generator::FirstName,
Generator::LastName,
Generator::FullName,
Generator::Email,
Generator::Username,
Generator::Company,
Generator::City,
Generator::ProductName,
] {
let v = gen_once(&generator, Type::Text, 3);
match v {
Value::Text(s) => assert!(!s.trim().is_empty(), "{generator:?} produced empty text"),
other => panic!("{generator:?} produced non-text {other:?}"),
}
}
}
#[test]
fn email_looks_like_an_email() {
let v = gen_once(&Generator::Email, Type::Text, 11);
let Value::Text(s) = v else { panic!("not text") };
assert!(s.contains('@'), "email should contain @: {s}");
}
#[test]
fn product_name_is_three_capitalised_words() {
let v = gen_once(&Generator::ProductName, Type::Text, 99);
let Value::Text(s) = v else { panic!("not text") };
let words: Vec<&str> = s.split(' ').collect();
assert_eq!(words.len(), 3, "product name should be 3 words: {s}");
for w in words {
assert!(w.chars().next().unwrap().is_ascii_uppercase(), "word `{w}` not capitalised");
}
}
#[test]
fn recent_dates_fall_within_the_bounded_window() {
let mut rng = make_rng(Some(1));
let earliest = reference_date()
.checked_sub_days(chrono::Days::new(RECENT_WINDOW_DAYS as u64))
.unwrap();
let latest = reference_date();
for _ in 0..200 {
let v = generate_value(&Generator::DateRecent, Type::Date, &mut rng);
let Value::Text(s) = v else { panic!("date not text") };
let d = NaiveDate::parse_from_str(&s, "%Y-%m-%d").expect("valid ISO date");
assert!(d >= earliest && d <= latest, "date {d} outside recent window");
}
}
#[test]
fn dob_dates_fall_within_the_adult_window() {
let mut rng = make_rng(Some(2));
let earliest = reference_date()
.checked_sub_days(chrono::Days::new(ADULT_MAX_DAYS as u64))
.unwrap();
let latest = reference_date()
.checked_sub_days(chrono::Days::new(ADULT_MIN_DAYS as u64))
.unwrap();
for _ in 0..200 {
let v = generate_value(&Generator::DateAdult, Type::Date, &mut rng);
let Value::Text(s) = v else { panic!("date not text") };
let d = NaiveDate::parse_from_str(&s, "%Y-%m-%d").expect("valid ISO date");
assert!(d >= earliest && d <= latest, "dob {d} outside adult window");
}
}
#[test]
fn datetime_is_iso_shaped() {
let v = gen_once(&Generator::DateTimeRecent, Type::DateTime, 5);
let Value::Text(s) = v else { panic!("not text") };
assert!(s.contains('T'), "datetime needs a T separator: {s}");
// Parses as a naive datetime.
chrono::NaiveDateTime::parse_from_str(&s, "%Y-%m-%dT%H:%M:%S")
.unwrap_or_else(|e| panic!("invalid datetime {s}: {e}"));
}
#[test]
fn currency_is_whole_for_int_and_fractional_for_decimal() {
let Value::Number(int_amt) = gen_once(&Generator::CurrencyAmount, Type::Int, 4) else {
panic!("not a number")
};
assert!(!int_amt.contains('.'), "int currency should be whole: {int_amt}");
let Value::Number(dec_amt) = gen_once(&Generator::CurrencyAmount, Type::Decimal, 4) else {
panic!("not a number")
};
assert!(dec_amt.contains('.'), "decimal currency should have cents: {dec_amt}");
}
#[test]
fn age_is_in_human_range() {
let mut rng = make_rng(Some(8));
for _ in 0..100 {
let Value::Number(a) = generate_value(&Generator::Age, Type::Int, &mut rng) else {
panic!("age not a number")
};
let n: i64 = a.parse().unwrap();
assert!((18..=80).contains(&n), "age {n} out of range");
}
}
#[test]
fn pick_from_chooses_a_listed_value() {
let generator = Generator::PickFrom(vec!["active".into(), "closed".into()]);
let mut rng = make_rng(Some(6));
for _ in 0..50 {
let Value::Text(s) = generate_value(&generator, Type::Text, &mut rng) else {
panic!("not text")
};
assert!(matches!(s.as_str(), "active" | "closed"), "unexpected pick {s}");
}
}
#[test]
fn pick_from_wraps_numeric_values_as_numbers() {
let generator = Generator::PickFrom(vec!["1".into(), "2".into(), "3".into()]);
let mut rng = make_rng(Some(6));
let v = generate_value(&generator, Type::Int, &mut rng);
assert!(matches!(v, Value::Number(_)), "numeric pick should be a Number: {v:?}");
}
#[test]
fn int_range_stays_within_inclusive_bounds() {
let g = Generator::Range { low: "10".into(), high: "20".into() };
let mut rng = make_rng(Some(5));
for _ in 0..200 {
let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else {
panic!("int range should be a number")
};
let n: i64 = s.parse().unwrap();
assert!((10..=20).contains(&n), "int {n} out of [10,20]");
}
}
#[test]
fn real_range_stays_within_bounds_and_has_cents() {
let g = Generator::Range { low: "1.0".into(), high: "9.0".into() };
let mut rng = make_rng(Some(5));
for _ in 0..200 {
let Value::Number(s) = generate_value(&g, Type::Real, &mut rng) else {
panic!("real range should be a number")
};
let n: f64 = s.parse().unwrap();
assert!((1.0..=9.0).contains(&n), "real {n} out of [1,9]");
assert!(s.contains('.'), "real should be formatted with cents: {s}");
}
}
#[test]
fn date_range_stays_within_quoted_bounds() {
let g = Generator::Range {
low: "2023-01-01".into(),
high: "2023-12-31".into(),
};
let lo = NaiveDate::parse_from_str("2023-01-01", "%Y-%m-%d").unwrap();
let hi = NaiveDate::parse_from_str("2023-12-31", "%Y-%m-%d").unwrap();
let mut rng = make_rng(Some(9));
for _ in 0..200 {
let Value::Text(s) = generate_value(&g, Type::Date, &mut rng) else {
panic!("date range should be text")
};
let d = NaiveDate::parse_from_str(&s, "%Y-%m-%d").expect("valid date");
assert!(d >= lo && d <= hi, "date {d} out of range");
}
}
#[test]
fn reversed_bounds_are_tolerated() {
let g = Generator::Range { low: "20".into(), high: "10".into() };
let mut rng = make_rng(Some(1));
let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else {
panic!("number")
};
let n: i64 = s.parse().unwrap();
assert!((10..=20).contains(&n), "reversed bounds still produce in-range: {n}");
}
#[test]
fn range_bounds_reason_accepts_compatible_and_rejects_incompatible() {
// Numeric / date / datetime accept; text / bool reject.
assert!(range_bounds_reason(Type::Int, "1", "10").is_none());
assert!(range_bounds_reason(Type::Real, "1.5", "9.9").is_none());
assert!(range_bounds_reason(Type::Date, "2023-01-01", "2024-01-01").is_none());
assert!(range_bounds_reason(Type::DateTime, "2023-01-01T00:00:00", "2024-01-01T00:00:00").is_none());
// Non-numeric bound on a numeric column.
assert!(range_bounds_reason(Type::Int, "abc", "10").is_some());
// A range on a text column is meaningless.
assert!(range_bounds_reason(Type::Text, "a", "z").is_some());
assert!(range_bounds_reason(Type::Bool, "0", "1").is_some());
}
#[test]
fn markers_fall_back_to_type_based_generation() {
// An un-intercepted marker must not panic; it generates by type.
let v = gen_once(&Generator::IdentitySequential, Type::Text, 1);
assert!(matches!(v, Value::Text(_)));
let v = gen_once(&Generator::ForeignKeySample, Type::Int, 1);
assert!(matches!(v, Value::Number(_)));
}
#[test]
fn generic_fallback_matches_each_type() {
let mut rng = make_rng(Some(0));
assert!(matches!(generate_value(&Generator::Generic, Type::Text, &mut rng), Value::Text(_)));
assert!(matches!(generate_value(&Generator::Generic, Type::Int, &mut rng), Value::Number(_)));
assert!(matches!(generate_value(&Generator::Generic, Type::Bool, &mut rng), Value::Bool(_)));
assert!(matches!(generate_value(&Generator::Generic, Type::Blob, &mut rng), Value::Null));
// shortid fallback is a valid base58 id.
let Value::Text(sid) = generate_value(&Generator::Generic, Type::ShortId, &mut rng) else {
panic!("shortid not text")
};
assert!(crate::dsl::shortid::validate(&sid).is_ok(), "invalid shortid {sid}");
}
}
+440
View File
@@ -0,0 +1,440 @@
//! Generator selection: the name-aware, type-gated catalogue (ADR-0048
//! D7), table-context disambiguation for `name`/`title` (D11), the
//! identifier-family rule (D10), and enum-ish detection (D12).
//!
//! Selection is **token-based**: a column name is split on `_`, `-` and
//! camelCase boundaries, lowercased, and matched against an
//! ordered, most-specific-first list. Each rule is **type-gated** — a
//! name match only fires when the column's type is compatible, so a
//! column called `email` typed `int` falls through to type-based
//! generation rather than producing a string. Documented false-positive
//! guards keep `username`/`filename` away from the bare person-name
//! rule.
use tracing::trace;
use crate::dsl::types::Type;
use crate::seed::{ColumnSpec, Generator};
/// Choose the generator for a column (ADR-0048 D7/D10/D11/D12).
///
/// Precedence: foreign keys and `IN`-CHECK columns are resolved first
/// (the executor / a fixed list), then the ordered name catalogue, then
/// the type-based fallback.
#[must_use]
pub fn choose_generator(table: &str, col: &ColumnSpec) -> Generator {
let generator = choose_generator_inner(table, col);
trace!(
table = table,
column = %col.name,
ty = %col.ty,
chosen = ?generator,
"seed: chose generator for column"
);
generator
}
fn choose_generator_inner(table: &str, col: &ColumnSpec) -> Generator {
// FK columns are filled by sampling existing parent rows (D14) —
// the executor owns that; generation here would be wrong.
if col.is_foreign_key {
return Generator::ForeignKeySample;
}
// A simple `col IN (…)` CHECK becomes the value source (D17), so the
// common enum-as-CHECK pattern just works.
if let Some(values) = &col.check_in_values
&& !values.is_empty()
{
return Generator::PickFrom(values.clone());
}
let toks = tokens(&col.name);
match_name_generator(table, &toks, col.ty).unwrap_or(Generator::Generic)
}
/// Whether a column name looks like an enum / fixed-value set that has
/// no sensible generic generator (D12). Used by the executor to drive
/// the post-seed advisory; such columns still receive generic text.
#[must_use]
pub fn is_enum_ish(name: &str) -> bool {
const ENUM_TOKENS: &[&str] = &[
"role", "status", "state", "type", "kind", "category", "level",
"tier", "stage", "priority", "gender",
];
let toks = tokens(name);
toks.iter().any(|t| ENUM_TOKENS.contains(&t.as_str()))
}
/// The ordered, most-specific-first name catalogue. Returns `None` when
/// nothing matches (→ type-based fallback) or when a name matches but
/// its type gate fails.
fn match_name_generator(table: &str, toks: &[String], ty: Type) -> Option<Generator> {
let text = type_is_text(ty);
let numeric = ty.is_numeric();
// — Person —
if text && (has_any(toks, &["fname", "firstname"]) || has_seq(toks, "first", "name")) {
return Some(Generator::FirstName);
}
if text
&& (has_any(toks, &["lname", "lastname", "surname"]) || has_seq(toks, "last", "name"))
{
return Some(Generator::LastName);
}
if text && (has_any(toks, &["username", "login", "handle"]) || has_seq(toks, "user", "name")) {
return Some(Generator::Username);
}
if text && has_any(toks, &["email", "emails"]) {
return Some(Generator::Email);
}
if text && has_any(toks, &["password", "passwd", "pwd"]) {
return Some(Generator::Password);
}
if text && has_any(toks, &["phone", "mobile", "cell", "tel", "telephone"]) {
return Some(Generator::Phone);
}
// — bare `name` / `title` → table-context (D11) —
// Guarded against the `*_name` false positives handled above (those
// returned already) plus structural names like `filename`/`table_name`.
if text && has_any(toks, &["name", "title"]) && !is_name_false_positive(toks) {
return Some(name_by_table_context(table));
}
// — Address —
if text && has_any(toks, &["city", "town"]) {
return Some(Generator::City);
}
if text && has_token(toks, "country") {
return Some(Generator::Country);
}
// `province` / explicit `state_name`/`state_abbr` → a real state name.
// Bare `state` is left to enum-ish (it usually means status), so we
// require `province` or a `state` token paired with name/abbr.
if text && (has_token(toks, "province") || (has_token(toks, "state") && has_any(toks, &["name", "abbr"]))) {
return Some(Generator::StateName);
}
if text && has_any(toks, &["street", "address", "addr"]) {
return Some(Generator::Street);
}
if text && has_any(toks, &["zip", "zipcode", "postcode", "postal"]) {
return Some(Generator::ZipCode);
}
// — Organisation / job —
if text && has_any(toks, &["company", "employer", "org", "organization", "organisation"]) {
return Some(Generator::Company);
}
if text && has_any(toks, &["job", "position", "profession", "occupation"]) {
return Some(Generator::JobTitle);
}
// — Free text —
if text && has_any(toks, &["description", "bio", "notes", "note", "summary", "comment", "comments", "about"]) {
return Some(Generator::Sentence);
}
if text && has_any(toks, &["url", "website", "homepage", "link"]) {
return Some(Generator::Url);
}
if text && has_any(toks, &["color", "colour"]) {
return Some(Generator::HexColor);
}
// — Numeric —
if numeric && has_any(toks, &["price", "amount", "cost", "salary", "balance", "total", "fee", "revenue"]) {
return Some(Generator::CurrencyAmount);
}
if numeric && has_token(toks, "age") {
return Some(Generator::Age);
}
if numeric && has_any(toks, &["quantity", "qty", "stock", "count"]) {
return Some(Generator::SmallInt);
}
// — Temporal (bounded, D8) —
if matches!(ty, Type::Date) && has_any(toks, &["dob", "birthday", "birthdate"]) {
return Some(Generator::DateAdult);
}
if matches!(ty, Type::Date) && has_token(toks, "date") {
return Some(Generator::DateRecent);
}
if matches!(ty, Type::DateTime) && has_any(toks, &["timestamp", "datetime", "at"]) {
return Some(Generator::DateTimeRecent);
}
// — Boolean —
if matches!(ty, Type::Bool)
&& (toks.first().map(String::as_str) == Some("is")
|| toks.first().map(String::as_str) == Some("has")
|| has_any(toks, &["active", "enabled", "verified", "deleted"]))
{
return Some(Generator::Boolean);
}
// — Identifier family (D10) — late so phone/email/etc. win first.
if matches!(ty, Type::Int | Type::Text) && is_identifier_name(toks) {
return Some(Generator::IdentitySequential);
}
None
}
/// Resolve a bare `name`/`title` column by the **table** it lives in
/// (D11): product-ish → a product name, company-ish → a company name,
/// person-ish → a person name, otherwise a generic person name.
fn name_by_table_context(table: &str) -> Generator {
let toks = tokens(table);
const PRODUCTY: &[&str] = &[
"product", "products", "item", "items", "good", "goods",
"merchandise", "catalog", "catalogue", "inventory", "sku", "skus",
];
const COMPANYISH: &[&str] = &[
"company", "companies", "vendor", "vendors", "supplier",
"suppliers", "manufacturer", "manufacturers", "brand", "brands",
"organization", "organisation",
];
const PERSONISH: &[&str] = &[
"user", "users", "customer", "customers", "person", "people",
"employee", "employees", "member", "members", "contact",
"contacts", "author", "authors", "student", "students",
];
if has_any(&toks, PRODUCTY) {
Generator::ProductName
} else if has_any(&toks, COMPANYISH) {
Generator::Company
} else if has_any(&toks, PERSONISH) {
Generator::FullName
} else {
// Unknown table: a person name is the most generally useful
// default for a bare `name` column.
Generator::FullName
}
}
/// Names ending in `name`/`title` that are NOT person names. The
/// specific `first`/`last`/`user` cases are matched earlier and return
/// before this guard; this catches structural names.
fn is_name_false_positive(toks: &[String]) -> bool {
const NON_PERSON: &[&str] = &[
"file", "table", "host", "domain", "field", "class", "tag",
"event", "path", "col", "column", "db", "schema", "index", "key",
"page", "node", "type",
];
has_any(toks, NON_PERSON) && has_any(toks, &["name", "title"])
}
/// Identifier-family names (D10): treated as unique identifiers. FK
/// columns never reach here (handled in [`choose_generator`]).
fn is_identifier_name(toks: &[String]) -> bool {
const ID_TOKENS: &[&str] = &["id", "code", "sku", "ref", "reference", "barcode"];
if has_any(toks, ID_TOKENS) {
return true;
}
// `*_number` / `*_no` as an identifier, but only when qualified
// (a bare `number`/`no` is too ambiguous, and `phone_number` already
// matched the phone rule earlier).
toks.len() >= 2 && has_any(toks, &["number", "no"])
}
// — token utilities —
/// Split a column/table name into lowercase tokens on `_`, `-`, spaces,
/// and camelCase boundaries. `created_at` → [`created`, `at`];
/// `firstName` → [`first`, `name`]; `DOB` → [`dob`].
fn tokens(name: &str) -> Vec<String> {
let mut out = Vec::new();
let mut cur = String::new();
let mut prev_was_lower_or_digit = false;
for ch in name.chars() {
if ch == '_' || ch == '-' || ch == ' ' {
if !cur.is_empty() {
out.push(std::mem::take(&mut cur));
}
prev_was_lower_or_digit = false;
continue;
}
// camelCase boundary: an uppercase letter following a lowercase
// letter or digit starts a new token.
if ch.is_ascii_uppercase() && prev_was_lower_or_digit && !cur.is_empty() {
out.push(std::mem::take(&mut cur));
}
cur.push(ch.to_ascii_lowercase());
prev_was_lower_or_digit = ch.is_ascii_lowercase() || ch.is_ascii_digit();
}
if !cur.is_empty() {
out.push(cur);
}
out
}
fn has_token(toks: &[String], t: &str) -> bool {
toks.iter().any(|x| x == t)
}
fn has_any(toks: &[String], candidates: &[&str]) -> bool {
candidates.iter().any(|c| has_token(toks, c))
}
/// Whether `a` is immediately followed by `b` in the token list — for
/// matching split compound names like `first name` / `user name`.
fn has_seq(toks: &[String], a: &str, b: &str) -> bool {
toks.windows(2).any(|w| w[0] == a && w[1] == b)
}
/// Text-typed for heuristic purposes — `text`, `shortid`, plus the
/// text-backed `decimal`/`date`/`datetime` are excluded here because
/// those have their own dedicated gates; only `text`/`shortid` accept
/// free-text generators.
const fn type_is_text(ty: Type) -> bool {
matches!(ty, Type::Text | Type::ShortId)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::seed::ColumnSpec;
use pretty_assertions::assert_eq;
fn choose(table: &str, name: &str, ty: Type) -> Generator {
choose_generator(table, &ColumnSpec::plain(name, ty))
}
#[test]
fn person_name_fields_map_to_name_generators() {
assert_eq!(choose("users", "first_name", Type::Text), Generator::FirstName);
assert_eq!(choose("users", "firstName", Type::Text), Generator::FirstName);
assert_eq!(choose("users", "last_name", Type::Text), Generator::LastName);
assert_eq!(choose("users", "surname", Type::Text), Generator::LastName);
}
#[test]
fn contact_fields_map_correctly() {
assert_eq!(choose("users", "email", Type::Text), Generator::Email);
assert_eq!(choose("users", "work_email", Type::Text), Generator::Email);
assert_eq!(choose("users", "username", Type::Text), Generator::Username);
assert_eq!(choose("users", "user_name", Type::Text), Generator::Username);
assert_eq!(choose("users", "phone", Type::Text), Generator::Phone);
assert_eq!(choose("accounts", "password", Type::Text), Generator::Password);
}
#[test]
fn address_fields_map_correctly() {
assert_eq!(choose("a", "city", Type::Text), Generator::City);
assert_eq!(choose("a", "country", Type::Text), Generator::Country);
assert_eq!(choose("a", "street", Type::Text), Generator::Street);
assert_eq!(choose("a", "zip", Type::Text), Generator::ZipCode);
assert_eq!(choose("a", "postcode", Type::Text), Generator::ZipCode);
assert_eq!(choose("a", "province", Type::Text), Generator::StateName);
}
#[test]
fn bare_name_uses_table_context() {
// D11 — the same column name resolves differently by table.
assert_eq!(choose("products", "name", Type::Text), Generator::ProductName);
assert_eq!(choose("items", "title", Type::Text), Generator::ProductName);
assert_eq!(choose("users", "name", Type::Text), Generator::FullName);
assert_eq!(choose("customers", "name", Type::Text), Generator::FullName);
assert_eq!(choose("vendors", "name", Type::Text), Generator::Company);
// Unknown table → person name default.
assert_eq!(choose("widgets", "name", Type::Text), Generator::FullName);
}
#[test]
fn name_false_positives_do_not_become_person_names() {
// These must NOT resolve to a person/product name.
assert_ne!(choose("files", "filename", Type::Text), Generator::FullName);
assert_ne!(choose("meta", "table_name", Type::Text), Generator::FullName);
// They fall through to a generic / non-person generator.
assert_eq!(choose("files", "filename", Type::Text), Generator::Generic);
}
#[test]
fn numeric_name_heuristics_are_type_gated() {
// `price` on a numeric column → currency; on text → falls through.
assert_eq!(choose("p", "price", Type::Int), Generator::CurrencyAmount);
assert_eq!(choose("p", "price", Type::Decimal), Generator::CurrencyAmount);
assert_eq!(choose("p", "price", Type::Text), Generator::Generic);
assert_eq!(choose("u", "age", Type::Int), Generator::Age);
assert_eq!(choose("o", "quantity", Type::Int), Generator::SmallInt);
}
#[test]
fn email_on_wrong_type_falls_through() {
// The type gate: an `email` int column does NOT get a string —
// it falls through to type-based generation.
assert_eq!(choose("u", "email", Type::Int), Generator::Generic);
}
#[test]
fn temporal_fields_are_bounded_and_type_gated() {
assert_eq!(choose("u", "dob", Type::Date), Generator::DateAdult);
assert_eq!(choose("o", "order_date", Type::Date), Generator::DateRecent);
assert_eq!(choose("o", "created_at", Type::DateTime), Generator::DateTimeRecent);
assert_eq!(choose("o", "timestamp", Type::DateTime), Generator::DateTimeRecent);
// Wrong type → not a date generator.
assert_eq!(choose("o", "order_date", Type::Int), Generator::Generic);
}
#[test]
fn boolean_fields_map_to_boolean() {
assert_eq!(choose("u", "is_active", Type::Bool), Generator::Boolean);
assert_eq!(choose("u", "has_paid", Type::Bool), Generator::Boolean);
assert_eq!(choose("u", "enabled", Type::Bool), Generator::Boolean);
}
#[test]
fn identifier_family_is_unique_sequential() {
assert_eq!(choose("t", "code", Type::Text), Generator::IdentitySequential);
assert_eq!(choose("t", "sku", Type::Text), Generator::IdentitySequential);
assert_eq!(choose("t", "order_number", Type::Int), Generator::IdentitySequential);
assert_eq!(choose("t", "external_id", Type::Int), Generator::IdentitySequential);
}
#[test]
fn foreign_key_columns_defer_to_executor() {
let mut spec = ColumnSpec::plain("user_id", Type::Int);
spec.is_foreign_key = true;
assert_eq!(choose_generator("orders", &spec), Generator::ForeignKeySample);
}
#[test]
fn check_in_values_become_pick_from() {
let mut spec = ColumnSpec::plain("status", Type::Text);
spec.check_in_values = Some(vec!["active".into(), "closed".into()]);
assert_eq!(
choose_generator("orders", &spec),
Generator::PickFrom(vec!["active".into(), "closed".into()])
);
}
#[test]
fn enum_ish_names_are_detected_for_the_advisory() {
assert!(is_enum_ish("status"));
assert!(is_enum_ish("role"));
assert!(is_enum_ish("order_state"));
assert!(is_enum_ish("priority"));
assert!(!is_enum_ish("email"));
assert!(!is_enum_ish("first_name"));
}
#[test]
fn enum_ish_columns_fall_through_to_generic() {
// No special generator — generic text + the advisory flags them.
assert_eq!(choose("orders", "status", Type::Text), Generator::Generic);
assert_eq!(choose("users", "role", Type::Text), Generator::Generic);
}
#[test]
fn unmatched_columns_use_type_based_fallback() {
assert_eq!(choose("t", "some_freeform_field", Type::Text), Generator::Generic);
}
#[test]
fn tokenizer_splits_on_all_boundaries() {
assert_eq!(tokens("created_at"), vec!["created", "at"]);
assert_eq!(tokens("firstName"), vec!["first", "name"]);
assert_eq!(tokens("DOB"), vec!["dob"]);
assert_eq!(tokens("user-email"), vec!["user", "email"]);
assert_eq!(tokens("HTTPStatus"), vec!["httpstatus"]);
}
}
+213
View File
@@ -0,0 +1,213 @@
//! Pure fake-data generation library for the `seed` command (ADR-0048).
//!
//! This module is the **generation half** of `seed`: given a column's
//! shape (name, type, constraints), it chooses a *generator* and turns
//! a seeded RNG into plausible [`Value`]s. It is deliberately decoupled
//! from `db.rs` — it knows nothing about SQLite, the worker thread, or
//! persistence — so it stays pure and unit-testable, with exact-value
//! assertions made possible by the seedable RNG (ADR-0048 D4).
//!
//! The executor (`db.rs::do_seed`) adapts the real schema into
//! [`ColumnSpec`]s, calls [`choose_generator`] per column, and then
//! [`generate_value`] per row — except for the *stateful* markers
//! ([`Generator::IdentitySequential`], [`Generator::ForeignKeySample`])
//! which need database context (existing rows, the running sequence)
//! and so are resolved by the executor, not here.
//!
//! Layout:
//! - this file — the public types ([`ColumnSpec`], [`Generator`],
//! [`SeedRng`]) and the RNG constructor.
//! - [`heuristics`] — [`choose_generator`] + the name-aware catalogue
//! (D7), table-context disambiguation (D11), identifier (D10) and
//! enum-ish (D12) detection.
//! - [`generators`] — [`generate_value`]: per-generator value
//! production, the hand-rolled `product` generator (D9) and the
//! bounded date windows (D8).
mod check;
mod generators;
mod heuristics;
mod vocabulary;
pub use check::parse_in_check_values;
pub use generators::{generate_value, range_bounds_reason};
pub use heuristics::{choose_generator, is_enum_ish};
pub use vocabulary::{generator_for_name, is_known_generator_prefix, KNOWN_GENERATORS};
use rand::rngs::StdRng;
use rand::{RngExt, SeedableRng};
use crate::dsl::types::Type;
/// The RNG that drives all seed generation.
///
/// A single seeded `StdRng` feeds both `fake`'s `fake_with_rng` and the
/// hand-rolled generators, so a `--seed` value fully determines the
/// output (ADR-0048 D4). `rand 0.10`'s `StdRng` satisfies `fake`'s
/// `RngExt` bound (it re-exports `rand::RngExt`), so the same handle
/// works on both sides.
pub type SeedRng = StdRng;
/// Build the seed RNG.
///
/// With `Some(seed)` the stream is reproducible; with `None` it is
/// seeded from entropy (via the thread RNG) so each run differs.
/// Seeding `StdRng` from a single `u64` in both cases keeps
/// construction uniform and avoids `rand`'s churn-prone from-entropy
/// constructors.
#[must_use]
pub fn make_rng(seed: Option<u64>) -> SeedRng {
let seed = seed.unwrap_or_else(|| rand::rng().random::<u64>());
StdRng::seed_from_u64(seed)
}
/// A column described in just enough detail to choose and run a
/// generator. Built by the executor from the real schema; kept
/// independent of `db.rs`'s `ReadColumn` so this library stays pure.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ColumnSpec {
/// The column's name — the primary signal for generator choice.
pub name: String,
/// The user-facing playground type — gates every name heuristic.
pub ty: Type,
/// `NOT NULL` — the executor uses this for the block guard (D1);
/// generation always produces a value, so it is informational here.
pub not_null: bool,
/// Part of the table's primary key.
pub primary_key: bool,
/// Carries a `UNIQUE` constraint (or is a single-column PK).
pub unique: bool,
/// A foreign-key column — generation is the executor's job
/// (sample an existing parent row, D14), so [`choose_generator`]
/// returns [`Generator::ForeignKeySample`].
pub is_foreign_key: bool,
/// Values parsed from a simple `col IN ('a', 'b', …)` CHECK
/// constraint (D17). When present, generation draws from them so
/// the common enum-as-CHECK pattern "just works".
pub check_in_values: Option<Vec<String>>,
}
impl ColumnSpec {
/// Convenience constructor for a plain, unconstrained column —
/// used heavily in tests.
#[cfg(test)]
#[must_use]
pub fn plain(name: &str, ty: Type) -> Self {
Self {
name: name.to_string(),
ty,
not_null: false,
primary_key: false,
unique: false,
is_foreign_key: false,
check_in_values: None,
}
}
}
/// The chosen generation strategy for a column.
///
/// Most variants are *stateless* — [`generate_value`] turns them into a
/// [`Value`] from the RNG alone. Two are *stateful markers* that the
/// executor must intercept (they need database context):
/// [`Self::IdentitySequential`] (the running `MAX+offset` sequence,
/// D10) and [`Self::ForeignKeySample`] (draw from existing parent
/// rows, D14). For safety [`generate_value`] treats an un-intercepted
/// marker as [`Self::Generic`] rather than panicking.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Generator {
// — Person —
FirstName,
LastName,
/// A full person name (table-context default for `name`/`title`).
FullName,
Email,
Username,
Password,
Phone,
// — Address —
City,
Country,
StateName,
Street,
ZipCode,
// — Organisation / commerce —
Company,
JobTitle,
/// Hand-rolled `{adjective} {material} {noun}` (D9) — `fake` has no
/// commerce module.
ProductName,
// — Free text —
Sentence,
Paragraph,
Url,
HexColor,
// — Numeric —
/// A money-shaped amount (whole for `int`, two-decimal otherwise).
CurrencyAmount,
/// A plausible human age (1880).
Age,
/// A small positive integer (quantities, counts).
SmallInt,
// — Temporal (bounded windows, D8) —
/// A date within the last few years.
DateRecent,
/// A date in an adult birth window (≈1880 years ago) — for `dob`.
DateAdult,
/// A datetime within the last few years.
DateTimeRecent,
// — Boolean —
Boolean,
// — Stateful markers (executor-resolved) —
/// Unique sequential identifier (D10): the executor supplies
/// `MAX(col)+offset`. Chosen for identifier-named non-FK columns.
IdentitySequential,
/// FK column (D14): the executor samples an existing parent key.
ForeignKeySample,
// — List / range (the `set` override clause, D2) —
/// Uniform pick from a fixed list — a simple `IN`-CHECK (D17), an
/// enum, or a `set <col> in (…)` / `= <value>` override (D2).
PickFrom(Vec<String>),
/// Uniform value in `[low, high]` — the `set <col> between low and
/// high` override (D2). Bounds are the raw literal strings; their
/// interpretation (int / real / date / datetime) follows the
/// destination column type at generation time. The executor
/// validates type-compatibility *before* generation (a bound that
/// does not parse for the column type is a friendly error), so
/// [`generate_value`] only ever sees parseable bounds; a defensive
/// parse failure falls back to type-based generation.
Range { low: String, high: String },
/// Type-based fallback (D8) when no name heuristic matches.
Generic,
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn same_seed_yields_identical_rng_streams() {
let mut a = make_rng(Some(42));
let mut b = make_rng(Some(42));
let xs: Vec<u64> = (0..8).map(|_| a.random::<u64>()).collect();
let ys: Vec<u64> = (0..8).map(|_| b.random::<u64>()).collect();
assert_eq!(xs, ys, "a fixed seed must reproduce the stream");
}
#[test]
fn different_seeds_yield_different_streams() {
let mut a = make_rng(Some(1));
let mut b = make_rng(Some(2));
let xs: Vec<u64> = (0..8).map(|_| a.random::<u64>()).collect();
let ys: Vec<u64> = (0..8).map(|_| b.random::<u64>()).collect();
assert_ne!(xs, ys);
}
#[test]
fn unseeded_rng_constructs_without_panicking() {
// Entropy-seeded path: just exercise it.
let mut rng = make_rng(None);
let _ = rng.random::<u64>();
}
}
+149
View File
@@ -0,0 +1,149 @@
//! The curated named-generator vocabulary (ADR-0048 D9).
//!
//! This is the **single source of truth** for "what generator names can
//! a learner write after `set <col> as …`", shared by three consumers
//! (mirroring `KNOWN_SQL_FUNCTIONS`, ADR-0022 Amд6):
//!
//! - **Tab completion** — the `seed … set <col> as ⟨here⟩` slot offers
//! these names (`src/completion.rs`).
//! - **The typing-time validity indicator (ADR-0027)** — an unknown
//! name after `as` is flagged `[ERR]` while typing.
//! - **The executor** — `db.rs::do_seed` maps a name to a [`Generator`]
//! via [`generator_for_name`]; an unknown name is a friendly error.
//!
//! The list is a deliberately *curated pedagogical set* — the generators
//! a learner reaches for, not every internal [`Generator`] variant
//! (stateful markers like `ForeignKeySample` are executor-only and have
//! no name). It is lowercase + sorted (pinned by a unit test).
use crate::seed::Generator;
/// The curated generator names, lowercase and **sorted** (invariant
/// pinned by a test — completion relies on stable order and a
/// case-insensitive prefix match against these canonical spellings).
pub const KNOWN_GENERATORS: &[&str] = &[
"age",
"bool",
"city",
"color",
"company",
"country",
"date",
"datetime",
"email",
"first_name",
"job",
"last_name",
"name",
"paragraph",
"password",
"phone",
"price",
"product",
"sentence",
"state",
"street",
"url",
"username",
"zip",
];
/// Map a generator name (case-insensitive) to its [`Generator`].
///
/// `None` for an unrecognised name — the executor turns that into a
/// friendly "unknown generator" error naming the curated set. A couple
/// of common spelling variants (`firstname`, `lastname`, `colour`,
/// `full_name`) are accepted as aliases even though only the canonical
/// spelling is offered for completion.
#[must_use]
pub fn generator_for_name(name: &str) -> Option<Generator> {
let n = name.to_ascii_lowercase();
let g = match n.as_str() {
"name" | "full_name" => Generator::FullName,
"first_name" | "firstname" => Generator::FirstName,
"last_name" | "lastname" | "surname" => Generator::LastName,
"email" => Generator::Email,
"username" => Generator::Username,
"password" => Generator::Password,
"phone" => Generator::Phone,
"city" => Generator::City,
"country" => Generator::Country,
"state" => Generator::StateName,
"street" => Generator::Street,
"zip" => Generator::ZipCode,
"company" => Generator::Company,
"job" => Generator::JobTitle,
"product" => Generator::ProductName,
"sentence" => Generator::Sentence,
"paragraph" => Generator::Paragraph,
"url" => Generator::Url,
"color" | "colour" => Generator::HexColor,
"price" => Generator::CurrencyAmount,
"age" => Generator::Age,
"date" => Generator::DateRecent,
"datetime" => Generator::DateTimeRecent,
"bool" => Generator::Boolean,
_ => return None,
};
Some(g)
}
/// Whether `partial` is a case-insensitive prefix of at least one known
/// generator name.
///
/// An empty `partial` matches every generator (it is a prefix of all) —
/// mirrors `is_known_function_prefix`. Used by the validity indicator to
/// avoid flagging a still-being-typed name.
#[must_use]
pub fn is_known_generator_prefix(partial: &str) -> bool {
let lowered = partial.to_ascii_lowercase();
KNOWN_GENERATORS.iter().any(|g| g.starts_with(&lowered))
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn known_generators_is_sorted_and_lowercase() {
let mut sorted = KNOWN_GENERATORS.to_vec();
sorted.sort_unstable();
assert_eq!(KNOWN_GENERATORS, sorted.as_slice(), "must be sorted");
for g in KNOWN_GENERATORS {
assert_eq!(*g, g.to_ascii_lowercase(), "must be lowercase: {g}");
}
}
#[test]
fn every_listed_name_maps_to_a_generator() {
for g in KNOWN_GENERATORS {
assert!(
generator_for_name(g).is_some(),
"listed generator name `{g}` has no mapping"
);
}
}
#[test]
fn mapping_is_case_insensitive_and_has_aliases() {
assert_eq!(generator_for_name("EMAIL"), Some(Generator::Email));
assert_eq!(generator_for_name("FirstName"), Some(Generator::FirstName));
assert_eq!(generator_for_name("colour"), Some(Generator::HexColor));
assert_eq!(generator_for_name("full_name"), Some(Generator::FullName));
}
#[test]
fn unknown_name_has_no_mapping() {
assert_eq!(generator_for_name("bogus"), None);
assert_eq!(generator_for_name(""), None);
}
#[test]
fn prefix_check_matches_known_and_rejects_unknown() {
assert!(is_known_generator_prefix("ema"));
assert!(is_known_generator_prefix("EMA"));
assert!(is_known_generator_prefix("")); // empty is a prefix of all
assert!(!is_known_generator_prefix("zzz"));
}
}
+2
View File
@@ -163,6 +163,7 @@ impl Theme {
HighlightClass::String => self.tok_string, HighlightClass::String => self.tok_string,
HighlightClass::Punct => self.tok_punct, HighlightClass::Punct => self.tok_punct,
HighlightClass::Flag => self.tok_flag, HighlightClass::Flag => self.tok_flag,
HighlightClass::Function => self.tok_function,
HighlightClass::Error => self.tok_error, HighlightClass::Error => self.tok_error,
} }
} }
@@ -228,6 +229,7 @@ mod tests {
assert_eq!(t.highlight_class_color(HighlightClass::String), t.tok_string); assert_eq!(t.highlight_class_color(HighlightClass::String), t.tok_string);
assert_eq!(t.highlight_class_color(HighlightClass::Punct), t.tok_punct); assert_eq!(t.highlight_class_color(HighlightClass::Punct), t.tok_punct);
assert_eq!(t.highlight_class_color(HighlightClass::Flag), t.tok_flag); assert_eq!(t.highlight_class_color(HighlightClass::Flag), t.tok_flag);
assert_eq!(t.highlight_class_color(HighlightClass::Function), t.tok_function);
assert_eq!(t.highlight_class_color(HighlightClass::Error), t.tok_error); assert_eq!(t.highlight_class_color(HighlightClass::Error), t.tok_error);
} }
+19 -27
View File
@@ -1438,12 +1438,19 @@ fn render_input_one_row(
let offset = input_scroll_offset(line_cols, cursor_col, tw, app.input_scroll_offset); let offset = input_scroll_offset(line_cols, cursor_col, tw, app.input_scroll_offset);
app.input_scroll_offset = offset; app.input_scroll_offset = offset;
let runs = crate::input_render::render_input_runs_in_mode( // Strip the `:` one-shot prefix for the SQL highlighting/overlays
// (ADR-0003); the `:` itself renders as plain text. Identity for
// non-one-shot input.
let (fb_view, fb_cursor, fb_off) = app.feedback_view();
let runs = crate::input_render::render_input_runs_feedback(
&app.input, &app.input,
cursor, cursor,
theme, theme,
&app.schema_cache, &app.schema_cache,
mode_for_render, mode_for_render,
fb_view,
fb_cursor,
fb_off,
); );
let spans = runs_to_spans(&app.input, &runs); let spans = runs_to_spans(&app.input, &runs);
@@ -1507,12 +1514,19 @@ fn render_input_two_rows(
let offset = input_scroll_offset(line_cols, cursor_col, capacity, app.input_scroll_offset); let offset = input_scroll_offset(line_cols, cursor_col, capacity, app.input_scroll_offset);
app.input_scroll_offset = offset; app.input_scroll_offset = offset;
let runs = crate::input_render::render_input_runs_in_mode( // Strip the `:` one-shot prefix for the SQL highlighting/overlays
// (ADR-0003); the `:` itself renders as plain text. Identity for
// non-one-shot input.
let (fb_view, fb_cursor, fb_off) = app.feedback_view();
let runs = crate::input_render::render_input_runs_feedback(
&app.input, &app.input,
cursor, cursor,
theme, theme,
&app.schema_cache, &app.schema_cache,
mode_for_render, mode_for_render,
fb_view,
fb_cursor,
fb_off,
); );
let cells = expand_runs_to_cells(&app.input, &runs); let cells = expand_runs_to_cells(&app.input, &runs);
let len = cells.len(); let len = cells.len();
@@ -1621,23 +1635,6 @@ fn runs_to_spans<'a>(
.collect() .collect()
} }
/// Strip a leading one-shot `:` sigil (and the whitespace after
/// it) from `input`, returning the advanced command slice and the
/// cursor remapped into it. Mirrors `App::submit`'s `:` handling
/// so the hint panel hints at the command, not the sigil
/// (ADR-0022 Amendment 1). Used only when the effective mode is
/// `AdvancedOneShot`, where `input` is guaranteed to start (after
/// any leading whitespace) with `:`.
fn strip_one_shot_prefix(input: &str, cursor: usize) -> (&str, usize) {
let lead_ws = input.len() - input.trim_start().len();
let after_colon = lead_ws + 1; // skip the `:`
let ws_after = input[after_colon..].len() - input[after_colon..].trim_start().len();
let prefix_len = (after_colon + ws_after).min(input.len());
let effective = &input[prefix_len..];
let effective_cursor = cursor.saturating_sub(prefix_len).min(effective.len());
(effective, effective_cursor)
}
/// Resolve the Hint panel body into its rendered lines, pre-wrapped /// Resolve the Hint panel body into its rendered lines, pre-wrapped
/// to the panel's inner width and clamped to `max_rows` with an /// to the panel's inner width and clamped to `max_rows` with an
/// ellipsis backstop (issue #12). `max_rows` is the geometry-fixed row /// ellipsis backstop (issue #12). `max_rows` is the geometry-fixed row
@@ -1679,14 +1676,9 @@ fn resolve_hint_lines(
// In one-shot advanced mode (`:` prefix in simple mode) the // In one-shot advanced mode (`:` prefix in simple mode) the
// raw input carries the `:` sigil, which is not part of the // raw input carries the `:` sigil, which is not part of the
// grammar. Strip it for the ambient computation so the hint // grammar. The shared feedback view strips it so the hint reflects
// reflects the advanced command — mirroring `App::submit`. // the advanced command — mirroring `App::submit` (ADR-0003).
let (hint_input, hint_cursor) = match app.effective_mode() { let (hint_input, hint_cursor, _off) = app.feedback_view();
EffectiveMode::AdvancedOneShot => {
strip_one_shot_prefix(&app.input, app.input_cursor)
}
_ => (app.input.as_str(), app.input_cursor),
};
let ambient = crate::input_render::ambient_hint_in_mode( let ambient = crate::input_render::ambient_hint_in_mode(
hint_input, hint_input,
hint_cursor, hint_cursor,
+1
View File
@@ -23,6 +23,7 @@ mod m2n;
mod parse_error_pedagogy; mod parse_error_pedagogy;
mod project_lifecycle; mod project_lifecycle;
mod replay_command; mod replay_command;
mod seed;
mod sql_alter_table; mod sql_alter_table;
mod sql_create_index; mod sql_create_index;
mod sql_create_table; mod sql_create_table;
+9
View File
@@ -109,6 +109,14 @@ fn near_miss_matrix_simple_mode() {
("delete", &["after `delete`, expected `from`", "delete from <Table>"]), ("delete", &["after `delete`, expected `from`", "delete from <Table>"]),
("delete from", &["after `delete from`, expected table name", "delete from <Table>"]), ("delete from", &["after `delete from`, expected table name", "delete from <Table>"]),
("delete from T", &["expected `where` or `--all-rows`", "delete from <Table>"]), ("delete from T", &["expected `where` or `--all-rows`", "delete from <Table>"]),
("seed", &["after `seed`, expected table name", "seed <Table> [count]"]),
// Phase 2 (ADR-0048 D2/D1): malformed `set` clause + column-fill.
("seed T set", &["after `seed T set`, expected column name", "seed <Table>.<col>"]),
(
"seed T set role",
&["after `seed T set role`, expected `=`, `in`, `between`, or `as`", "seed <Table>.<col>"],
),
("seed T.", &["after `seed T.`, expected column name", "seed <Table>.<col>"]),
("replay", &["after `replay`, expected string literal or path", "replay <path>"]), ("replay", &["after `replay`, expected string literal or path", "replay <path>"]),
("explain", &["after `explain`, expected `show`, `update`, or `delete`", "explain show data"]), ("explain", &["after `explain`, expected `show`, `update`, or `delete`", "explain show data"]),
// advanced-only entry word typed in simple mode → "this is SQL" rail // advanced-only entry word typed in simple mode → "this is SQL" rail
@@ -539,3 +547,4 @@ fn caret_aligns_under_offending_token() {
+1277
View File
File diff suppressed because it is too large Load Diff
+66
View File
@@ -237,6 +237,7 @@ fn command_kind_label(cmd: &rdbms_playground::dsl::Command) -> String {
ShowTable { .. } => "ShowTable".into(), ShowTable { .. } => "ShowTable".into(),
ShowList { kind, name } => format!("ShowList({kind:?}, {})", name.is_some()), ShowList { kind, name } => format!("ShowList({kind:?}, {})", name.is_some()),
Insert { .. } => "Insert".into(), Insert { .. } => "Insert".into(),
Seed { .. } => "Seed".into(),
Update { .. } => "Update".into(), Update { .. } => "Update".into(),
Delete { .. } => "Delete".into(), Delete { .. } => "Delete".into(),
ShowData { .. } => "ShowData".into(), ShowData { .. } => "ShowData".into(),
@@ -440,3 +441,68 @@ fn smoke_assess_parse_label_round_trips() {
assert_eq!(a.parse_result.as_deref(), Ok("Insert")); assert_eq!(a.parse_result.as_deref(), Ok("Insert"));
assert!(matches!(a.state, InputState::Valid)); assert!(matches!(a.state, InputState::Valid));
} }
/// `seed` (ADR-0048) gets the standard ambient surface for free from
/// grammar registration: table-name completion, the validity indicator
/// flagging an unknown table, and the `--seed` flag offered as a
/// candidate.
#[test]
fn seed_completion_and_validity() {
let schema = schema_serial_pk(); // Customers(id serial, Name, Email)
// Completion: `seed ` offers existing table names.
let cands = completion_candidate_texts(&assess_at_end("seed ", &schema));
assert!(
cands.iter().any(|c| c == "Customers"),
"`seed ` should complete table names, got {cands:?}"
);
// Validity (ADR-0027): a known table seeds clean; an unknown one is
// flagged (same table slot as update/delete/show data).
let ok = assess_at_end("seed Customers 5", &schema);
assert!(matches!(ok.state, InputState::Valid), "known table: {:?}", ok.state);
// seed's unknown-table behaviour must match its closest sibling
// `show data` (same table-only slot), whatever that is.
let seed_ghost = assess_at_end("seed Ghost 5", &schema).state;
let show_ghost = assess_at_end("show data Ghost", &schema).state;
assert_eq!(
std::mem::discriminant(&seed_ghost),
std::mem::discriminant(&show_ghost),
"seed should treat an unknown table like `show data`: seed={seed_ghost:?}, show={show_ghost:?}"
);
// The `--seed` reproducibility flag is offered after the count.
let flag_cands = completion_candidate_texts(&assess_at_end("seed Customers 5 ", &schema));
assert!(
flag_cands.iter().any(|c| c.contains("seed")),
"`--seed` should be offered as a candidate, got {flag_cands:?}"
);
// Phase 2 (ADR-0048 D2): the `set` clause is offered after the count.
assert!(
flag_cands.iter().any(|c| c == "set"),
"`set` should be offered after the count, got {flag_cands:?}"
);
// `set ` offers the active table's columns (narrowed to Customers).
let set_cands = completion_candidate_texts(&assess_at_end("seed Customers set ", &schema));
assert!(
set_cands.iter().any(|c| c == "Name") && set_cands.iter().any(|c| c == "Email"),
"`set ` should complete this table's columns, got {set_cands:?}"
);
// `set <col> as ` offers the curated generator vocabulary (D9).
let gen_cands =
completion_candidate_texts(&assess_at_end("seed Customers set Email as ", &schema));
assert!(
gen_cands.iter().any(|c| c == "email") && gen_cands.iter().any(|c| c == "product"),
"`as ` should complete generator names, got {gen_cands:?}"
);
// Column-fill (D1 form 2): `seed Customers.` offers the columns.
let fill_cands = completion_candidate_texts(&assess_at_end("seed Customers.", &schema));
assert!(
fill_cands.iter().any(|c| c == "Name"),
"`seed Customers.` should complete column names, got {fill_cands:?}"
);
}
@@ -24,10 +24,10 @@ Assessment {
completion: Some( completion: Some(
Completion { Completion {
replaced_range: ( replaced_range: (
24, 22,
27, 27,
), ),
partial_prefix: "all", partial_prefix: "--all",
candidates: [ candidates: [
Candidate { Candidate {
text: "--all-rows", text: "--all-rows",
@@ -24,10 +24,10 @@ Assessment {
completion: Some( completion: Some(
Completion { Completion {
replaced_range: ( replaced_range: (
33, 31,
36, 36,
), ),
partial_prefix: "all", partial_prefix: "--all",
candidates: [ candidates: [
Candidate { Candidate {
text: "--all-rows", text: "--all-rows",