Merge branch 'main' into website
This commit is contained in:
Generated
+18
@@ -419,6 +419,12 @@ dependencies = [
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deunicode"
|
||||
version = "1.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04"
|
||||
|
||||
[[package]]
|
||||
name = "diff"
|
||||
version = "0.1.13"
|
||||
@@ -518,6 +524,17 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fake"
|
||||
version = "5.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea6be833b323a56361118a747470a45a1bcd5c52a2ec9b1e40c83dafe687e453"
|
||||
dependencies = [
|
||||
"deunicode",
|
||||
"either",
|
||||
"rand 0.10.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fallible-iterator"
|
||||
version = "0.3.0"
|
||||
@@ -1527,6 +1544,7 @@ dependencies = [
|
||||
"crossterm",
|
||||
"csv",
|
||||
"directories",
|
||||
"fake",
|
||||
"futures-util",
|
||||
"gethostname",
|
||||
"insta",
|
||||
|
||||
@@ -24,6 +24,14 @@ chrono = { version = "0.4.44", default-features = false, features = ["clock"] }
|
||||
crossterm = { version = "0.29.0", features = ["event-stream"] }
|
||||
csv = "1.4.0"
|
||||
directories = "6.0.0"
|
||||
# Realistic fake-data generators for the `seed` command (ADR-0048):
|
||||
# names, emails, addresses, companies, lorem, etc. Default features
|
||||
# only — the basic fakers need no flags; date/datetime values are
|
||||
# generated in-house (rand + the existing `chrono`) for the bounded
|
||||
# windows ADR-0048 D8 requires, so `fake`'s `chrono` feature is
|
||||
# deliberately omitted. No commerce/product module exists, so the
|
||||
# `product` generator is hand-rolled (D9).
|
||||
fake = "5"
|
||||
futures-util = "0.3.32"
|
||||
gethostname = "1.1.0"
|
||||
rand = "0.10.1"
|
||||
|
||||
@@ -0,0 +1,677 @@
|
||||
# ADR-0048: `seed` — fake-data generation command (SD1, opens SD2)
|
||||
|
||||
## Status
|
||||
|
||||
**Accepted (2026-06-11); Phase 1 + Phase 2 implemented (2026-06-11).** Design
|
||||
settled with the user across an extended fork dialogue (every decision
|
||||
below was escalated and user-chosen), then hardened by a pre-build
|
||||
`/runda` Devil's-Advocate pass that found six blockers — undo
|
||||
integration (D15), replay semantics (D16), `set` value quoting (D2),
|
||||
CHECK-constraint handling (D17), a phase-ordering bug in the advisory
|
||||
(D13), and auto-show flooding (D18) — plus refinements (state-relative
|
||||
reproducibility, compound-FK tuple sampling, column-fill constraint
|
||||
rules, the `fake` dependency scan), all folded in.
|
||||
|
||||
**Phase 1 shipped** test-first across commits `202e25a` (generation
|
||||
library + `fake` dependency) → `f1e9484` (command skeleton) →
|
||||
`73493fa` (FK sampling) → `9c13501` (uniqueness / junction / IN-CHECK)
|
||||
→ `0b3ab3c` (`SeedResult` / preview / advisory / count cap) →
|
||||
`e6ff63d` (single-transaction O(N) path) → `fbd219b` (`--seed` flag,
|
||||
ambient wiring, and a whole-implementation `/runda` pass). The
|
||||
post-implementation `/runda` found eight gaps — FK-sampling
|
||||
determinism (now `ORDER BY`), shortid reproducibility (now from the
|
||||
seeded RNG, so **D4 holds with no exceptions**), and six untested
|
||||
ADR decisions (D5/D15/D16/D17 + atomicity + zero-count), all closed.
|
||||
**2358 tests pass / 0 fail / 0 skip; clippy clean.**
|
||||
|
||||
**Implemented in Phase 1:** the whole-row `seed <table> [count]
|
||||
[--seed <n>]` form and every D1–D18 decision *except* the two
|
||||
Phase-2 surfaces.
|
||||
|
||||
**Phase 2 implemented (2026-06-11):** both remaining surfaces — the
|
||||
**`set` override clause** (D2: fixed value / pick-list / named
|
||||
generator / range, quoted literals, type-aware) and the
|
||||
**`<table>.<column>` column-fill** form (D1 form 2: an UPDATE over
|
||||
existing rows, refusing PK/autogen targets, empty-table no-op, one undo
|
||||
step). The named-generator vocabulary (D9) lives in `src/seed`
|
||||
(`KNOWN_GENERATORS` / `generator_for_name`); a new range `Generator`
|
||||
(`src/seed/generators.rs`) backs `between`; the override clause is
|
||||
folded from the flat matched path (`build_seed_overrides`,
|
||||
`src/dsl/grammar/data.rs`) and applied to the per-column plan
|
||||
(`apply_seed_overrides`, `src/db.rs`), with column-fill in
|
||||
`do_seed_column_fill`. Full ambient wiring: completion (the generator
|
||||
vocabulary after `as`, the `set`/`.col` column slots), highlighting
|
||||
(`HighlightClass::Function` → `tok_function`, the generator slot), the
|
||||
validity indicator (`IdentSource::Generators` — an unknown name flagged
|
||||
`[ERR]`), help, and parse-error pedagogy rows. The D13 advisory now
|
||||
carries its Phase-2/3 wording (points at `set` and the column-fill
|
||||
repair). A post-implementation `/runda` pass then added one
|
||||
user-chosen refinement: a **bounded override on a UNIQUE column** (a
|
||||
fixed value / too-short pick-list) is now a **friendly error** rather
|
||||
than a silent uniqueness cap (see D2). **2400 tests pass / 0 fail / 0
|
||||
skip; clippy clean.** Two
|
||||
implementation refinements vs. this ADR's wording, both met the
|
||||
user-facing contract: dates in the range form are **quoted** (the D2
|
||||
amendment, above — no date-literal token exists); and the `set` value
|
||||
slots reuse `update`'s typed `current_column_value` (no spurious
|
||||
column-ref match) rather than the raw expression operand.
|
||||
|
||||
Further SD2 increments (custom user generators, NULL injection,
|
||||
multi-locale, recursive parent auto-seed) remain out of scope (see Out
|
||||
of scope).
|
||||
|
||||
Closes `requirements.md` **SD1** and delivers the core of **SD2**
|
||||
(per-type generators, determinism, the `fake`-backed catalogue). It
|
||||
also closes one of the two remaining gaps in **A1** ("all canonical
|
||||
app-level commands") — `seed`; the other, `hint` (**H2**), is
|
||||
separate.
|
||||
|
||||
Builds on: ADR-0014 (data operations, the `Value`/`Bound` value model,
|
||||
the auto-show pattern, FK-error enrichment), ADR-0005/0011 (the type
|
||||
vocabulary and `Type::fk_target_type()`), ADR-0012/0013 (the column /
|
||||
relationship metadata tables, the rebuild-table primitive — *read* by
|
||||
seed for schema introspection), ADR-0024 (the unified grammar tree /
|
||||
`CommandNode` registration that gives completion, hints, help-id,
|
||||
usage-id for free), ADR-0022 (ambient typing assistance — the
|
||||
`KNOWN_SQL_FUNCTIONS` curated-vocabulary pattern that the
|
||||
generator-name list mirrors), ADR-0026 (the `in (...)` / `between ...
|
||||
and ...` expression grammar the override clause reuses), ADR-0027 (the
|
||||
validity-indicator diagnostics model), and ADR-0038 (the
|
||||
`OutputStyleClass::Hint` styled output used for the post-seed
|
||||
advisory). Honours ADR-0003 (both modes, no sigil), ADR-0009 (DSL
|
||||
conventions — keyword grammar, `--` flags for opt-in choices, one
|
||||
sigil only), ADR-0002 (no engine name in user-facing strings), and
|
||||
ADR-0015 (per-command write-through persistence).
|
||||
|
||||
## Context
|
||||
|
||||
`seed <table> [count]` is the last unbuilt **data-authoring** command
|
||||
in the requirements. The pedagogical value is high: a learner who has
|
||||
just modelled a schema wants rows to query against *now*, without
|
||||
hand-typing dozens of `insert`s. A teacher wants a one-liner that
|
||||
fills a demo database with believable data. SD1 commits to "plausible
|
||||
fake data; junction tables seeded with valid foreign-key references
|
||||
drawn from existing parent rows." SD2 deferred the *how* — "per-type
|
||||
generators, locale, determinism, override hooks" — explicitly pending
|
||||
this ADR.
|
||||
|
||||
The design conversation widened the scope deliberately, with the user
|
||||
confirming each step:
|
||||
|
||||
- **Realism matters more than minimalism** for a teaching tool. Random
|
||||
`text_a3f9` values teach nothing; `Alice Martinez` /
|
||||
`alice.m@example.com` make queries feel real. → adopt a faker
|
||||
library and make generation **name-aware**.
|
||||
- **The column *name* is the strongest signal** for what a value should
|
||||
look like, but it is **ambiguous** without the **table** for the
|
||||
`name`/`title` family (`products.name` ≠ `users.name`).
|
||||
- **Heuristics will miss**, so a **manual override** surface is
|
||||
required, not optional — this is SD2's "override hooks", brought
|
||||
forward.
|
||||
- **Identifiers and enums** are special: `id`-ish columns want
|
||||
uniqueness; `status`-ish columns have no sensible generic value and
|
||||
should be *flagged*, not guessed.
|
||||
|
||||
The novel work is the **generation layer**. Everything downstream —
|
||||
type validation, autogen autofill (`serial`/`shortid`), FK
|
||||
enforcement, per-command persistence, the auto-show outcome — is
|
||||
reused from the existing insert/update machinery as **shared helper
|
||||
functions**, per the X5 architecture preference (unique commands, with
|
||||
mechanics shared as library functions — *not* by emitting
|
||||
`Command::Insert` to borrow `do_insert`).
|
||||
|
||||
## Decision
|
||||
|
||||
Add a dedicated **`seed`** command (its own AST variant and its own
|
||||
`do_seed` worker executor) available in **both modes**, with the
|
||||
surface and behaviour below. Generation is realistic, name- and
|
||||
table-aware, type-gated, with a manual override clause and a
|
||||
reproducibility flag.
|
||||
|
||||
**Command classification (important, set by the replay decision
|
||||
D16).** Although `requirements.md` A1 lists `seed` among the
|
||||
"app-level commands" (meaning: part of the canonical command surface,
|
||||
no sigil, both modes), `seed` is architecturally a **data-authoring
|
||||
command** — a sibling of `insert`/`update`/`delete`, **not** an
|
||||
app-lifecycle `AppCommand`. It is therefore **not** added to
|
||||
`is_app_lifecycle_entry_word` / completion's
|
||||
`empty_input_offers_app_command_entry_keywords` (those mirror the
|
||||
`AppCommand` set and must match — `seed` belongs in neither): `replay`
|
||||
re-runs it as a data write (D16).
|
||||
|
||||
### D1 — Command surface (fork, user-chosen: "whole-row + column-fill")
|
||||
|
||||
Two forms:
|
||||
|
||||
1. **Whole-row generation** — `seed <table> [count]`
|
||||
Generates `count` new rows (an INSERT path). `count` **defaults to
|
||||
20** (D6) when omitted. Every user-fillable column is filled per the
|
||||
generation rules (D7–D12); `serial`/`shortid` autogen columns are
|
||||
left to the existing autofill helpers.
|
||||
|
||||
2. **Column-fill on existing rows** — `seed <table>.<column>`
|
||||
Fills `<column>` across the table's **existing** rows (an UPDATE
|
||||
path) — the natural follow-up to `add column`. Combined with the
|
||||
`set` clause (D2) this is also the precise repair for a single
|
||||
mis-guessed column: `seed users.work_addr set work_addr as email`.
|
||||
Column-fill **refuses** PK columns and autogen (`serial`/`shortid`)
|
||||
columns (a friendly error — you don't "fill" an identity column),
|
||||
and **respects** the same UNIQUE / FK / required rules as whole-row
|
||||
generation (a UNIQUE target gets collision-free values; an FK
|
||||
target samples from the parent, D14). On an **empty** table it is a
|
||||
friendly no-op ("no rows to fill").
|
||||
|
||||
**Zero / over-cap counts.** `seed <table> 0` is a friendly no-op;
|
||||
`count` over the maximum (D6) is a friendly error.
|
||||
|
||||
The column-restricted-*insert* form (`seed t (a, b)` — new rows, only
|
||||
some columns filled) was considered and **rejected** as marginal and
|
||||
constraint-fragile (see Alternatives).
|
||||
|
||||
**Required-column block guard (user requirement).** If seed cannot
|
||||
produce a value for a `NOT NULL` column — the only real case is a
|
||||
`NOT NULL blob` column, which has no DSL value path — it **refuses the
|
||||
whole operation with a friendly error** naming the column, rather than
|
||||
attempting a NULL insert that would violate the constraint. The check
|
||||
is a pre-flight over the resolved per-column plan, before any write.
|
||||
|
||||
### D2 — Manual override: the `set` clause (fork, user-chosen: "value + list + generator + range")
|
||||
|
||||
An optional, comma-separated `set` clause overrides generation per
|
||||
column. Four forms, all reusing existing grammar vocabulary so there
|
||||
is nothing new to learn:
|
||||
|
||||
| Form | Example | Meaning |
|
||||
|---|---|---|
|
||||
| Fixed value | `set status = 'pending'` | every row gets the constant |
|
||||
| Pick-from-list | `set role in ('admin', 'editor', 'viewer')` | uniform random choice from the list |
|
||||
| Explicit generator | `set work_addr as email` | force a named generator (D9) |
|
||||
| Range | `set price between 10 and 100` | uniform in range; **also dates** — `set signup between '2023-01-01' and '2024-12-31'` |
|
||||
|
||||
Multiple clauses combine: `seed users 20 set role in ('admin',
|
||||
'user'), status = 'active', signup between '2023-01-01' and
|
||||
'2024-12-31'`.
|
||||
|
||||
**Override × UNIQUE capacity (post-implementation `/runda`, user-chosen:
|
||||
"friendly error").** A *bounded* override — a fixed value, or a
|
||||
pick-list — on a **single-column-UNIQUE** target (a `UNIQUE` column or a
|
||||
single-column PK) that offers fewer **distinct** values than the row
|
||||
count cannot fill the run; rather than let the D10 uniqueness machinery
|
||||
silently cap it (e.g. `seed users 100 set email = 'x'` → 1 row), seed
|
||||
**refuses up front** with a friendly error pointing at the fixes (use a
|
||||
generator, or a longer list). Generators and ranges are treated as
|
||||
effectively unbounded sources — if one genuinely exhausts, the D14
|
||||
distinct-combination cap still applies. Compound uniqueness is exempt
|
||||
(the *other* key columns can still vary).
|
||||
|
||||
**Quoting (fork, user-chosen: "quoted, grammar-consistent").** Text
|
||||
values and list items are **quoted string literals** (`'admin'`),
|
||||
exactly as everywhere else in the DSL — only **numbers** stay
|
||||
unquoted. **Amendment (2026-06-11, Phase 2 build):** the original
|
||||
wording said "numbers *and dates* stay unquoted", but this DSL has
|
||||
**no date-literal token** — `Value` is `Number`/`Text` only, and a
|
||||
date is a **quoted string** validated by `bind_date` (`'2023-01-01'`)
|
||||
everywhere else (insert / update / `where`). An unquoted `2023-01-01`
|
||||
lexes as `2023`,`-`,`01`,… and cannot parse. So **dates in the range
|
||||
form are quoted** (`between '2023-01-01' and '2024-12-31'`) — which is
|
||||
in fact *more* faithful to this decision's own "quoted,
|
||||
grammar-consistent" principle. Numbers remain unquoted (`NumberLit`).
|
||||
This reuses the ADR-0026 expression grammar **unchanged**:
|
||||
the DA pass confirmed that the `in (...)` form's operands are typed
|
||||
value slots, so a *bare* `admin` would parse as a **column reference**
|
||||
(→ "unknown column"), not a string. Quoting is therefore not a style
|
||||
preference but a correctness requirement of grammar reuse. The range
|
||||
form is **type-aware**: numeric bounds for numeric columns, date
|
||||
bounds for date/datetime columns; a type-incompatible bound is a
|
||||
friendly error. `=`, `in (...)`, and `between ... and ...` are the
|
||||
ADR-0026 expression operators; `set` is the ADR-0014 UPDATE keyword;
|
||||
`as` is borrowed from the SQL alias slot. The `as <generator>` operand
|
||||
is a bare name from the curated generator vocabulary (D9), not a
|
||||
value. The override takes precedence over every heuristic.
|
||||
|
||||
### D3 — Generation library: `fake` crate + hand-rolled gaps (fork, user-chosen: "name-aware + realistic")
|
||||
|
||||
Add the **`fake`** crate (v5.x at time of writing; English locale for
|
||||
v1 per X2) for realistic values: names, emails, usernames, addresses,
|
||||
companies, phone numbers, lorem text, dates. Generation is driven by a
|
||||
per-column **generator** chosen by the heuristics (D7) or the override
|
||||
(D2), falling back to **type-based** generation (D8).
|
||||
|
||||
**Implementation-time verifications (resolved 2026-06-11 when the
|
||||
dependency was added):**
|
||||
|
||||
- **`rand` de-duplication — clean.** `fake` 5.1.0 depends on
|
||||
`rand = "0.10"`, the **same major** as the project's `rand 0.10.1`,
|
||||
so `cargo tree -e normal` resolves a **single** `rand 0.10.1` (no
|
||||
runtime duplication; the `rand 0.8.6` visible to `cargo tree -i
|
||||
rand` is only `fake`'s own dev-dependency, never compiled for us).
|
||||
Consequence for D4: one seeded `rand 0.10` `StdRng` can drive
|
||||
**both** `fake`'s `fake_with_rng` and the hand-rolled generators —
|
||||
determinism is single-RNG, single-version, and shares `shortid.rs`'s
|
||||
`rand` version.
|
||||
- **`fake` module inventory / features — confirmed.** Default features
|
||||
(`["either"]`) cover the core string fakers used here
|
||||
(Name/Internet/Address/Company/Lorem/PhoneNumber); `fake`'s `chrono`
|
||||
feature is **deliberately omitted** (dates generated in-house for
|
||||
D8's bounded windows). No commerce/product module exists → `product`
|
||||
is hand-rolled (D9). (The exact faker call sites are pinned when the
|
||||
generation library is built.)
|
||||
- **Security (new-dependency posture) — clean.** The `fake` tree (296
|
||||
packages total) scanned clean by **all three** mandated scanners:
|
||||
`osv-scanner` (no issues), `grype` (no vulnerabilities), `trivy fs
|
||||
--scanners vuln` (0). No findings to document or accept.
|
||||
|
||||
### D4 — Determinism: `--seed <n>` (fork, user-chosen: "optional flag")
|
||||
|
||||
Generation is **random by default**. The optional `--seed <n>` flag
|
||||
makes a run **reproducible**: **same database state + same `--seed` →
|
||||
identical data**. The "database state" qualifier matters (DA
|
||||
refinement) — FK sampling (D14), identifier sequencing (D10), and
|
||||
UNIQUE collision-avoidance all *read existing rows*, so reproducibility
|
||||
is relative to the data already present, not absolute. Value: teachers
|
||||
hand out one dataset; demos are stable; and the feature's own tests
|
||||
can assert **exact** output (against a known starting state).
|
||||
Implemented with a seedable RNG threaded through every generator (no
|
||||
`thread_rng` on the seeded path). `--` flag per ADR-0009 (opt-in
|
||||
choice). Naming note: the flag `--seed` and the command `seed` share a
|
||||
word but never collide grammatically (`seed users 20 --seed 42` parses
|
||||
unambiguously). This flag is also the determinism lever for **replay**
|
||||
(D16): a recorded `seed … --seed N` line reproduces on replay; a bare
|
||||
`seed …` line regenerates fresh data.
|
||||
|
||||
### D5 — Both modes (A1)
|
||||
|
||||
`seed` is a canonical app-level command, available in **simple and
|
||||
advanced** mode, no sigil — like `save`/`load`/`export`/`replay`.
|
||||
|
||||
### D6 — Default count: 20; bounded maximum
|
||||
|
||||
Omitted `count` → **20** rows: enough to make `where`, `group by`,
|
||||
`order by`, and `limit` meaningful without flooding the output pane.
|
||||
A **maximum** is enforced (proposed 10 000) to prevent a typo
|
||||
(`seed t 1000000`) from hanging the app or bloating the project; over
|
||||
the cap → friendly error stating the limit.
|
||||
|
||||
### D7 — Name-aware heuristics, type-gated (the catalogue)
|
||||
|
||||
A column's **name** selects a generator, but a name rule only fires
|
||||
when the column's **type** is compatible (a column named `email` typed
|
||||
`int` does **not** get a string — it falls through to type-based int).
|
||||
Matching is **case-insensitive**, **token-based** (split on `_`,
|
||||
camelCase, kebab), **most-specific-first**, with documented
|
||||
false-positive guards. The catalogue (representative; full table lives
|
||||
with the implementation):
|
||||
|
||||
| Column name (tokens) | Generator | Type gate |
|
||||
|---|---|---|
|
||||
| `first_name`/`fname` · `last_name`/`surname`/`lname` | first / last name | text |
|
||||
| `name`/`full_name` · `title` | **table-context** name (D11) | text |
|
||||
| `email`/`*_email` | email | text |
|
||||
| `username`/`login`/`handle` | username | text |
|
||||
| `password`/`pwd` | password | text |
|
||||
| `phone`/`mobile`/`cell`/`tel` | phone number | text |
|
||||
| `city`/`town` · `country` · `state`/`province` | address parts | text |
|
||||
| `street`/`address`/`addr` · `zip`/`postcode`/`postal` | address parts | text |
|
||||
| `company`/`employer`/`org` · `job`/`position`/`profession` | company / job | text |
|
||||
| `description`/`bio`/`notes`/`summary`/`comment` | sentence / paragraph | text |
|
||||
| `url`/`website`/`homepage` · `color`/`colour` | URL / hex colour | text |
|
||||
| `price`/`amount`/`cost`/`salary`/`balance`/`total` | currency-range number | numeric |
|
||||
| `age` · `quantity`/`qty`/`stock`/`count` | 18–80 · small int | numeric |
|
||||
| `date`/`*_date` | date, recent ~3 yr window | date |
|
||||
| `dob`/`birthday` | date, adult window (18–80 yr ago) | date |
|
||||
| `timestamp`/`datetime` · `created_at`/`updated_at`/`*_at` | datetime, recent window (`updated_at` ≥ `created_at`) | datetime |
|
||||
| `is_*`/`has_*`/`active`/`enabled` | boolean | bool |
|
||||
| **identifier family** (D10) | unique sequential | int/text |
|
||||
| **enum-ish family** (D12) | generic text + flag | (text) |
|
||||
|
||||
**False-positive guards (documented):** `username`/`filename`/
|
||||
`table_name`/`*_name` handled before the bare `name` rule so they do
|
||||
**not** resolve to person-name; the bare `name`/`title` rule requires a
|
||||
standalone token or a recognised `*_name` suffix.
|
||||
|
||||
### D8 — Type-based fallback
|
||||
|
||||
When no name rule matches (or to satisfy a name rule's type gate),
|
||||
generate by **type**: `text`→realistic words/short phrase, `int`→
|
||||
bounded random, `real`→random double, `decimal`→formatted number,
|
||||
`bool`→random, `date`/`datetime`→**bounded recent** value (never "any
|
||||
point in all of history" — per the user's date concern), `serial`/
|
||||
`shortid`→omitted (autogen helpers fill them), `blob`→unsupported
|
||||
(nullable→NULL; `NOT NULL`→D1 block guard).
|
||||
|
||||
### D9 — Named generators + the `product` generator
|
||||
|
||||
The generators addressable via `set ... as <generator>` (D2) and
|
||||
chosen by D7 form a **curated, named vocabulary** — `name`,
|
||||
`first_name`, `last_name`, `email`, `username`, `phone`, `city`,
|
||||
`country`, `street`, `zip`, `company`, `job`, `sentence`, `paragraph`,
|
||||
`url`, `color`, `price`, `age`, `date`, `datetime`, `bool`, `product`,
|
||||
… — the single source of truth shared by the executor, the completion
|
||||
source, and the highlighter (mirroring `KNOWN_SQL_FUNCTIONS`,
|
||||
ADR-0022 Amд6).
|
||||
|
||||
**`product`** is **hand-rolled** (the `fake` crate has no
|
||||
commerce/product module — D3): `{adjective} {material} {noun}` from
|
||||
three small baked-in word lists (~20 each) → "Sleek Bamboo Keyboard",
|
||||
"Vintage Leather Backpack". Seedable through the D4 RNG. Always
|
||||
addressable as `set <col> as product`, and auto-selected by D11 for
|
||||
the `name`/`title` family in product-ish tables.
|
||||
|
||||
### D10 — Identifier family → unique by name (fork, user-chosen: "unique sequential")
|
||||
|
||||
A column in the identifier family — `id`, `*_id` **that is not an FK**,
|
||||
`code`, `sku`, `ref`/`reference`, `number`/`no`, `barcode` — that is
|
||||
**not** a serial/shortid autogen column and **not** the PK is treated
|
||||
as an identifier and gets **unique** values: **int → sequential**
|
||||
(`MAX(col)+1` ascending, reads like real ids, never collides);
|
||||
**text → unique short code** (generate-with-retry). Precedence:
|
||||
**FK detection wins** over this rule (an FK `user_id` *should* have
|
||||
duplicates — many children per parent), so `*_id` only triggers
|
||||
uniqueness when the column is not a foreign key.
|
||||
|
||||
**Constraint-driven uniqueness is independent and mandatory:** any
|
||||
column with a `UNIQUE` constraint (or a user-fillable single-column
|
||||
PK) gets guaranteed-unique generation regardless of name — a
|
||||
correctness requirement, not a heuristic. Generation for such columns
|
||||
uses retry/sequence to guarantee no collision within the batch and
|
||||
against existing rows.
|
||||
|
||||
### D11 — Table-context disambiguation for `name`/`title` (fork, user-chosen: "table-context-aware")
|
||||
|
||||
For the `name`/`title` family **only**, the heuristic also reads the
|
||||
**table** name token:
|
||||
|
||||
- `product`/`item`/`goods`/`merchandise`/`catalog`/`inventory` →
|
||||
`product` generator (D9)
|
||||
- `company`/`companies`/`vendor`/`supplier`/`manufacturer`/`brand` →
|
||||
company name
|
||||
- `user`/`customer`/`person`/`people`/`employee`/`member`/`contact`/
|
||||
`author`/`student` → person name
|
||||
- unrecognised table → generic word
|
||||
|
||||
This resolves the real ambiguity (`products.name` → "Sleek Bamboo
|
||||
Keyboard"; `users.name` → "Alice Martinez"; `vendors.name` → "Globex
|
||||
Corp"). It is a deliberately **scoped** use of table context — the only
|
||||
place the table name influences generation.
|
||||
|
||||
### D12 — Enum-ish names → generic + post-seed advisory (fork, user-chosen: "flag enum-ish only")
|
||||
|
||||
Enum-ish names — `role`, `status`, `type`, `state`, `kind`,
|
||||
`category`, `level`, `tier`, `stage`, `priority`, `gender` — have **no
|
||||
sensible generic generator**, so they are **not guessed**: they fall
|
||||
through to generic text (they must still be filled — a `NOT NULL`
|
||||
status cannot be left empty). Seed then emits a **post-seed advisory**
|
||||
(D13) naming them and pointing at the `set ... in (...)` override.
|
||||
|
||||
### D13 — Reporting: post-seed advisory (fork, user-chosen: "flag enum-ish only")
|
||||
|
||||
After a successful seed, in addition to the normal auto-show outcome
|
||||
(row count + the affected rows, per ADR-0014), seed appends a
|
||||
**`OutputStyleClass::Hint`** advisory **only** when one or more
|
||||
enum-ish columns (D12) — **or columns guarded by a CHECK that seed
|
||||
could not derive values from** (D17) — were filled generically.
|
||||
|
||||
The wording is **phase-aware** (DA finding: the advisory must not name
|
||||
features that ship later). In **Phase 1** (no `set` clause yet) it
|
||||
names the columns and explains they were filled generically. From
|
||||
**Phase 2/3** it points at the concrete repair:
|
||||
|
||||
```
|
||||
# Phase 1 wording:
|
||||
✓ Seeded 20 rows into users
|
||||
ℹ status, role were filled with generic text — they look like
|
||||
fixed value sets you may want to choose deliberately.
|
||||
|
||||
# Phase 2/3 wording (set clause + column-fill exist):
|
||||
✓ Seeded 20 rows into users
|
||||
ℹ status, role filled generically. Fix existing rows with
|
||||
seed users.status set status in ('active','inactive'),
|
||||
or pass set … on the next seed.
|
||||
```
|
||||
|
||||
Note the repair for **already-seeded rows** is the **column-fill**
|
||||
form (`seed users.status set …`), not "re-seed" (which would add more
|
||||
rows) — DA correction. This is a **result-time** note (cheap, reusing
|
||||
ADR-0038's hint rendering), not a typing-time warning. The fuller
|
||||
"per-column report" (every column → its generator) was considered and
|
||||
**deferred** (see Alternatives / Out of scope).
|
||||
|
||||
### D14 — Foreign keys (SD1; fork on empty-parent, user-chosen: "friendly error")
|
||||
|
||||
- **Each FK** is filled by sampling **uniformly** from the **existing
|
||||
rows** of the parent table's referenced column(s). Duplicates are
|
||||
expected and correct (many children per parent). For a **compound
|
||||
FK**, the referenced **tuple is sampled jointly** (a whole existing
|
||||
parent key), never per-column independently — independent sampling
|
||||
could fabricate a `(a, b)` pair that exists in no parent row and
|
||||
would fail FK enforcement (DA refinement).
|
||||
- **Empty parent** → seed **refuses with a friendly error** naming the
|
||||
parent and the FK column ("seed `users` first — `orders.user_id`
|
||||
references it"). Safe, predictable, teaches FK dependency order.
|
||||
Recursive parent auto-seed is **deferred** to a future `--recursive`
|
||||
opt-in (Out of scope).
|
||||
- **Junction / compound-PK tables** (SD1's explicit case): sample
|
||||
**distinct combinations** of the parent PK tuples to satisfy the
|
||||
compound PK's uniqueness; if `count` exceeds the number of available
|
||||
distinct combinations, **cap** at the maximum and note it in the
|
||||
outcome.
|
||||
- **Self-referential FK** (`manager_id → id`): if nullable, leave NULL
|
||||
or point at an earlier row in the same batch; if `NOT NULL` on an
|
||||
otherwise-empty table, friendly error. Documented edge case.
|
||||
- **Nullable FKs** are **always filled** in v1 (predictable);
|
||||
occasional-NULL injection is deferred.
|
||||
|
||||
### D15 — Undo: one snapshot per seed (DA finding; ADR-0006)
|
||||
|
||||
Seed is a mutation, so it must participate in undo. The draft omitted
|
||||
this; the DA found the codebase already has the right primitive —
|
||||
`BeginBatch` / `EndBatch` (`db.rs`), used by `replay` so a multi-write
|
||||
run collapses to **one** boundary snapshot. `do_seed` wraps its
|
||||
generated writes in `begin_batch` / `end_batch`, so **`seed users 20`
|
||||
is a single undo step**, not 20 — matching ADR-0006 Amendment 1's
|
||||
batch model. Column-fill's bulk UPDATE is likewise one step. (`import`
|
||||
remains the only data-affecting op outside undo, per ADR-0015 §11;
|
||||
seed is firmly inside it.)
|
||||
|
||||
### D16 — Replay: seed re-runs as a data write (fork, user-chosen)
|
||||
|
||||
`replay` re-executes a recorded `seed` line as a **data-write
|
||||
command** — it is **not** in the app-lifecycle skip-set (see Command
|
||||
classification, above). Consequence, accepted by the user: a **bare**
|
||||
`seed users 20` regenerates **fresh, divergent** data on each replay;
|
||||
a `seed users 20 --seed 42` line (the determinism lever, D4)
|
||||
**reproduces** the original data. This keeps seed faithful to its
|
||||
nature as a data write and puts reproducibility exactly where the
|
||||
`--seed` flag already lives. (Seeded *data* is in any case durable
|
||||
independently of replay, via the ADR-0015 CSV store + `rebuild`;
|
||||
replay is the scripting re-run path, U4.) The DA confirmed the wiring
|
||||
trap: because seed is *not* an `AppCommand`, it is correctly absent
|
||||
from `is_app_lifecycle_entry_word` and replay dispatches it through
|
||||
the normal data path rather than aborting.
|
||||
|
||||
### D17 — CHECK constraints: derive from simple `IN`, else friendly-fail (fork, user-chosen)
|
||||
|
||||
A CHECK on a generically-filled column would otherwise fail the whole
|
||||
batch (DA finding — the block guard only covered `NOT NULL blob`).
|
||||
Two-tier handling, per the user:
|
||||
|
||||
1. **Derive from simple `IN`-CHECKs.** When a column's CHECK is the
|
||||
common enum-as-CHECK shape — `col IN ('a', 'b', …)` (the column's
|
||||
own CHECK, single-column, literal list) — seed **parses out the
|
||||
allowed values and uses them as the generator** (uniform choice).
|
||||
The frequent `CHECK (status IN ('active','closed'))` case then
|
||||
"just works" with no override needed.
|
||||
2. **Best-effort + friendly fail for the rest.** For CHECKs seed
|
||||
cannot interpret (ranges, expressions, multi-column), it generates
|
||||
best-effort; if a generated row violates the CHECK, the insert
|
||||
fails through the existing **H1 friendly-error layer** (ADR-0019)
|
||||
naming the constraint and pointing at `set`. Such CHECK-guarded
|
||||
columns are also **pre-flagged in the advisory** (D13) alongside
|
||||
enum-ish names, so the user is warned before hitting the failure.
|
||||
|
||||
No new CHECK engine — tier 1 is a narrow literal-`IN` parse over the
|
||||
CHECK text already stored in metadata; tier 2 is the existing failure
|
||||
path.
|
||||
|
||||
### D18 — Auto-show is capped for large seeds (DA finding)
|
||||
|
||||
ADR-0014 auto-show renders "the affected rows" — fine for one insert,
|
||||
a wall for a 10 000-row seed. Seed's outcome shows a **capped
|
||||
preview** (proposed first **20** rows) with a `(showing 20 of N)`
|
||||
note, not the full set. The row **count** is always reported in full;
|
||||
only the rendered table is capped.
|
||||
|
||||
## Grammar, AST, and cross-cutting wiring
|
||||
|
||||
Per ADR-0024, `seed` is registered as a `CommandNode` so completion,
|
||||
hints, help, and usage flow from one definition. The wiring, as
|
||||
**explicit acceptance criteria** (a `/runda` pass must verify each —
|
||||
ADR-0045 showed "claimed verified" is not verified):
|
||||
|
||||
- **AST + executor.** A dedicated command variant (`Seed { table,
|
||||
target_column: Option<String>, count: Option<u32>, overrides:
|
||||
Vec<SeedOverride>, rng_seed: Option<u64> }`) and a dedicated
|
||||
`do_seed` worker executor. `do_seed` **reuses shared helpers**
|
||||
(value binding `impl_value_for`, autogen autofill, FK enrichment,
|
||||
the multi-row parameterised-insert pattern of `plan_autogen_autofill`,
|
||||
the UPDATE path for column-fill, per-command persistence, the
|
||||
`begin_batch`/`end_batch` undo primitive of D15) as library
|
||||
functions — it does **not** emit `Command::Insert`/`Command::Update`
|
||||
(X5).
|
||||
- **Replay / undo classification (D15/D16).** `do_seed` brackets its
|
||||
writes in one batch (one undo step). The `seed` entry word is
|
||||
**deliberately absent** from `is_app_lifecycle_entry_word` and
|
||||
completion's `empty_input_offers_app_command_entry_keywords` (the
|
||||
`AppCommand` mirror) so replay re-runs it as a data write — an
|
||||
explicit acceptance check, since the default for an unlisted
|
||||
recognised command must be "replayed", not "abort".
|
||||
- **Completion sources:** table-name (existing tables); `.column` and
|
||||
`set`-clause column slots (columns of the named table); the
|
||||
generator-name vocabulary (D9) after `as`; `count` number; `set` /
|
||||
`=` / `in` / `as` / `between` / `and` keywords; `--seed` flag.
|
||||
- **Syntax highlighting:** `seed` keyword; the generator-name
|
||||
vocabulary highlighted as **`tok_function`** (reuse the existing
|
||||
ADR-0022 Amд6 blue — no new theme colour).
|
||||
- **Hints:** ambient per-slot "what's next" and usage hints, both
|
||||
modes.
|
||||
- **Help:** `help seed` topic (`help_id` + per-command block); the
|
||||
general `help` list picks it up automatically via REGISTRY.
|
||||
- **Parse-error pedagogy (ADR-0042):** near-miss matrix rows for `seed`
|
||||
(bare / missing-table / wrong-token / malformed `set`), both modes.
|
||||
- **Validity indicator (ADR-0027):** typing-time `[ERR]`/`[WRN]` for
|
||||
unknown table, unknown column (in `.column` or `set`), unknown
|
||||
generator name after `as`.
|
||||
- **No DSL→SQL teaching echo (ADR-0038).** `seed` is a utility/app
|
||||
command, not a DSL form of a SQL statement, so the echo does not
|
||||
apply. (A future "show the generated INSERTs" is out of scope —
|
||||
it would dump `count` statements.)
|
||||
|
||||
## Implementation phasing
|
||||
|
||||
Design is whole; the **implementation** is phased into reviewable,
|
||||
test-first commits:
|
||||
|
||||
1. **Core whole-row seed** *(done, Phase 1)* — grammar/AST/executor;
|
||||
type-based generation + the `fake`-backed name heuristics
|
||||
(D7/D8/D11); identifier uniqueness (D10) + constraint uniqueness; FK
|
||||
sampling (joint tuples) + empty-parent error + junction
|
||||
distinct-combos (D14); `--seed` determinism (D4); default count + cap
|
||||
+ zero-no-op (D6/D1); required-column block guard (D1); **undo batch
|
||||
(D15)**; **replay-as-data-write classification (D16)**; **CHECK
|
||||
derive / friendly-fail (D17)**; **capped auto-show (D18)**; the
|
||||
enum/CHECK advisory in its **Phase-1 wording** (D12/D13); full
|
||||
ambient wiring; both modes.
|
||||
2. **The `set` override clause** (D2) *(done, Phase 2)* — value / list /
|
||||
generator / range, type-aware, with completion + highlight +
|
||||
validity for the generator-name slot.
|
||||
3. **Column-fill mode** (`seed <table>.<column>`, D1 form 2) *(done,
|
||||
Phase 2)* — the UPDATE path.
|
||||
|
||||
Each phase is independently green before the next. (Phases 2 and 3
|
||||
landed together — they share the `set`-override executor machinery, so
|
||||
splitting them risked a state where `set` parsed but column-fill
|
||||
silently no-op'd.)
|
||||
|
||||
## Testing (ADR-0008 tiers 1–3; test-first)
|
||||
|
||||
- **Tier 1 (unit, deterministic via `--seed`):** generator selection
|
||||
(name × type-gate matrix, including every false-positive guard of
|
||||
D7); table-context disambiguation (D11); identifier uniqueness and
|
||||
the FK-wins-over-`*_id` precedence (D10); bounded-date windows (D8);
|
||||
the `product` generator shape; override resolution + precedence (D2);
|
||||
the required-column block guard (D1); the count cap (D6). Exact-value
|
||||
assertions are possible because `--seed` fixes the RNG.
|
||||
- **Tier 2 (insta snapshots):** the seeded data table render and the
|
||||
enum advisory (D13) at representative sizes, light + dark.
|
||||
- **Tier 3 (integration, full event loop):** `seed users 20` end to
|
||||
end (rows land in db + CSV + history, auto-show, persistence);
|
||||
FK sampling against a populated parent (incl. a **compound FK** —
|
||||
every child tuple exists in the parent); **empty-parent friendly
|
||||
error**; **junction** seeding with distinct combinations and the
|
||||
over-cap note; the `set` clause forms (quoted literals); **column-
|
||||
fill** on existing rows (incl. refusal of PK/autogen targets, empty-
|
||||
table no-op); reproducibility (`--seed 42` twice → identical data
|
||||
from a fixed state); both modes. Plus the DA-driven cases:
|
||||
**one-undo-step** (seed then a single `undo` removes all rows);
|
||||
**replay** of a bare `seed` line (divergent) vs a `--seed` line
|
||||
(reproduced); **`IN`-CHECK auto-derivation** ("just works") and a
|
||||
**complex-CHECK friendly failure**; **capped auto-show** on a large
|
||||
seed.
|
||||
|
||||
"All green, no skips" is the only acceptable end state; the Phase-1
|
||||
baseline (2290 passing / 0 failing / 0 skipped / 1 ignored doctest) is
|
||||
the regression floor.
|
||||
|
||||
## Out of scope / deferred (future SD2 work)
|
||||
|
||||
- **Recursive parent auto-seed** (`--recursive`) — D14 errors instead.
|
||||
- **NULL injection** for nullable columns (teaching optional
|
||||
relationships / `IS NULL`) — v1 always fills.
|
||||
- **Multi-locale** generation — English only (X2).
|
||||
- **User-defined custom generators** (true "override hooks" — register
|
||||
a named generator) — the `set ... as <builtin>` surface covers the
|
||||
common need; custom generators are a later SD2 increment.
|
||||
- **Full per-column seed report** — D13 flags enum-ish only.
|
||||
- **Column-restricted insert** (`seed t (a, b)`) — rejected (D1).
|
||||
- **"Show the generated SQL"** teaching echo for seed.
|
||||
|
||||
## Alternatives considered
|
||||
|
||||
- **Hand-rolled generators only (no `fake`):** minimal dependency, but
|
||||
synthetic-looking data (`text_a3f9`) — rejected on pedagogy
|
||||
(pedagogy wins ties).
|
||||
- **Type-only generation (no name awareness):** simpler, but misses
|
||||
the biggest UX win (a `users` table that reads like real people) —
|
||||
rejected.
|
||||
- **Column-name-only `name` (no table context):** leaves
|
||||
`products.name` → person names, requiring a manual override on every
|
||||
product/company table — rejected for the `name`/`title` family
|
||||
(D11).
|
||||
- **No override clause (heuristics + type only):** could not answer
|
||||
"the heuristic guessed wrong, fix it" or enum columns — rejected;
|
||||
the `set` clause (D2) is the answer to the user's Q3.
|
||||
- **Recursive auto-seed of empty parents:** powerful but magical and
|
||||
can seed tables the user did not name — deferred behind a future
|
||||
flag (D14).
|
||||
- **Always-random (no `--seed`):** simplest, but no reproducible
|
||||
datasets and weaker tests — rejected (D4).
|
||||
- **Full per-column report by default:** a nice teaching artifact but
|
||||
verbose on wide tables — deferred; flag-only advisory chosen (D13).
|
||||
- **Reuse `Command::Insert`/`do_insert` directly** from seed: tempting
|
||||
for code reuse, but collapses command identity and violates X5 —
|
||||
rejected in favour of a dedicated `do_seed` that calls shared
|
||||
*helpers*.
|
||||
- **Skip seed on replay** (classify as app-lifecycle, D16): consistent
|
||||
with A1's "app-level" label and avoids divergent data, but seed is a
|
||||
data write and silently skipping it on a scripted re-run is
|
||||
surprising — rejected; `--seed` is the determinism lever instead.
|
||||
- **Bare-word `set` list items** (`in (admin, …)`, D2): matched the
|
||||
early mockups and reads cleaner, but bare words are column
|
||||
references in the reused grammar (would error) and would force a
|
||||
custom list form — rejected for quoted literals (grammar reuse +
|
||||
DSL consistency).
|
||||
- **Pre-flight refuse any CHECK-bearing table** (D17): safest but
|
||||
blocks seeding too many legitimate tables — rejected for the
|
||||
derive-`IN`-else-friendly-fail tier.
|
||||
- **`set`-driven NULL / per-column report / recursive parent seed:**
|
||||
deferred — see Out of scope.
|
||||
@@ -60,3 +60,4 @@ This directory contains the project's ADRs, recorded per
|
||||
- [ADR-0045 — `create m:n relationship` convenience command (C4)](0045-mn-convenience.md) — **Accepted + implemented 2026-06-10** (closes `requirements.md` **C4**; all forks user-confirmed + a `/runda` DA pass that verified the `do_create_table` reuse against code and corrected the "no PK-less tables" assumption — advanced SQL `create table t (a int)` has none, so a parent-PK guard is retained). Implementation corrected a second ADR premise: "the walker already dispatches multiple nodes per entry word" held only in *advanced* mode — two simple-mode spots (dispatcher `decide`, completion continuation-merge) assumed ≤1 DSL form per entry word and were generalized **behaviour-preservingly** (dispatch reduces to the old single-candidate commit; completion merge gated on `simple_count > 1`). Junction echo wired (`render_create_m2n`, round-trips as SQL). `create m:n relationship from <T1> to <T2> [as <name>]` generates a junction table with one FK column per parent PK column, a **compound PK over all the FK columns** (the textbook junction — the pair is unique, no duplicate links), and **two 1:n relationships**, all in **one transaction = one undo step** (built by reusing `do_create_table`, which already takes `foreign_keys` + writes relationship metadata — no batch bracketing). Forks all user-chosen: junction PK = compound-over-FKs (vs surrogate serial / no PK); referential actions = **`CASCADE`** on delete+update (vs NO ACTION / RESTRICT); naming = auto `{T1}_{T2}` + optional `as` (vs auto-only); available in **both modes** (Simple-category DSL, like the sibling relationship commands). FK columns named `{parent_table}_{pk_column}` (disambiguates shared `id`; generalises to compound parents via ADR-0043), typed via `fk_target_type` (ADR-0011). A distinct `Command::CreateM2nRelationship` (not lowered to `CreateTable`) preserves command identity (X5) and lets the teaching echo speak in m:n terms. Cross-cutting wiring enumerated: separate `CREATE_M2N` `CommandNode` (own `help_id`/`usage_ids`), `("m","m:n")` completion composite, `HintMode`s, grammar-driven highlighting, `help`/`help create`, `parse_error_pedagogy` near-miss matrix, teaching echo. OOS: **self-referential m:n** (`from T to T`) refused outright (user-confirmed "full stop" — directional column-naming is more than this beginner convenience warrants); per-relationship action overrides; extra junction payload columns; m:n diagram echo; renaming the auto-generated relationships
|
||||
- [ADR-0046 — Schema sidebar focus/navigation mode and responsive input & hint layout (UI #20/#21/#23)](0046-sidebar-navigation-and-responsive-input-hint.md) — **Accepted + implemented 2026-06-10, phased A→B→C** (8 commits `9f5f76b`…`22bec61`; closes Gitea **#20** hint jumpiness, **#21** left-column improvements, **#23** long input — all forks user-confirmed, including the persistent show/hide toggle which is **deferred**: the Ctrl-O peek covers #21's "keystroke to show and hide"). Two decisions landed differently from the draft (recorded inline): relationship data on **`App`** not `SchemaCache` (DB2); the nav overlay clears **only the sidebar strip + a one-column gutter**, panels staying visible behind (DC2). Treats the three UI issues as one coupled decision because they share the terminal's width/height budget. **Phase A (input & hint):** the hint panel's height becomes a function of **terminal geometry, fixed between resizes** (not of hint content), eliminating the #20 jump at its source — measured catalog shows ≥ ~54-col right-column width never needs > 2 hint lines, so 3 lines is a rare narrow-terminal-only case; height buckets `H<40` compact (input 1 row + horizontal scroll / hint 2) vs `H≥40` comfortable (input 2 rows soft-wrap / hint 2), output `Min(5)` honoured first under degradation; input gains horizontal scroll (`input_scroll_offset`, single logical `String` — **not** I1 multi-line) and 2-row soft-wrap display when tall, preserving ADR-0027's 6-col indicator reserve. **Phase B (sidebar):** the 26-col Tables column is **kept but made optional and richer** (not deleted — pedagogy wins ties) — **width-derived session-only** visibility (visible iff width > 90 or a Ctrl-O peek is active — no stored field; hides at width ≤ 90 so the 90-col screencasts drop it; ADR-0015 format untouched), plus a **relationships panel** rendered narrow with endpoints broken at the arrow, ellipsized — a **separate sibling panel** that **overrides S2**'s nested-list extension model (relationships are cross-table). the full records live on a new **`App.relationships`** field (revised from the ADR's original `SchemaCache.relationship_details` at implementation — `SchemaCache` is walker-facing and needs only the names, kept in `relationships: Vec<String>`; details are UI-only, so `App` mirrors `app.tables` and avoids ~23 fixture edits), delivered by `Database::read_all_relationships` + an `AppEvent::RelationshipsRefreshed`; the two left panels split vertically with the relationships panel floored at 5 rows ("(none)" when empty) and capped at 50 % of the column (DB4). **Phase C (navigation mode):** **`Ctrl-O`** enters a focus cycle (Input → Tables → Relationships → Input; `Esc` exits) orthogonal to the ADR-0003 input mode — **`Ctrl-B` was rejected on review as the default tmux prefix** (unreachable inside tmux); the focused panel **expands to ~40–50 cols as a `Clear` overlay** (right panels stay unchanging underneath) and scrolls via **Up/Down (line) + PageUp/PageDown (page)** (context-rebind, reusing the output-scroll viewport mechanism), with an accent focus border; all non-nav keys inert in nav mode (and nav keys inert while a modal is open). Forks all user-chosen: keep-optional-richer (vs remove/narrow); navigation-mode (vs modeless modifier scroll); `Ctrl-O` (Ctrl-B rejected = tmux prefix); overlay (vs layout re-split); inert-non-nav-keys; geometry-fixed hint height; `H<40/≥40` thresholds; session-only persistence; Up/Down line-scroll; **separate relationships panel overriding S2**; **no hint-area toggle** (S4's stale "keyboard-toggleable" claim struck — never implemented, unwanted). A pre-build `/runda` DA pass drove these corrections: caught the `Ctrl-B`/tmux collision, the `SchemaCache` retype that would have broken completion, the 2-row-input/indicator placement, the missing nav-mode key disposition + modal gate, and three unreferenced requirements (S1 evolved, S2 overridden, S4 corrected); also cross-checked open issue **#22** (overlay/annotation layer — separate ADR, adjacent). OOS: true multi-line input (I1); readline shortcuts (I1b); cross-session sidebar persistence; output as a third nav focus; relationship search/edit from the panel; hint-area toggle; #22's annotation layer. Accepted consequence: the 90-col visibility threshold makes a terminal's output *narrower* when widened across the boundary (sidebar appears)
|
||||
- [ADR-0047 — Demonstration overlay layer (keystroke badges + step captions)](0047-demonstration-overlay-layer.md) — **Accepted 2026-06-10; implemented 2026-06-11, phased A→B→C (closes Gitea #22)** (commits `f879d54`→`2d0f4b2`; no `requirements.md` item — tracked by issue + ADR per convention; all forks user-confirmed + a pre-build `/runda` pass that produced 10 tightening findings and a whole-implementation `/runda` pass that returned PASS, no blockers). An in-app **demonstration mode** (`--demo` flag / `RDBMS_PLAYGROUND_DEMO` env, **off by default, zero footprint when off**) that renders two transient overlays so `autocast` screencasts — and live teaching, and a future guided-lesson system — can show otherwise-invisible interactions. **Keystroke badges** (`[TAB]`, `[ENTER]`, `[UP]`, …): **automatic, app-detected** over a fixed set of glyph-less keys (the app already sees every key, so it re-records for free), label via a pure `demo_badge_label(&KeyEvent)`; the badge **auto-expires on a ~1.5 s timer** that extends the runtime's existing time-boxed-`recv` arm condition (`debounce.is_armed() || badge_pending`; expiry `Instant` in the runtime, `App.demo_badge` the render mirror — mirroring the `input` vs `input_indicator` split). **Step captions**: a **stealth, control-code-delimited input buffer** toggled by **`Ctrl+]`** (byte `0x1D` → arrives as `Char('5')+CONTROL`, verified against crossterm 0.29 `parse.rs:110-113`; chosen over `Ctrl+!`, which is **not a single ASCII byte so autocast cannot send it** — the same wall as arrow keys, R4) — typed characters accumulate **invisibly** (prompt untouched, no echo/history), `Backspace` edits, other keys inert, a second `Ctrl+]` **commits** to the caption box (empty commit dismisses); lives in pure-sync `App::update()`, **intercepted before the modal gate** so captions/badges work **over the load picker** (the `#24` projects cast). Both render as **floating flat black-on-yellow rectangles** (solid fill, **no border glyphs** — a one-cell text margin, deliberately unlike the app's bordered panels; user decision post-build, `2d0f4b2`) **at the output panel's inner bottom-right**, drawn **last over modals**, badge **stacked above** the caption, **no layout reflow**; caption **word-wraps to ≤ 3 lines** (3–5 rows), badge fixed 3 rows; clamp/skip guard for tiny terminals; a new **`App.last_output_area: Rect`** (set in `render_output_panel`) gives the top-level draw the anchor. Caption persists **until the next keystroke**; badge suppressed while capturing. Forks all user-chosen: `--demo` activation (vs hidden command / chord); automatic badges (vs scripted); stealth buffer (vs typed-command / preloaded-file); floating bottom-right boxes (vs HUD / banner / subtitle); `Ctrl+]` trigger; wrap-to-3-line captions; ~1.5 s badge / next-keystroke caption timing. Tested test-first across Tier 1 (label fn, capture state machine incl. over-modal + demo-off gate, nearest-deadline helper), Tier 2 (insta snapshots: badge/caption/both-stacked at 90×26 light+dark, short-terminal clamp), Tier 3 (`--demo` plumbing, badge set/suppressed, caption-without-input wiring), CLI (`--demo` parse + env fallback) — with an **honest limit** noted: the `tokio` timer wiring inside `run_loop` is exercised via the pure pieces + Tier-3 plumbing, not a standalone integration test of the timeout (same posture as the existing `IndicatorDebounce`). One intentional, user-acknowledged behaviour: `Ctrl-C` is inert while capturing (every non-`Ctrl+]` key is, by spec). Final tally **2290 passing / 0 failing / 0 skipped** (1 long-standing ignored doctest), clippy clean. OOS: scripted/manual badge push; badges for glyph keys; configurable styling/placement; the guided-lesson system itself (own ADR); cross-session/-switch persistence; localised caption content; arrow-only cast interactions (output-pane scroll); wiring the overlays into the website `casts.mjs` scripts (website-branch follow-up). Implementation phased **A** (`--demo` plumbing) → **B** (badges) → **C** (captions) + a flat-rectangle restyle
|
||||
- [ADR-0048 — `seed` fake-data generation command](0048-seed-fake-data-generation.md) — **Accepted 2026-06-11; Phase 1 + Phase 2 implemented 2026-06-11** (Phase 1 commits `202e25a`→`fbd219b`; design settled with the user across an extended fork dialogue, hardened by a pre-build `/runda` pass (six blockers folded in), a post-implementation `/runda` pass (eight gaps closed — FK/shortid determinism so **D4 holds with no exceptions**, plus six untested ADR decisions), and a Phase-2 pre-build `/runda` pass (which caught the no-date-literal-token reality → the D2 quoted-dates amendment), and a post-implementation `/runda` pass (which added a friendly error for a bounded override on a UNIQUE column — see D2); **2400 tests pass, clippy clean**). Closes `requirements.md` **SD1** and the core of **SD2**; closes the `seed` half of **A1**. **Phase 1 shipped:** whole-row `seed <table> [count] [--seed <n>]` with realistic name-aware generation (the `fake` crate + a type-gated heuristic catalogue, table-context name disambiguation, hand-rolled `product` generator, bounded dates), identifier + constraint uniqueness incl. junction distinct-combos, FK sampling from existing parent rows (empty-parent error), `IN`-CHECK derivation + complex-CHECK advisory, a required-column block guard, `--seed` reproducibility (serial/FK/shortid all deterministic), undo as one batch step, replay as a data write, a capped auto-show preview, the enum/CHECK advisory, and an O(N) single-transaction insert path. **Phase 2 shipped (2026-06-11):** the `set` override clause (D2 — fixed value / pick-list / `as <generator>` / `between` range, **quoted** dates per the D2 amendment, type-aware, override drops the column from the advisory) and the `<table>.<column>` column-fill form (D1 form 2 — an UPDATE over existing rows, refusing PK/autogen targets, empty-table no-op, FK/unique-respecting, one undo step), with the new `KNOWN_GENERATORS` vocabulary (D9), a range `Generator`, full completion/highlight (`HighlightClass::Function`)/validity (`IdentSource::Generators`)/help/pedagogy wiring, and the D13 advisory's Phase-2/3 wording. Further SD2 increments (custom generators, NULL injection, multi-locale, recursive auto-seed) out of scope. Closes `requirements.md` **SD1** and the core of **SD2**; closes the `seed` half of **A1** (the other being `hint`/**H2**). A dedicated `seed` command (own AST variant + `do_seed` executor, **both modes**) generating **realistic, name-aware** fake data. Two forms: **`seed <table> [count]`** (new rows, default **20**, capped) and **`seed <table>.<column>`** (fill a column on existing rows, an UPDATE). Generation adds the **`fake` crate** (v5, English) driven by a **type-gated, token-matched name-heuristic catalogue** (~30 patterns, documented false-positive guards), with **table-context** disambiguating the `name`/`title` family (`products.name`→product, `users.name`→person, `vendors.name`→company), a **hand-rolled `product` generator** (`fake` has no commerce module), **bounded dates** (`date`/`timestamp`/`dob`/`*_at` recognised, recent windows — never "all of history"), the **identifier family** (`id`/`code`/`ref`/`number`, non-FK/non-PK) → **unique sequential**, and **enum-ish names** (`role`/`status`/`type`/…) left generic + a **post-seed Hint advisory** pointing at `set … in (…)`. A **`set` override clause** — `= value` / `in (a,b,c)` / `as <generator>` / `between a and b` (numeric **and** date), reusing ADR-0026 operators — answers the heuristic-miss case. **`--seed <n>`** makes runs reproducible (and enables exact-value tests). **FK** columns sampled uniformly from existing parent rows (**empty parent → friendly error**, no recursion v1); **junction/compound-PK** tables seeded with **distinct combinations**, capped + noted (SD1). A **required-column block guard** refuses rather than NULL-violate a `NOT NULL` column it can't fill (e.g. `NOT NULL blob`). Full ambient wiring (completion incl. a new generator-name vocabulary highlighted as `tok_function`, hints, `help seed`, ADR-0042 near-miss matrix, ADR-0027 validity); **no DSL→SQL teaching echo** (seed is a utility command, not a SQL twin). Honours **X5** — `do_seed` reuses insert/update *mechanics as helpers*, not by emitting `Command::Insert`. Implementation phased: (1) core whole-row seed → (2) `set` overrides → (3) column-fill. Deferred (future SD2): recursive auto-seed, NULL injection, multi-locale, user-defined custom generators, full per-column report
|
||||
|
||||
@@ -8,9 +8,8 @@ to end across three phases + a restyle).
|
||||
|
||||
## §1. State at handoff
|
||||
|
||||
**Branch:** `main`. **HEAD `2d0f4b2`** plus an **uncommitted docs
|
||||
finalization** (ADR-0047 status → implemented, README index, this
|
||||
handoff — see §6). Push is the user's step.
|
||||
**Branch:** `main`. **HEAD `f0afec3`** — all work committed, nothing
|
||||
pending. Unpushed (push is the user's step; normal working state).
|
||||
|
||||
**Tests: 2290 passing / 0 failing / 0 skipped / 1 ignored** (the 1
|
||||
ignored is the long-standing `friendly` doctest). **Clippy clean**
|
||||
@@ -18,6 +17,7 @@ ignored is the long-standing `friendly` doctest). **Clippy clean**
|
||||
|
||||
**This session's commits:**
|
||||
```
|
||||
f0afec3 docs: session handoff 64 + ADR-0047 implemented (#22/#24)
|
||||
2d0f4b2 feat(ui): flat filled rectangles for demo overlays (#22, ADR-0047 D4)
|
||||
241f60c feat(ui): demo-mode step-caption stealth buffer (#22, ADR-0047 D3/D4)
|
||||
2584e76 feat(ui): demo-mode keystroke badges (#22, ADR-0047 D2/D4/D5)
|
||||
@@ -26,8 +26,9 @@ e9eb1b1 docs: ADR-0047 — demonstration overlay layer for casts/teaching (#22)
|
||||
638b4c9 feat(app): vi-style j/k/g/G navigation in the load picker (#24)
|
||||
```
|
||||
|
||||
**Issues closed:** **#24** (vi nav) and **#22** (demo overlays) — close
|
||||
#22 once the docs finalization commit lands.
|
||||
**Issues closed:** both **#24** (vi nav) and **#22** (demo overlays) are
|
||||
**closed on Gitea** with closing comments — verified via the filtered
|
||||
issue list. Nothing left open from this session's scope.
|
||||
|
||||
## §2. #24 — vi-style load-picker navigation (commit `638b4c9`)
|
||||
|
||||
@@ -107,13 +108,15 @@ existing `IndicatorDebounce` already takes. A future Tier-4 PTY harness
|
||||
|
||||
## §6. How to take over
|
||||
|
||||
**Nothing is pending from this session** — both issues are closed, all
|
||||
docs landed (`f0afec3`), tree is green. The next session **returns to the
|
||||
open requirements backlog** (§7). Suggested start: run `/whatsnext`
|
||||
(it reads this handoff), or pick from §7 below.
|
||||
|
||||
1. Read handoffs 62 → 63 → 64, `CLAUDE.md`, `docs/requirements.md`,
|
||||
`docs/adr/README.md`, and **ADR-0047** (fully landed).
|
||||
2. **Pending:** the docs finalization commit (ADR-0047 status →
|
||||
implemented; README index; this handoff). Commit as
|
||||
`docs: session handoff 64 + ADR-0047 implemented (#22/#24)` (the user
|
||||
confirms commit messages). Then close **#22** on Gitea.
|
||||
3. **For demo-overlay work:** `App` has `demo_mode`, `demo_badge`,
|
||||
`docs/adr/README.md`. ADR-0047 is fully landed; revisit only for
|
||||
demo-overlay follow-ups.
|
||||
2. **For demo-overlay work:** `App` has `demo_mode`, `demo_badge`,
|
||||
`demo_badge_seq`, `demo_caption`, `demo_caption_capturing`,
|
||||
`demo_caption_buffer`, `last_output_area`. Rendering:
|
||||
`render_demo_overlays` / `render_badge_box` / `render_caption_box` /
|
||||
|
||||
@@ -0,0 +1,144 @@
|
||||
# Session handoff — 2026-06-11 (65)
|
||||
|
||||
Sixty-fifth handover. Continues from handoff-64 (ADR-0047 demo
|
||||
overlays). This session designed and shipped **ADR-0048 — the `seed`
|
||||
fake-data generation command (SD1)**, Phase 1, end to end: an ADR with
|
||||
an extended fork dialogue + two `/runda` passes, then a phased
|
||||
test-first build.
|
||||
|
||||
## §1. State at handoff
|
||||
|
||||
**Branch:** `main`. **HEAD will be the doc-wrap-up commit** (see §6) —
|
||||
all seed work committed, nothing pending. Unpushed (push is the user's
|
||||
step; normal working state).
|
||||
|
||||
**Tests: 2358 passing / 0 failing / 0 skipped / 1 ignored** (the long
|
||||
-standing `friendly` doctest). **Clippy clean** (nursery, all targets).
|
||||
+68 over handoff-64's 2290.
|
||||
|
||||
**`cargo sweep` run** at wrap-up: `target/` 1.6 G → 183 M.
|
||||
|
||||
**This session's commits:**
|
||||
```
|
||||
202e25a feat(seed): fake-data generation library + fake dependency (P1.1)
|
||||
f1e9484 feat(seed): command plumbing + walking skeleton (P1.2)
|
||||
73493fa feat(seed): FK sampling, empty-parent error, block guard (P1.3a)
|
||||
9c13501 feat(seed): uniqueness, junction distinct-combos, IN-CHECK (P1.3b)
|
||||
0b3ab3c feat(seed): SeedResult outcome, capped preview, advisory, count cap (P1.3c)
|
||||
e6ff63d perf(seed): single-transaction multi-row insert path (P1.3d)
|
||||
fbd219b feat(seed): --seed flag, ambient wiring, and /runda hardening (P1.4 + DA)
|
||||
```
|
||||
(plus the earlier `4d0ae77` multi-tab-scope withdrawal and `0af7f56`
|
||||
ADR-0048 doc, and the wrap-up doc commit.)
|
||||
|
||||
## §2. What `seed` does (Phase 1 — read ADR-0048)
|
||||
|
||||
`seed <table> [count] [--seed <n>]` — populate a table with realistic
|
||||
fake data. **Available in both modes** (A1).
|
||||
|
||||
- **Realistic, name-aware generation:** the **`fake` crate** (v5,
|
||||
English) driven by a **type-gated heuristic catalogue** (`src/seed/
|
||||
heuristics.rs`) — `email`→email, `first_name`→first name, `price`→
|
||||
currency, etc., each only firing when the column *type* is
|
||||
compatible. **Table-context** disambiguates `name`/`title`
|
||||
(`products.name`→a hand-rolled **product** name, `users.name`→person,
|
||||
`vendors.name`→company). **Bounded dates** (`dob`/`created_at`/
|
||||
`date`/`timestamp` → recent windows, never "all of history", anchored
|
||||
to a fixed reference epoch for reproducibility). Type-based fallback
|
||||
otherwise.
|
||||
- **Uniqueness (D10):** the user-fillable PK, compound UNIQUE
|
||||
constraints, single-column UNIQUE, and identifier-named columns
|
||||
(`id`/`code`/…) stay distinct across the batch and vs existing rows;
|
||||
**junction tables** get **distinct FK combinations** (capped at the
|
||||
available product, reported). Identifier ints get a monotonic
|
||||
sequence.
|
||||
- **FK (D14):** every FK column samples an existing parent row (compound
|
||||
FK reads one consistent parent row); **empty parent → friendly
|
||||
error**.
|
||||
- **`IN`-CHECK (D17):** a simple `col IN ('a','b')` CHECK becomes the
|
||||
value source (enum-as-CHECK just works); complex CHECKs are flagged in
|
||||
the advisory and best-effort generated (a violation rolls the batch
|
||||
back).
|
||||
- **Reproducibility (D4):** `--seed <n>` → identical data on the same DB
|
||||
state. **Holds with no exceptions** — serial (rowid/MAX+1), FK
|
||||
(`ORDER BY`), **shortid (seeded RNG)**, all generators.
|
||||
- **Output:** the seeded-row count, a **capped preview** (first 20
|
||||
rows), and a **Hint-styled advisory** naming enum-ish / underivable-
|
||||
CHECK columns filled generically. Count cap 10 000; `seed t 0` no-op.
|
||||
- **Safety:** one **undo** step (snapshot wraps the whole seed);
|
||||
**replay** re-runs it as a data write; the insert path is a single
|
||||
transaction (O(N), atomic, commit-db-last preserved).
|
||||
|
||||
## §3. Where the code lives
|
||||
|
||||
- **`src/seed/`** — the pure generation library (no DB): `mod.rs`
|
||||
(`ColumnSpec`, `Generator`, `SeedRng`, `make_rng`), `heuristics.rs`
|
||||
(`choose_generator` + the catalogue + `is_enum_ish`), `generators.rs`
|
||||
(`generate_value` + the `product` generator + bounded dates),
|
||||
`check.rs` (`parse_in_check_values`). ~40 Tier-1 tests, deterministic.
|
||||
- **`src/db.rs`** — `do_seed` (+ `SeedColPlan`, `sample_parent_key_
|
||||
tuples`, `seed_value_list_key`, `seed_max_int`, `SeedResult`,
|
||||
`DEFAULT_SEED_COUNT`/`MAX_SEED_COUNT`/`SEED_PREVIEW_CAP`), the new
|
||||
**`insert_one_row`** core extracted from `do_insert` (shared, no
|
||||
tx/persist — so seed runs N rows in one tx), and the `Request::Seed` /
|
||||
`Database::seed` / worker wiring.
|
||||
- **`src/dsl/grammar/data.rs`** — `SEED` `CommandNode`, `build_seed`,
|
||||
the `--seed` flag grammar (`Seq[Flag("seed"), NumberLit]`, the first
|
||||
DSL flag with a value). `Command::Seed` in `command.rs`.
|
||||
- **Runtime/render** — `CommandOutcome::Seed`, `AppEvent::
|
||||
DslSeedSucceeded`, `App::handle_dsl_seed_success`. Catalog keys
|
||||
`ok.rows_seeded` / `seed.capped` / `seed.advisory_generic` /
|
||||
`help.data.seed` / `parse.usage.seed`.
|
||||
- **Tests** — `tests/it/seed.rs` (25 integration tests),
|
||||
`tests/typing_surface/mod.rs` (`seed_completion_and_validity`),
|
||||
`tests/it/parse_error_pedagogy.rs` (bare-`seed` near-miss row),
|
||||
`src/app.rs` (two render tests), `src/dsl/shortid.rs`
|
||||
(`generate_with_rng`).
|
||||
|
||||
## §4. Process notes (the two `/runda` passes)
|
||||
|
||||
- **Pre-build `/runda`** (on the ADR) found six blockers — undo
|
||||
integration (D15), replay semantics (D16), `set`-value quoting (D2),
|
||||
CHECK handling (D17), an advisory phase-ordering bug (D13), auto-show
|
||||
flooding (D18) — all folded into ADR-0048 before any code; the three
|
||||
genuine forks re-escalated and user-resolved.
|
||||
- **Post-implementation `/runda`** (on the whole implementation) found
|
||||
**eight gaps**, all closed: FK-sampling determinism (→ `ORDER BY`),
|
||||
**shortid not reproducible** (→ seeded RNG, fixed not documented — the
|
||||
user chose the fix), and six **untested ADR decisions** (D5 advanced
|
||||
mode, D15 undo, D16 replay, D17 complex-CHECK advisory, atomic
|
||||
rollback, zero-count) — tests added for each.
|
||||
|
||||
## §5. Phase 2 (deferred — designed in ADR-0048, NOT built)
|
||||
|
||||
These are the only seed pieces left; both have full designs in
|
||||
ADR-0048:
|
||||
|
||||
1. **The `set` override clause (D2)** — `seed t 20 set role in
|
||||
('a','b'), status = 'x', work_addr as email, price between 10 and
|
||||
100`. Value / pick-from-list / explicit-generator / range, **quoted
|
||||
literals** (grammar-consistent). This is the SD2 "override hooks"
|
||||
core. The `ColumnSpec.check_in_values` → `PickFrom` plumbing and the
|
||||
`Generator` vocabulary already exist; this adds the grammar + a `set`
|
||||
clause that overrides the per-column plan.
|
||||
2. **Column-fill (`seed <table>.<column>`, D1 form 2)** — fill one
|
||||
column across *existing* rows (an UPDATE). Refuses PK/autogen targets;
|
||||
empty-table no-op.
|
||||
|
||||
`requirements.md`: **SD1 `[x]`**, **SD2 `[/]`** (core done; the two
|
||||
above open), **A1 14/15** (only `hint`/**H2** unregistered).
|
||||
|
||||
## §6. How to take over
|
||||
|
||||
1. Read handoffs 63 → 64 → 65, `CLAUDE.md`, `docs/requirements.md`,
|
||||
`docs/adr/0048-seed-fake-data-generation.md` (the whole thing — D1
|
||||
–D18 + the as-built status block).
|
||||
2. **Seed is feature-complete for Phase 1; nothing pending.** Next
|
||||
options (user's call): seed **Phase 2** (`set` clause + column-fill);
|
||||
**H2 `hint`** (closes A1) — own ADR; **TT5 CI**; or the larger
|
||||
**V4 journal** / **tutorial** ADRs.
|
||||
3. Two minor, user-deferred observations (non-blocking): the uniqueness
|
||||
retry cap (`MAX_ATTEMPTS=200`) can cap a *medium* unique domain
|
||||
slightly below its true size (junction/small domains are exact);
|
||||
`literal_to_value` doesn't type-check an IN-CHECK literal vs a numeric
|
||||
column (a malformed `int IN ('a')` CHECK fails cleanly at bind).
|
||||
@@ -0,0 +1,145 @@
|
||||
# Session handoff — 2026-06-11 (66)
|
||||
|
||||
Sixty-sixth handover. Continues from handoff-65 (ADR-0048 `seed`
|
||||
Phase 1). This session built **ADR-0048 Phase 2** end to end: the
|
||||
**`set` override clause** (D2) and the **`<table>.<column>`
|
||||
column-fill** form (D1 form 2) — the two surfaces Phase 1 deliberately
|
||||
deferred. Designed-then-DA-vetted (a `/runda` pass that caught a real
|
||||
ADR-vs-grammar conflict), then built test-first.
|
||||
|
||||
## §1. State at handoff
|
||||
|
||||
**Branch:** `main`. All Phase-2 work is in the working tree;
|
||||
**commits are pending the user's approval** (see §6). Unpushed is the
|
||||
normal working state.
|
||||
|
||||
**Tests: 2400 passing / 0 failing / 0 skipped / 1 ignored** (the
|
||||
long-standing `friendly` doctest). **Clippy clean** (nursery, all
|
||||
targets). +42 over handoff-65's 2358.
|
||||
|
||||
## §2. What landed (read ADR-0048 — Status + D1/D2/D9/D13)
|
||||
|
||||
`seed <T>[.<col>] [count] [set <overrides>] [--seed <n>]`.
|
||||
|
||||
- **`set` override clause (D2):** four forms, comma-separated —
|
||||
`status = 'active'` (fixed), `role in ('a','b')` (pick-list),
|
||||
`work_addr as email` (named generator), `price between 10 and 100`
|
||||
(range; numeric **and quoted dates**). Type-aware; an override
|
||||
**drops its column from the generic-fill advisory** (D13). Value
|
||||
slots reuse `update`'s typed `current_column_value` (quoting
|
||||
enforced structurally — a bare word is rejected).
|
||||
- **Column-fill (D1 form 2):** `seed users.email [set …]` fills one
|
||||
column across **existing** rows (an UPDATE). Refuses PK / autogen
|
||||
(`serial`/`shortid`/`blob`) targets; **empty table → friendly
|
||||
no-op**; FK target samples the parent; UNIQUE/identifier target gets
|
||||
collision-free values; **one undo step**; `set` may only adjust the
|
||||
filled column; a row count is refused.
|
||||
- **Named-generator vocabulary (D9):** `src/seed/vocabulary.rs` —
|
||||
`KNOWN_GENERATORS` + `generator_for_name` + `is_known_generator_prefix`,
|
||||
the single source of truth for completion, validity, and the executor.
|
||||
- **Range generator:** `Generator::Range { low, high }` in
|
||||
`src/seed/generators.rs`, interpreted per destination type;
|
||||
`range_bounds_reason` validates compatibility before generation.
|
||||
- **Ambient wiring:** completion (generator names after `as`, the
|
||||
`set <col>` and `.col` column slots, the `set` keyword); highlight
|
||||
(new `HighlightClass::Function` → existing `tok_function`); validity
|
||||
(new `IdentSource::Generators` — unknown generator flagged `[ERR]`;
|
||||
unknown column in `set`/`.col` flagged via the existing Columns
|
||||
path); help (`help.data.seed`); parse-error pedagogy near-miss rows;
|
||||
the D13 advisory's **Phase-2/3 wording** (points at `set` and the
|
||||
column-fill repair). Both modes (D5).
|
||||
|
||||
## §3. The ADR amendment (a real DA find)
|
||||
|
||||
The pre-build `/runda` pass found that **ADR-0048 D2's "dates stay
|
||||
unquoted" was impossible** — this DSL has **no date-literal token**
|
||||
(`Value` is `Number`/`Text`; dates are quoted strings validated by
|
||||
`bind_date`). Escalated to the user, who chose **quoted dates +
|
||||
amend the ADR** (the grammar-consistent option). D2 now carries a
|
||||
dated amendment; the range form uses `between '2023-01-01' and
|
||||
'2024-12-31'`. This was the only divergence from the ADR text; numbers
|
||||
remain unquoted.
|
||||
|
||||
## §4. Where the code lives
|
||||
|
||||
- **`src/dsl/command.rs`** — `Command::Seed` gains `target_column:
|
||||
Option<String>` + `overrides: Vec<SeedOverride>`; new `SeedOverride`
|
||||
/ `SeedOverrideKind`.
|
||||
- **`src/dsl/grammar/data.rs`** — `SEED_SET_CLAUSE` + `SEED_DOT_COLUMN`
|
||||
grammar; `SEED_GENERATOR` slot (`IdentSource::Generators`,
|
||||
`HighlightClass::Function`); `build_seed` + the override fold
|
||||
(`build_seed_overrides` / `parse_seed_override_tail`).
|
||||
- **`src/dsl/grammar/mod.rs`** — `IdentSource::Generators` +
|
||||
`HighlightClass::Function`.
|
||||
- **`src/db.rs`** — `apply_seed_overrides` / `seed_override_plan` /
|
||||
`seed_override_literal`; `do_seed_column_fill`; `do_seed` +
|
||||
`Database::seed` + worker wiring threaded with the new params.
|
||||
- **`src/seed/`** — `vocabulary.rs` (new); `generators.rs` (range
|
||||
generator + `range_bounds_reason`); `mod.rs` (`Generator::Range`).
|
||||
- **`src/completion.rs`** — generator candidates after `as`; generator
|
||||
validity. **`src/input_render.rs`** — `"generator"` invalid-ident
|
||||
kind. **`src/theme.rs`** — `Function → tok_function`.
|
||||
- **Catalog** — `help.data.seed`, `parse.usage.seed`,
|
||||
`seed.advisory_generic` (Phase-2/3 wording) in `en-US.yaml`;
|
||||
`keys.rs` placeholders updated.
|
||||
- **Tests** — `tests/it/seed.rs` (+~30: builder fold, executor
|
||||
set/column-fill, undo, advanced mode), `src/seed/{vocabulary,
|
||||
generators}.rs` (range + vocabulary units), `src/completion.rs`
|
||||
(generator + column validity), `src/dsl/walker/highlight.rs`,
|
||||
`tests/typing_surface/mod.rs` (completion slots),
|
||||
`tests/it/parse_error_pedagogy.rs` (near-miss rows).
|
||||
|
||||
## §5. Two implementation refinements vs. the ADR (both met the contract)
|
||||
|
||||
- **Quoted dates** (the D2 amendment, §3).
|
||||
- **Value slots reuse `current_column_value`** (the `update … set`
|
||||
typed slot) rather than the raw ADR-0026 expression operand — no
|
||||
spurious column-ref match, typed narrowing, consistent with
|
||||
`update`. The user-facing contract (quoted literals, type-aware) is
|
||||
fully met.
|
||||
|
||||
The `seed_take_value` / `seed_set_error` builder paths are
|
||||
drift-guards (the typed slots only ever match value literals, so a bare
|
||||
word is rejected at the grammar level) — they use the generic
|
||||
`parse.error_wrapper`, mirroring `expr::build_expr`.
|
||||
|
||||
## §6. How to take over / next steps
|
||||
|
||||
1. Read handoffs 64 → 65 → 66, `CLAUDE.md`, `docs/requirements.md`,
|
||||
`docs/adr/0048-…md` (Status block + D1/D2/D9/D13 + the amendment).
|
||||
2. **Seed is feature-complete (SD1 + SD2).** `requirements.md`: **SD1
|
||||
`[x]`, SD2 `[x]`**. The only open A1 gap is `hint`/**H2** (own ADR).
|
||||
3. **Commits pending approval.** Suggested split:
|
||||
- `feat(seed): set override clause + column-fill (ADR-0048 Phase 2)`
|
||||
— all `src/` + `tests/` changes.
|
||||
- `docs: ADR-0048 Phase 2 implemented + handoff 66` — ADR / README /
|
||||
requirements / this file.
|
||||
4. Next options (user's call): **H2 `hint`** (closes A1); **TT5 CI**;
|
||||
the larger **V4 journal** / **tutorial** ADRs; or Tier-4 PTY (TT4).
|
||||
5. Consider a `cargo sweep` at this milestone (`target/` grows).
|
||||
|
||||
## §7. Post-implementation `/runda` pass (done this session)
|
||||
|
||||
A DA pass over the completed code found **no correctness bugs and no
|
||||
dropped requirements**; all D1–D18 acceptance criteria verified met,
|
||||
tests confirmed to catch regressions. One **design fork** was surfaced
|
||||
and **resolved by the user**:
|
||||
|
||||
- **Bounded override × UNIQUE column** — a fixed value / too-short
|
||||
pick-list on a single-column-UNIQUE target used to silently cap the
|
||||
run (e.g. `seed users 100 set email = 'x'` → 1 row). Now a **friendly
|
||||
error** up front (`seed_override_capacity_guard`, `src/db.rs`), for
|
||||
both whole-row and column-fill; generators/ranges stay cap-based
|
||||
(unbounded sources). ADR-0048 D2 documents it; two tests pin it.
|
||||
|
||||
Remaining **non-blocking** edges (noted, not bugs):
|
||||
|
||||
- Overriding an **FK column** with a literal: the override wins (D2); a
|
||||
non-parent value fails safely through the FK-error layer.
|
||||
- **Column-fill of one column of a *compound* FK** samples that column
|
||||
independently → an invalid tuple fails safely (UPDATE rejected,
|
||||
rollback), never corrupts. Single-column FKs / non-FK columns are
|
||||
exact.
|
||||
- The generator slot uses the **default candidate-ladder hint** (offers
|
||||
the vocabulary), not a dedicated prose intro — discoverability is met
|
||||
by completion; a prose intro is optional polish.
|
||||
@@ -0,0 +1,119 @@
|
||||
# Session handoff — 2026-06-12 (67)
|
||||
|
||||
Sixty-seventh handover. Continues directly from handoff-66 (ADR-0048
|
||||
`seed` Phase 2, committed). This was a **manual-testing pass**: the user
|
||||
exercised the app, found several rough edges, and we triaged each into
|
||||
*fix now* vs *file an issue*. Net result: **three bug fixes committed**
|
||||
and **three enhancement issues filed**.
|
||||
|
||||
## §1. State at handoff
|
||||
|
||||
**Branch:** `main`. Working tree **clean**; all work committed. Unpushed
|
||||
(push is the user's step).
|
||||
|
||||
**Tests: 2407 passing / 0 failing / 0 skipped / 1 ignored** (the
|
||||
long-standing `friendly` doctest). **Clippy clean** (nursery, all
|
||||
targets). +7 over handoff-66's 2400.
|
||||
|
||||
**Commits since handoff-65:**
|
||||
```
|
||||
f7155ce fix(input): thread the `:` one-shot escape into live SQL feedback
|
||||
4cacb82 fix(completion): don't flag a table alias used before its FROM clause
|
||||
c3e0103 fix(completion): flag-aware partial so a dash completes flags, not keywords
|
||||
30b2677 docs: ADR-0048 Phase 2 implemented + handoff 66
|
||||
a12facc feat(seed): set override clause + column-fill (ADR-0048 Phase 2)
|
||||
```
|
||||
(`a12facc`/`30b2677` are the Phase-2 work documented in handoff-66.)
|
||||
|
||||
## §2. Bug fixes this session (all committed, all tested)
|
||||
|
||||
1. **`c3e0103` — flag completion ate the dash.** Typing a flag at a
|
||||
flag position (`add 1:n relationship … -`) offered the `on` keyword
|
||||
and, on accept, produced `-on` / `---create-fk`: the partial-token
|
||||
walk stopped at `-`, so the dash was outside the replaced range.
|
||||
Fix: flag-aware partial detection (a dash-prefixed token at a word
|
||||
boundary is a flag-in-progress, **gated on a flag being expected** so
|
||||
`where x = -5` stays a number) + a unified flag matcher
|
||||
(`trim_start_matches('-')`). Affected **all** flags. 4 tests + 2
|
||||
partial-flag snapshots updated (they'd captured the latent bug).
|
||||
|
||||
2. **`4cacb82` — table alias flagged as an unknown column.** In a
|
||||
SELECT, the projection (`sum(ol.count*…)`) can reference an alias
|
||||
whose `FROM … OrderLines ol` sits *after* the cursor. The candidate
|
||||
engine recovers that via the §10.6 full-input lookahead (ADR-0032),
|
||||
but `invalid_ident_at_cursor` only walked text *before* the cursor —
|
||||
so `ol` matched no scope and got a red "ERR" overlay on an otherwise
|
||||
valid query. Fix: give the validity check the same full-input
|
||||
lookahead and bail when the partial prefix-matches a binding's alias
|
||||
or table. 1 test.
|
||||
|
||||
3. **`f7155ce` — the `:` one-shot escape broke live SQL feedback.**
|
||||
Submission strips the `:` (ADR-0003), but the *live* feedback kept it
|
||||
in the buffer handed to the walker, which bailed at the `:`. Effect:
|
||||
under `:`, Tab completed nothing and a valid query could flash `[ERR]`
|
||||
— while the same line in full `mode advanced` worked. (The hint
|
||||
already stripped it, hence "hint shows the name but Tab does
|
||||
nothing".) Fix: one shared `App::feedback_view()` (the `:`-stripped
|
||||
SQL + mapped cursor + stripped offset) routed through completion (with
|
||||
a `replaced_range` offset shift), the validity verdict, and rendering
|
||||
(new `render_input_runs_feedback` highlights/overlays the view shifted
|
||||
by the offset; the `:` renders as plain text); the ambient hint was
|
||||
consolidated onto it (removing the duplicate `strip_one_shot_prefix`).
|
||||
3 tests + the 9 existing colon tests still green.
|
||||
|
||||
## §3. Investigated, **no code change** (working as designed)
|
||||
|
||||
- **Comma-`FROM` implicit join** (`select … from A, B, C`) is
|
||||
**deliberately rejected** — ADR-0032 §11 / OOS-3: *"comma-FROM teaches
|
||||
habits we do not want to encourage; `CROSS JOIN` covers the same shape
|
||||
explicitly."* The explicit equivalent (`CROSS JOIN … WHERE …`) works.
|
||||
- **`sum(…)` returning one row** with no `GROUP BY` is **correct SQL**
|
||||
(the aggregate collapses the result to one row; SQLite/the playground
|
||||
allow the non-aggregated columns where Postgres would error). The
|
||||
user's query needed `group by o.id`. Verified (1 row).
|
||||
|
||||
## §4. Open issues filed this session — **next session's candidates**
|
||||
|
||||
All on `git.lazyeval.net/oli/rdbms-playground`, label `enhancement`:
|
||||
|
||||
- **#26 — `seed <table>` hint omits the optional count.** A complete
|
||||
command's optional positional *number* has no Tab candidate, so it's
|
||||
invisible. `IntroProse` doesn't fit (it only fires for incomplete
|
||||
required slots; the completing Seq match clears the hint). Needs a way
|
||||
to advertise optional positional non-keyword args. *(I attempted +
|
||||
reverted this during Phase 2; see the analysis in the issue.)*
|
||||
- **#27 — Bottom status line: keybindings-only, context- and
|
||||
state-aware.** Per-nav-focus keybindings (Input vs sidebar), **include
|
||||
transient states** (Tab-cycle, history) — user preference — and add
|
||||
`mode advanced` to the empty-input hint. May warrant a small ADR.
|
||||
- **#28 — Reconsider relationship prose in `add column` (incidental DDL)
|
||||
confirmations.** Currently by design (ADR-0044 §1 keeps prose, not
|
||||
diagrams, for incidental DDL). **User preference: do NOT show the
|
||||
`References:` / `Referenced by:` block** in the add-column
|
||||
confirmation at all — focus on the change just made. This revisits a
|
||||
decided area → land as a **new ADR** superseding the relevant part of
|
||||
ADR-0016 §5 / ADR-0044 §1; confirm scope (just `add column`, or all
|
||||
incidental DDL).
|
||||
|
||||
## §5. Other open work (unchanged from handoff-66 §6)
|
||||
|
||||
`seed` is **feature-complete** (`requirements.md` SD1 `[x]`, SD2 `[x]`).
|
||||
Remaining roadmap, user's call:
|
||||
|
||||
- **H2 `hint`** — the last A1 gap (its own ADR).
|
||||
- **TT5 CI** — test infra exists; no CI workflow yet.
|
||||
- **TT4 PTY (Tier-4)** — ADR-0008 specifies it; not wired.
|
||||
- Larger: **V4 journal**, **tutorial/lesson system** (each needs an ADR).
|
||||
|
||||
A possible quick follow-up: a friendlier "use an explicit `JOIN`"
|
||||
parse-error for comma-`FROM` (point 1) — not filed; mention if wanted.
|
||||
|
||||
## §6. How to take over
|
||||
|
||||
1. Read handoffs 65 → 66 → 67, `CLAUDE.md`, `docs/requirements.md`.
|
||||
2. `seed` Phase 2 is done (ADR-0048 Status block is current). The
|
||||
manual-testing fixes (§2) are committed and green.
|
||||
3. Pick from §4 (filed issues #26/#27/#28) or §5 (roadmap). #28 is a
|
||||
decision/ADR; #27 is UX (maybe ADR); #26 is a hint-system enhancement.
|
||||
4. Consider a `cargo sweep` at this milestone (`target/` grows across
|
||||
sessions).
|
||||
+66
-26
@@ -88,12 +88,16 @@ since ADR-0027.)
|
||||
because relationships are cross-table rather than per-table, they
|
||||
get their own sibling panel stacked below the tables list, not
|
||||
nested items within it — user-confirmed 2026-06-10.)*
|
||||
- [/] **S3** Output panel renders a visualization of the
|
||||
currently selected item and supports multiple tabs.
|
||||
*(Partial, verified 2026-06-07: single-element structure
|
||||
visualisation renders (`output_render.rs:82-180`); **multiple
|
||||
tabs are not implemented** — the output is one line buffer, no
|
||||
tab abstraction. Same multi-tab gap as V2.)*
|
||||
- [x] **S3** Output panel renders a visualization of the
|
||||
currently selected item.
|
||||
*(Satisfied: single-element structure visualisation renders
|
||||
(`output_render.rs:82-180`) — select a table, see its columns /
|
||||
types / keys. **Multi-tab clause withdrawn 2026-06-11** (user
|
||||
decision): the original wording promised "and supports multiple
|
||||
tabs", but the output model is settling on the single scrollable
|
||||
**V4 journal** rather than switchable tabs, so the tab clause is
|
||||
dropped from tracked scope. A future return to tabbed output would
|
||||
be a fresh requirement, not this one. Same withdrawal as V2.)*
|
||||
- [x] **S4** Hint area below the input field, showing hints about
|
||||
the current input or last error.
|
||||
*(Verified 2026-06-07: `ui.rs:1088-1110` `render_hint_panel` /
|
||||
@@ -242,13 +246,12 @@ since ADR-0027.)
|
||||
available in both modes: `save`, `save as`, `load`, `new`,
|
||||
`rebuild`, `export`, `import`, `seed`, `replay`, `undo`,
|
||||
`redo`, `mode`, `help`, `hint`, `quit`.
|
||||
*(Partial, verified 2026-06-07: 13 of 15 implemented and
|
||||
available in both modes — `quit`/`q`, `mode simple|advanced`,
|
||||
`help`, `save`, `save as`, `load`, `new`, `rebuild`, `export`,
|
||||
`import`, `replay`, `undo`, `redo` (REGISTRY in
|
||||
`grammar/app.rs:249-333`). **Missing: `seed`** (tracked as SD1)
|
||||
**and `hint`** (tracked as H2) — neither is registered. A1
|
||||
closes when SD1 + H2 land.)*
|
||||
*(Partial: **14 of 15** implemented and available in both modes —
|
||||
`quit`/`q`, `mode simple|advanced`, `help`, `save`, `save as`,
|
||||
`load`, `new`, `rebuild`, `export`, `import`, `replay`, `undo`,
|
||||
`redo`, and now **`seed`** (ADR-0048 / SD1, done 2026-06-11).
|
||||
**Only `hint`** (tracked as H2) remains unregistered. A1 closes
|
||||
when H2 lands.)*
|
||||
|
||||
## DSL data commands
|
||||
|
||||
@@ -469,15 +472,18 @@ since ADR-0027.)
|
||||
"relationship-relevant" reach). The §3 last-resort helper line was
|
||||
considered and rejected. Two `/runda` passes (design + implementation).
|
||||
Selection-nav and the broader journal direction remain in V4.)*
|
||||
- [/] **V2** SQL query results render as a dynamic table view in
|
||||
the output pane, with multiple result tabs supported.
|
||||
*(Partial, verified 2026-06-07: the **table view** is done —
|
||||
`output_render.rs:38-72` `render_data_table` renders a
|
||||
box-drawing frame with aligned columns (numeric right, text
|
||||
left) and NULL/control-char sanitisation, for `show data` and
|
||||
after every write (ADR-0014). **Missing: multiple result tabs**
|
||||
— the output is a single `VecDeque<OutputLine>` with no tab
|
||||
abstraction (same gap as S3). Multi-tab sits in V4 territory.)*
|
||||
- [x] **V2** SQL query results render as a dynamic table view in
|
||||
the output pane.
|
||||
*(Satisfied: the **table view** is done — `output_render.rs:38-72`
|
||||
`render_data_table` renders a box-drawing frame with aligned
|
||||
columns (numeric right, text left) and NULL/control-char
|
||||
sanitisation, for `show data` and after every write (ADR-0014).
|
||||
**Multi-tab clause withdrawn 2026-06-11** (user decision): the
|
||||
original wording promised "with multiple result tabs supported";
|
||||
retained multi-result output, if ever wanted, now belongs to the
|
||||
single scrollable **V4 journal** direction rather than switchable
|
||||
tabs, so the tab clause is dropped from tracked scope. A future
|
||||
return would be a new requirement. Same withdrawal as S3.)*
|
||||
- [~] **V3** Full ER-diagram export (whole-database graph, viewed
|
||||
outside the TUI) — low priority; design and ADR pending.
|
||||
- [~] **V4** Output panel as a *scrollable per-session log* with
|
||||
@@ -492,7 +498,13 @@ since ADR-0027.)
|
||||
*(Partial: PageUp / PageDown scrolling of the existing line
|
||||
buffer is in, with new output snapping the view to the most
|
||||
recent. The full V4 scope — smart structure rendering, log
|
||||
styling, Markdown export, scroll indicator — remains pending.)*
|
||||
styling, Markdown export, scroll indicator — remains pending.
|
||||
**As of 2026-06-11 this journal model is the sole tracked
|
||||
direction for evolving the output pane:** the competing multi-tab
|
||||
output alternative (the trailing clauses of S3 and V2) was
|
||||
withdrawn from scope by user decision, so retained / multi-result
|
||||
output, if pursued, is folded into this journal rather than into
|
||||
switchable tabs.)*
|
||||
- [x] **V5** `show <kind> [<name>]` family of commands for
|
||||
redisplaying schema info on demand.
|
||||
*(Done 2026-06-07: `show table <name>` + `show data <Table>`
|
||||
@@ -652,11 +664,39 @@ since ADR-0027.)
|
||||
|
||||
## Sample data / seeding
|
||||
|
||||
- [ ] **SD1** `seed <table> [count]` generates plausible fake
|
||||
- [x] **SD1** `seed <table> [count]` generates plausible fake
|
||||
data; junction tables are seeded with valid foreign-key
|
||||
references drawn from existing parent rows.
|
||||
- [~] **SD2** Detailed seeding rules (per-type generators,
|
||||
locale, determinism, override hooks) — design and ADR pending.
|
||||
*(Done 2026-06-11 via **ADR-0048** (commits `202e25a`→`fbd219b`).
|
||||
Whole-row `seed <table> [count] [--seed <n>]` with realistic
|
||||
name-aware generation (`fake` crate + a type-gated heuristic
|
||||
catalogue, table-context name disambiguation, hand-rolled
|
||||
`product` generator, bounded dates), identifier + constraint
|
||||
uniqueness, **junction tables seeded with valid FK references
|
||||
drawn from existing parent rows** (distinct combinations, capped;
|
||||
empty-parent friendly error), `IN`-CHECK derivation, a
|
||||
required-column block guard, undo as one step, replay as a data
|
||||
write, a capped auto-show + enum/CHECK advisory, and an O(N)
|
||||
single-transaction path. The `set` override clause and
|
||||
`<table>.<column>` column-fill landed in SD2 Phase 2, below.)*
|
||||
- [x] **SD2** Detailed seeding rules (per-type generators,
|
||||
locale, determinism, override hooks).
|
||||
*(Done 2026-06-11 via **ADR-0048** (Phase 1 + Phase 2). Phase 1:
|
||||
type-gated name-aware per-type generators with a `fake`-backed
|
||||
catalogue + table-context disambiguation, **`--seed` determinism**
|
||||
(serial/FK/shortid all reproducible — D4 holds with no
|
||||
exceptions), English-only locale (X2). **Phase 2 (the "override
|
||||
hooks" core):** the `set` override clause — fixed value /
|
||||
pick-from-list / `as <generator>` / `between` range (numeric and
|
||||
**quoted** dates, type-aware; an override drops the column from
|
||||
the generic-fill advisory) — and the `<table>.<column>`
|
||||
column-fill form (an UPDATE over existing rows, refusing
|
||||
PK/autogen targets, empty-table no-op, FK/unique-respecting, one
|
||||
undo step). Adds the `KNOWN_GENERATORS` vocabulary (D9), a range
|
||||
`Generator`, and full completion / highlight / validity / help /
|
||||
parse-error-pedagogy wiring. Deferred SD2 increments:
|
||||
user-defined custom generators, NULL injection, multi-locale,
|
||||
recursive parent auto-seed.)*
|
||||
|
||||
## Query analysis
|
||||
|
||||
|
||||
+254
-19
@@ -646,6 +646,44 @@ impl App {
|
||||
}
|
||||
}
|
||||
|
||||
/// The input view the **live-feedback** walkers (completion, ambient
|
||||
/// hint, validity verdict, highlight overlays) should see, plus the
|
||||
/// byte offset stripped from the front and the cursor mapped into the
|
||||
/// view.
|
||||
///
|
||||
/// Under the `:` one-shot escape (ADR-0003) the buffer carries a
|
||||
/// leading `:` (and an auto-inserted space) that is *not* advanced
|
||||
/// SQL — submission already strips it before parsing, but the live
|
||||
/// feedback did not, so the walker bailed at the `:` and resolved
|
||||
/// nothing (no completion / hint, a spurious error overlay). This
|
||||
/// returns the stripped SQL exactly as submission sees it, so the
|
||||
/// feedback matches a real advanced-mode session. `offset` maps any
|
||||
/// walker-returned byte position (completion `replaced_range`,
|
||||
/// overlay spans) back to real-buffer coordinates.
|
||||
///
|
||||
/// For every non-one-shot input this is the identity
|
||||
/// `(&input, cursor, 0)`.
|
||||
#[must_use]
|
||||
pub fn feedback_view(&self) -> (&str, usize, usize) {
|
||||
if matches!(self.effective_mode(), EffectiveMode::AdvancedOneShot) {
|
||||
// The first non-whitespace char is the `:` (per
|
||||
// `effective_mode`); strip up to and including it, then any
|
||||
// following whitespace — mirroring submission's
|
||||
// `trimmed[1..].trim()`.
|
||||
let leading_ws = self.input.len() - self.input.trim_start().len();
|
||||
let mut offset = leading_ws + 1; // past the `:`
|
||||
while offset < self.input.len()
|
||||
&& self.input.as_bytes()[offset].is_ascii_whitespace()
|
||||
{
|
||||
offset += 1;
|
||||
}
|
||||
let view = &self.input[offset..];
|
||||
let cursor = self.input_cursor.saturating_sub(offset).min(view.len());
|
||||
return (view, cursor, offset);
|
||||
}
|
||||
(&self.input, self.input_cursor, 0)
|
||||
}
|
||||
|
||||
/// The validity-indicator verdict for the current input
|
||||
/// (ADR-0027 §3). `None` when the input would run clean.
|
||||
///
|
||||
@@ -667,11 +705,10 @@ impl App {
|
||||
EffectiveMode::AdvancedPersistent
|
||||
| EffectiveMode::AdvancedOneShot => Mode::Advanced,
|
||||
};
|
||||
crate::dsl::walker::input_verdict_in_mode(
|
||||
&self.input,
|
||||
Some(&self.schema_cache),
|
||||
mode,
|
||||
)
|
||||
// Strip the `:` one-shot prefix so the walker verdicts the SQL
|
||||
// itself, not the escape marker (which it can't parse).
|
||||
let (view, _cursor, _offset) = self.feedback_view();
|
||||
crate::dsl::walker::input_verdict_in_mode(view, Some(&self.schema_cache), mode)
|
||||
}
|
||||
|
||||
/// Process one event from the runtime, mutating state and
|
||||
@@ -771,6 +808,10 @@ impl App {
|
||||
self.handle_dsl_insert_success(&command, &result);
|
||||
Vec::new()
|
||||
}
|
||||
AppEvent::DslSeedSucceeded { command, result } => {
|
||||
self.handle_dsl_seed_success(&command, &result);
|
||||
Vec::new()
|
||||
}
|
||||
AppEvent::DslUpdateSucceeded {
|
||||
command,
|
||||
result,
|
||||
@@ -1395,13 +1436,7 @@ impl App {
|
||||
}
|
||||
|
||||
fn start_or_complete_at(&mut self, multi_start_idx: usize) {
|
||||
let cursor = self.input_cursor.min(self.input.len());
|
||||
let Some(comp) = crate::completion::candidates_at_cursor_in_mode(
|
||||
&self.input,
|
||||
cursor,
|
||||
&self.schema_cache,
|
||||
self.effective_mode().as_mode(),
|
||||
) else {
|
||||
let Some(comp) = self.completion_for_feedback() else {
|
||||
return;
|
||||
};
|
||||
if comp.candidates.len() == 1 {
|
||||
@@ -1413,13 +1448,7 @@ impl App {
|
||||
}
|
||||
|
||||
fn start_or_complete_last(&mut self) {
|
||||
let cursor = self.input_cursor.min(self.input.len());
|
||||
let Some(comp) = crate::completion::candidates_at_cursor_in_mode(
|
||||
&self.input,
|
||||
cursor,
|
||||
&self.schema_cache,
|
||||
self.effective_mode().as_mode(),
|
||||
) else {
|
||||
let Some(comp) = self.completion_for_feedback() else {
|
||||
return;
|
||||
};
|
||||
if comp.candidates.len() == 1 {
|
||||
@@ -1430,6 +1459,22 @@ impl App {
|
||||
}
|
||||
}
|
||||
|
||||
/// Completion at the cursor, computed against the `:`-stripped
|
||||
/// feedback view (ADR-0003 one-shot) with its `replaced_range`
|
||||
/// mapped back to real-buffer coordinates so `commit_*` edit the
|
||||
/// right span. Identity for non-one-shot input (offset 0).
|
||||
fn completion_for_feedback(&self) -> Option<crate::completion::Completion> {
|
||||
let (view, view_cursor, offset) = self.feedback_view();
|
||||
let mut comp = crate::completion::candidates_at_cursor_in_mode(
|
||||
view,
|
||||
view_cursor.min(view.len()),
|
||||
&self.schema_cache,
|
||||
self.effective_mode().as_mode(),
|
||||
)?;
|
||||
comp.replaced_range = (comp.replaced_range.0 + offset, comp.replaced_range.1 + offset);
|
||||
Some(comp)
|
||||
}
|
||||
|
||||
/// Single-candidate commit: insert "<text> " (with trailing
|
||||
/// space) and DO NOT create a memo. The user can keep
|
||||
/// typing or press Tab again to fresh-complete at the new
|
||||
@@ -2072,6 +2117,39 @@ impl App {
|
||||
}
|
||||
}
|
||||
|
||||
/// Render a successful `seed` (ADR-0048): the ✓ echo, the seeded-row
|
||||
/// count (with a cap note when the unique-value space ran out), the
|
||||
/// capped preview table (D18), and a Hint-styled advisory naming
|
||||
/// columns filled with generic text that look like fixed value sets
|
||||
/// (D12/D13).
|
||||
fn handle_dsl_seed_success(&mut self, command: &Command, result: &crate::db::SeedResult) {
|
||||
self.note_ok_summary(command);
|
||||
let mut summary = crate::t!(
|
||||
"ok.rows_seeded",
|
||||
count = result.produced,
|
||||
table = result.table
|
||||
);
|
||||
if result.produced < result.requested {
|
||||
summary.push(' ');
|
||||
summary.push_str(&crate::t!("seed.capped", requested = result.requested));
|
||||
}
|
||||
self.note_system(summary);
|
||||
for line in crate::output_render::render_data_table(&result.data) {
|
||||
self.note_system(line);
|
||||
}
|
||||
if !result.advisory_columns.is_empty() {
|
||||
// `column` (the first advised column) seeds the concrete
|
||||
// repair examples (D13 Phase 2/3 wording); `columns` lists
|
||||
// them all.
|
||||
self.push_category_three_prose(crate::t!(
|
||||
"seed.advisory_generic",
|
||||
columns = result.advisory_columns.join(", "),
|
||||
column = result.advisory_columns[0],
|
||||
table = result.table
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_dsl_update_success(&mut self, command: &Command, result: &UpdateResult) {
|
||||
self.note_ok_summary(command);
|
||||
self.note_system(crate::t!("ok.rows_updated", count = result.rows_affected));
|
||||
@@ -2390,6 +2468,9 @@ impl App {
|
||||
// the executor), like the named DSL drop.
|
||||
C::SqlDropIndex { .. } => (Operation::DropIndex, None, None),
|
||||
C::Insert { table, .. } => (Operation::Insert, Some(table.as_str()), None),
|
||||
// Seed generates inserts; FK/constraint failures read as
|
||||
// insert errors (ADR-0048).
|
||||
C::Seed { table, .. } => (Operation::Insert, Some(table.as_str()), None),
|
||||
C::Update { table, .. } => (Operation::Update, Some(table.as_str()), None),
|
||||
C::Delete { table, .. } => (Operation::Delete, Some(table.as_str()), None),
|
||||
C::ShowData { name, .. } | C::ShowTable { name } => {
|
||||
@@ -4936,6 +5017,86 @@ mod tests {
|
||||
assert_eq!(app.effective_mode(), EffectiveMode::AdvancedPersistent);
|
||||
}
|
||||
|
||||
/// Build a two-table cache (`Orders(id, customer_id)` +
|
||||
/// `Customers(id, name)`) for the `:` one-shot SQL-feedback tests.
|
||||
fn install_join_schema(app: &mut App) {
|
||||
use crate::completion::TableColumn;
|
||||
use crate::dsl::types::Type;
|
||||
app.schema_cache.tables = vec!["Orders".into(), "Customers".into()];
|
||||
app.schema_cache.table_columns.insert(
|
||||
"Orders".into(),
|
||||
vec![TableColumn::new("id", Type::Serial), TableColumn::new("customer_id", Type::Int)],
|
||||
);
|
||||
app.schema_cache.table_columns.insert(
|
||||
"Customers".into(),
|
||||
vec![TableColumn::new("id", Type::Serial), TableColumn::new("name", Type::Text)],
|
||||
);
|
||||
for t in app.schema_cache.tables.clone() {
|
||||
for c in &app.schema_cache.table_columns[&t] {
|
||||
app.schema_cache.columns.push(c.name.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn colon_one_shot_gives_sql_completion_the_stripped_view() {
|
||||
// Bug (manual testing): the `:` one-shot escape (ADR-0003) left
|
||||
// the leading `:` in the buffer passed to the live SQL feedback,
|
||||
// so the walker bailed at `:` and Tab completed nothing — while
|
||||
// the identical line in full `mode advanced` completed. Now the
|
||||
// feedback view strips the `:`, so both behave the same.
|
||||
let body = "select c.name from Orders o join Customers c on c.id=o.cu";
|
||||
|
||||
// Full advanced mode: completes `o.cu` → `o.customer_id`.
|
||||
let mut adv = App::new();
|
||||
adv.mode = Mode::Advanced;
|
||||
install_join_schema(&mut adv);
|
||||
type_str(&mut adv, body);
|
||||
adv.update(key(KeyCode::Tab));
|
||||
assert!(
|
||||
adv.input.ends_with("o.customer_id "),
|
||||
"full advanced should complete: {:?}",
|
||||
adv.input
|
||||
);
|
||||
|
||||
// `:` one-shot from simple mode: must complete the same way, and
|
||||
// the `:` prefix must be preserved in the buffer.
|
||||
let mut one = App::new();
|
||||
one.mode = Mode::Simple;
|
||||
install_join_schema(&mut one);
|
||||
one.update(key(KeyCode::Char(':')));
|
||||
type_str(&mut one, body);
|
||||
assert_eq!(one.effective_mode(), EffectiveMode::AdvancedOneShot);
|
||||
one.update(key(KeyCode::Tab));
|
||||
assert!(
|
||||
one.input.trim_start().starts_with(':'),
|
||||
"the `:` prefix is kept: {:?}",
|
||||
one.input
|
||||
);
|
||||
assert!(
|
||||
one.input.ends_with("o.customer_id "),
|
||||
"`:` one-shot must complete the SQL column too: {:?}",
|
||||
one.input
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn colon_one_shot_validity_is_clean_for_a_valid_query() {
|
||||
// A *valid* `:`-prefixed query must not light the `[ERR]`
|
||||
// indicator (the walker used to choke on the `:` and always
|
||||
// report Error).
|
||||
let mut app = App::new();
|
||||
install_join_schema(&mut app);
|
||||
app.update(key(KeyCode::Char(':')));
|
||||
type_str(&mut app, "select name from Customers");
|
||||
assert_eq!(
|
||||
app.input_validity_verdict(),
|
||||
None,
|
||||
"a valid one-shot query should verdict clean, got {:?}",
|
||||
app.input_validity_verdict(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn effective_mode_flips_to_one_shot_when_colon_typed_in_simple_mode() {
|
||||
let mut app = App::new();
|
||||
@@ -6223,6 +6384,80 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn seed_success_renders_count_preview_and_advisory() {
|
||||
// ADR-0048: handle_dsl_seed_success renders the seeded-row count,
|
||||
// the preview table, and the enum/CHECK advisory.
|
||||
let mut app = App::new();
|
||||
app.output
|
||||
.push_back(OutputLine::echo("seed users 20", crate::mode::Mode::Simple));
|
||||
app.update(AppEvent::DslSeedSucceeded {
|
||||
command: Command::Seed {
|
||||
table: "users".to_string(),
|
||||
target_column: None,
|
||||
count: Some(20),
|
||||
overrides: Vec::new(),
|
||||
rng_seed: None,
|
||||
},
|
||||
result: crate::db::SeedResult {
|
||||
table: "users".to_string(),
|
||||
requested: 20,
|
||||
produced: 20,
|
||||
data: crate::db::DataResult {
|
||||
table_name: "users".to_string(),
|
||||
columns: vec!["name".to_string()],
|
||||
column_types: vec![None],
|
||||
rows: vec![vec![Some("Alice".to_string())]],
|
||||
},
|
||||
advisory_columns: vec!["status".to_string()],
|
||||
},
|
||||
});
|
||||
let texts: Vec<String> = app.output.iter().map(|l| l.text.clone()).collect();
|
||||
assert!(
|
||||
texts.iter().any(|t| t.contains("20 row(s) seeded into users")),
|
||||
"seeded-row count surfaced: {texts:?}",
|
||||
);
|
||||
assert!(
|
||||
texts.iter().any(|t| t.contains("status") && t.contains("generic text")),
|
||||
"the advisory names the enum-ish column: {texts:?}",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn seed_success_reports_a_cap() {
|
||||
// produced < requested → the cap note appears next to the count.
|
||||
let mut app = App::new();
|
||||
app.output
|
||||
.push_back(OutputLine::echo("seed J 10", crate::mode::Mode::Simple));
|
||||
app.update(AppEvent::DslSeedSucceeded {
|
||||
command: Command::Seed {
|
||||
table: "J".to_string(),
|
||||
target_column: None,
|
||||
count: Some(10),
|
||||
overrides: Vec::new(),
|
||||
rng_seed: None,
|
||||
},
|
||||
result: crate::db::SeedResult {
|
||||
table: "J".to_string(),
|
||||
requested: 10,
|
||||
produced: 4,
|
||||
data: crate::db::DataResult {
|
||||
table_name: "J".to_string(),
|
||||
columns: Vec::new(),
|
||||
column_types: Vec::new(),
|
||||
rows: Vec::new(),
|
||||
},
|
||||
advisory_columns: Vec::new(),
|
||||
},
|
||||
});
|
||||
let texts: Vec<String> = app.output.iter().map(|l| l.text.clone()).collect();
|
||||
assert!(
|
||||
texts.iter().any(|t| t.contains("4 row(s) seeded into J")
|
||||
&& t.contains("of 10 requested")),
|
||||
"the cap note surfaces requested vs produced: {texts:?}",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sql_delete_returning_renders_cascade_and_result_table() {
|
||||
// ADR-0033 3g: a DELETE … RETURNING surfaces BOTH the cascade
|
||||
|
||||
+229
-18
@@ -120,7 +120,13 @@ impl SchemaCache {
|
||||
IdentSource::Columns => &self.columns,
|
||||
IdentSource::Relationships => &self.relationships,
|
||||
IdentSource::Indexes => &self.indexes,
|
||||
IdentSource::NewName | IdentSource::Types | IdentSource::Free => &[],
|
||||
// Curated / invented sources never come from the schema
|
||||
// cache — `Generators` candidates are supplied separately
|
||||
// from the `seed` vocabulary (ADR-0048 D9).
|
||||
IdentSource::NewName
|
||||
| IdentSource::Types
|
||||
| IdentSource::Generators
|
||||
| IdentSource::Free => &[],
|
||||
}
|
||||
}
|
||||
|
||||
@@ -327,6 +333,37 @@ pub fn candidates_at_cursor_with_in_mode(
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Flag-aware extension. The plain walk above stops at `-`, so a
|
||||
// flag the user is mid-typing (`-`, `--`, `--all`, `--create-fk`)
|
||||
// leaves an *empty* partial sitting just after the dash(es) — which
|
||||
// made the engine offer every keyword (a `-` prefix-matches nothing,
|
||||
// so the empty-prefix path let `on` through) and, worse, replace an
|
||||
// empty range so accepting produced `-on` / `---create-fk`. When a
|
||||
// dash-prefixed token sits at a word boundary AND a flag is actually
|
||||
// expected here, treat the whole dash-run-plus-body as the partial so
|
||||
// it is matched and replaced wholesale. The "flag is expected" gate
|
||||
// (one cheap probe on the pre-dash prefix) keeps a signed number /
|
||||
// minus (`where x = -5`) from being mis-read as a flag.
|
||||
{
|
||||
let mut run = cursor;
|
||||
while run > 0 {
|
||||
let p = bytes[run - 1];
|
||||
if p.is_ascii_alphanumeric() || p == b'_' || p == b'-' {
|
||||
run -= 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
let word_boundary = run == 0 || bytes[run - 1].is_ascii_whitespace();
|
||||
if run < cursor && bytes[run] == b'-' && word_boundary && run < start {
|
||||
let pre = crate::dsl::walker::completion_probe_in_mode(&input[..run], cache, mode);
|
||||
if pre.expected.iter().any(|e| matches!(e, Expectation::Flag(_))) {
|
||||
start = run;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let partial_prefix = input[start..cursor].to_string();
|
||||
let leading = &input[..start];
|
||||
|
||||
@@ -623,29 +660,19 @@ pub fn candidates_at_cursor_with_in_mode(
|
||||
// Source 1.55: flag candidates (`--name`). Surfaced as a
|
||||
// distinct CandidateKind so the hint panel can colour them
|
||||
// with `tok_flag` (matching how they'll appear after
|
||||
// insertion). The standard prefix matcher walks back over
|
||||
// alphanumeric + underscore, which does NOT cross `-`, so
|
||||
// when the user types `--all` the partial is `all` — match
|
||||
// the flag's body against that. Otherwise match the full
|
||||
// `--name` against the partial (which may be empty or start
|
||||
// with `--`).
|
||||
// insertion). The flag-aware partial detection above captures any
|
||||
// leading dash-run, so the partial is one of: empty, all-dashes
|
||||
// (`-` / `--`), or `[-]+body`. Stripping the leading dashes and
|
||||
// matching the remainder against the flag *body* handles all of
|
||||
// them uniformly (empty / all-dashes → match every flag).
|
||||
let flag_needle = partial_prefix.trim_start_matches('-').to_lowercase();
|
||||
let flags: Vec<String> = expected
|
||||
.iter()
|
||||
.filter_map(|e| match e {
|
||||
Expectation::Flag(name) => Some(*name),
|
||||
_ => None,
|
||||
})
|
||||
.filter(|body| {
|
||||
if partial_prefix.starts_with("--") {
|
||||
format!("--{body}")
|
||||
.to_lowercase()
|
||||
.starts_with(&lowered_prefix)
|
||||
} else if partial_prefix.is_empty() {
|
||||
true
|
||||
} else {
|
||||
body.to_lowercase().starts_with(&lowered_prefix)
|
||||
}
|
||||
})
|
||||
.filter(|body| body.to_lowercase().starts_with(&flag_needle))
|
||||
.map(|body| format!("--{body}"))
|
||||
.collect();
|
||||
|
||||
@@ -709,6 +736,22 @@ pub fn candidates_at_cursor_with_in_mode(
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
// Source 1.9: fake-data generator names (ADR-0048 D9). At the
|
||||
// `seed … set <col> as ⟨here⟩` slot (`IdentSource::Generators`) the
|
||||
// curated vocabulary is offered so a learner can discover `email` /
|
||||
// `product` / … by Tab. Same `Function` kind / `tok_function` colour
|
||||
// as SQL functions (no new theme colour — ADR-0048 §Grammar).
|
||||
let has_generator_slot = expected
|
||||
.iter()
|
||||
.any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. }));
|
||||
if has_generator_slot {
|
||||
functions.extend(
|
||||
crate::seed::KNOWN_GENERATORS
|
||||
.iter()
|
||||
.filter(|g| matches_prefix(g))
|
||||
.map(|g| (*g).to_string()),
|
||||
);
|
||||
}
|
||||
|
||||
// Source 2: schema identifiers — accumulated across every
|
||||
// matching schema-listable `Ident { source }` expectation.
|
||||
@@ -1200,6 +1243,45 @@ pub fn invalid_ident_at_cursor_in_mode(
|
||||
if has_sql_expr_slot && crate::dsl::sql_functions::is_known_function_prefix(partial) {
|
||||
return None;
|
||||
}
|
||||
// A bare ident at a SQL expression slot may be a **table alias / name**
|
||||
// the user is mid-typing as a qualifier (`ol` in `sum(ol.count)`). The
|
||||
// defining FROM clause can sit *after* the cursor — the projection
|
||||
// references it — so the leading-only walk has an empty from-scope and
|
||||
// would wrongly flag the alias as an unknown column. Recover the scope
|
||||
// from the FULL input (mirrors the §10.6 edit-an-existing-query
|
||||
// lookahead the candidate engine uses for column narrowing) and bail
|
||||
// when the partial prefix-matches a binding's alias or table name.
|
||||
if has_sql_expr_slot {
|
||||
let full = crate::dsl::walker::completion_probe_in_mode(input, cache, mode);
|
||||
let lowered = partial.to_lowercase();
|
||||
let matches_qualifier = full.from_scope.iter().any(|b| {
|
||||
b.alias
|
||||
.as_deref()
|
||||
.is_some_and(|a| a.to_lowercase().starts_with(&lowered))
|
||||
|| b.table.to_lowercase().starts_with(&lowered)
|
||||
});
|
||||
if matches_qualifier {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
// ADR-0048 D9: the `seed … set <col> as <gen>` slot is a curated
|
||||
// vocabulary (`IdentSource::Generators`), not a schema source, so the
|
||||
// schema-column check below would never see it. A partial that
|
||||
// prefix-matches a known generator is an in-progress name; anything
|
||||
// else is an unknown generator → flag it `[ERR]` while typing.
|
||||
let has_generator_slot = expected
|
||||
.iter()
|
||||
.any(|e| matches!(e, Expectation::Ident { source: IdentSource::Generators, .. }));
|
||||
if has_generator_slot {
|
||||
if crate::seed::is_known_generator_prefix(partial) {
|
||||
return None;
|
||||
}
|
||||
return Some(InvalidIdent {
|
||||
range: (start, cursor),
|
||||
found: partial.to_string(),
|
||||
source: IdentSource::Generators,
|
||||
});
|
||||
}
|
||||
// Find every schema-listable source in the expected list.
|
||||
let sources: Vec<IdentSource> = expected
|
||||
.iter()
|
||||
@@ -1488,6 +1570,71 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_dash_offers_flags_not_keywords_and_replaces_the_dash() {
|
||||
// Bug (manual testing): `add 1:n relationship … -` (one dash)
|
||||
// offered the `on` keyword *and* `--create-fk`, and accepting
|
||||
// produced `-on` / `---create-fk` because the lone `-` was not
|
||||
// part of the replaced range. A dash at a flag position is a
|
||||
// flag-in-progress: offer flags, exclude keywords, replace the
|
||||
// dash on accept.
|
||||
let input = "add 1:n relationship from X.a to Y.b -";
|
||||
let c = candidates_at_cursor(input, input.len(), &SchemaCache::default())
|
||||
.expect("a `-` at a flag position offers candidates");
|
||||
let texts: Vec<&str> = c.candidates.iter().map(|x| x.text.as_str()).collect();
|
||||
assert!(texts.contains(&"--create-fk"), "should offer --create-fk: {texts:?}");
|
||||
assert!(!texts.contains(&"on"), "must NOT offer `on` after a dash: {texts:?}");
|
||||
assert_eq!(
|
||||
c.replaced_range,
|
||||
(input.len() - 1, input.len()),
|
||||
"the `-` must be inside the replaced range so accept yields `--create-fk`",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn double_dash_replaces_both_dashes_on_accept() {
|
||||
let input = "delete from T --";
|
||||
let c = candidates_at_cursor_in_mode(
|
||||
input,
|
||||
input.len(),
|
||||
&SchemaCache::default(),
|
||||
Mode::Simple,
|
||||
)
|
||||
.expect("`--` offers the flag");
|
||||
assert!(c.candidates.iter().any(|x| x.text == "--all-rows"));
|
||||
assert_eq!(
|
||||
c.replaced_range,
|
||||
(input.len() - 2, input.len()),
|
||||
"both dashes are replaced so accept yields `--all-rows`, not `----all-rows`",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dash_at_a_value_position_is_not_treated_as_a_flag() {
|
||||
// `show data T where x = -5` — the `-` is a sign, not a flag.
|
||||
// No flag is expected here, so the dash must not be swallowed
|
||||
// into a flag partial: the partial stays `5` (the original
|
||||
// value-operand behaviour), and no `--…` candidate appears.
|
||||
let mut s = SchemaCache::default();
|
||||
s.tables.push("T".into());
|
||||
s.columns.push("x".into());
|
||||
let input = "show data T where x = -5";
|
||||
if let Some(c) =
|
||||
candidates_at_cursor_in_mode(input, input.len(), &s, Mode::Simple)
|
||||
{
|
||||
assert!(
|
||||
!c.candidates.iter().any(|x| x.text.starts_with("--")),
|
||||
"no flags at a value position: {:?}",
|
||||
c.candidates,
|
||||
);
|
||||
assert_eq!(
|
||||
c.replaced_range,
|
||||
(input.len() - 1, input.len()),
|
||||
"only the `5` is the partial; the `-` (sign) is not captured",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn typed_dashes_offer_the_optional_cascade_flag_on_drop_column() {
|
||||
// The same optional-flag class: `drop column … [--cascade]`.
|
||||
@@ -2606,6 +2753,70 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_ident_does_not_flag_a_table_alias_used_before_its_from_clause() {
|
||||
// Manual-testing bug: in `select … sum(ol.count*…) … from … OrderLines ol …`
|
||||
// the projection references alias `ol` whose FROM binding sits
|
||||
// *after* the cursor. The leading-only walk had an empty from-scope
|
||||
// and wrongly flagged `ol` as an unknown column (a red "ERR" overlay
|
||||
// on an otherwise-valid query). The full-input lookahead must
|
||||
// recover the scope (ADR-0032 §10.6) so `ol` is not flagged.
|
||||
use crate::dsl::types::Type;
|
||||
let mut s = SchemaCache::default();
|
||||
s.tables.push("OrderLines".into());
|
||||
s.columns.push("count".into());
|
||||
s.table_columns
|
||||
.insert("OrderLines".into(), vec![TableColumn::new("count", Type::Int)]);
|
||||
let input = "select sum(ol.count) from OrderLines ol";
|
||||
let cursor = input.find("ol.count").unwrap() + 2; // right after `ol`
|
||||
assert!(
|
||||
invalid_ident_at_cursor_in_mode(input, cursor, &s, Mode::Advanced).is_none(),
|
||||
"a table alias used before its FROM clause must not be flagged as a bad column",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_ident_fires_for_unknown_generator_after_as() {
|
||||
// ADR-0048 D9: an unknown name at the `set <col> as <gen>` slot is
|
||||
// flagged `[ERR]` while typing.
|
||||
let cache = two_table_schema();
|
||||
let input = "seed a set name as bogus";
|
||||
let inv = invalid_ident_at_cursor(input, input.len(), &cache)
|
||||
.expect("unknown generator must flag");
|
||||
assert_eq!(inv.found, "bogus");
|
||||
assert_eq!(inv.source, IdentSource::Generators);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_ident_fires_for_unknown_column_in_seed_set_and_column_fill() {
|
||||
// ADR-0048: an unknown column at the `set <col>` slot and the
|
||||
// `<table>.<col>` column-fill slot is flagged like any other
|
||||
// column slot (both are `IdentSource::Columns`).
|
||||
let cache = two_table_schema(); // table `a`; columns id, name
|
||||
let set_in = invalid_ident_at_cursor("seed a set xyz", 14, &cache)
|
||||
.expect("unknown column in `set` must flag");
|
||||
assert_eq!(set_in.found, "xyz");
|
||||
assert_eq!(set_in.source, IdentSource::Columns);
|
||||
|
||||
let fill = invalid_ident_at_cursor("seed a.xyz", 10, &cache)
|
||||
.expect("unknown column in column-fill must flag");
|
||||
assert_eq!(fill.source, IdentSource::Columns);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_ident_does_not_fire_for_generator_prefix() {
|
||||
// A prefix of a known generator is an in-progress name, not a typo.
|
||||
let cache = two_table_schema();
|
||||
assert!(
|
||||
invalid_ident_at_cursor("seed a set name as ema", 22, &cache).is_none(),
|
||||
"`ema` prefixes `email` — must not flag",
|
||||
);
|
||||
assert!(
|
||||
invalid_ident_at_cursor("seed a set name as email", 24, &cache).is_none(),
|
||||
"`email` is a known generator — must not flag",
|
||||
);
|
||||
}
|
||||
|
||||
fn two_table_schema() -> SchemaCache {
|
||||
use crate::dsl::types::Type;
|
||||
let mut s = SchemaCache::default();
|
||||
|
||||
@@ -402,6 +402,25 @@ pub enum Command {
|
||||
filter: Option<Expr>,
|
||||
limit: Option<u64>,
|
||||
},
|
||||
/// Populate a table with generated fake data (ADR-0048, SD1/SD2).
|
||||
/// `count` defaults to 20 when omitted; `rng_seed` (from the
|
||||
/// `--seed <n>` flag) makes generation reproducible.
|
||||
///
|
||||
/// Phase 2 surfaces (ADR-0048 D1/D2):
|
||||
/// - `target_column` is `Some` for the **column-fill** form
|
||||
/// `seed <table>.<column>` — fill one column across the table's
|
||||
/// *existing* rows (an UPDATE), rather than generating new rows.
|
||||
/// - `overrides` carries the `set <col> …` clause: per-column pins
|
||||
/// that take precedence over the heuristic generator (D2).
|
||||
Seed {
|
||||
table: String,
|
||||
/// `Some(col)` → column-fill mode (UPDATE existing rows);
|
||||
/// `None` → whole-row generation (INSERT new rows).
|
||||
target_column: Option<String>,
|
||||
count: Option<u64>,
|
||||
overrides: Vec<SeedOverride>,
|
||||
rng_seed: Option<u64>,
|
||||
},
|
||||
/// Replay a sequence of DSL commands from a file. Each line
|
||||
/// is parsed and dispatched through the same pipeline as
|
||||
/// interactive input. Blank lines and lines whose first
|
||||
@@ -637,6 +656,38 @@ impl RowFilter {
|
||||
}
|
||||
}
|
||||
|
||||
/// One `set <col> …` override on a `seed` command (ADR-0048 D2, Phase 2).
|
||||
///
|
||||
/// The user can pin a column's generated values to a constant, a
|
||||
/// pick-list, an explicit named generator, or a range — overriding the
|
||||
/// per-column heuristic the executor would otherwise pick. `column` is
|
||||
/// the user-typed column name (validated against the table at execution,
|
||||
/// like every other column slot).
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct SeedOverride {
|
||||
pub column: String,
|
||||
pub kind: SeedOverrideKind,
|
||||
}
|
||||
|
||||
/// The four `set` override forms (ADR-0048 D2).
|
||||
///
|
||||
/// Values arrive as the DSL's `Value` (quoted text / unquoted number —
|
||||
/// dates are quoted text per the D2 amendment); the `Generator` name is
|
||||
/// a raw string validated at execution because `src/dsl` cannot depend
|
||||
/// on `src/seed` (the curated vocabulary lives there).
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum SeedOverrideKind {
|
||||
/// `set status = 'pending'` — every row gets the constant.
|
||||
Fixed(Value),
|
||||
/// `set role in ('admin', 'editor')` — uniform pick from the list.
|
||||
PickList(Vec<Value>),
|
||||
/// `set work_addr as email` — force the named generator (D9).
|
||||
Generator(String),
|
||||
/// `set price between 10 and 100` — uniform in `[low, high]`;
|
||||
/// numeric or (quoted) date bounds per the destination column type.
|
||||
Range { low: Value, high: Value },
|
||||
}
|
||||
|
||||
/// A complex WHERE expression (ADR-0026 §4).
|
||||
///
|
||||
/// Built by `grammar::expr::build_expr` from the flat
|
||||
@@ -949,6 +1000,7 @@ impl Command {
|
||||
} => "show index",
|
||||
Self::ShowList { kind, .. } => kind.command_name(),
|
||||
Self::Insert { .. } => "insert into",
|
||||
Self::Seed { .. } => "seed",
|
||||
Self::Update { .. } => "update",
|
||||
Self::Delete { .. } => "delete from",
|
||||
Self::ShowData { .. } => "show data",
|
||||
@@ -997,6 +1049,7 @@ impl Command {
|
||||
| Self::AddConstraint { table, .. }
|
||||
| Self::DropConstraint { table, .. }
|
||||
| Self::Insert { table, .. }
|
||||
| Self::Seed { table, .. }
|
||||
| Self::Update { table, .. }
|
||||
| Self::Delete { table, .. } => table,
|
||||
// For relationships we focus on the parent (1-side):
|
||||
|
||||
+346
-1
@@ -24,7 +24,9 @@
|
||||
//! later swap that capture for the same typed slots used here, adding
|
||||
//! live hints/highlighting.
|
||||
|
||||
use crate::dsl::command::{Command, Expr, RowFilter, ShowListKind};
|
||||
use crate::dsl::command::{
|
||||
Command, Expr, RowFilter, SeedOverride, SeedOverrideKind, ShowListKind,
|
||||
};
|
||||
use crate::dsl::grammar::{
|
||||
CommandNode, IdentSource, Node, NumberValidator, ValidationError, Word, expr,
|
||||
shared::{
|
||||
@@ -425,6 +427,152 @@ const LIMIT_CLAUSE_NODES: &[Node] = &[
|
||||
];
|
||||
const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES);
|
||||
|
||||
// =================================================================
|
||||
// seed — `seed <T>[.<col>] [<count>] [set <overrides>] [--seed <n>]`
|
||||
// (ADR-0048, SD1 whole-row + SD2 Phase 2 set-clause /
|
||||
// column-fill)
|
||||
// =================================================================
|
||||
|
||||
/// Optional positional row count. Reuses `LIMIT_VALIDATOR` (a
|
||||
/// non-negative integer).
|
||||
const SEED_COUNT: Node = Node::NumberLit {
|
||||
validator: Some(LIMIT_VALIDATOR),
|
||||
};
|
||||
/// `--seed <n>` — a reproducible-generation flag carrying a numeric
|
||||
/// seed (ADR-0048 D4). The only flag in the DSL that takes a value;
|
||||
/// `build_seed` reads the number immediately after the flag.
|
||||
const SEED_FLAG_NODES: &[Node] = &[
|
||||
Node::Flag("seed"),
|
||||
Node::NumberLit {
|
||||
validator: Some(LIMIT_VALIDATOR),
|
||||
},
|
||||
];
|
||||
const SEED_FLAG: Node = Node::Seq(SEED_FLAG_NODES);
|
||||
|
||||
// --- column-fill target: the optional `.<column>` (ADR-0048 D1
|
||||
// form 2) ----------------------------------------------------
|
||||
//
|
||||
// `seed users.email …` fills one column across existing rows. The
|
||||
// table ident stops at `.` (idents are alnum/underscore), so an
|
||||
// `Optional(Seq['.', column])` after the table cleanly discriminates:
|
||||
// when the next token is not `.`, the `Punct('.')` first-child
|
||||
// NoMatches and `walk_optional` skips it; once `.` commits, a missing
|
||||
// column propagates as the user mid-typing `seed users.` (driver
|
||||
// `walk_optional` semantics). The column resolves against
|
||||
// `current_table_columns` (populated by `TABLE_NAME_WRITES`).
|
||||
const SEED_TARGET_COLUMN: Node = Node::Ident {
|
||||
source: IdentSource::Columns,
|
||||
role: "seed_target_column",
|
||||
validator: None,
|
||||
highlight_override: None,
|
||||
writes_table: false,
|
||||
writes_column: false,
|
||||
writes_user_listed_column: false,
|
||||
writes_table_alias: false,
|
||||
writes_cte_name: false,
|
||||
writes_projection_alias: false,
|
||||
};
|
||||
const SEED_DOT_COLUMN_NODES: &[Node] = &[Node::Punct('.'), SEED_TARGET_COLUMN];
|
||||
const SEED_DOT_COLUMN: Node = Node::Optional(&Node::Seq(SEED_DOT_COLUMN_NODES));
|
||||
|
||||
// --- the `set <col> <override>[, …]` clause (ADR-0048 D2) --------
|
||||
//
|
||||
// Each override pins one column's generation. The column slot
|
||||
// `writes_column` so the typed value slots (`PER_COLUMN_VALUE`, the
|
||||
// same `current_column_value` dispatch `update … set` uses) narrow to
|
||||
// the column's type — so list/range/fixed values get the column's
|
||||
// typed slot (quoted text, unquoted number, quoted date) and a
|
||||
// type-mismatched literal is flagged. The four tails each start with a
|
||||
// distinct token (`=` / `in` / `between` / `as`), so the `Choice`
|
||||
// discriminates cleanly (no Optional-first branch).
|
||||
|
||||
/// The `set <col>` column slot. Distinct role from `update`'s
|
||||
/// `update_set_column` and the expression `expr_column`.
|
||||
const SEED_SET_COLUMN: Node = Node::Ident {
|
||||
source: IdentSource::Columns,
|
||||
role: "seed_set_column",
|
||||
validator: None,
|
||||
highlight_override: None,
|
||||
writes_table: false,
|
||||
writes_column: true,
|
||||
writes_user_listed_column: false,
|
||||
writes_table_alias: false,
|
||||
writes_cte_name: false,
|
||||
writes_projection_alias: false,
|
||||
};
|
||||
|
||||
/// `as <generator>` — the curated generator-name vocabulary (D9),
|
||||
/// highlighted in the `tok_function` colour. The slot is structural
|
||||
/// (any identifier matches); the name is validated at execution and
|
||||
/// flagged live by the validity indicator.
|
||||
const SEED_GENERATOR: Node = Node::Ident {
|
||||
source: IdentSource::Generators,
|
||||
role: "seed_generator",
|
||||
validator: None,
|
||||
highlight_override: Some(crate::dsl::grammar::HighlightClass::Function),
|
||||
writes_table: false,
|
||||
writes_column: false,
|
||||
writes_user_listed_column: false,
|
||||
writes_table_alias: false,
|
||||
writes_cte_name: false,
|
||||
writes_projection_alias: false,
|
||||
};
|
||||
|
||||
/// `= <value>` — a fixed constant for every row.
|
||||
const SEED_OV_FIXED_NODES: &[Node] = &[Node::Punct('='), PER_COLUMN_VALUE];
|
||||
/// `in ( <value> [, <value>]* )` — uniform pick from the list.
|
||||
const SEED_OV_IN_VALUES: Node = Node::Repeated {
|
||||
inner: &PER_COLUMN_VALUE,
|
||||
separator: Some(&Node::Punct(',')),
|
||||
min: 1,
|
||||
};
|
||||
const SEED_OV_IN_NODES: &[Node] = &[
|
||||
Node::Word(Word::keyword("in")),
|
||||
Node::Punct('('),
|
||||
SEED_OV_IN_VALUES,
|
||||
Node::Punct(')'),
|
||||
];
|
||||
/// `between <value> and <value>` — uniform in the (typed) range.
|
||||
const SEED_OV_BETWEEN_NODES: &[Node] = &[
|
||||
Node::Word(Word::keyword("between")),
|
||||
PER_COLUMN_VALUE,
|
||||
Node::Word(Word::keyword("and")),
|
||||
PER_COLUMN_VALUE,
|
||||
];
|
||||
/// `as <generator>` — force a named generator.
|
||||
const SEED_OV_AS_NODES: &[Node] = &[Node::Word(Word::keyword("as")), SEED_GENERATOR];
|
||||
|
||||
const SEED_OV_TAIL_CHOICES: &[Node] = &[
|
||||
Node::Seq(SEED_OV_FIXED_NODES),
|
||||
Node::Seq(SEED_OV_IN_NODES),
|
||||
Node::Seq(SEED_OV_BETWEEN_NODES),
|
||||
Node::Seq(SEED_OV_AS_NODES),
|
||||
];
|
||||
const SEED_OV_TAIL: Node = Node::Choice(SEED_OV_TAIL_CHOICES);
|
||||
|
||||
const SEED_OVERRIDE_NODES: &[Node] = &[SEED_SET_COLUMN, SEED_OV_TAIL];
|
||||
const SEED_OVERRIDE: Node = Node::Seq(SEED_OVERRIDE_NODES);
|
||||
const SEED_OVERRIDES: Node = Node::Repeated {
|
||||
inner: &SEED_OVERRIDE,
|
||||
separator: Some(&Node::Punct(',')),
|
||||
min: 1,
|
||||
};
|
||||
const SEED_SET_CLAUSE_NODES: &[Node] =
|
||||
&[Node::Word(Word::keyword("set")), SEED_OVERRIDES];
|
||||
const SEED_SET_CLAUSE: Node = Node::Seq(SEED_SET_CLAUSE_NODES);
|
||||
|
||||
const SEED_NODES: &[Node] = &[
|
||||
// `writes_table` so the `.column` target, the `set <col>=…`
|
||||
// clause's column slots, and the typed value slots all resolve
|
||||
// against this table.
|
||||
TABLE_NAME_WRITES,
|
||||
SEED_DOT_COLUMN,
|
||||
Node::Optional(&SEED_COUNT),
|
||||
Node::Optional(&SEED_SET_CLAUSE),
|
||||
Node::Optional(&SEED_FLAG),
|
||||
];
|
||||
const SEED_SHAPE: Node = Node::Seq(SEED_NODES);
|
||||
|
||||
const UPDATE_NODES: &[Node] = &[
|
||||
TABLE_NAME_WRITES,
|
||||
Node::Word(Word::keyword("set")),
|
||||
@@ -708,6 +856,195 @@ fn build_show_limit(path: &MatchedPath) -> Result<Option<u64>, ValidationError>
|
||||
})
|
||||
}
|
||||
|
||||
/// Build a `seed <T>[.<col>] [<count>] [set <overrides>] [--seed <n>]`
|
||||
/// command (ADR-0048, SD1 + SD2 Phase 2).
|
||||
///
|
||||
/// - `target_column` (column-fill, D1 form 2) is the `seed_target_column`
|
||||
/// ident, present only for the `seed <T>.<col>` form.
|
||||
/// - The positional `count` is the `NumberLit` that precedes both the
|
||||
/// `set` keyword and the `--seed` flag — bounding it that way keeps a
|
||||
/// `set age between 18 and 80` value (also a `NumberLit`) from being
|
||||
/// mistaken for the count.
|
||||
/// - `--seed <n>` is the `NumberLit` right after the flag (D4).
|
||||
/// - `overrides` (D2) is folded from the flat `set`-clause terminals.
|
||||
fn build_seed(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
|
||||
let table = require_ident(path, "table_name")?;
|
||||
let target_column = ident_text(path, "seed_target_column").map(str::to_string);
|
||||
|
||||
let flag_idx = path
|
||||
.items
|
||||
.iter()
|
||||
.position(|i| matches!(&i.kind, MatchedKind::Flag("seed")));
|
||||
let set_idx = path
|
||||
.items
|
||||
.iter()
|
||||
.position(|i| matches!(&i.kind, MatchedKind::Word("set")));
|
||||
|
||||
let rng_seed = flag_idx
|
||||
.and_then(|fi| path.items.get(fi + 1))
|
||||
.filter(|i| matches!(i.kind, MatchedKind::NumberLit))
|
||||
.map(|i| parse_seed_u64(&i.text))
|
||||
.transpose()?;
|
||||
|
||||
// The count is bounded to before the `set` clause and the flag, so a
|
||||
// numeric value inside `set` (e.g. `between 18 and 80`) is never read
|
||||
// as the count.
|
||||
let count_boundary = [set_idx, flag_idx]
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.min()
|
||||
.unwrap_or(path.items.len());
|
||||
let count = path
|
||||
.items
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(idx, i)| matches!(i.kind, MatchedKind::NumberLit) && *idx < count_boundary)
|
||||
.map(|(_, i)| parse_seed_u64(&i.text))
|
||||
.transpose()?;
|
||||
|
||||
let overrides = build_seed_overrides(path, set_idx, flag_idx)?;
|
||||
|
||||
Ok(Command::Seed {
|
||||
table,
|
||||
target_column,
|
||||
count,
|
||||
overrides,
|
||||
rng_seed,
|
||||
})
|
||||
}
|
||||
|
||||
/// Fold the flat `set`-clause terminals into [`SeedOverride`]s
|
||||
/// (ADR-0048 D2). The clause region runs from just after `Word("set")`
|
||||
/// to the `--seed` flag (or the path end). Each override begins at a
|
||||
/// `seed_set_column` ident; the token right after it selects the form
|
||||
/// (`=` / `in` / `between` / `as`). Top-level comma separators between
|
||||
/// overrides are skipped (the `in (...)` form consumes its own inner
|
||||
/// commas up to `)`).
|
||||
fn build_seed_overrides(
|
||||
path: &MatchedPath,
|
||||
set_idx: Option<usize>,
|
||||
flag_idx: Option<usize>,
|
||||
) -> Result<Vec<SeedOverride>, ValidationError> {
|
||||
let Some(set_idx) = set_idx else {
|
||||
return Ok(Vec::new());
|
||||
};
|
||||
let end = flag_idx.unwrap_or(path.items.len());
|
||||
let region = &path.items[set_idx + 1..end];
|
||||
|
||||
let mut overrides = Vec::new();
|
||||
let mut i = 0;
|
||||
while i < region.len() {
|
||||
// The next override starts at its column ident; skip the
|
||||
// top-level comma separators (and any stray token) between them.
|
||||
let MatchedKind::Ident {
|
||||
role: "seed_set_column",
|
||||
..
|
||||
} = ®ion[i].kind
|
||||
else {
|
||||
i += 1;
|
||||
continue;
|
||||
};
|
||||
let column = region[i].text.clone();
|
||||
i += 1;
|
||||
let kind = parse_seed_override_tail(region, &mut i, &column)?;
|
||||
overrides.push(SeedOverride { column, kind });
|
||||
}
|
||||
Ok(overrides)
|
||||
}
|
||||
|
||||
/// Parse one override tail starting at `region[*i]` (just past the
|
||||
/// column ident), advancing `*i` past the consumed tokens.
|
||||
fn parse_seed_override_tail(
|
||||
region: &[MatchedItem],
|
||||
i: &mut usize,
|
||||
column: &str,
|
||||
) -> Result<SeedOverrideKind, ValidationError> {
|
||||
let head = region.get(*i).ok_or_else(|| seed_set_error(column))?;
|
||||
match &head.kind {
|
||||
MatchedKind::Punct('=') => {
|
||||
*i += 1;
|
||||
let value = seed_take_value(region, i, column)?;
|
||||
Ok(SeedOverrideKind::Fixed(value))
|
||||
}
|
||||
MatchedKind::Word("in") => {
|
||||
*i += 1; // `in`
|
||||
// `(`
|
||||
if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Punct('('))) {
|
||||
*i += 1;
|
||||
}
|
||||
let mut values = Vec::new();
|
||||
while let Some(item) = region.get(*i) {
|
||||
match &item.kind {
|
||||
MatchedKind::Punct(')') => {
|
||||
*i += 1;
|
||||
break;
|
||||
}
|
||||
MatchedKind::Punct(',') => {
|
||||
*i += 1;
|
||||
}
|
||||
_ => values.push(seed_take_value(region, i, column)?),
|
||||
}
|
||||
}
|
||||
Ok(SeedOverrideKind::PickList(values))
|
||||
}
|
||||
MatchedKind::Word("between") => {
|
||||
*i += 1; // `between`
|
||||
let low = seed_take_value(region, i, column)?;
|
||||
if matches!(region.get(*i).map(|t| &t.kind), Some(MatchedKind::Word("and"))) {
|
||||
*i += 1;
|
||||
}
|
||||
let high = seed_take_value(region, i, column)?;
|
||||
Ok(SeedOverrideKind::Range { low, high })
|
||||
}
|
||||
MatchedKind::Word("as") => {
|
||||
*i += 1; // `as`
|
||||
let gen_item = region
|
||||
.get(*i)
|
||||
.filter(|t| matches!(t.kind, MatchedKind::Ident { role: "seed_generator", .. }))
|
||||
.ok_or_else(|| seed_set_error(column))?;
|
||||
*i += 1;
|
||||
Ok(SeedOverrideKind::Generator(gen_item.text.clone()))
|
||||
}
|
||||
_ => Err(seed_set_error(column)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Take one value literal at `region[*i]`, advancing past it.
|
||||
///
|
||||
/// The grammar's typed value slots only ever match value literals (a
|
||||
/// bare unquoted word fails to match the slot and is rejected *before*
|
||||
/// this fold runs — D2's quoting requirement enforced structurally), so
|
||||
/// a non-literal here can only mean a grammar/builder drift bug; the
|
||||
/// `Err` is a drift guard (mirrors `expr::build_expr`).
|
||||
fn seed_take_value(
|
||||
region: &[MatchedItem],
|
||||
i: &mut usize,
|
||||
column: &str,
|
||||
) -> Result<Value, ValidationError> {
|
||||
let item = region.get(*i).ok_or_else(|| seed_set_error(column))?;
|
||||
let value = item_to_value(item).ok_or_else(|| seed_set_error(column))?;
|
||||
*i += 1;
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
/// Drift-guard error for the `set`-clause fold (see `seed_take_value`).
|
||||
fn seed_set_error(column: &str) -> ValidationError {
|
||||
ValidationError {
|
||||
message_key: "parse.error_wrapper",
|
||||
args: vec![("detail", format!("malformed `set` clause for `{column}`"))],
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_seed_u64(text: &str) -> Result<u64, ValidationError> {
|
||||
text.parse::<u64>().map_err(|_| ValidationError {
|
||||
message_key: "parse.custom.bind_type_mismatch",
|
||||
args: vec![
|
||||
("found", text.to_string()),
|
||||
("expected", "non-negative integer".to_string()),
|
||||
],
|
||||
})
|
||||
}
|
||||
|
||||
fn build_insert(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
|
||||
let table = require_ident(path, "table_name")?;
|
||||
|
||||
@@ -1452,6 +1789,14 @@ pub static SHOW: CommandNode = CommandNode {
|
||||
"parse.usage.show_index",
|
||||
],};
|
||||
|
||||
pub static SEED: CommandNode = CommandNode {
|
||||
entry: Word::keyword("seed"),
|
||||
shape: SEED_SHAPE,
|
||||
ast_builder: build_seed,
|
||||
help_id: Some("data.seed"),
|
||||
usage_ids: &["parse.usage.seed"],
|
||||
};
|
||||
|
||||
pub static INSERT: CommandNode = CommandNode {
|
||||
entry: Word::keyword("insert"),
|
||||
shape: INSERT_SHAPE,
|
||||
|
||||
@@ -57,6 +57,12 @@ pub enum HighlightClass {
|
||||
String,
|
||||
Punct,
|
||||
Flag,
|
||||
/// A curated function-vocabulary name — the `seed … set <col> as
|
||||
/// <generator>` generator names (ADR-0048 D2/§Grammar). Rendered in
|
||||
/// the existing `tok_function` colour (ADR-0022 Amд6 blue — no new
|
||||
/// theme colour), assigned via a generator slot's
|
||||
/// `highlight_override`, not by byte shape.
|
||||
Function,
|
||||
Error,
|
||||
}
|
||||
|
||||
@@ -86,6 +92,14 @@ pub enum IdentSource {
|
||||
/// content validator on column-type slots; not user-listable
|
||||
/// from the schema.
|
||||
Types,
|
||||
/// Closed, curated set of fake-data generator names (ADR-0048
|
||||
/// D9) — the `seed … set <col> as <generator>` slot. Like
|
||||
/// `Types`, not user-listable from the schema; the vocabulary
|
||||
/// lives in `src/seed` and the completion engine offers it. The
|
||||
/// grammar slot is purely structural (matches any identifier);
|
||||
/// an unknown name is flagged live (validity) and rejected at
|
||||
/// execution.
|
||||
Generators,
|
||||
/// Any identifier shape; used by synthetic catch-all branches
|
||||
/// (e.g., the unknown-value branch of `mode <value>`).
|
||||
Free,
|
||||
@@ -117,6 +131,7 @@ impl IdentSource {
|
||||
Self::Relationships => "relationship name",
|
||||
Self::Indexes => "index name",
|
||||
Self::Types => "type",
|
||||
Self::Generators => "generator name",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,6 +149,7 @@ impl IdentSource {
|
||||
"relationship name" => Some(Self::Relationships),
|
||||
"index name" => Some(Self::Indexes),
|
||||
"type" => Some(Self::Types),
|
||||
"generator name" => Some(Self::Generators),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -714,6 +730,7 @@ pub static REGISTRY: &[(&CommandNode, CommandCategory)] = &[
|
||||
(&ddl::CREATE, CommandCategory::Simple),
|
||||
(&ddl::CREATE_M2N, CommandCategory::Simple),
|
||||
(&data::SHOW, CommandCategory::Simple),
|
||||
(&data::SEED, CommandCategory::Simple),
|
||||
(&data::INSERT, CommandCategory::Simple),
|
||||
(&data::UPDATE, CommandCategory::Simple),
|
||||
(&data::DELETE, CommandCategory::Simple),
|
||||
|
||||
@@ -300,6 +300,7 @@ fn format_expectation(e: &crate::dsl::walker::outcome::Expectation) -> String {
|
||||
IdentSource::Relationships => "relationship name".to_string(),
|
||||
IdentSource::Indexes => "index name".to_string(),
|
||||
IdentSource::Types => "type".to_string(),
|
||||
IdentSource::Generators => "generator name".to_string(),
|
||||
IdentSource::NewName | IdentSource::Free => "identifier".to_string(),
|
||||
},
|
||||
Expectation::Punct(c) => format!("`{c}`"),
|
||||
|
||||
+10
-6
@@ -18,17 +18,21 @@ const DEFAULT_LEN: usize = 10;
|
||||
pub const MIN_LEN: usize = 10;
|
||||
pub const MAX_LEN: usize = 12;
|
||||
|
||||
/// Generate a fresh shortid using thread-local RNG.
|
||||
/// Generate a fresh shortid using the thread-local RNG.
|
||||
#[must_use]
|
||||
pub fn generate() -> String {
|
||||
generate_len(DEFAULT_LEN)
|
||||
generate_with_rng(&mut rand::rng())
|
||||
}
|
||||
|
||||
/// Generate a shortid from a caller-supplied RNG.
|
||||
///
|
||||
/// Lets `seed --seed <n>` produce **reproducible** shortid values
|
||||
/// (ADR-0048 D4) by threading its seeded RNG through, while the default
|
||||
/// [`generate`] keeps its thread-RNG behaviour for ordinary inserts.
|
||||
#[must_use]
|
||||
fn generate_len(len: usize) -> String {
|
||||
let mut rng = rand::rng();
|
||||
let mut out = String::with_capacity(len);
|
||||
for _ in 0..len {
|
||||
pub fn generate_with_rng<R: RngExt + ?Sized>(rng: &mut R) -> String {
|
||||
let mut out = String::with_capacity(DEFAULT_LEN);
|
||||
for _ in 0..DEFAULT_LEN {
|
||||
let idx = rng.random_range(0..ALPHABET.len());
|
||||
out.push(ALPHABET[idx] as char);
|
||||
}
|
||||
|
||||
@@ -240,6 +240,18 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn seed_generator_name_highlighted_as_function() {
|
||||
// ADR-0048 D9: the `set <col> as <gen>` generator name carries the
|
||||
// `Function` highlight class (via the slot's `highlight_override`),
|
||||
// rendered in the shared `tok_function` colour.
|
||||
let runs = run("seed Members set role as email");
|
||||
assert!(
|
||||
runs.iter().any(|(_, _, c)| *c == HighlightClass::Function),
|
||||
"generator name `email` should be Function-highlighted: {runs:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_command_word_classified_by_byte_shape() {
|
||||
// Walker doesn't engage; fallback classifies as Identifier.
|
||||
|
||||
@@ -1236,6 +1236,10 @@ fn schema_existence_diagnostics(
|
||||
IdentSource::Relationships
|
||||
| IdentSource::Indexes
|
||||
| IdentSource::Types
|
||||
// `Generators` (the `set … as <gen>` slot, ADR-0048 D9) is a
|
||||
// curated vocabulary; its unknown-name validity is handled by
|
||||
// the completion-layer indicator, not this walker diagnostic.
|
||||
| IdentSource::Generators
|
||||
| IdentSource::Free => {}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,6 +87,10 @@ pub enum AppEvent {
|
||||
command: Command,
|
||||
result: InsertResult,
|
||||
},
|
||||
DslSeedSucceeded {
|
||||
command: Command,
|
||||
result: crate::db::SeedResult,
|
||||
},
|
||||
DslUpdateSucceeded {
|
||||
command: Command,
|
||||
result: UpdateResult,
|
||||
|
||||
@@ -207,6 +207,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[
|
||||
("help.ddl.rename", &[]),
|
||||
("help.ddl.change", &[]),
|
||||
("help.data.show", &[]),
|
||||
("help.data.seed", &[]),
|
||||
("help.data.insert", &[]),
|
||||
("help.data.update", &[]),
|
||||
("help.data.delete", &[]),
|
||||
@@ -308,6 +309,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[
|
||||
("parse.usage.undo", &[]),
|
||||
("parse.usage.save", &[]),
|
||||
("parse.usage.select", &[]),
|
||||
("parse.usage.seed", &[]),
|
||||
("parse.usage.show_data", &[]),
|
||||
("parse.usage.show_table", &[]),
|
||||
("parse.usage.show_tables", &[]),
|
||||
@@ -548,7 +550,10 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[
|
||||
("ok.index_dropped_with_column", &["index"]),
|
||||
("ok.rows_deleted", &["count"]),
|
||||
("ok.rows_inserted", &["count"]),
|
||||
("ok.rows_seeded", &["count", "table"]),
|
||||
("ok.rows_updated", &["count"]),
|
||||
("seed.capped", &["requested"]),
|
||||
("seed.advisory_generic", &["columns", "column", "table"]),
|
||||
// ---- Client-side success notes (ADR-0017 §6, ADR-0018 §9) ----
|
||||
("client_side.auto_fill_add_serial", &["count"]),
|
||||
("client_side.auto_fill_add_shortid", &["count"]),
|
||||
|
||||
@@ -333,6 +333,17 @@ help:
|
||||
show indexes — list all indexes
|
||||
show relationship <name> — show one relationship's detail
|
||||
show index <name> — show one index's detail
|
||||
seed: |-
|
||||
seed <T> [<count>] — fill a table with generated sample rows
|
||||
(default 20). Existing rows are kept;
|
||||
foreign keys draw from existing parent rows.
|
||||
seed <T> ... set <c> = 'v' | in ('a','b') | as <gen> | between x and y
|
||||
— pin how a column is generated: a fixed
|
||||
value, a pick-list, a named generator
|
||||
(email, name, product, ...), or a range.
|
||||
seed <T>.<col> [set ...] — fill one column across the EXISTING rows
|
||||
(the follow-up to `add column`).
|
||||
seed <T> ... --seed <n> — reproducible: same data for the same n.
|
||||
insert: |-
|
||||
insert into <T> [(cols)] [values] (vals) — add a row
|
||||
update: |-
|
||||
@@ -569,6 +580,7 @@ parse:
|
||||
change_column: |-
|
||||
change column [in] [table] <Table>: <Name> (<Type>)
|
||||
[--force-conversion | --dont-convert]
|
||||
seed: "seed <Table> [count] [set <col> = ... | in (...) | as <gen> | between x and y] | seed <Table>.<col>"
|
||||
show_data: "show data <Table>"
|
||||
show_table: "show table <Table>"
|
||||
show_tables: "show tables"
|
||||
@@ -978,6 +990,17 @@ db:
|
||||
# template couldn't provide. Re-introduce a key here if a non-English
|
||||
# locale lands.)
|
||||
|
||||
# Seed-command notes (ADR-0048): the cap note when the unique-value
|
||||
# space is exhausted, and the advisory that flags columns filled with
|
||||
# generic text that look like fixed value sets.
|
||||
seed:
|
||||
capped: "(of {requested} requested — ran out of distinct value combinations)"
|
||||
# ADR-0048 D13 (Phase 2/3 wording): name the generically-filled
|
||||
# enum-ish / CHECK columns and point at the concrete repairs — the
|
||||
# `set` clause on a fresh seed, or the column-fill form for the rows
|
||||
# just created.
|
||||
advisory_generic: "{columns} filled with generic text — they look like fixed value sets. Pin them next time with `set {column} in ('…', '…')`, or fix these rows with `seed {table}.{column} set {column} in ('…', '…')`."
|
||||
|
||||
ok:
|
||||
# ADR-0040: the generic `[ok] <verb> <subject>` summary line was
|
||||
# retired — a successful command's echo line now carries a ✓
|
||||
@@ -985,6 +1008,7 @@ ok:
|
||||
# per-operation row-count footers below still convey real payload
|
||||
# and are unchanged.
|
||||
rows_inserted: " {count} row(s) inserted"
|
||||
rows_seeded: " {count} row(s) seeded into {table}"
|
||||
rows_updated: " {count} row(s) updated"
|
||||
rows_deleted: " {count} row(s) deleted"
|
||||
# Shown beneath a `drop column --cascade` summary, once per
|
||||
|
||||
+98
-7
@@ -84,16 +84,60 @@ pub fn render_input_runs_in_mode(
|
||||
cache: &crate::completion::SchemaCache,
|
||||
mode: Mode,
|
||||
) -> Vec<StyledRun> {
|
||||
let mut runs = lex_to_runs_in_mode(input, theme, mode);
|
||||
// Identity feedback view — highlight/overlay the whole input.
|
||||
render_input_runs_feedback(input, cursor_byte, theme, cache, mode, input, cursor_byte, 0)
|
||||
}
|
||||
|
||||
/// [`render_input_runs_in_mode`] with a separate **feedback view** for
|
||||
/// the walker-driven highlighting and overlays.
|
||||
///
|
||||
/// Under the `:` one-shot escape (ADR-0003) the buffer carries a leading
|
||||
/// `:` that is not advanced SQL; `view` is the stripped SQL (and
|
||||
/// `view_cursor` the cursor within it) so the walker highlights and
|
||||
/// diagnoses the SQL itself, while the `:` prefix renders as plain text.
|
||||
/// `offset` is the byte length stripped from the front — base runs and
|
||||
/// overlay positions are shifted by it back into `input` coordinates.
|
||||
/// Callers without a one-shot escape pass `(input, cursor, 0)` (what
|
||||
/// [`render_input_runs_in_mode`] does).
|
||||
#[must_use]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn render_input_runs_feedback(
|
||||
input: &str,
|
||||
cursor_byte: usize,
|
||||
theme: &Theme,
|
||||
cache: &crate::completion::SchemaCache,
|
||||
mode: Mode,
|
||||
view: &str,
|
||||
view_cursor: usize,
|
||||
offset: usize,
|
||||
) -> Vec<StyledRun> {
|
||||
// Base highlighting runs over the SQL view, shifted into buffer
|
||||
// coordinates; the stripped prefix (the `:` + space) renders as
|
||||
// plain foreground text.
|
||||
let mut runs: Vec<StyledRun> = if offset == 0 {
|
||||
lex_to_runs_in_mode(input, theme, mode)
|
||||
} else {
|
||||
let mut r = vec![StyledRun {
|
||||
byte_range: (0, offset),
|
||||
style: ratatui::style::Style::default().fg(theme.fg),
|
||||
}];
|
||||
r.extend(lex_to_runs_in_mode(view, theme, mode).into_iter().map(|run| {
|
||||
StyledRun {
|
||||
byte_range: (run.byte_range.0 + offset, run.byte_range.1 + offset),
|
||||
..run
|
||||
}
|
||||
}));
|
||||
r
|
||||
};
|
||||
if let InputState::DefiniteErrorAt(pos) =
|
||||
classify_parse_result(parse_command_with_schema_in_mode(input, cache, mode))
|
||||
classify_parse_result(parse_command_with_schema_in_mode(view, cache, mode))
|
||||
{
|
||||
overlay_error(&mut runs, pos, theme);
|
||||
overlay_error(&mut runs, pos + offset, theme);
|
||||
}
|
||||
if let Some(inv) =
|
||||
crate::completion::invalid_ident_at_cursor_in_mode(input, cursor_byte, cache, mode)
|
||||
crate::completion::invalid_ident_at_cursor_in_mode(view, view_cursor, cache, mode)
|
||||
{
|
||||
overlay_error(&mut runs, inv.range.0, theme);
|
||||
overlay_error(&mut runs, inv.range.0 + offset, theme);
|
||||
}
|
||||
// Schema-aware diagnostics (ADR-0027 §2): unknown table /
|
||||
// column (ERROR), or a dubious comparison (WARNING), is
|
||||
@@ -101,12 +145,12 @@ pub fn render_input_runs_in_mode(
|
||||
// so a problem the user has typed past stays visible. The
|
||||
// mode-aware walk picks up the SQL-specific diagnostics from
|
||||
// ADR-0032 in advanced mode.
|
||||
for diag in walker::input_diagnostics_in_mode(input, Some(cache), mode) {
|
||||
for diag in walker::input_diagnostics_in_mode(view, Some(cache), mode) {
|
||||
let colour = match diag.severity {
|
||||
walker::Severity::Error => theme.tok_error,
|
||||
walker::Severity::Warning => theme.warning,
|
||||
};
|
||||
overlay_span(&mut runs, diag.span, colour);
|
||||
overlay_span(&mut runs, (diag.span.0 + offset, diag.span.1 + offset), colour);
|
||||
}
|
||||
inject_cursor(&mut runs, input, cursor_byte, theme);
|
||||
runs
|
||||
@@ -817,6 +861,9 @@ fn ambient_hint_core_in_mode(
|
||||
crate::dsl::grammar::IdentSource::Tables => "table",
|
||||
crate::dsl::grammar::IdentSource::Columns => "column",
|
||||
crate::dsl::grammar::IdentSource::Relationships => "relationship",
|
||||
// The `seed … set <col> as <gen>` curated vocabulary
|
||||
// (ADR-0048 D9) flags an unknown name here.
|
||||
crate::dsl::grammar::IdentSource::Generators => "generator",
|
||||
// `NewName`, `Types`, `Free` are filtered out by
|
||||
// `invalid_ident_at_cursor` (it only fires for
|
||||
// known-set sources via `completes_from_schema`), so
|
||||
@@ -1105,6 +1152,50 @@ mod tests {
|
||||
assert!(reversed(&runs[0]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn one_shot_colon_highlights_the_sql_and_overlays_no_error() {
|
||||
// ADR-0003 `:` one-shot: the SQL after the `:` must highlight and
|
||||
// diagnose like real advanced mode — the `:` prefix renders as
|
||||
// plain text and a valid query carries no error overlay (the old
|
||||
// path let the walker choke on the `:` and mark it red).
|
||||
use crate::completion::{SchemaCache, TableColumn};
|
||||
use crate::dsl::types::Type;
|
||||
let theme = dark();
|
||||
let mut cache = SchemaCache::default();
|
||||
cache.tables.push("Customers".into());
|
||||
cache.columns.push("name".into());
|
||||
cache
|
||||
.table_columns
|
||||
.insert("Customers".into(), vec![TableColumn::new("name", Type::Text)]);
|
||||
let input = ": select name from Customers";
|
||||
let view = "select name from Customers";
|
||||
let offset = 2; // ": "
|
||||
let runs = render_input_runs_feedback(
|
||||
input,
|
||||
input.len(),
|
||||
&theme,
|
||||
&cache,
|
||||
Mode::Advanced,
|
||||
view,
|
||||
view.len(),
|
||||
offset,
|
||||
);
|
||||
assert!(
|
||||
runs.iter().all(|r| r.style.fg != Some(theme.tok_error)),
|
||||
"a valid one-shot query must carry no error overlay: {runs:?}",
|
||||
);
|
||||
assert!(
|
||||
runs.iter()
|
||||
.any(|r| r.byte_range.0 == offset && r.style.fg == Some(theme.tok_keyword)),
|
||||
"the `select` keyword (past the `: ` prefix) is keyword-coloured: {runs:?}",
|
||||
);
|
||||
assert_eq!(
|
||||
runs.first().unwrap().byte_range.0,
|
||||
0,
|
||||
"the `:` prefix is rendered from byte 0",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keyword_token_takes_keyword_colour() {
|
||||
let theme = dark();
|
||||
|
||||
@@ -23,6 +23,7 @@ pub mod output_render;
|
||||
pub mod persistence;
|
||||
pub mod project;
|
||||
pub mod runtime;
|
||||
pub mod seed;
|
||||
pub mod theme;
|
||||
pub mod type_change;
|
||||
pub mod ui;
|
||||
|
||||
@@ -1492,6 +1492,10 @@ fn spawn_dsl_dispatch(
|
||||
command: command.clone(),
|
||||
result,
|
||||
},
|
||||
Ok(CommandOutcome::Seed(result)) => AppEvent::DslSeedSucceeded {
|
||||
command: command.clone(),
|
||||
result,
|
||||
},
|
||||
Ok(CommandOutcome::Update(result)) => AppEvent::DslUpdateSucceeded {
|
||||
command: command.clone(),
|
||||
result,
|
||||
@@ -2364,6 +2368,7 @@ enum CommandOutcome {
|
||||
ShowRelationship(Option<Box<crate::db::RelationshipDiagramData>>),
|
||||
QueryPlan(QueryPlan),
|
||||
Insert(InsertResult),
|
||||
Seed(crate::db::SeedResult),
|
||||
Update(UpdateResult),
|
||||
Delete(DeleteResult),
|
||||
ChangeColumn(ChangeColumnTypeResult),
|
||||
@@ -2911,6 +2916,17 @@ async fn execute_command_typed(
|
||||
.insert(table, columns, values, src)
|
||||
.await
|
||||
.map(CommandOutcome::Insert),
|
||||
// ADR-0048 (SD1/SD2 Phase 2).
|
||||
Command::Seed {
|
||||
table,
|
||||
target_column,
|
||||
count,
|
||||
overrides,
|
||||
rng_seed,
|
||||
} => database
|
||||
.seed(table, target_column, count, overrides, rng_seed, src)
|
||||
.await
|
||||
.map(CommandOutcome::Seed),
|
||||
Command::Update {
|
||||
table,
|
||||
assignments,
|
||||
|
||||
@@ -0,0 +1,193 @@
|
||||
//! Parse a simple `<column> IN ('a', 'b', …)` CHECK into its allowed
|
||||
//! value list (ADR-0048 D17), so the common enum-as-CHECK pattern seeds
|
||||
//! from the permitted values instead of generic text. Anything more
|
||||
//! complex (ranges, expressions, multi-column, non-literal items)
|
||||
//! returns `None`; the executor then best-effort generates and lets a
|
||||
//! violation surface through the friendly-error layer.
|
||||
|
||||
/// Extract the string-literal values of a `<column> IN ( … )` CHECK.
|
||||
///
|
||||
/// Case-insensitive on the `IN` keyword and the column name; tolerates a
|
||||
/// quoted column (`"status"`). Every list item must be a single-quoted
|
||||
/// string literal (`''` is an embedded quote). Returns `None` for any
|
||||
/// other shape.
|
||||
#[must_use]
|
||||
pub fn parse_in_check_values(check: &str, column: &str) -> Option<Vec<String>> {
|
||||
let (in_idx, paren_open) = find_in_paren(check)?;
|
||||
if !lhs_is_column(check[..in_idx].trim(), column) {
|
||||
return None;
|
||||
}
|
||||
let values = extract_quoted_list(&check[paren_open..])?;
|
||||
if values.is_empty() { None } else { Some(values) }
|
||||
}
|
||||
|
||||
const fn is_ident_byte(b: u8) -> bool {
|
||||
b.is_ascii_alphanumeric() || b == b'_'
|
||||
}
|
||||
|
||||
/// Find the `IN` keyword (as a word, outside string literals) that is
|
||||
/// followed by `(`. Returns `(byte index of `IN`, byte index of `(`)`.
|
||||
fn find_in_paren(check: &str) -> Option<(usize, usize)> {
|
||||
let bytes = check.as_bytes();
|
||||
let mut i = 0;
|
||||
let mut in_quote = false;
|
||||
while i < bytes.len() {
|
||||
let b = bytes[i];
|
||||
if in_quote {
|
||||
if b == b'\'' {
|
||||
in_quote = false;
|
||||
}
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if b == b'\'' {
|
||||
in_quote = true;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
let is_in = (b == b'i' || b == b'I')
|
||||
&& bytes.get(i + 1).is_some_and(|n| *n == b'n' || *n == b'N');
|
||||
if is_in {
|
||||
let before_ok = i == 0 || !is_ident_byte(bytes[i - 1]);
|
||||
let after = i + 2;
|
||||
let after_ok = bytes.get(after).is_none_or(|n| !is_ident_byte(*n));
|
||||
if before_ok && after_ok {
|
||||
let mut k = after;
|
||||
while bytes.get(k).is_some_and(u8::is_ascii_whitespace) {
|
||||
k += 1;
|
||||
}
|
||||
if bytes.get(k) == Some(&b'(') {
|
||||
return Some((i, k));
|
||||
}
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn lhs_is_column(lhs: &str, column: &str) -> bool {
|
||||
let t = lhs.trim();
|
||||
let stripped = t
|
||||
.strip_prefix('"')
|
||||
.and_then(|s| s.strip_suffix('"'))
|
||||
.unwrap_or(t);
|
||||
stripped.eq_ignore_ascii_case(column)
|
||||
}
|
||||
|
||||
/// Parse `( 'a', 'b', … )` from a string starting at `(` into the
|
||||
/// unescaped literals. `None` if any item is not a pure quoted literal.
|
||||
fn extract_quoted_list(s: &str) -> Option<Vec<String>> {
|
||||
let mut chars = s.chars().peekable();
|
||||
if chars.next()? != '(' {
|
||||
return None;
|
||||
}
|
||||
let mut values = Vec::new();
|
||||
loop {
|
||||
while chars.peek().is_some_and(|c| c.is_whitespace()) {
|
||||
chars.next();
|
||||
}
|
||||
match chars.peek()? {
|
||||
')' => {
|
||||
chars.next();
|
||||
break;
|
||||
}
|
||||
'\'' => {
|
||||
let v = read_quoted(&mut chars)?;
|
||||
values.push(v);
|
||||
while chars.peek().is_some_and(|c| c.is_whitespace()) {
|
||||
chars.next();
|
||||
}
|
||||
match chars.next()? {
|
||||
',' => {}
|
||||
')' => break,
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
Some(values)
|
||||
}
|
||||
|
||||
/// Read a single-quoted string literal (cursor at the opening `'`),
|
||||
/// unescaping `''` to `'`.
|
||||
fn read_quoted(chars: &mut std::iter::Peekable<std::str::Chars>) -> Option<String> {
|
||||
if chars.next()? != '\'' {
|
||||
return None;
|
||||
}
|
||||
let mut out = String::new();
|
||||
loop {
|
||||
match chars.next()? {
|
||||
'\'' => {
|
||||
if chars.peek() == Some(&'\'') {
|
||||
chars.next();
|
||||
out.push('\'');
|
||||
} else {
|
||||
return Some(out);
|
||||
}
|
||||
}
|
||||
c => out.push(c),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn parses_a_simple_in_check() {
|
||||
assert_eq!(
|
||||
parse_in_check_values("status IN ('active', 'closed')", "status"),
|
||||
Some(vec!["active".to_string(), "closed".to_string()])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tolerates_a_quoted_column_and_lowercase_in() {
|
||||
assert_eq!(
|
||||
parse_in_check_values("\"status\" in ('a','b','c')", "status"),
|
||||
Some(vec!["a".into(), "b".into(), "c".into()])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescapes_embedded_quotes() {
|
||||
assert_eq!(
|
||||
parse_in_check_values("note IN ('it''s', 'ok')", "note"),
|
||||
Some(vec!["it's".into(), "ok".into()])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_commas_and_parens_inside_literals() {
|
||||
assert_eq!(
|
||||
parse_in_check_values("label IN ('a, b', 'c)d')", "label"),
|
||||
Some(vec!["a, b".into(), "c)d".into()])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_non_literal_lists() {
|
||||
assert_eq!(parse_in_check_values("n IN (1, 2, 3)", "n"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_non_in_checks() {
|
||||
assert_eq!(parse_in_check_values("age >= 0", "age"), None);
|
||||
assert_eq!(parse_in_check_values("length(name) > 0", "name"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_when_lhs_is_a_different_column() {
|
||||
assert_eq!(parse_in_check_values("status IN ('a')", "role"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_trip_on_in_inside_a_word_or_literal() {
|
||||
// `min` contains "in" but is not the IN operator.
|
||||
assert_eq!(parse_in_check_values("min(x) > 0", "x"), None);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,584 @@
|
||||
//! Value production: turn a [`Generator`] + a seeded RNG into a
|
||||
//! [`Value`] (ADR-0048 D8/D9). Realistic generators come from the
|
||||
//! `fake` crate (English locale); `product` is hand-rolled (D9, no
|
||||
//! commerce module exists); dates are generated against a **fixed
|
||||
//! reference epoch** so a `--seed` run is fully reproducible without
|
||||
//! depending on the wall clock (D8 bounded windows).
|
||||
//!
|
||||
//! The stateful markers ([`Generator::IdentitySequential`],
|
||||
//! [`Generator::ForeignKeySample`]) are resolved by the executor with
|
||||
//! database context; if one reaches here un-intercepted it falls back
|
||||
//! to type-based generation rather than panicking.
|
||||
|
||||
use chrono::{Datelike, NaiveDate};
|
||||
use fake::Fake;
|
||||
use rand::RngExt;
|
||||
|
||||
use crate::dsl::types::Type;
|
||||
use crate::dsl::value::Value;
|
||||
use crate::seed::{Generator, SeedRng};
|
||||
|
||||
/// Fixed anchor for bounded date/datetime windows. Using a constant
|
||||
/// (rather than `now()`) keeps `--seed` output reproducible across days
|
||||
/// and makes tests deterministic. It advances with releases.
|
||||
const REF_YEAR: i32 = 2025;
|
||||
const REF_MONTH: u32 = 6;
|
||||
const REF_DAY: u32 = 1;
|
||||
|
||||
/// `~3 years` window for "recent" dates, in days.
|
||||
const RECENT_WINDOW_DAYS: i64 = 3 * 365;
|
||||
/// Adult birth window (≈18–80 years ago), in days.
|
||||
const ADULT_MIN_DAYS: i64 = 18 * 365;
|
||||
const ADULT_MAX_DAYS: i64 = 80 * 365;
|
||||
|
||||
/// Produce one value for `generator` against destination type `ty`.
|
||||
#[must_use]
|
||||
pub fn generate_value(generator: &Generator, ty: Type, rng: &mut SeedRng) -> Value {
|
||||
use fake::faker::address::en as addr;
|
||||
use fake::faker::company::en as company;
|
||||
use fake::faker::internet::en as net;
|
||||
use fake::faker::job::en as job;
|
||||
use fake::faker::lorem::en as lorem;
|
||||
use fake::faker::name::en as name;
|
||||
use fake::faker::phone_number::en as phone;
|
||||
|
||||
match generator {
|
||||
Generator::FirstName => Value::Text(name::FirstName().fake_with_rng(rng)),
|
||||
Generator::LastName => Value::Text(name::LastName().fake_with_rng(rng)),
|
||||
Generator::FullName => Value::Text(name::Name().fake_with_rng(rng)),
|
||||
Generator::Email => Value::Text(net::FreeEmail().fake_with_rng(rng)),
|
||||
Generator::Username => Value::Text(net::Username().fake_with_rng(rng)),
|
||||
Generator::Password => Value::Text(net::Password(8..16).fake_with_rng(rng)),
|
||||
Generator::Phone => Value::Text(phone::PhoneNumber().fake_with_rng(rng)),
|
||||
Generator::City => Value::Text(addr::CityName().fake_with_rng(rng)),
|
||||
Generator::Country => Value::Text(addr::CountryName().fake_with_rng(rng)),
|
||||
Generator::StateName => Value::Text(addr::StateName().fake_with_rng(rng)),
|
||||
Generator::Street => Value::Text(addr::StreetName().fake_with_rng(rng)),
|
||||
Generator::ZipCode => Value::Text(addr::ZipCode().fake_with_rng(rng)),
|
||||
Generator::Company => Value::Text(company::CompanyName().fake_with_rng(rng)),
|
||||
Generator::JobTitle => Value::Text(job::Title().fake_with_rng(rng)),
|
||||
Generator::ProductName => Value::Text(product_name(rng)),
|
||||
Generator::Sentence => Value::Text(lorem::Sentence(5..12).fake_with_rng(rng)),
|
||||
Generator::Paragraph => Value::Text(lorem::Paragraph(2..4).fake_with_rng(rng)),
|
||||
Generator::Url => {
|
||||
let word: String = lorem::Word().fake_with_rng(rng);
|
||||
let suffix: String = net::DomainSuffix().fake_with_rng(rng);
|
||||
Value::Text(format!("https://{word}.{suffix}"))
|
||||
}
|
||||
// Hand-rolled — `fake`'s color module is feature-gated (it pulls
|
||||
// an extra crate); a hex colour is trivial from the RNG.
|
||||
Generator::HexColor => Value::Text(format!("#{:06X}", rng.random_range(0..0x0100_0000))),
|
||||
Generator::CurrencyAmount => currency_amount(ty, rng),
|
||||
Generator::Age => Value::Number(rng.random_range(18..=80).to_string()),
|
||||
Generator::SmallInt => Value::Number(rng.random_range(1..=100).to_string()),
|
||||
Generator::DateRecent => Value::Text(format_date(random_past_date(rng, 0, RECENT_WINDOW_DAYS))),
|
||||
Generator::DateAdult => {
|
||||
Value::Text(format_date(random_past_date(rng, ADULT_MIN_DAYS, ADULT_MAX_DAYS)))
|
||||
}
|
||||
Generator::DateTimeRecent => Value::Text(random_recent_datetime(rng)),
|
||||
Generator::Boolean => Value::Bool(rng.random_range(0..2) == 1),
|
||||
Generator::PickFrom(values) if !values.is_empty() => {
|
||||
let chosen: &String = pick(rng, values);
|
||||
literal_to_value(chosen, ty)
|
||||
}
|
||||
// The `set <col> between low and high` override (D2). Bounds are
|
||||
// interpreted per the destination type; the executor has already
|
||||
// validated they parse, so a defensive parse failure here falls
|
||||
// back to type-based generation rather than producing junk.
|
||||
Generator::Range { low, high } => range_value(low, high, ty, rng),
|
||||
// Un-intercepted markers + an empty pick list → type-based.
|
||||
Generator::PickFrom(_)
|
||||
| Generator::IdentitySequential
|
||||
| Generator::ForeignKeySample
|
||||
| Generator::Generic => generic_for_type(ty, rng),
|
||||
}
|
||||
}
|
||||
|
||||
/// Uniform value in `[low, high]` for the `between` override (D2).
|
||||
///
|
||||
/// Bounds are interpreted by destination type. Returns the type-based
|
||||
/// fallback for a bound that does not parse or a type that has no range
|
||||
/// meaning — the executor pre-validates, so this is defensive only.
|
||||
fn range_value(low: &str, high: &str, ty: Type, rng: &mut SeedRng) -> Value {
|
||||
match ty {
|
||||
Type::Int | Type::Serial => parse_int_range(low, high)
|
||||
.map(|(lo, hi)| Value::Number(rng.random_range(lo..=hi).to_string()))
|
||||
.unwrap_or_else(|| generic_for_type(ty, rng)),
|
||||
Type::Real | Type::Decimal => parse_real_range(low, high)
|
||||
.map(|(lo, hi)| {
|
||||
let v = rng.random::<f64>().mul_add(hi - lo, lo);
|
||||
Value::Number(format!("{v:.2}"))
|
||||
})
|
||||
.unwrap_or_else(|| generic_for_type(ty, rng)),
|
||||
Type::Date => parse_date_range(low, high)
|
||||
.map(|(lo, hi)| Value::Text(format_date(random_date_between(rng, lo, hi))))
|
||||
.unwrap_or_else(|| generic_for_type(ty, rng)),
|
||||
Type::DateTime => parse_datetime_range(low, high)
|
||||
.map(|(lo, hi)| Value::Text(random_datetime_between(rng, lo, hi)))
|
||||
.unwrap_or_else(|| generic_for_type(ty, rng)),
|
||||
// text / bool / blob / shortid have no range meaning.
|
||||
_ => generic_for_type(ty, rng),
|
||||
}
|
||||
}
|
||||
|
||||
/// Validate that `low`/`high` parse as bounds for `ty`.
|
||||
///
|
||||
/// The `between` override (D2) is checked by the executor *before*
|
||||
/// generation. Returns a short human reason on failure (the executor
|
||||
/// wraps it in a friendly error naming the column), `None` when valid.
|
||||
#[must_use]
|
||||
pub fn range_bounds_reason(ty: Type, low: &str, high: &str) -> Option<String> {
|
||||
let ok = match ty {
|
||||
Type::Int | Type::Serial => parse_int_range(low, high).is_some(),
|
||||
Type::Real | Type::Decimal => parse_real_range(low, high).is_some(),
|
||||
Type::Date => parse_date_range(low, high).is_some(),
|
||||
Type::DateTime => parse_datetime_range(low, high).is_some(),
|
||||
// text / bool / blob / shortid have no range meaning.
|
||||
Type::Text | Type::Bool | Type::Blob | Type::ShortId => false,
|
||||
};
|
||||
if ok {
|
||||
return None;
|
||||
}
|
||||
Some(match ty {
|
||||
Type::Int | Type::Serial => "expected two whole numbers, e.g. `between 1 and 100`".to_string(),
|
||||
Type::Real | Type::Decimal => "expected two numbers, e.g. `between 1.0 and 9.99`".to_string(),
|
||||
Type::Date => "expected two quoted dates, e.g. `between '2023-01-01' and '2024-12-31'`".to_string(),
|
||||
Type::DateTime => {
|
||||
"expected two quoted datetimes, e.g. `between '2023-01-01T00:00:00' and '2024-12-31T23:59:59'`"
|
||||
.to_string()
|
||||
}
|
||||
Type::Text | Type::Bool | Type::Blob | Type::ShortId => {
|
||||
"a `between` range only applies to numeric and date/datetime columns".to_string()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse and order an integer range; `None` if either bound is not an
|
||||
/// integer.
|
||||
fn parse_int_range(low: &str, high: &str) -> Option<(i64, i64)> {
|
||||
let lo: i64 = low.trim().parse().ok()?;
|
||||
let hi: i64 = high.trim().parse().ok()?;
|
||||
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
|
||||
}
|
||||
|
||||
fn parse_real_range(low: &str, high: &str) -> Option<(f64, f64)> {
|
||||
let lo: f64 = low.trim().parse().ok()?;
|
||||
let hi: f64 = high.trim().parse().ok()?;
|
||||
if !lo.is_finite() || !hi.is_finite() {
|
||||
return None;
|
||||
}
|
||||
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
|
||||
}
|
||||
|
||||
fn parse_date_range(low: &str, high: &str) -> Option<(NaiveDate, NaiveDate)> {
|
||||
let lo = NaiveDate::parse_from_str(low.trim(), "%Y-%m-%d").ok()?;
|
||||
let hi = NaiveDate::parse_from_str(high.trim(), "%Y-%m-%d").ok()?;
|
||||
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
|
||||
}
|
||||
|
||||
/// Accept both the `T`-separated and space-separated datetime spellings
|
||||
/// the app validates (`bind_datetime` / `validate_datetime`).
|
||||
fn parse_one_datetime(s: &str) -> Option<chrono::NaiveDateTime> {
|
||||
let t = s.trim();
|
||||
chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%dT%H:%M:%S")
|
||||
.or_else(|_| chrono::NaiveDateTime::parse_from_str(t, "%Y-%m-%d %H:%M:%S"))
|
||||
.ok()
|
||||
}
|
||||
|
||||
fn parse_datetime_range(
|
||||
low: &str,
|
||||
high: &str,
|
||||
) -> Option<(chrono::NaiveDateTime, chrono::NaiveDateTime)> {
|
||||
let lo = parse_one_datetime(low)?;
|
||||
let hi = parse_one_datetime(high)?;
|
||||
Some(if lo <= hi { (lo, hi) } else { (hi, lo) })
|
||||
}
|
||||
|
||||
/// Uniform date in `[lo, hi]` (inclusive).
|
||||
fn random_date_between(rng: &mut SeedRng, lo: NaiveDate, hi: NaiveDate) -> NaiveDate {
|
||||
let lo_ce = lo.num_days_from_ce();
|
||||
let hi_ce = hi.num_days_from_ce();
|
||||
let day = rng.random_range(lo_ce..=hi_ce);
|
||||
NaiveDate::from_num_days_from_ce_opt(day).unwrap_or(lo)
|
||||
}
|
||||
|
||||
/// Uniform datetime in `[lo, hi]`, rendered `YYYY-MM-DDTHH:MM:SS`.
|
||||
fn random_datetime_between(
|
||||
rng: &mut SeedRng,
|
||||
lo: chrono::NaiveDateTime,
|
||||
hi: chrono::NaiveDateTime,
|
||||
) -> String {
|
||||
let lo_s = lo.and_utc().timestamp();
|
||||
let hi_s = hi.and_utc().timestamp();
|
||||
let secs = if lo_s <= hi_s {
|
||||
rng.random_range(lo_s..=hi_s)
|
||||
} else {
|
||||
rng.random_range(hi_s..=lo_s)
|
||||
};
|
||||
let dt = chrono::DateTime::from_timestamp(secs, 0)
|
||||
.map_or(lo, |d| d.naive_utc());
|
||||
dt.format("%Y-%m-%dT%H:%M:%S").to_string()
|
||||
}
|
||||
|
||||
/// Type-based fallback generation (D8). Never produces NULL for a
|
||||
/// generatable type; `blob`/`serial`/`shortid` are handled by the
|
||||
/// executor (autogen / block guard) and yield NULL here only as a
|
||||
/// last resort.
|
||||
fn generic_for_type(ty: Type, rng: &mut SeedRng) -> Value {
|
||||
use fake::faker::lorem::en as lorem;
|
||||
match ty {
|
||||
Type::Text => {
|
||||
let words: Vec<String> = lorem::Words(2..4).fake_with_rng(rng);
|
||||
Value::Text(words.join(" "))
|
||||
}
|
||||
Type::ShortId => Value::Text(crate::dsl::shortid::generate_with_rng(rng)),
|
||||
Type::Int => Value::Number(rng.random_range(1..=10_000).to_string()),
|
||||
Type::Serial => Value::Number(rng.random_range(1..=10_000).to_string()),
|
||||
Type::Real => {
|
||||
let n: f64 = rng.random_range(0..100_000) as f64 / 100.0;
|
||||
Value::Number(format!("{n:.2}"))
|
||||
}
|
||||
Type::Decimal => {
|
||||
let dollars = rng.random_range(0..10_000);
|
||||
let cents = rng.random_range(0..100);
|
||||
Value::Number(format!("{dollars}.{cents:02}"))
|
||||
}
|
||||
Type::Bool => Value::Bool(rng.random_range(0..2) == 1),
|
||||
Type::Date => Value::Text(format_date(random_past_date(rng, 0, RECENT_WINDOW_DAYS))),
|
||||
Type::DateTime => Value::Text(random_recent_datetime(rng)),
|
||||
Type::Blob => Value::Null,
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrap a fixed-list literal as the right `Value` shape for `ty` (used
|
||||
/// by `PickFrom` — enum / `IN`-CHECK values).
|
||||
fn literal_to_value(s: &str, ty: Type) -> Value {
|
||||
match ty {
|
||||
Type::Int | Type::Serial | Type::Real | Type::Decimal => Value::Number(s.to_string()),
|
||||
Type::Bool => Value::Bool(matches!(s.to_ascii_lowercase().as_str(), "true" | "1")),
|
||||
_ => Value::Text(s.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
/// A money-shaped amount: whole for `int`/`serial`, two-decimal for the
|
||||
/// fractional numeric types.
|
||||
fn currency_amount(ty: Type, rng: &mut SeedRng) -> Value {
|
||||
match ty {
|
||||
Type::Real | Type::Decimal => {
|
||||
let dollars = rng.random_range(1..=1_000);
|
||||
let cents = rng.random_range(0..100);
|
||||
Value::Number(format!("{dollars}.{cents:02}"))
|
||||
}
|
||||
// int / serial / anything else numeric → whole amount.
|
||||
_ => Value::Number(rng.random_range(1..=1_000).to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
// — the hand-rolled `product` generator (D9) —
|
||||
|
||||
const PRODUCT_ADJECTIVES: &[&str] = &[
|
||||
"Sleek", "Rustic", "Ergonomic", "Handcrafted", "Refined", "Modern",
|
||||
"Vintage", "Compact", "Premium", "Lightweight", "Durable", "Elegant",
|
||||
"Sturdy", "Smooth", "Gorgeous", "Intelligent", "Practical", "Awesome",
|
||||
"Incredible", "Recycled",
|
||||
];
|
||||
const PRODUCT_MATERIALS: &[&str] = &[
|
||||
"Wooden", "Copper", "Granite", "Cotton", "Steel", "Leather", "Bamboo",
|
||||
"Plastic", "Ceramic", "Glass", "Concrete", "Rubber", "Bronze", "Marble",
|
||||
"Linen", "Silk", "Aluminum", "Wool", "Gold", "Carbon",
|
||||
];
|
||||
const PRODUCT_NOUNS: &[&str] = &[
|
||||
"Chair", "Lamp", "Table", "Bottle", "Backpack", "Keyboard", "Mug",
|
||||
"Shoes", "Jacket", "Watch", "Wallet", "Bench", "Hat", "Gloves",
|
||||
"Towel", "Ball", "Bike", "Knife", "Pillow", "Blanket",
|
||||
];
|
||||
|
||||
fn product_name(rng: &mut SeedRng) -> String {
|
||||
format!(
|
||||
"{} {} {}",
|
||||
pick(rng, PRODUCT_ADJECTIVES),
|
||||
pick(rng, PRODUCT_MATERIALS),
|
||||
pick(rng, PRODUCT_NOUNS),
|
||||
)
|
||||
}
|
||||
|
||||
// — bounded dates (D8) —
|
||||
|
||||
const fn reference_date() -> NaiveDate {
|
||||
match NaiveDate::from_ymd_opt(REF_YEAR, REF_MONTH, REF_DAY) {
|
||||
Some(d) => d,
|
||||
None => panic!("reference date constants must be valid"),
|
||||
}
|
||||
}
|
||||
|
||||
/// A date between `min_days_ago` and `max_days_ago` before the
|
||||
/// reference epoch (inclusive).
|
||||
fn random_past_date(rng: &mut SeedRng, min_days_ago: i64, max_days_ago: i64) -> NaiveDate {
|
||||
let days_ago = rng.random_range(min_days_ago..=max_days_ago);
|
||||
let ce = reference_date().num_days_from_ce();
|
||||
let target = ce - i32::try_from(days_ago).unwrap_or(0);
|
||||
NaiveDate::from_num_days_from_ce_opt(target).unwrap_or_else(reference_date)
|
||||
}
|
||||
|
||||
fn format_date(date: NaiveDate) -> String {
|
||||
date.format("%Y-%m-%d").to_string()
|
||||
}
|
||||
|
||||
/// A recent datetime: a recent date plus a random time-of-day, rendered
|
||||
/// as `YYYY-MM-DDTHH:MM:SS`.
|
||||
fn random_recent_datetime(rng: &mut SeedRng) -> String {
|
||||
let date = random_past_date(rng, 0, RECENT_WINDOW_DAYS);
|
||||
let h = rng.random_range(0..24);
|
||||
let m = rng.random_range(0..60);
|
||||
let s = rng.random_range(0..60);
|
||||
format!("{}T{h:02}:{m:02}:{s:02}", format_date(date))
|
||||
}
|
||||
|
||||
/// Pick a uniformly random element from a non-empty slice.
|
||||
fn pick<'a, T>(rng: &mut SeedRng, items: &'a [T]) -> &'a T {
|
||||
&items[rng.random_range(0..items.len())]
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::seed::make_rng;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn gen_once(generator: &Generator, ty: Type, seed: u64) -> Value {
|
||||
let mut rng = make_rng(Some(seed));
|
||||
generate_value(generator, ty, &mut rng)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generation_is_deterministic_for_a_fixed_seed() {
|
||||
for generator in [
|
||||
Generator::FullName,
|
||||
Generator::Email,
|
||||
Generator::ProductName,
|
||||
Generator::DateRecent,
|
||||
Generator::CurrencyAmount,
|
||||
] {
|
||||
let a = gen_once(&generator, Type::Text, 7);
|
||||
let b = gen_once(&generator, Type::Text, 7);
|
||||
assert_eq!(a, b, "{generator:?} must reproduce for a fixed seed");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn text_generators_produce_nonempty_text() {
|
||||
for generator in [
|
||||
Generator::FirstName,
|
||||
Generator::LastName,
|
||||
Generator::FullName,
|
||||
Generator::Email,
|
||||
Generator::Username,
|
||||
Generator::Company,
|
||||
Generator::City,
|
||||
Generator::ProductName,
|
||||
] {
|
||||
let v = gen_once(&generator, Type::Text, 3);
|
||||
match v {
|
||||
Value::Text(s) => assert!(!s.trim().is_empty(), "{generator:?} produced empty text"),
|
||||
other => panic!("{generator:?} produced non-text {other:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn email_looks_like_an_email() {
|
||||
let v = gen_once(&Generator::Email, Type::Text, 11);
|
||||
let Value::Text(s) = v else { panic!("not text") };
|
||||
assert!(s.contains('@'), "email should contain @: {s}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn product_name_is_three_capitalised_words() {
|
||||
let v = gen_once(&Generator::ProductName, Type::Text, 99);
|
||||
let Value::Text(s) = v else { panic!("not text") };
|
||||
let words: Vec<&str> = s.split(' ').collect();
|
||||
assert_eq!(words.len(), 3, "product name should be 3 words: {s}");
|
||||
for w in words {
|
||||
assert!(w.chars().next().unwrap().is_ascii_uppercase(), "word `{w}` not capitalised");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn recent_dates_fall_within_the_bounded_window() {
|
||||
let mut rng = make_rng(Some(1));
|
||||
let earliest = reference_date()
|
||||
.checked_sub_days(chrono::Days::new(RECENT_WINDOW_DAYS as u64))
|
||||
.unwrap();
|
||||
let latest = reference_date();
|
||||
for _ in 0..200 {
|
||||
let v = generate_value(&Generator::DateRecent, Type::Date, &mut rng);
|
||||
let Value::Text(s) = v else { panic!("date not text") };
|
||||
let d = NaiveDate::parse_from_str(&s, "%Y-%m-%d").expect("valid ISO date");
|
||||
assert!(d >= earliest && d <= latest, "date {d} outside recent window");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dob_dates_fall_within_the_adult_window() {
|
||||
let mut rng = make_rng(Some(2));
|
||||
let earliest = reference_date()
|
||||
.checked_sub_days(chrono::Days::new(ADULT_MAX_DAYS as u64))
|
||||
.unwrap();
|
||||
let latest = reference_date()
|
||||
.checked_sub_days(chrono::Days::new(ADULT_MIN_DAYS as u64))
|
||||
.unwrap();
|
||||
for _ in 0..200 {
|
||||
let v = generate_value(&Generator::DateAdult, Type::Date, &mut rng);
|
||||
let Value::Text(s) = v else { panic!("date not text") };
|
||||
let d = NaiveDate::parse_from_str(&s, "%Y-%m-%d").expect("valid ISO date");
|
||||
assert!(d >= earliest && d <= latest, "dob {d} outside adult window");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn datetime_is_iso_shaped() {
|
||||
let v = gen_once(&Generator::DateTimeRecent, Type::DateTime, 5);
|
||||
let Value::Text(s) = v else { panic!("not text") };
|
||||
assert!(s.contains('T'), "datetime needs a T separator: {s}");
|
||||
// Parses as a naive datetime.
|
||||
chrono::NaiveDateTime::parse_from_str(&s, "%Y-%m-%dT%H:%M:%S")
|
||||
.unwrap_or_else(|e| panic!("invalid datetime {s}: {e}"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn currency_is_whole_for_int_and_fractional_for_decimal() {
|
||||
let Value::Number(int_amt) = gen_once(&Generator::CurrencyAmount, Type::Int, 4) else {
|
||||
panic!("not a number")
|
||||
};
|
||||
assert!(!int_amt.contains('.'), "int currency should be whole: {int_amt}");
|
||||
let Value::Number(dec_amt) = gen_once(&Generator::CurrencyAmount, Type::Decimal, 4) else {
|
||||
panic!("not a number")
|
||||
};
|
||||
assert!(dec_amt.contains('.'), "decimal currency should have cents: {dec_amt}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn age_is_in_human_range() {
|
||||
let mut rng = make_rng(Some(8));
|
||||
for _ in 0..100 {
|
||||
let Value::Number(a) = generate_value(&Generator::Age, Type::Int, &mut rng) else {
|
||||
panic!("age not a number")
|
||||
};
|
||||
let n: i64 = a.parse().unwrap();
|
||||
assert!((18..=80).contains(&n), "age {n} out of range");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pick_from_chooses_a_listed_value() {
|
||||
let generator = Generator::PickFrom(vec!["active".into(), "closed".into()]);
|
||||
let mut rng = make_rng(Some(6));
|
||||
for _ in 0..50 {
|
||||
let Value::Text(s) = generate_value(&generator, Type::Text, &mut rng) else {
|
||||
panic!("not text")
|
||||
};
|
||||
assert!(matches!(s.as_str(), "active" | "closed"), "unexpected pick {s}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pick_from_wraps_numeric_values_as_numbers() {
|
||||
let generator = Generator::PickFrom(vec!["1".into(), "2".into(), "3".into()]);
|
||||
let mut rng = make_rng(Some(6));
|
||||
let v = generate_value(&generator, Type::Int, &mut rng);
|
||||
assert!(matches!(v, Value::Number(_)), "numeric pick should be a Number: {v:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn int_range_stays_within_inclusive_bounds() {
|
||||
let g = Generator::Range { low: "10".into(), high: "20".into() };
|
||||
let mut rng = make_rng(Some(5));
|
||||
for _ in 0..200 {
|
||||
let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else {
|
||||
panic!("int range should be a number")
|
||||
};
|
||||
let n: i64 = s.parse().unwrap();
|
||||
assert!((10..=20).contains(&n), "int {n} out of [10,20]");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn real_range_stays_within_bounds_and_has_cents() {
|
||||
let g = Generator::Range { low: "1.0".into(), high: "9.0".into() };
|
||||
let mut rng = make_rng(Some(5));
|
||||
for _ in 0..200 {
|
||||
let Value::Number(s) = generate_value(&g, Type::Real, &mut rng) else {
|
||||
panic!("real range should be a number")
|
||||
};
|
||||
let n: f64 = s.parse().unwrap();
|
||||
assert!((1.0..=9.0).contains(&n), "real {n} out of [1,9]");
|
||||
assert!(s.contains('.'), "real should be formatted with cents: {s}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_range_stays_within_quoted_bounds() {
|
||||
let g = Generator::Range {
|
||||
low: "2023-01-01".into(),
|
||||
high: "2023-12-31".into(),
|
||||
};
|
||||
let lo = NaiveDate::parse_from_str("2023-01-01", "%Y-%m-%d").unwrap();
|
||||
let hi = NaiveDate::parse_from_str("2023-12-31", "%Y-%m-%d").unwrap();
|
||||
let mut rng = make_rng(Some(9));
|
||||
for _ in 0..200 {
|
||||
let Value::Text(s) = generate_value(&g, Type::Date, &mut rng) else {
|
||||
panic!("date range should be text")
|
||||
};
|
||||
let d = NaiveDate::parse_from_str(&s, "%Y-%m-%d").expect("valid date");
|
||||
assert!(d >= lo && d <= hi, "date {d} out of range");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reversed_bounds_are_tolerated() {
|
||||
let g = Generator::Range { low: "20".into(), high: "10".into() };
|
||||
let mut rng = make_rng(Some(1));
|
||||
let Value::Number(s) = generate_value(&g, Type::Int, &mut rng) else {
|
||||
panic!("number")
|
||||
};
|
||||
let n: i64 = s.parse().unwrap();
|
||||
assert!((10..=20).contains(&n), "reversed bounds still produce in-range: {n}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn range_bounds_reason_accepts_compatible_and_rejects_incompatible() {
|
||||
// Numeric / date / datetime accept; text / bool reject.
|
||||
assert!(range_bounds_reason(Type::Int, "1", "10").is_none());
|
||||
assert!(range_bounds_reason(Type::Real, "1.5", "9.9").is_none());
|
||||
assert!(range_bounds_reason(Type::Date, "2023-01-01", "2024-01-01").is_none());
|
||||
assert!(range_bounds_reason(Type::DateTime, "2023-01-01T00:00:00", "2024-01-01T00:00:00").is_none());
|
||||
// Non-numeric bound on a numeric column.
|
||||
assert!(range_bounds_reason(Type::Int, "abc", "10").is_some());
|
||||
// A range on a text column is meaningless.
|
||||
assert!(range_bounds_reason(Type::Text, "a", "z").is_some());
|
||||
assert!(range_bounds_reason(Type::Bool, "0", "1").is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn markers_fall_back_to_type_based_generation() {
|
||||
// An un-intercepted marker must not panic; it generates by type.
|
||||
let v = gen_once(&Generator::IdentitySequential, Type::Text, 1);
|
||||
assert!(matches!(v, Value::Text(_)));
|
||||
let v = gen_once(&Generator::ForeignKeySample, Type::Int, 1);
|
||||
assert!(matches!(v, Value::Number(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generic_fallback_matches_each_type() {
|
||||
let mut rng = make_rng(Some(0));
|
||||
assert!(matches!(generate_value(&Generator::Generic, Type::Text, &mut rng), Value::Text(_)));
|
||||
assert!(matches!(generate_value(&Generator::Generic, Type::Int, &mut rng), Value::Number(_)));
|
||||
assert!(matches!(generate_value(&Generator::Generic, Type::Bool, &mut rng), Value::Bool(_)));
|
||||
assert!(matches!(generate_value(&Generator::Generic, Type::Blob, &mut rng), Value::Null));
|
||||
// shortid fallback is a valid base58 id.
|
||||
let Value::Text(sid) = generate_value(&Generator::Generic, Type::ShortId, &mut rng) else {
|
||||
panic!("shortid not text")
|
||||
};
|
||||
assert!(crate::dsl::shortid::validate(&sid).is_ok(), "invalid shortid {sid}");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,440 @@
|
||||
//! Generator selection: the name-aware, type-gated catalogue (ADR-0048
|
||||
//! D7), table-context disambiguation for `name`/`title` (D11), the
|
||||
//! identifier-family rule (D10), and enum-ish detection (D12).
|
||||
//!
|
||||
//! Selection is **token-based**: a column name is split on `_`, `-` and
|
||||
//! camelCase boundaries, lowercased, and matched against an
|
||||
//! ordered, most-specific-first list. Each rule is **type-gated** — a
|
||||
//! name match only fires when the column's type is compatible, so a
|
||||
//! column called `email` typed `int` falls through to type-based
|
||||
//! generation rather than producing a string. Documented false-positive
|
||||
//! guards keep `username`/`filename` away from the bare person-name
|
||||
//! rule.
|
||||
|
||||
use tracing::trace;
|
||||
|
||||
use crate::dsl::types::Type;
|
||||
use crate::seed::{ColumnSpec, Generator};
|
||||
|
||||
/// Choose the generator for a column (ADR-0048 D7/D10/D11/D12).
|
||||
///
|
||||
/// Precedence: foreign keys and `IN`-CHECK columns are resolved first
|
||||
/// (the executor / a fixed list), then the ordered name catalogue, then
|
||||
/// the type-based fallback.
|
||||
#[must_use]
|
||||
pub fn choose_generator(table: &str, col: &ColumnSpec) -> Generator {
|
||||
let generator = choose_generator_inner(table, col);
|
||||
trace!(
|
||||
table = table,
|
||||
column = %col.name,
|
||||
ty = %col.ty,
|
||||
chosen = ?generator,
|
||||
"seed: chose generator for column"
|
||||
);
|
||||
generator
|
||||
}
|
||||
|
||||
fn choose_generator_inner(table: &str, col: &ColumnSpec) -> Generator {
|
||||
// FK columns are filled by sampling existing parent rows (D14) —
|
||||
// the executor owns that; generation here would be wrong.
|
||||
if col.is_foreign_key {
|
||||
return Generator::ForeignKeySample;
|
||||
}
|
||||
// A simple `col IN (…)` CHECK becomes the value source (D17), so the
|
||||
// common enum-as-CHECK pattern just works.
|
||||
if let Some(values) = &col.check_in_values
|
||||
&& !values.is_empty()
|
||||
{
|
||||
return Generator::PickFrom(values.clone());
|
||||
}
|
||||
|
||||
let toks = tokens(&col.name);
|
||||
match_name_generator(table, &toks, col.ty).unwrap_or(Generator::Generic)
|
||||
}
|
||||
|
||||
/// Whether a column name looks like an enum / fixed-value set that has
|
||||
/// no sensible generic generator (D12). Used by the executor to drive
|
||||
/// the post-seed advisory; such columns still receive generic text.
|
||||
#[must_use]
|
||||
pub fn is_enum_ish(name: &str) -> bool {
|
||||
const ENUM_TOKENS: &[&str] = &[
|
||||
"role", "status", "state", "type", "kind", "category", "level",
|
||||
"tier", "stage", "priority", "gender",
|
||||
];
|
||||
let toks = tokens(name);
|
||||
toks.iter().any(|t| ENUM_TOKENS.contains(&t.as_str()))
|
||||
}
|
||||
|
||||
/// The ordered, most-specific-first name catalogue. Returns `None` when
|
||||
/// nothing matches (→ type-based fallback) or when a name matches but
|
||||
/// its type gate fails.
|
||||
fn match_name_generator(table: &str, toks: &[String], ty: Type) -> Option<Generator> {
|
||||
let text = type_is_text(ty);
|
||||
let numeric = ty.is_numeric();
|
||||
|
||||
// — Person —
|
||||
if text && (has_any(toks, &["fname", "firstname"]) || has_seq(toks, "first", "name")) {
|
||||
return Some(Generator::FirstName);
|
||||
}
|
||||
if text
|
||||
&& (has_any(toks, &["lname", "lastname", "surname"]) || has_seq(toks, "last", "name"))
|
||||
{
|
||||
return Some(Generator::LastName);
|
||||
}
|
||||
if text && (has_any(toks, &["username", "login", "handle"]) || has_seq(toks, "user", "name")) {
|
||||
return Some(Generator::Username);
|
||||
}
|
||||
if text && has_any(toks, &["email", "emails"]) {
|
||||
return Some(Generator::Email);
|
||||
}
|
||||
if text && has_any(toks, &["password", "passwd", "pwd"]) {
|
||||
return Some(Generator::Password);
|
||||
}
|
||||
if text && has_any(toks, &["phone", "mobile", "cell", "tel", "telephone"]) {
|
||||
return Some(Generator::Phone);
|
||||
}
|
||||
|
||||
// — bare `name` / `title` → table-context (D11) —
|
||||
// Guarded against the `*_name` false positives handled above (those
|
||||
// returned already) plus structural names like `filename`/`table_name`.
|
||||
if text && has_any(toks, &["name", "title"]) && !is_name_false_positive(toks) {
|
||||
return Some(name_by_table_context(table));
|
||||
}
|
||||
|
||||
// — Address —
|
||||
if text && has_any(toks, &["city", "town"]) {
|
||||
return Some(Generator::City);
|
||||
}
|
||||
if text && has_token(toks, "country") {
|
||||
return Some(Generator::Country);
|
||||
}
|
||||
// `province` / explicit `state_name`/`state_abbr` → a real state name.
|
||||
// Bare `state` is left to enum-ish (it usually means status), so we
|
||||
// require `province` or a `state` token paired with name/abbr.
|
||||
if text && (has_token(toks, "province") || (has_token(toks, "state") && has_any(toks, &["name", "abbr"]))) {
|
||||
return Some(Generator::StateName);
|
||||
}
|
||||
if text && has_any(toks, &["street", "address", "addr"]) {
|
||||
return Some(Generator::Street);
|
||||
}
|
||||
if text && has_any(toks, &["zip", "zipcode", "postcode", "postal"]) {
|
||||
return Some(Generator::ZipCode);
|
||||
}
|
||||
|
||||
// — Organisation / job —
|
||||
if text && has_any(toks, &["company", "employer", "org", "organization", "organisation"]) {
|
||||
return Some(Generator::Company);
|
||||
}
|
||||
if text && has_any(toks, &["job", "position", "profession", "occupation"]) {
|
||||
return Some(Generator::JobTitle);
|
||||
}
|
||||
|
||||
// — Free text —
|
||||
if text && has_any(toks, &["description", "bio", "notes", "note", "summary", "comment", "comments", "about"]) {
|
||||
return Some(Generator::Sentence);
|
||||
}
|
||||
if text && has_any(toks, &["url", "website", "homepage", "link"]) {
|
||||
return Some(Generator::Url);
|
||||
}
|
||||
if text && has_any(toks, &["color", "colour"]) {
|
||||
return Some(Generator::HexColor);
|
||||
}
|
||||
|
||||
// — Numeric —
|
||||
if numeric && has_any(toks, &["price", "amount", "cost", "salary", "balance", "total", "fee", "revenue"]) {
|
||||
return Some(Generator::CurrencyAmount);
|
||||
}
|
||||
if numeric && has_token(toks, "age") {
|
||||
return Some(Generator::Age);
|
||||
}
|
||||
if numeric && has_any(toks, &["quantity", "qty", "stock", "count"]) {
|
||||
return Some(Generator::SmallInt);
|
||||
}
|
||||
|
||||
// — Temporal (bounded, D8) —
|
||||
if matches!(ty, Type::Date) && has_any(toks, &["dob", "birthday", "birthdate"]) {
|
||||
return Some(Generator::DateAdult);
|
||||
}
|
||||
if matches!(ty, Type::Date) && has_token(toks, "date") {
|
||||
return Some(Generator::DateRecent);
|
||||
}
|
||||
if matches!(ty, Type::DateTime) && has_any(toks, &["timestamp", "datetime", "at"]) {
|
||||
return Some(Generator::DateTimeRecent);
|
||||
}
|
||||
|
||||
// — Boolean —
|
||||
if matches!(ty, Type::Bool)
|
||||
&& (toks.first().map(String::as_str) == Some("is")
|
||||
|| toks.first().map(String::as_str) == Some("has")
|
||||
|| has_any(toks, &["active", "enabled", "verified", "deleted"]))
|
||||
{
|
||||
return Some(Generator::Boolean);
|
||||
}
|
||||
|
||||
// — Identifier family (D10) — late so phone/email/etc. win first.
|
||||
if matches!(ty, Type::Int | Type::Text) && is_identifier_name(toks) {
|
||||
return Some(Generator::IdentitySequential);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Resolve a bare `name`/`title` column by the **table** it lives in
|
||||
/// (D11): product-ish → a product name, company-ish → a company name,
|
||||
/// person-ish → a person name, otherwise a generic person name.
|
||||
fn name_by_table_context(table: &str) -> Generator {
|
||||
let toks = tokens(table);
|
||||
const PRODUCTY: &[&str] = &[
|
||||
"product", "products", "item", "items", "good", "goods",
|
||||
"merchandise", "catalog", "catalogue", "inventory", "sku", "skus",
|
||||
];
|
||||
const COMPANYISH: &[&str] = &[
|
||||
"company", "companies", "vendor", "vendors", "supplier",
|
||||
"suppliers", "manufacturer", "manufacturers", "brand", "brands",
|
||||
"organization", "organisation",
|
||||
];
|
||||
const PERSONISH: &[&str] = &[
|
||||
"user", "users", "customer", "customers", "person", "people",
|
||||
"employee", "employees", "member", "members", "contact",
|
||||
"contacts", "author", "authors", "student", "students",
|
||||
];
|
||||
if has_any(&toks, PRODUCTY) {
|
||||
Generator::ProductName
|
||||
} else if has_any(&toks, COMPANYISH) {
|
||||
Generator::Company
|
||||
} else if has_any(&toks, PERSONISH) {
|
||||
Generator::FullName
|
||||
} else {
|
||||
// Unknown table: a person name is the most generally useful
|
||||
// default for a bare `name` column.
|
||||
Generator::FullName
|
||||
}
|
||||
}
|
||||
|
||||
/// Names ending in `name`/`title` that are NOT person names. The
|
||||
/// specific `first`/`last`/`user` cases are matched earlier and return
|
||||
/// before this guard; this catches structural names.
|
||||
fn is_name_false_positive(toks: &[String]) -> bool {
|
||||
const NON_PERSON: &[&str] = &[
|
||||
"file", "table", "host", "domain", "field", "class", "tag",
|
||||
"event", "path", "col", "column", "db", "schema", "index", "key",
|
||||
"page", "node", "type",
|
||||
];
|
||||
has_any(toks, NON_PERSON) && has_any(toks, &["name", "title"])
|
||||
}
|
||||
|
||||
/// Identifier-family names (D10): treated as unique identifiers. FK
|
||||
/// columns never reach here (handled in [`choose_generator`]).
|
||||
fn is_identifier_name(toks: &[String]) -> bool {
|
||||
const ID_TOKENS: &[&str] = &["id", "code", "sku", "ref", "reference", "barcode"];
|
||||
if has_any(toks, ID_TOKENS) {
|
||||
return true;
|
||||
}
|
||||
// `*_number` / `*_no` as an identifier, but only when qualified
|
||||
// (a bare `number`/`no` is too ambiguous, and `phone_number` already
|
||||
// matched the phone rule earlier).
|
||||
toks.len() >= 2 && has_any(toks, &["number", "no"])
|
||||
}
|
||||
|
||||
// — token utilities —
|
||||
|
||||
/// Split a column/table name into lowercase tokens on `_`, `-`, spaces,
|
||||
/// and camelCase boundaries. `created_at` → [`created`, `at`];
|
||||
/// `firstName` → [`first`, `name`]; `DOB` → [`dob`].
|
||||
fn tokens(name: &str) -> Vec<String> {
|
||||
let mut out = Vec::new();
|
||||
let mut cur = String::new();
|
||||
let mut prev_was_lower_or_digit = false;
|
||||
for ch in name.chars() {
|
||||
if ch == '_' || ch == '-' || ch == ' ' {
|
||||
if !cur.is_empty() {
|
||||
out.push(std::mem::take(&mut cur));
|
||||
}
|
||||
prev_was_lower_or_digit = false;
|
||||
continue;
|
||||
}
|
||||
// camelCase boundary: an uppercase letter following a lowercase
|
||||
// letter or digit starts a new token.
|
||||
if ch.is_ascii_uppercase() && prev_was_lower_or_digit && !cur.is_empty() {
|
||||
out.push(std::mem::take(&mut cur));
|
||||
}
|
||||
cur.push(ch.to_ascii_lowercase());
|
||||
prev_was_lower_or_digit = ch.is_ascii_lowercase() || ch.is_ascii_digit();
|
||||
}
|
||||
if !cur.is_empty() {
|
||||
out.push(cur);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn has_token(toks: &[String], t: &str) -> bool {
|
||||
toks.iter().any(|x| x == t)
|
||||
}
|
||||
|
||||
fn has_any(toks: &[String], candidates: &[&str]) -> bool {
|
||||
candidates.iter().any(|c| has_token(toks, c))
|
||||
}
|
||||
|
||||
/// Whether `a` is immediately followed by `b` in the token list — for
|
||||
/// matching split compound names like `first name` / `user name`.
|
||||
fn has_seq(toks: &[String], a: &str, b: &str) -> bool {
|
||||
toks.windows(2).any(|w| w[0] == a && w[1] == b)
|
||||
}
|
||||
|
||||
/// Text-typed for heuristic purposes — `text`, `shortid`, plus the
|
||||
/// text-backed `decimal`/`date`/`datetime` are excluded here because
|
||||
/// those have their own dedicated gates; only `text`/`shortid` accept
|
||||
/// free-text generators.
|
||||
const fn type_is_text(ty: Type) -> bool {
|
||||
matches!(ty, Type::Text | Type::ShortId)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::seed::ColumnSpec;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn choose(table: &str, name: &str, ty: Type) -> Generator {
|
||||
choose_generator(table, &ColumnSpec::plain(name, ty))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn person_name_fields_map_to_name_generators() {
|
||||
assert_eq!(choose("users", "first_name", Type::Text), Generator::FirstName);
|
||||
assert_eq!(choose("users", "firstName", Type::Text), Generator::FirstName);
|
||||
assert_eq!(choose("users", "last_name", Type::Text), Generator::LastName);
|
||||
assert_eq!(choose("users", "surname", Type::Text), Generator::LastName);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn contact_fields_map_correctly() {
|
||||
assert_eq!(choose("users", "email", Type::Text), Generator::Email);
|
||||
assert_eq!(choose("users", "work_email", Type::Text), Generator::Email);
|
||||
assert_eq!(choose("users", "username", Type::Text), Generator::Username);
|
||||
assert_eq!(choose("users", "user_name", Type::Text), Generator::Username);
|
||||
assert_eq!(choose("users", "phone", Type::Text), Generator::Phone);
|
||||
assert_eq!(choose("accounts", "password", Type::Text), Generator::Password);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn address_fields_map_correctly() {
|
||||
assert_eq!(choose("a", "city", Type::Text), Generator::City);
|
||||
assert_eq!(choose("a", "country", Type::Text), Generator::Country);
|
||||
assert_eq!(choose("a", "street", Type::Text), Generator::Street);
|
||||
assert_eq!(choose("a", "zip", Type::Text), Generator::ZipCode);
|
||||
assert_eq!(choose("a", "postcode", Type::Text), Generator::ZipCode);
|
||||
assert_eq!(choose("a", "province", Type::Text), Generator::StateName);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bare_name_uses_table_context() {
|
||||
// D11 — the same column name resolves differently by table.
|
||||
assert_eq!(choose("products", "name", Type::Text), Generator::ProductName);
|
||||
assert_eq!(choose("items", "title", Type::Text), Generator::ProductName);
|
||||
assert_eq!(choose("users", "name", Type::Text), Generator::FullName);
|
||||
assert_eq!(choose("customers", "name", Type::Text), Generator::FullName);
|
||||
assert_eq!(choose("vendors", "name", Type::Text), Generator::Company);
|
||||
// Unknown table → person name default.
|
||||
assert_eq!(choose("widgets", "name", Type::Text), Generator::FullName);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn name_false_positives_do_not_become_person_names() {
|
||||
// These must NOT resolve to a person/product name.
|
||||
assert_ne!(choose("files", "filename", Type::Text), Generator::FullName);
|
||||
assert_ne!(choose("meta", "table_name", Type::Text), Generator::FullName);
|
||||
// They fall through to a generic / non-person generator.
|
||||
assert_eq!(choose("files", "filename", Type::Text), Generator::Generic);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn numeric_name_heuristics_are_type_gated() {
|
||||
// `price` on a numeric column → currency; on text → falls through.
|
||||
assert_eq!(choose("p", "price", Type::Int), Generator::CurrencyAmount);
|
||||
assert_eq!(choose("p", "price", Type::Decimal), Generator::CurrencyAmount);
|
||||
assert_eq!(choose("p", "price", Type::Text), Generator::Generic);
|
||||
assert_eq!(choose("u", "age", Type::Int), Generator::Age);
|
||||
assert_eq!(choose("o", "quantity", Type::Int), Generator::SmallInt);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn email_on_wrong_type_falls_through() {
|
||||
// The type gate: an `email` int column does NOT get a string —
|
||||
// it falls through to type-based generation.
|
||||
assert_eq!(choose("u", "email", Type::Int), Generator::Generic);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn temporal_fields_are_bounded_and_type_gated() {
|
||||
assert_eq!(choose("u", "dob", Type::Date), Generator::DateAdult);
|
||||
assert_eq!(choose("o", "order_date", Type::Date), Generator::DateRecent);
|
||||
assert_eq!(choose("o", "created_at", Type::DateTime), Generator::DateTimeRecent);
|
||||
assert_eq!(choose("o", "timestamp", Type::DateTime), Generator::DateTimeRecent);
|
||||
// Wrong type → not a date generator.
|
||||
assert_eq!(choose("o", "order_date", Type::Int), Generator::Generic);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn boolean_fields_map_to_boolean() {
|
||||
assert_eq!(choose("u", "is_active", Type::Bool), Generator::Boolean);
|
||||
assert_eq!(choose("u", "has_paid", Type::Bool), Generator::Boolean);
|
||||
assert_eq!(choose("u", "enabled", Type::Bool), Generator::Boolean);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn identifier_family_is_unique_sequential() {
|
||||
assert_eq!(choose("t", "code", Type::Text), Generator::IdentitySequential);
|
||||
assert_eq!(choose("t", "sku", Type::Text), Generator::IdentitySequential);
|
||||
assert_eq!(choose("t", "order_number", Type::Int), Generator::IdentitySequential);
|
||||
assert_eq!(choose("t", "external_id", Type::Int), Generator::IdentitySequential);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn foreign_key_columns_defer_to_executor() {
|
||||
let mut spec = ColumnSpec::plain("user_id", Type::Int);
|
||||
spec.is_foreign_key = true;
|
||||
assert_eq!(choose_generator("orders", &spec), Generator::ForeignKeySample);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_in_values_become_pick_from() {
|
||||
let mut spec = ColumnSpec::plain("status", Type::Text);
|
||||
spec.check_in_values = Some(vec!["active".into(), "closed".into()]);
|
||||
assert_eq!(
|
||||
choose_generator("orders", &spec),
|
||||
Generator::PickFrom(vec!["active".into(), "closed".into()])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn enum_ish_names_are_detected_for_the_advisory() {
|
||||
assert!(is_enum_ish("status"));
|
||||
assert!(is_enum_ish("role"));
|
||||
assert!(is_enum_ish("order_state"));
|
||||
assert!(is_enum_ish("priority"));
|
||||
assert!(!is_enum_ish("email"));
|
||||
assert!(!is_enum_ish("first_name"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn enum_ish_columns_fall_through_to_generic() {
|
||||
// No special generator — generic text + the advisory flags them.
|
||||
assert_eq!(choose("orders", "status", Type::Text), Generator::Generic);
|
||||
assert_eq!(choose("users", "role", Type::Text), Generator::Generic);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unmatched_columns_use_type_based_fallback() {
|
||||
assert_eq!(choose("t", "some_freeform_field", Type::Text), Generator::Generic);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenizer_splits_on_all_boundaries() {
|
||||
assert_eq!(tokens("created_at"), vec!["created", "at"]);
|
||||
assert_eq!(tokens("firstName"), vec!["first", "name"]);
|
||||
assert_eq!(tokens("DOB"), vec!["dob"]);
|
||||
assert_eq!(tokens("user-email"), vec!["user", "email"]);
|
||||
assert_eq!(tokens("HTTPStatus"), vec!["httpstatus"]);
|
||||
}
|
||||
}
|
||||
+213
@@ -0,0 +1,213 @@
|
||||
//! Pure fake-data generation library for the `seed` command (ADR-0048).
|
||||
//!
|
||||
//! This module is the **generation half** of `seed`: given a column's
|
||||
//! shape (name, type, constraints), it chooses a *generator* and turns
|
||||
//! a seeded RNG into plausible [`Value`]s. It is deliberately decoupled
|
||||
//! from `db.rs` — it knows nothing about SQLite, the worker thread, or
|
||||
//! persistence — so it stays pure and unit-testable, with exact-value
|
||||
//! assertions made possible by the seedable RNG (ADR-0048 D4).
|
||||
//!
|
||||
//! The executor (`db.rs::do_seed`) adapts the real schema into
|
||||
//! [`ColumnSpec`]s, calls [`choose_generator`] per column, and then
|
||||
//! [`generate_value`] per row — except for the *stateful* markers
|
||||
//! ([`Generator::IdentitySequential`], [`Generator::ForeignKeySample`])
|
||||
//! which need database context (existing rows, the running sequence)
|
||||
//! and so are resolved by the executor, not here.
|
||||
//!
|
||||
//! Layout:
|
||||
//! - this file — the public types ([`ColumnSpec`], [`Generator`],
|
||||
//! [`SeedRng`]) and the RNG constructor.
|
||||
//! - [`heuristics`] — [`choose_generator`] + the name-aware catalogue
|
||||
//! (D7), table-context disambiguation (D11), identifier (D10) and
|
||||
//! enum-ish (D12) detection.
|
||||
//! - [`generators`] — [`generate_value`]: per-generator value
|
||||
//! production, the hand-rolled `product` generator (D9) and the
|
||||
//! bounded date windows (D8).
|
||||
|
||||
mod check;
|
||||
mod generators;
|
||||
mod heuristics;
|
||||
mod vocabulary;
|
||||
|
||||
pub use check::parse_in_check_values;
|
||||
pub use generators::{generate_value, range_bounds_reason};
|
||||
pub use heuristics::{choose_generator, is_enum_ish};
|
||||
pub use vocabulary::{generator_for_name, is_known_generator_prefix, KNOWN_GENERATORS};
|
||||
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{RngExt, SeedableRng};
|
||||
|
||||
use crate::dsl::types::Type;
|
||||
|
||||
/// The RNG that drives all seed generation.
|
||||
///
|
||||
/// A single seeded `StdRng` feeds both `fake`'s `fake_with_rng` and the
|
||||
/// hand-rolled generators, so a `--seed` value fully determines the
|
||||
/// output (ADR-0048 D4). `rand 0.10`'s `StdRng` satisfies `fake`'s
|
||||
/// `RngExt` bound (it re-exports `rand::RngExt`), so the same handle
|
||||
/// works on both sides.
|
||||
pub type SeedRng = StdRng;
|
||||
|
||||
/// Build the seed RNG.
|
||||
///
|
||||
/// With `Some(seed)` the stream is reproducible; with `None` it is
|
||||
/// seeded from entropy (via the thread RNG) so each run differs.
|
||||
/// Seeding `StdRng` from a single `u64` in both cases keeps
|
||||
/// construction uniform and avoids `rand`'s churn-prone from-entropy
|
||||
/// constructors.
|
||||
#[must_use]
|
||||
pub fn make_rng(seed: Option<u64>) -> SeedRng {
|
||||
let seed = seed.unwrap_or_else(|| rand::rng().random::<u64>());
|
||||
StdRng::seed_from_u64(seed)
|
||||
}
|
||||
|
||||
/// A column described in just enough detail to choose and run a
|
||||
/// generator. Built by the executor from the real schema; kept
|
||||
/// independent of `db.rs`'s `ReadColumn` so this library stays pure.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct ColumnSpec {
|
||||
/// The column's name — the primary signal for generator choice.
|
||||
pub name: String,
|
||||
/// The user-facing playground type — gates every name heuristic.
|
||||
pub ty: Type,
|
||||
/// `NOT NULL` — the executor uses this for the block guard (D1);
|
||||
/// generation always produces a value, so it is informational here.
|
||||
pub not_null: bool,
|
||||
/// Part of the table's primary key.
|
||||
pub primary_key: bool,
|
||||
/// Carries a `UNIQUE` constraint (or is a single-column PK).
|
||||
pub unique: bool,
|
||||
/// A foreign-key column — generation is the executor's job
|
||||
/// (sample an existing parent row, D14), so [`choose_generator`]
|
||||
/// returns [`Generator::ForeignKeySample`].
|
||||
pub is_foreign_key: bool,
|
||||
/// Values parsed from a simple `col IN ('a', 'b', …)` CHECK
|
||||
/// constraint (D17). When present, generation draws from them so
|
||||
/// the common enum-as-CHECK pattern "just works".
|
||||
pub check_in_values: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl ColumnSpec {
|
||||
/// Convenience constructor for a plain, unconstrained column —
|
||||
/// used heavily in tests.
|
||||
#[cfg(test)]
|
||||
#[must_use]
|
||||
pub fn plain(name: &str, ty: Type) -> Self {
|
||||
Self {
|
||||
name: name.to_string(),
|
||||
ty,
|
||||
not_null: false,
|
||||
primary_key: false,
|
||||
unique: false,
|
||||
is_foreign_key: false,
|
||||
check_in_values: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The chosen generation strategy for a column.
|
||||
///
|
||||
/// Most variants are *stateless* — [`generate_value`] turns them into a
|
||||
/// [`Value`] from the RNG alone. Two are *stateful markers* that the
|
||||
/// executor must intercept (they need database context):
|
||||
/// [`Self::IdentitySequential`] (the running `MAX+offset` sequence,
|
||||
/// D10) and [`Self::ForeignKeySample`] (draw from existing parent
|
||||
/// rows, D14). For safety [`generate_value`] treats an un-intercepted
|
||||
/// marker as [`Self::Generic`] rather than panicking.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Generator {
|
||||
// — Person —
|
||||
FirstName,
|
||||
LastName,
|
||||
/// A full person name (table-context default for `name`/`title`).
|
||||
FullName,
|
||||
Email,
|
||||
Username,
|
||||
Password,
|
||||
Phone,
|
||||
// — Address —
|
||||
City,
|
||||
Country,
|
||||
StateName,
|
||||
Street,
|
||||
ZipCode,
|
||||
// — Organisation / commerce —
|
||||
Company,
|
||||
JobTitle,
|
||||
/// Hand-rolled `{adjective} {material} {noun}` (D9) — `fake` has no
|
||||
/// commerce module.
|
||||
ProductName,
|
||||
// — Free text —
|
||||
Sentence,
|
||||
Paragraph,
|
||||
Url,
|
||||
HexColor,
|
||||
// — Numeric —
|
||||
/// A money-shaped amount (whole for `int`, two-decimal otherwise).
|
||||
CurrencyAmount,
|
||||
/// A plausible human age (18–80).
|
||||
Age,
|
||||
/// A small positive integer (quantities, counts).
|
||||
SmallInt,
|
||||
// — Temporal (bounded windows, D8) —
|
||||
/// A date within the last few years.
|
||||
DateRecent,
|
||||
/// A date in an adult birth window (≈18–80 years ago) — for `dob`.
|
||||
DateAdult,
|
||||
/// A datetime within the last few years.
|
||||
DateTimeRecent,
|
||||
// — Boolean —
|
||||
Boolean,
|
||||
// — Stateful markers (executor-resolved) —
|
||||
/// Unique sequential identifier (D10): the executor supplies
|
||||
/// `MAX(col)+offset`. Chosen for identifier-named non-FK columns.
|
||||
IdentitySequential,
|
||||
/// FK column (D14): the executor samples an existing parent key.
|
||||
ForeignKeySample,
|
||||
// — List / range (the `set` override clause, D2) —
|
||||
/// Uniform pick from a fixed list — a simple `IN`-CHECK (D17), an
|
||||
/// enum, or a `set <col> in (…)` / `= <value>` override (D2).
|
||||
PickFrom(Vec<String>),
|
||||
/// Uniform value in `[low, high]` — the `set <col> between low and
|
||||
/// high` override (D2). Bounds are the raw literal strings; their
|
||||
/// interpretation (int / real / date / datetime) follows the
|
||||
/// destination column type at generation time. The executor
|
||||
/// validates type-compatibility *before* generation (a bound that
|
||||
/// does not parse for the column type is a friendly error), so
|
||||
/// [`generate_value`] only ever sees parseable bounds; a defensive
|
||||
/// parse failure falls back to type-based generation.
|
||||
Range { low: String, high: String },
|
||||
/// Type-based fallback (D8) when no name heuristic matches.
|
||||
Generic,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn same_seed_yields_identical_rng_streams() {
|
||||
let mut a = make_rng(Some(42));
|
||||
let mut b = make_rng(Some(42));
|
||||
let xs: Vec<u64> = (0..8).map(|_| a.random::<u64>()).collect();
|
||||
let ys: Vec<u64> = (0..8).map(|_| b.random::<u64>()).collect();
|
||||
assert_eq!(xs, ys, "a fixed seed must reproduce the stream");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn different_seeds_yield_different_streams() {
|
||||
let mut a = make_rng(Some(1));
|
||||
let mut b = make_rng(Some(2));
|
||||
let xs: Vec<u64> = (0..8).map(|_| a.random::<u64>()).collect();
|
||||
let ys: Vec<u64> = (0..8).map(|_| b.random::<u64>()).collect();
|
||||
assert_ne!(xs, ys);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unseeded_rng_constructs_without_panicking() {
|
||||
// Entropy-seeded path: just exercise it.
|
||||
let mut rng = make_rng(None);
|
||||
let _ = rng.random::<u64>();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,149 @@
|
||||
//! The curated named-generator vocabulary (ADR-0048 D9).
|
||||
//!
|
||||
//! This is the **single source of truth** for "what generator names can
|
||||
//! a learner write after `set <col> as …`", shared by three consumers
|
||||
//! (mirroring `KNOWN_SQL_FUNCTIONS`, ADR-0022 Amд6):
|
||||
//!
|
||||
//! - **Tab completion** — the `seed … set <col> as ⟨here⟩` slot offers
|
||||
//! these names (`src/completion.rs`).
|
||||
//! - **The typing-time validity indicator (ADR-0027)** — an unknown
|
||||
//! name after `as` is flagged `[ERR]` while typing.
|
||||
//! - **The executor** — `db.rs::do_seed` maps a name to a [`Generator`]
|
||||
//! via [`generator_for_name`]; an unknown name is a friendly error.
|
||||
//!
|
||||
//! The list is a deliberately *curated pedagogical set* — the generators
|
||||
//! a learner reaches for, not every internal [`Generator`] variant
|
||||
//! (stateful markers like `ForeignKeySample` are executor-only and have
|
||||
//! no name). It is lowercase + sorted (pinned by a unit test).
|
||||
|
||||
use crate::seed::Generator;
|
||||
|
||||
/// The curated generator names, lowercase and **sorted** (invariant
|
||||
/// pinned by a test — completion relies on stable order and a
|
||||
/// case-insensitive prefix match against these canonical spellings).
|
||||
pub const KNOWN_GENERATORS: &[&str] = &[
|
||||
"age",
|
||||
"bool",
|
||||
"city",
|
||||
"color",
|
||||
"company",
|
||||
"country",
|
||||
"date",
|
||||
"datetime",
|
||||
"email",
|
||||
"first_name",
|
||||
"job",
|
||||
"last_name",
|
||||
"name",
|
||||
"paragraph",
|
||||
"password",
|
||||
"phone",
|
||||
"price",
|
||||
"product",
|
||||
"sentence",
|
||||
"state",
|
||||
"street",
|
||||
"url",
|
||||
"username",
|
||||
"zip",
|
||||
];
|
||||
|
||||
/// Map a generator name (case-insensitive) to its [`Generator`].
|
||||
///
|
||||
/// `None` for an unrecognised name — the executor turns that into a
|
||||
/// friendly "unknown generator" error naming the curated set. A couple
|
||||
/// of common spelling variants (`firstname`, `lastname`, `colour`,
|
||||
/// `full_name`) are accepted as aliases even though only the canonical
|
||||
/// spelling is offered for completion.
|
||||
#[must_use]
|
||||
pub fn generator_for_name(name: &str) -> Option<Generator> {
|
||||
let n = name.to_ascii_lowercase();
|
||||
let g = match n.as_str() {
|
||||
"name" | "full_name" => Generator::FullName,
|
||||
"first_name" | "firstname" => Generator::FirstName,
|
||||
"last_name" | "lastname" | "surname" => Generator::LastName,
|
||||
"email" => Generator::Email,
|
||||
"username" => Generator::Username,
|
||||
"password" => Generator::Password,
|
||||
"phone" => Generator::Phone,
|
||||
"city" => Generator::City,
|
||||
"country" => Generator::Country,
|
||||
"state" => Generator::StateName,
|
||||
"street" => Generator::Street,
|
||||
"zip" => Generator::ZipCode,
|
||||
"company" => Generator::Company,
|
||||
"job" => Generator::JobTitle,
|
||||
"product" => Generator::ProductName,
|
||||
"sentence" => Generator::Sentence,
|
||||
"paragraph" => Generator::Paragraph,
|
||||
"url" => Generator::Url,
|
||||
"color" | "colour" => Generator::HexColor,
|
||||
"price" => Generator::CurrencyAmount,
|
||||
"age" => Generator::Age,
|
||||
"date" => Generator::DateRecent,
|
||||
"datetime" => Generator::DateTimeRecent,
|
||||
"bool" => Generator::Boolean,
|
||||
_ => return None,
|
||||
};
|
||||
Some(g)
|
||||
}
|
||||
|
||||
/// Whether `partial` is a case-insensitive prefix of at least one known
|
||||
/// generator name.
|
||||
///
|
||||
/// An empty `partial` matches every generator (it is a prefix of all) —
|
||||
/// mirrors `is_known_function_prefix`. Used by the validity indicator to
|
||||
/// avoid flagging a still-being-typed name.
|
||||
#[must_use]
|
||||
pub fn is_known_generator_prefix(partial: &str) -> bool {
|
||||
let lowered = partial.to_ascii_lowercase();
|
||||
KNOWN_GENERATORS.iter().any(|g| g.starts_with(&lowered))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn known_generators_is_sorted_and_lowercase() {
|
||||
let mut sorted = KNOWN_GENERATORS.to_vec();
|
||||
sorted.sort_unstable();
|
||||
assert_eq!(KNOWN_GENERATORS, sorted.as_slice(), "must be sorted");
|
||||
for g in KNOWN_GENERATORS {
|
||||
assert_eq!(*g, g.to_ascii_lowercase(), "must be lowercase: {g}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn every_listed_name_maps_to_a_generator() {
|
||||
for g in KNOWN_GENERATORS {
|
||||
assert!(
|
||||
generator_for_name(g).is_some(),
|
||||
"listed generator name `{g}` has no mapping"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mapping_is_case_insensitive_and_has_aliases() {
|
||||
assert_eq!(generator_for_name("EMAIL"), Some(Generator::Email));
|
||||
assert_eq!(generator_for_name("FirstName"), Some(Generator::FirstName));
|
||||
assert_eq!(generator_for_name("colour"), Some(Generator::HexColor));
|
||||
assert_eq!(generator_for_name("full_name"), Some(Generator::FullName));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_name_has_no_mapping() {
|
||||
assert_eq!(generator_for_name("bogus"), None);
|
||||
assert_eq!(generator_for_name(""), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prefix_check_matches_known_and_rejects_unknown() {
|
||||
assert!(is_known_generator_prefix("ema"));
|
||||
assert!(is_known_generator_prefix("EMA"));
|
||||
assert!(is_known_generator_prefix("")); // empty is a prefix of all
|
||||
assert!(!is_known_generator_prefix("zzz"));
|
||||
}
|
||||
}
|
||||
@@ -163,6 +163,7 @@ impl Theme {
|
||||
HighlightClass::String => self.tok_string,
|
||||
HighlightClass::Punct => self.tok_punct,
|
||||
HighlightClass::Flag => self.tok_flag,
|
||||
HighlightClass::Function => self.tok_function,
|
||||
HighlightClass::Error => self.tok_error,
|
||||
}
|
||||
}
|
||||
@@ -228,6 +229,7 @@ mod tests {
|
||||
assert_eq!(t.highlight_class_color(HighlightClass::String), t.tok_string);
|
||||
assert_eq!(t.highlight_class_color(HighlightClass::Punct), t.tok_punct);
|
||||
assert_eq!(t.highlight_class_color(HighlightClass::Flag), t.tok_flag);
|
||||
assert_eq!(t.highlight_class_color(HighlightClass::Function), t.tok_function);
|
||||
assert_eq!(t.highlight_class_color(HighlightClass::Error), t.tok_error);
|
||||
}
|
||||
|
||||
|
||||
@@ -1438,12 +1438,19 @@ fn render_input_one_row(
|
||||
let offset = input_scroll_offset(line_cols, cursor_col, tw, app.input_scroll_offset);
|
||||
app.input_scroll_offset = offset;
|
||||
|
||||
let runs = crate::input_render::render_input_runs_in_mode(
|
||||
// Strip the `:` one-shot prefix for the SQL highlighting/overlays
|
||||
// (ADR-0003); the `:` itself renders as plain text. Identity for
|
||||
// non-one-shot input.
|
||||
let (fb_view, fb_cursor, fb_off) = app.feedback_view();
|
||||
let runs = crate::input_render::render_input_runs_feedback(
|
||||
&app.input,
|
||||
cursor,
|
||||
theme,
|
||||
&app.schema_cache,
|
||||
mode_for_render,
|
||||
fb_view,
|
||||
fb_cursor,
|
||||
fb_off,
|
||||
);
|
||||
let spans = runs_to_spans(&app.input, &runs);
|
||||
|
||||
@@ -1507,12 +1514,19 @@ fn render_input_two_rows(
|
||||
let offset = input_scroll_offset(line_cols, cursor_col, capacity, app.input_scroll_offset);
|
||||
app.input_scroll_offset = offset;
|
||||
|
||||
let runs = crate::input_render::render_input_runs_in_mode(
|
||||
// Strip the `:` one-shot prefix for the SQL highlighting/overlays
|
||||
// (ADR-0003); the `:` itself renders as plain text. Identity for
|
||||
// non-one-shot input.
|
||||
let (fb_view, fb_cursor, fb_off) = app.feedback_view();
|
||||
let runs = crate::input_render::render_input_runs_feedback(
|
||||
&app.input,
|
||||
cursor,
|
||||
theme,
|
||||
&app.schema_cache,
|
||||
mode_for_render,
|
||||
fb_view,
|
||||
fb_cursor,
|
||||
fb_off,
|
||||
);
|
||||
let cells = expand_runs_to_cells(&app.input, &runs);
|
||||
let len = cells.len();
|
||||
@@ -1621,23 +1635,6 @@ fn runs_to_spans<'a>(
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Strip a leading one-shot `:` sigil (and the whitespace after
|
||||
/// it) from `input`, returning the advanced command slice and the
|
||||
/// cursor remapped into it. Mirrors `App::submit`'s `:` handling
|
||||
/// so the hint panel hints at the command, not the sigil
|
||||
/// (ADR-0022 Amendment 1). Used only when the effective mode is
|
||||
/// `AdvancedOneShot`, where `input` is guaranteed to start (after
|
||||
/// any leading whitespace) with `:`.
|
||||
fn strip_one_shot_prefix(input: &str, cursor: usize) -> (&str, usize) {
|
||||
let lead_ws = input.len() - input.trim_start().len();
|
||||
let after_colon = lead_ws + 1; // skip the `:`
|
||||
let ws_after = input[after_colon..].len() - input[after_colon..].trim_start().len();
|
||||
let prefix_len = (after_colon + ws_after).min(input.len());
|
||||
let effective = &input[prefix_len..];
|
||||
let effective_cursor = cursor.saturating_sub(prefix_len).min(effective.len());
|
||||
(effective, effective_cursor)
|
||||
}
|
||||
|
||||
/// Resolve the Hint panel body into its rendered lines, pre-wrapped
|
||||
/// to the panel's inner width and clamped to `max_rows` with an
|
||||
/// ellipsis backstop (issue #12). `max_rows` is the geometry-fixed row
|
||||
@@ -1679,14 +1676,9 @@ fn resolve_hint_lines(
|
||||
|
||||
// In one-shot advanced mode (`:` prefix in simple mode) the
|
||||
// raw input carries the `:` sigil, which is not part of the
|
||||
// grammar. Strip it for the ambient computation so the hint
|
||||
// reflects the advanced command — mirroring `App::submit`.
|
||||
let (hint_input, hint_cursor) = match app.effective_mode() {
|
||||
EffectiveMode::AdvancedOneShot => {
|
||||
strip_one_shot_prefix(&app.input, app.input_cursor)
|
||||
}
|
||||
_ => (app.input.as_str(), app.input_cursor),
|
||||
};
|
||||
// grammar. The shared feedback view strips it so the hint reflects
|
||||
// the advanced command — mirroring `App::submit` (ADR-0003).
|
||||
let (hint_input, hint_cursor, _off) = app.feedback_view();
|
||||
let ambient = crate::input_render::ambient_hint_in_mode(
|
||||
hint_input,
|
||||
hint_cursor,
|
||||
|
||||
@@ -23,6 +23,7 @@ mod m2n;
|
||||
mod parse_error_pedagogy;
|
||||
mod project_lifecycle;
|
||||
mod replay_command;
|
||||
mod seed;
|
||||
mod sql_alter_table;
|
||||
mod sql_create_index;
|
||||
mod sql_create_table;
|
||||
|
||||
@@ -109,6 +109,14 @@ fn near_miss_matrix_simple_mode() {
|
||||
("delete", &["after `delete`, expected `from`", "delete from <Table>"]),
|
||||
("delete from", &["after `delete from`, expected table name", "delete from <Table>"]),
|
||||
("delete from T", &["expected `where` or `--all-rows`", "delete from <Table>"]),
|
||||
("seed", &["after `seed`, expected table name", "seed <Table> [count]"]),
|
||||
// Phase 2 (ADR-0048 D2/D1): malformed `set` clause + column-fill.
|
||||
("seed T set", &["after `seed T set`, expected column name", "seed <Table>.<col>"]),
|
||||
(
|
||||
"seed T set role",
|
||||
&["after `seed T set role`, expected `=`, `in`, `between`, or `as`", "seed <Table>.<col>"],
|
||||
),
|
||||
("seed T.", &["after `seed T.`, expected column name", "seed <Table>.<col>"]),
|
||||
("replay", &["after `replay`, expected string literal or path", "replay <path>"]),
|
||||
("explain", &["after `explain`, expected `show`, `update`, or `delete`", "explain show data"]),
|
||||
// advanced-only entry word typed in simple mode → "this is SQL" rail
|
||||
@@ -539,3 +547,4 @@ fn caret_aligns_under_offending_token() {
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
+1277
File diff suppressed because it is too large
Load Diff
@@ -237,6 +237,7 @@ fn command_kind_label(cmd: &rdbms_playground::dsl::Command) -> String {
|
||||
ShowTable { .. } => "ShowTable".into(),
|
||||
ShowList { kind, name } => format!("ShowList({kind:?}, {})", name.is_some()),
|
||||
Insert { .. } => "Insert".into(),
|
||||
Seed { .. } => "Seed".into(),
|
||||
Update { .. } => "Update".into(),
|
||||
Delete { .. } => "Delete".into(),
|
||||
ShowData { .. } => "ShowData".into(),
|
||||
@@ -440,3 +441,68 @@ fn smoke_assess_parse_label_round_trips() {
|
||||
assert_eq!(a.parse_result.as_deref(), Ok("Insert"));
|
||||
assert!(matches!(a.state, InputState::Valid));
|
||||
}
|
||||
|
||||
/// `seed` (ADR-0048) gets the standard ambient surface for free from
|
||||
/// grammar registration: table-name completion, the validity indicator
|
||||
/// flagging an unknown table, and the `--seed` flag offered as a
|
||||
/// candidate.
|
||||
#[test]
|
||||
fn seed_completion_and_validity() {
|
||||
let schema = schema_serial_pk(); // Customers(id serial, Name, Email)
|
||||
|
||||
// Completion: `seed ` offers existing table names.
|
||||
let cands = completion_candidate_texts(&assess_at_end("seed ", &schema));
|
||||
assert!(
|
||||
cands.iter().any(|c| c == "Customers"),
|
||||
"`seed ` should complete table names, got {cands:?}"
|
||||
);
|
||||
|
||||
// Validity (ADR-0027): a known table seeds clean; an unknown one is
|
||||
// flagged (same table slot as update/delete/show data).
|
||||
let ok = assess_at_end("seed Customers 5", &schema);
|
||||
assert!(matches!(ok.state, InputState::Valid), "known table: {:?}", ok.state);
|
||||
// seed's unknown-table behaviour must match its closest sibling
|
||||
// `show data` (same table-only slot), whatever that is.
|
||||
let seed_ghost = assess_at_end("seed Ghost 5", &schema).state;
|
||||
let show_ghost = assess_at_end("show data Ghost", &schema).state;
|
||||
assert_eq!(
|
||||
std::mem::discriminant(&seed_ghost),
|
||||
std::mem::discriminant(&show_ghost),
|
||||
"seed should treat an unknown table like `show data`: seed={seed_ghost:?}, show={show_ghost:?}"
|
||||
);
|
||||
|
||||
// The `--seed` reproducibility flag is offered after the count.
|
||||
let flag_cands = completion_candidate_texts(&assess_at_end("seed Customers 5 ", &schema));
|
||||
assert!(
|
||||
flag_cands.iter().any(|c| c.contains("seed")),
|
||||
"`--seed` should be offered as a candidate, got {flag_cands:?}"
|
||||
);
|
||||
|
||||
// Phase 2 (ADR-0048 D2): the `set` clause is offered after the count.
|
||||
assert!(
|
||||
flag_cands.iter().any(|c| c == "set"),
|
||||
"`set` should be offered after the count, got {flag_cands:?}"
|
||||
);
|
||||
|
||||
// `set ` offers the active table's columns (narrowed to Customers).
|
||||
let set_cands = completion_candidate_texts(&assess_at_end("seed Customers set ", &schema));
|
||||
assert!(
|
||||
set_cands.iter().any(|c| c == "Name") && set_cands.iter().any(|c| c == "Email"),
|
||||
"`set ` should complete this table's columns, got {set_cands:?}"
|
||||
);
|
||||
|
||||
// `set <col> as ` offers the curated generator vocabulary (D9).
|
||||
let gen_cands =
|
||||
completion_candidate_texts(&assess_at_end("seed Customers set Email as ", &schema));
|
||||
assert!(
|
||||
gen_cands.iter().any(|c| c == "email") && gen_cands.iter().any(|c| c == "product"),
|
||||
"`as ` should complete generator names, got {gen_cands:?}"
|
||||
);
|
||||
|
||||
// Column-fill (D1 form 2): `seed Customers.` offers the columns.
|
||||
let fill_cands = completion_candidate_texts(&assess_at_end("seed Customers.", &schema));
|
||||
assert!(
|
||||
fill_cands.iter().any(|c| c == "Name"),
|
||||
"`seed Customers.` should complete column names, got {fill_cands:?}"
|
||||
);
|
||||
}
|
||||
|
||||
+2
-2
@@ -24,10 +24,10 @@ Assessment {
|
||||
completion: Some(
|
||||
Completion {
|
||||
replaced_range: (
|
||||
24,
|
||||
22,
|
||||
27,
|
||||
),
|
||||
partial_prefix: "all",
|
||||
partial_prefix: "--all",
|
||||
candidates: [
|
||||
Candidate {
|
||||
text: "--all-rows",
|
||||
|
||||
+2
-2
@@ -24,10 +24,10 @@ Assessment {
|
||||
completion: Some(
|
||||
Completion {
|
||||
replaced_range: (
|
||||
33,
|
||||
31,
|
||||
36,
|
||||
),
|
||||
partial_prefix: "all",
|
||||
partial_prefix: "--all",
|
||||
candidates: [
|
||||
Candidate {
|
||||
text: "--all-rows",
|
||||
|
||||
Reference in New Issue
Block a user