From 6d8c9eea363281cbdd8db8fed62d477e16abbce1 Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Sun, 31 May 2026 11:49:10 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20curated=20SQL=20function=20list=20?= =?UTF-8?q?=E2=80=94=20Tab=20completion=20(#15)=20+=20typing-time=20typo?= =?UTF-8?q?=20hint=20(#16)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add src/dsl/sql_functions.rs (KNOWN_SQL_FUNCTIONS) as the shared source of truth at sql_expr_ident slots: - #15: offer the functions as Tab candidates under a new CandidateKind::Function + ninth Theme colour tok_function (blue, distinct from keyword/identifier/type). - #16: restore the column-typo flag the #6 fix had dropped wholesale — invalid_ident_at_cursor now bails only when the partial prefix-matches a known function, else falls through to the schema-column check. A column named like a function (e.g. `count`) is deduped (column wins). `cast` is excluded — CAST(x AS type) is not a plain-call shape. The no-validation-allowlist posture stands: the list drives completion + the typo hint only, never parse-time acceptance. Docs: ADR-0022 Amendment 6, ADR-0031 status note, README index, requirements I3/I4 + refreshed test baseline. --- docs/adr/0022-ambient-typing-assistance.md | 86 ++++++++ docs/adr/0031-sql-expression-grammar.md | 21 ++ docs/adr/README.md | 4 +- docs/requirements.md | 38 ++-- src/completion.rs | 237 ++++++++++++++++++++- src/dsl/mod.rs | 1 + src/dsl/sql_functions.rs | 151 +++++++++++++ src/input_render.rs | 33 +++ src/theme.rs | 23 ++ src/ui.rs | 1 + 10 files changed, 570 insertions(+), 25 deletions(-) create mode 100644 src/dsl/sql_functions.rs diff --git a/docs/adr/0022-ambient-typing-assistance.md b/docs/adr/0022-ambient-typing-assistance.md index ee433e2..254b45e 100644 --- a/docs/adr/0022-ambient-typing-assistance.md +++ b/docs/adr/0022-ambient-typing-assistance.md @@ -686,6 +686,92 @@ can be revisited if hints routinely need more. full-screen snapshots (the empty-state placeholder and the `insert` usage hint now wrap to their full text instead of being clipped). +## Amendment 6 — Curated SQL function names: completion + typing-time typo hint (2026-05-30) + +The advanced-mode SQL expression grammar (ADR-0031) accepts a +function-call *shape* — `name(args)` — at its `sql_expr_ident` slot +but, by design, does **not** know which names are real functions +(ADR-0031 §1: the slot is `IdentSource::Columns`, optimised for the +common case of a column reference; the walker is a structural matcher, +not an evaluator). That left two gaps at this slot, raised as issues +#15 and #16: + +- **#16 (regression).** The earlier issue-#6 function-call validator + fix dropped `invalid_ident_at_cursor`'s "no such column" flag at + every `sql_expr_ident` position — necessary to stop a false positive + on a function name like `sum`, but it also silenced the typing-time + signal for a *genuine* column typo in an incomplete expression + (`select Agx` before a `FROM` brings the schema-existence diagnostic + into scope). Typing-time became *less* eager than submit-time. +- **#15 (discovery).** Tab at a `sql_expr_ident` slot offered schema + columns + a few expression keywords (`null`, `distinct`, `case`, …) + but no function names, so a learner had to already know `sum` / `avg` + / `upper` to type them. + +Both want the same thing: a single source of truth for *"what SQL +function names does this playground recognise"*. + +**Change:** + +1. **Curated list.** New `src/dsl/sql_functions.rs` with + `KNOWN_SQL_FUNCTIONS` (sorted, lowercase — a pinned invariant) and + an `is_known_function_prefix()` helper. A deliberately *curated + pedagogical set*, not "every SQLite built-in": the aggregates a + learner meets first (`count`/`sum`/`avg`/`min`/`max`), the common + scalars (`length`/`upper`/`lower`/`trim`/`substr`/`coalesce`/`abs`/ + `round`), and a broader scalar tier (`date`/`datetime`/`strftime`/ + `hex`/`ifnull`/`nullif`/`replace`/`instr`/`typeof`/`random`). + **`cast` is deliberately excluded** — SQLite's `CAST(expr AS type)` + is not a plain-call shape the expression grammar parses, so + offering it would surface a candidate that does not complete; it + stays out until the grammar grows a dedicated `CAST` form. +2. **#16 — restore the typo flag, narrowly.** `invalid_ident_at_cursor` + no longer bails wholesale at a `sql_expr_ident` slot. It bails only + when the partial prefix-matches a known function name; otherwise it + falls through to the existing schema-column check, which flags "no + such column" unless the partial prefix-matches a real column. So + `select Agx` warns again at typing time while `select sum` does not. + The submit-time `unknown_column` diagnostic path is untouched; the + issue-#6 lockdown tests (`genuine_column_typo_in_complete_select_…`, + `advanced_select_partial_function_name_not_flagged_…`) still pass. +3. **#15 — offer functions as candidates.** A new completion source + (Source 1.8) contributes `KNOWN_SQL_FUNCTIONS` (prefix-filtered like + every other source) whenever the expected set contains a + `sql_expr_ident` slot, ordered after keywords/types (a learner + reads clause keywords first, then discovers callables). +4. **New `CandidateKind::Function` + `tok_function` colour.** Like + Amendment 4 gave types their own class, function candidates get a + dedicated kind and a ninth `Theme` colour field (`tok_function`, + a blue distinct from keyword purple, identifier teal, and type + pink/magenta in both `dark()` and `light()`) so a callable reads + apart from a clause keyword, a column reference, and a column type. + +**No-validation-allowlist posture stands (ADR-0031 §6/§7).** The list +drives *completion* and the *typo hint* only — never parse-time +acceptance. An unknown or engine-specific function still parses (the +grammar admits the call shape generically) and surfaces an +engine-neutral *execution* error, exactly as before. + +**Pedagogy:** the same dedicated-colour rationale as Amendment 4 — a +learner can tell *"this is a function"* at a glance, and Tab now +*teaches* the function vocabulary instead of assuming it. + +**Coverage:** `sql_functions::{list_is_sorted_and_lowercase, +list_has_no_duplicates, cast_is_excluded, prefix_match_is_case_insensitive, +empty_prefix_matches_all, unknown_prefix_does_not_match}`; +`completion::{sql_expr_slot_offers_known_function_candidates, +projection_slot_offers_known_function_candidates, +sql_function_candidates_filter_by_prefix, +sql_function_candidates_carry_function_kind, +function_candidates_absent_at_non_expression_slots, +cast_is_not_offered_as_a_function_candidate, +invalid_ident_fires_for_genuine_typo_at_sql_expr_slot, +invalid_ident_does_not_fire_for_function_prefix_at_sql_expr_slot, +invalid_ident_does_not_fire_for_column_prefix_at_sql_expr_slot}`; +`input_render::advanced_select_genuine_column_typo_before_from_warns_at_typing_time`; +`theme::function_colour_is_distinct_from_keyword_identifier_and_type`. +See ADR-0031's status note for the grammar-side anchor. + ## Out of scope Deliberately deferred to keep this ADR shippable as a single diff --git a/docs/adr/0031-sql-expression-grammar.md b/docs/adr/0031-sql-expression-grammar.md index 083025e..d0eba95 100644 --- a/docs/adr/0031-sql-expression-grammar.md +++ b/docs/adr/0031-sql-expression-grammar.md @@ -409,3 +409,24 @@ Later phases extend the same fragment: set the engine-neutrality posture and the no-allowlist rule. - `docs/simple-mode-limitations.md` — the DSL limits this grammar lifts for advanced mode (§1, §4). + +## Status note — known-function list layered on the slot (2026-05-30) + +The `sql_expr_ident` slot is `IdentSource::Columns` and, per §1 / §5, +does **not** itself know which identifiers are function names — it +optimises for the common case (a column reference) and admits the +function-call shape structurally; §5 explicitly noted "function names +are not completed … a typed function name simply is not a candidate". +**ADR-0022 Amendment 6** layers a curated known-function list +(`src/dsl/sql_functions.rs`) on top of this slot, consumed two ways: +as Tab-completion candidates so a learner can discover `sum` / `upper` +/ … (issue #15 — softening §5's "not completed" line to "completed +from a curated pedagogical list, not an allowlist for validation"), +and as the allow-list that lets the typing-time column-typo hint stay +strict at this slot — flag a partial as "no such column" only when it +matches neither a schema column nor a known function name (issue #16). +The grammar here is unchanged, and §6/§7's no-validation-allowlist +posture stands: the list drives completion + the typo hint, **not** +parse-time acceptance (an unknown function still parses and surfaces an +engine-neutral execution error). The list sits in the completion / +hint layer above the grammar. diff --git a/docs/adr/README.md b/docs/adr/README.md index 9a1b07c..b2c66d1 100644 --- a/docs/adr/README.md +++ b/docs/adr/README.md @@ -27,7 +27,7 @@ This directory contains the project's ADRs, recorded per - [ADR-0019 — Friendly error layer (H1) and i18n message catalog](0019-friendly-error-layer-and-i18n.md) - [ADR-0020 — Tokenization layer for the DSL parser](0020-tokenization-layer-for-the-dsl-parser.md) - [ADR-0021 — Parser-as-source-of-truth for H1a (per-command usage in parse errors)](0021-parser-as-source-of-truth-for-h1a.md) -- [ADR-0022 — Ambient typing assistance: colour, hint panel, completion (I3 + I4)](0022-ambient-typing-assistance.md) — **Amendment 1 supersedes §12's simple-mode-only carve-out**: the unified mode-aware walker (ADR-0030/0031/0032) now speaks SQL, so advanced-mode ambient assistance is re-enabled. `ambient_hint_in_mode` + `hint_resolution_at_input_in_mode` + `expected_for_hint_snapshot` thread `Mode`; `render_hint_panel` calls ambient for all modes (no more advanced-mode `None`); the one-shot `:` sigil is stripped before the ambient walk. Fixes a live bug where advanced-mode SQL hinting/completion-preview were dead despite Phase 2 marking them green (validated at the engine layer, not the UI). Simple-mode gating, highlighting, and the §13 performance posture are unchanged; covered by an app-level render test plus ambient-layer regression locks; **Amendment 2 reverses the handoff-14 keywords-first candidate ordering** — schema identifiers (table/column/relationship names) now sort *before* keywords so a name the user would have to look up stays visible in the single-row, window-scrolled candidate line (keywords are learned over time; the `tok_identifier`/`tok_keyword` colour split marks the boundary); shipped with a `walk_repeated` fix that surfaces a list item's trailing optionals at a clean boundary (`order by Name ` → `asc`/`desc`, `select Name ` → `as`, `create table … Code(text) ` → `not`/`unique`/`default`/`check`; the `,` separator deliberately not surfaced); records a deferred two-line hint box for growing lists; **Amendment 3 makes the ambient-hint fallback rung schema-aware** — Amendment 1's bottom-rung `parse_command_in_mode` was schemaless while every earlier rung was not, so between-values insert hints pointed at `)` (type-blind close) instead of `,` and wrong-arity closed tuples read "submit with Enter" for an input the schema-aware parse rejects (issue #2); now uses `parse_command_with_schema_in_mode`, no extra walk, with the friendly arity diagnostic still winning at its higher rung; **Amendment 4 gives column types a dedicated highlight class** — both `Node::Ident.highlight_override` *and* the `Word.highlight_override` field were dead (driver destructured the former to `_`, `walk_word` hardcoded `Keyword`); now both wired through, with a new `HighlightClass::Type` + eighth `Theme` field `tok_type` (a pink/deep-magenta distinct from both keyword purple and identifier teal) so types no longer render identically to identifiers (issue #8); the three `IdentSource::Types` slots opt in via `Some(Type)` (advanced-mode single-word SQL aliases — `float`, `varchar`, … per ADR-0035 §3 — ride along for free), and the two-word `double precision` alias opts in via the new `Word::type_keyword` constructor so it matches its synonyms; **Amendment 5 lets the hint panel grow for long prose hints** — a fixed one-row panel clipped long field-value/usage hints past the first line (issue #12); `resolve_hint_lines` now pre-wraps prose and `render_right_column` sizes the panel to the line count (1 row default, up to `MAX_HINT_ROWS`=3, reclaimed when short) with a `clamp_wrapped` ellipsis backstop; the candidate list still scrolls horizontally on one row (Amendment 2's deferred two-line candidate box stays deferred); also shortens the 299-char `parse.usage.sql_create_table` synopsis to a terse one-liner (full grammar remains in `help.ddl.sql_create_table`) +- [ADR-0022 — Ambient typing assistance: colour, hint panel, completion (I3 + I4)](0022-ambient-typing-assistance.md) — **Amendment 1 supersedes §12's simple-mode-only carve-out**: the unified mode-aware walker (ADR-0030/0031/0032) now speaks SQL, so advanced-mode ambient assistance is re-enabled. `ambient_hint_in_mode` + `hint_resolution_at_input_in_mode` + `expected_for_hint_snapshot` thread `Mode`; `render_hint_panel` calls ambient for all modes (no more advanced-mode `None`); the one-shot `:` sigil is stripped before the ambient walk. Fixes a live bug where advanced-mode SQL hinting/completion-preview were dead despite Phase 2 marking them green (validated at the engine layer, not the UI). Simple-mode gating, highlighting, and the §13 performance posture are unchanged; covered by an app-level render test plus ambient-layer regression locks; **Amendment 2 reverses the handoff-14 keywords-first candidate ordering** — schema identifiers (table/column/relationship names) now sort *before* keywords so a name the user would have to look up stays visible in the single-row, window-scrolled candidate line (keywords are learned over time; the `tok_identifier`/`tok_keyword` colour split marks the boundary); shipped with a `walk_repeated` fix that surfaces a list item's trailing optionals at a clean boundary (`order by Name ` → `asc`/`desc`, `select Name ` → `as`, `create table … Code(text) ` → `not`/`unique`/`default`/`check`; the `,` separator deliberately not surfaced); records a deferred two-line hint box for growing lists; **Amendment 3 makes the ambient-hint fallback rung schema-aware** — Amendment 1's bottom-rung `parse_command_in_mode` was schemaless while every earlier rung was not, so between-values insert hints pointed at `)` (type-blind close) instead of `,` and wrong-arity closed tuples read "submit with Enter" for an input the schema-aware parse rejects (issue #2); now uses `parse_command_with_schema_in_mode`, no extra walk, with the friendly arity diagnostic still winning at its higher rung; **Amendment 4 gives column types a dedicated highlight class** — both `Node::Ident.highlight_override` *and* the `Word.highlight_override` field were dead (driver destructured the former to `_`, `walk_word` hardcoded `Keyword`); now both wired through, with a new `HighlightClass::Type` + eighth `Theme` field `tok_type` (a pink/deep-magenta distinct from both keyword purple and identifier teal) so types no longer render identically to identifiers (issue #8); the three `IdentSource::Types` slots opt in via `Some(Type)` (advanced-mode single-word SQL aliases — `float`, `varchar`, … per ADR-0035 §3 — ride along for free), and the two-word `double precision` alias opts in via the new `Word::type_keyword` constructor so it matches its synonyms; **Amendment 5 lets the hint panel grow for long prose hints** — a fixed one-row panel clipped long field-value/usage hints past the first line (issue #12); `resolve_hint_lines` now pre-wraps prose and `render_right_column` sizes the panel to the line count (1 row default, up to `MAX_HINT_ROWS`=3, reclaimed when short) with a `clamp_wrapped` ellipsis backstop; the candidate list still scrolls horizontally on one row (Amendment 2's deferred two-line candidate box stays deferred); also shortens the 299-char `parse.usage.sql_create_table` synopsis to a terse one-liner (full grammar remains in `help.ddl.sql_create_table`); **Amendment 6 adds a curated SQL function-name list** (`src/dsl/sql_functions.rs`, `KNOWN_SQL_FUNCTIONS` — aggregates + common + broader scalars; `cast` deliberately excluded as its `CAST(x AS type)` syntax isn't a plain-call shape) as the single source of truth shared by two consumers at the `sql_expr_ident` slot (ADR-0031 §1): **issue #15** offers the functions as Tab candidates under a new `CandidateKind::Function` + ninth `Theme` colour `tok_function` (a blue distinct from keyword/identifier/type, parallel to Amendment 4's `tok_type`) so a learner discovers `sum`/`upper`/…; **issue #16** restores the typing-time column-typo flag the issue-#6 fix had dropped wholesale at this slot — `invalid_ident_at_cursor` now bails only when the partial prefix-matches a known function, else falls through to the schema-column check, so `select Agx` warns again at typing time while `select sum` does not (the issue-#6 lockdown tests + the submit-time `unknown_column` diagnostic path are untouched, and the no-validation-allowlist posture stands); see ADR-0031's status note for the grammar-side anchor - [ADR-0023 — Unified declarative grammar tree](0023-unified-grammar-tree.md) — direction (superseded for execution detail by ADR-0024) - [ADR-0024 — Unified grammar tree: execution plan](0024-unified-grammar-tree-execution-plan.md) — **Accepted**, the executable spec — implemented (Phases A–F; Phase F shipped "minimal", `parser.rs` retained as the router — see the ADR's Phase F implementation note) - [ADR-0025 — Indexes](0025-indexes.md) — **Accepted** (**Amendment 1, 2026-05-25**: UNIQUE indexes admitted on the **advanced-mode** surface via `CREATE UNIQUE INDEX` — ADR-0035 §4d; the `IndexSchema.unique` flag round-trips through `project.yaml` with no new metadata table since the engine reports uniqueness natively; simple-mode `add unique index` stays deferred), `add index` / `drop index`, persistence, rebuild-table preservation, and items-list display (`C3` index portion + `S2`) @@ -36,7 +36,7 @@ This directory contains the project's ADRs, recorded per - [ADR-0028 — Query plans (`EXPLAIN QUERY PLAN`)](0028-query-plans.md) — **Accepted**, an `explain` prefix command over `show data` / `update` / `delete`; an annotated, span-styled plan tree; introduces the `OutputLine` styled-runs mechanism (ADR-0016's deferred per-span styling) (`QA1` / `QA2`) - [ADR-0029 — Column constraints (NOT NULL / UNIQUE / CHECK / DEFAULT)](0029-column-constraints.md) — **Accepted**, the four column-level constraints declared in the column-spec suffix (`create table` / `add column`) and modified on existing columns via `add constraint …` / `drop constraint …`; a pre-flight dry-run guards populated columns; `CHECK` reuses the ADR-0026 expression grammar via `Subgrammar` (`C3`) - [ADR-0030 — Advanced mode: the standard-SQL surface](0030-advanced-mode-sql-surface.md) — **Accepted**, SQL added as grammar *within the unified grammar tree* (ADR-0024), not a separate batch parser — so SQL gets the same completion / highlighting / hints / parse-errors as the DSL; mode gates the SQL forms; DDL routes through the typed `Command` executor (metadata + type vocabulary preserved), DML and `SELECT` execute as validated SQL; engine-neutral posture, the DSL→SQL teaching echo; supersedes ADR-0001's `sqlparser-rs` reservation; phased plan (`Q1` / `Q2` / `Q4`); **§13 OOS-2 (EXPLAIN of advanced SQL) superseded by ADR-0039** -- [ADR-0031 — The SQL expression grammar](0031-sql-expression-grammar.md) — **Accepted**, the stratified SQL expression grammar fragment commissioned by ADR-0030 §3: a single precedence ladder (`OR`/`AND`/`NOT`, the comparison/`LIKE`/`IN`/`BETWEEN`/`IS NULL` predicate set, arithmetic incl. `||`, function calls, `CASE`) — the superset of ADR-0026's DSL `WHERE` grammar, authored as a parallel fragment so simple mode is untouched; pure validation, builds **no** AST (consumers run/store SQL as text per ADR-0030 §4/§6); reuses ADR-0026's `Subgrammar` recursion + depth cap unchanged; subquery expressions and qualified column refs deferred to ADR-0030 Phase 2 +- [ADR-0031 — The SQL expression grammar](0031-sql-expression-grammar.md) — **Accepted**, the stratified SQL expression grammar fragment commissioned by ADR-0030 §3: a single precedence ladder (`OR`/`AND`/`NOT`, the comparison/`LIKE`/`IN`/`BETWEEN`/`IS NULL` predicate set, arithmetic incl. `||`, function calls, `CASE`) — the superset of ADR-0026's DSL `WHERE` grammar, authored as a parallel fragment so simple mode is untouched; pure validation, builds **no** AST (consumers run/store SQL as text per ADR-0030 §4/§6); reuses ADR-0026's `Subgrammar` recursion + depth cap unchanged; subquery expressions and qualified column refs deferred to ADR-0030 Phase 2; **status note (2026-05-30)** records that ADR-0022 Amendment 6 layers a curated known-function list on the `sql_expr_ident` slot (§1) for completion + the typing-time typo hint (issues #15/#16) — the grammar itself is unchanged, and the no-validation-allowlist posture stands - [ADR-0032 — The full SQL `SELECT` grammar](0032-sql-select-grammar.md) — **Accepted**, the Phase-2 grammar commissioned by ADR-0030 §3: full `SELECT` with `INNER`/`LEFT`/`RIGHT`/`FULL OUTER`/`CROSS` joins, `GROUP BY`/`HAVING`, all four set ops (`UNION`/`UNION ALL`/`INTERSECT`/`EXCEPT`), `WITH` and `WITH RECURSIVE` CTEs, `LIMIT … OFFSET`, `DISTINCT`, `t.*`, and bare-alias projection (lifting Phase-1 §4.2); additive extensions to ADR-0031's `sql_expr` for scalar subqueries, `IN (SELECT …)`, `[NOT] EXISTS`, and qualified column refs (redeeming ADR-0031 §7 OOS-1/OOS-2); grammar-recursion via `Subgrammar(&SQL_SELECT_COMPOUND)` reuses ADR-0026's `MAX_SUBGRAMMAR_DEPTH = 64` cap unchanged; **softens ADR-0030 §8's "ambient assistance comes for free" claim**: completion scope needs new `WalkContext` accumulators (a `from_scope_stack` of `ScopeFrame`s holding `from_scope` / `cte_bindings` / `projection_aliases`), a **new walker node variant `Node::ScopedSubgrammar(&Node)`** as the push/pop trigger (existing `Node::Subgrammar` unchanged so DSL `Expr` and `sql_expr` recursion are unaffected), qualified-prefix completion narrowing, body-projection-derived CTE column resolution (so `SELECT *` and explicit-projection CTE bodies both yield real column completion past `cte_alias.|`), and a **post-walk fixup pass** that re-resolves projection-list identifier highlighting/validity once `FROM` is parsed (the projection-before-FROM problem); classifies every Phase-2 validation case against ADR-0027's ERROR/WARNING guideline (§11): five new `diagnostic.*` keys for parse-time-detectable cases (unknown qualifier, ambiguous column, projection-alias misplaced, CTE/compound arity mismatch) plus eight `engine.*` translation keys; a MatchedPath-walking predicate-warnings variant that closes the Phase-1 gap where SQL `WHERE` expressions emitted no `LIKE`-on-numeric / `= NULL` / type-mismatch warnings (ADR-0027 Amendment 1 finally extends to the SQL surface); adds a worker-side post-prepare type-resolution pass via engine column-origin metadata so bare column refs recover their playground type (partially lifting Phase-1 §4.5, the bool→0/1 case) — `Cargo.toml` gains `column_metadata` to rusqlite features (verified against pinned 0.39.0); `__rdbms_*` rejection extended to every new table-source slot; Amendment 1 narrows §12's resolution rule from a grammar-side structural classification to "trust the engine's column-origin metadata verbatim" after an empirical probe showed origin metadata follows through non-recursive CTEs, scalar subqueries, derived tables, set ops, and joins — the one structural exception is recursive CTE result columns, which return None and stay typeless; Amendment 2 records that §10.6's "rewrite the highlight class" prescription is realised via the two-pass schema-existence diagnostic + the renderer's diagnostic-overlay path (no separate per-byte rewrite step needed; no new HighlightClass variant), and that the projection-before-FROM completion narrowing has been improved by an `src/completion.rs` look-ahead probe when the leading walk's `from_scope` is empty but the full input parses - [ADR-0033 — The full SQL DML grammar (`INSERT` / `UPDATE` / `DELETE`)](0033-sql-dml-grammar.md) — **Accepted** (implemented + verified through sub-phase 3k, 2026-05-23; phase-exit report `docs/handoff/20260523-phase-3-verification.md`), the Phase-3 grammar commissioned by ADR-0030 §3: single- and multi-row `INSERT` (incl. `INSERT … SELECT` recursing through ADR-0032's `SQL_SELECT_COMPOUND`), `UPDATE` with `SET` assignment list, `DELETE`, all three optionally followed by `RETURNING projection_list`, plus full `ON CONFLICT … DO NOTHING / DO UPDATE` UPSERT on INSERT; **fixes the DSL-vs-SQL dispatch architecture for shared entry words (`insert`/`update`/`delete`)**: SQL-first / DSL-fallback in Advanced mode via a `Choice(SQL_shape, DSL_shape)` per shape, gated by a new walker capability `Node::Guard(fn)` — a zero-byte-consumption gating node that fails the enclosing Seq with a `ValidationError`; carries `Command::SqlInsert` / `SqlUpdate` / `SqlDelete` variants and `do_sql_*` worker handlers each of which knows the target table (for re-persistence) and the `returning: bool` flag (for DataResult routing); `shortid` auto-fill mirrors the DSL `do_insert` mechanism via worker post-fill; SQL DELETE produces the same per-relationship cascade summary the DSL DELETE does (ADR-0014 parity); three new walker diagnostics (`insert_arity_mismatch` ERROR, `auto_column_overridden` WARNING, `not_null_missing` WARNING) with positive + negative tests each; OOS list explicitly carves out `DEFAULT VALUES` (the project's planned seed feature), SQLite-specific `OR REPLACE` / `OR IGNORE` / `OR ABORT` / `OR FAIL` / `OR ROLLBACK` prefixes, `UPDATE FROM` multi-table updates, and WITH-prefixed DML; the `excluded` keyword inside `ON CONFLICT DO UPDATE` is a deliberate carve-out from ADR-0030 §7's engine-neutral posture (no standard-SQL UPSERT spelling exists that SQLite and PostgreSQL share); eleven phased sub-phases each with explicit exit gates + written DA gate, opening with the dispatch mechanism before any DML grammar lands; initial DA review recorded seven critiques that were resolved before status moved to Proposed; **Amendment 1 supersedes §2's dispatch mechanism**: the originally-chosen `Node::Guard(fn)` + `Choice(SQL_shape, DSL_shape)` was found during 3a to be unworkable as framed (any guard-in-`Choice` mechanism forces a `walk_choice` change — `walk_choice` only falls through on `NoMatch`, so Simple-mode valid-DSL would wrongly surface "this is SQL", and `walk_seq` treats a `NoMatch` past `idx 0` as a hard `Failed`, breaking Advanced-mode DSL fall-through); replaced by **category-grouped, mode-aware dispatch** in `walker::walk` (each `REGISTRY` entry tagged `CommandCategory::{Simple, Advanced}`, generalising the existing whole-command `is_advanced_only` gate), shared entry words carrying a node in both groups, no `Node::Guard` and no `walk_choice`/`walk_seq` change, advanced-mode completion SQL-first with DSL as a full-line fallback; **Amendment 2 (sub-phase 3f) supersedes §7's cascade mechanism**: the WHERE-injected per-child pre-count rested on a premise that was factually wrong about the DSL handler (which detects cascades by before/after row-count diffing inside a transaction, not by `Expr`-derived pre-count subqueries) and would have broken the §2 parity promise by reporting `SET NULL` the DSL path doesn't; replaced by mirroring `do_delete`'s count-diff exactly (verbatim DELETE executes, child-count diff observes the cascade — `ON DELETE CASCADE` row removals only, SET NULL deferred for both paths to preserve parity), which shares the render-layer formatter for free via `CommandOutcome::Delete` and **withdraws risk R2** (no WHERE-byte extraction, no N+1 subquery); **Amendment 3 (sub-phase 3j) records the command-identity model and defers the execution-mode side-channel**: a command is the typed outcome of a *mode-rooted* grammar path and its identity is intrinsic (Advanced mode tries SQL first, falls back to the *Simple* DSL command when no SQL branch matches a token, e.g. `delete … --all-rows`; note `update … --all-rows` does *not* fall back — the SQL `SET` expression eats `--all-rows`, harmless since the engine treats it as a comment); **Simple mode commits the DSL candidate for shared words** so the *real* DSL error surfaces, and when that line would also run in advanced mode the rendering layer **combines** them — DSL error **plus** an `advanced_mode.also_valid_sql` pointer ("… (valid as SQL in advanced mode)") — keeping the actionable DSL fix while pointing at advanced mode; bare "this is SQL" is reserved for entry words with no DSL form (`select`/`with`); a fully-overlapping input (`insert … values …`) legitimately yields *two distinct commands* (`Command::Insert` typed-AST vs `Command::SqlInsert` validated-text) that do the same thing but execute differently (ADR-0030 §4), so each is tested in the mode that produces it; **corrects the plan's 3j exit-gate premise** that the DSL DML tests run in Simple mode (they call `parse_command`, which defaults to Advanced) — the real invariant is "Simple-mode behaviour unchanged, Advanced mode SQL-first, DSL grammar tested in Simple mode, both variants tested in their producing mode", with §6/§7 parity keeping the paths observably equivalent; and **defers to its own future ADR** the execution-time mode side-channel (three-way `Mode`: simple/advanced/advanced-one-shot threaded through `Action`→worker, for mode-dependent *output* like echoing generated SQL) — today only the *rendering* side-channel `OutputLine.mode_at_submission` exists, and the three-way distinction is not required for Phase 3 dispatch correctness; **Amendment 4, 2026-05-27** (design agreed, pending impl): **reverses Amendment 3's `update … --all-rows` counter-example as a bug** — surfaced by the ADR-0038 echo design. The walker has no `--` comment support (it lexes two minus operators) while the engine treats `--` as a comment, so `update T set x=42 --all-rows` was silently parsed as the expression `42 - -all - rows` over **non-existent columns** `all`/`rows` (an ADR-0027 "flag-if-it-will-fail" case) and matched `SqlUpdate`. Decision: the `--all-rows` sequence makes the SQL `UPDATE` shape **fail**, so dispatch **falls back to the DSL `Update { AllRows }`** — symmetry with `delete … --all-rows`; no `--` comment feature introduced (trailing comments stay unsupported). Inverts `sql_dml_e2e.rs::e2e_update_all_rows_in_advanced_does_not_fall_back_to_dsl`; mechanism settled test-first in the build; folded into the ADR-0038 effort (makes `update … --all-rows` echoable); **Amendment 5, 2026-05-28** (implemented + verified, user-confirmed): `advanced_mode.also_valid_sql` (the cross-mode pointer from Amendment 3) fires on **validity**, not just **parse** — *"valid"* meaning `input_verdict_in_mode(input, schema, Mode::Advanced) == None` in the ADR-0027 sense (parse succeeds *and* no Warning/Error diagnostic from any pass). Surfaced by **issue #1**: a positional `INSERT INTO T VALUES (…)` (no column list) with a value count that didn't match the target's column count parsed in advanced but failed at the engine, so the syntactic-only Amendment-3 gate promised a mode switch that wouldn't help. Closes the gap by (a) extending `dml_insert_arity_diagnostics` (§8.1, previously Form A only — its own doc-comment deferred Form B) to also check the no-column-list form against the schema's column count, emitting a new `diagnostic.insert_arity_mismatch_form_b` ERROR per offending tuple, and (b) refactoring `advanced_alternative_note` to read the validity verdict instead of running its own bespoke check — any static diagnostic added to the pipeline in the future automatically participates in the pointer gate. Side benefit: the `[ERR]` validity indicator now lights up at typing time for the reported scenario, no longer needing a submit to learn the line is wrong. Tests pinned: `insert_form_b_arity_mismatch_under_supply_fires` / `_over_supply_fires` / `_match_is_silent` / `_unknown_table_is_silent` (walker); `ambient_hint_omits_advanced_pointer_when_form_b_value_count_wouldnt_match` (gate); `simple_mode_submit_of_sql_construct_appends_advanced_pointer` (pointer still fires for genuine SQL-only constructs against a known schema). Amendment 3's "would parse in advanced mode" should henceforth be read as a synonym for "valid in advanced mode" in this stricter sense; the user-confirmed behavioural change is exactly the issue #1 bug case (no other input flips its pointer state) - [ADR-0034 — `history.log` as a complete command journal; replay reads success-only](0034-history-journal-and-replay-filter.md) — **Accepted**, resolves a three-way tension in `history.log`'s roles found while implementing ADR-0033 3f: (1) the persistent log is success-only while the in-memory Up/Down recall ring records *every* submission (success or failure, "so users can recall and edit typo'd commands"), and the ring is re-seeded from the log on project open — so **failed commands are recallable within a session but silently lost across sessions**; (2) replay wants the state-building (successful) commands while recall wants everything typed, which one success-only file cannot serve; (3) `replay history.log` never actually worked — `run_replay` parses each whole line through the DSL parser with no understanding of the `||` record shape, so a real log fails on line 1, and **no test ever fed the pipe format to replay** (the `replay_history_log_records_subcommands_only` test only checks what replay *writes*, never replays the log as input). Decision: `history.log` becomes a **complete journal** — every submission recorded, tagged `ok`/`err` via the status field the format already reserved (ADR-0015 §5) — and **each consumer filters**: hydration reads all records (cross-session recall matches in-session), replay reads `ok` only (and learns the journal format, while still accepting bare-command `.commands` scripts; detection by the leading timestamp+status prefix so a `|` inside a bare command isn't misread). Successful commands stay journalled transactionally by the worker; failed commands are journalled `err` best-effort from the runtime/app error path (a parse failure never reaches the worker). Amends ADR-0006's "successfully executed" wording and ADR-0015 §5 ("status always `ok`") / §12 (hydration). Code deferred to two tracked test-first sub-tasks (journal-failures+filtering; replay-parses-journal-format); existing all-`ok` logs need no migration; **implemented 2026-05-24** (plan `docs/plans/20260524-adr-0034-history-journal.md`); **Amendment 1 (2026-05-24): replay filters out app-lifecycle commands** — a working `replay history.log` (the §3 fix) exposed that the journal also records `save as`/`load`/`new`/`export`/`import`/`rebuild`/`mode` (which would panic the worker dispatch or abort the replay), so replay now re-applies **only** schema/data write commands and **skips** every `Command::App` + nested `Command::Replay`; **all skips continue** (never abort — reversing the prior nested-`replay` refusal, so a journal containing a once-run `replay` needs no hand-editing, and the infinite-loop footgun is closed by construction), with a `[skip]` **warning** on `import` and nested-`replay` skips (their omission can leave replayed state incomplete) and silent skips for the rest; `replay.error_nested` removed, `replay.skipped_import`/`replay.skipped_replay` added, `ReplayCompleted` carries `warnings` diff --git a/docs/requirements.md b/docs/requirements.md index 52f49fe..9cecd19 100644 --- a/docs/requirements.md +++ b/docs/requirements.md @@ -26,18 +26,21 @@ repo is pushed). ## Test baseline -After the ADR-0027 highlight / hint follow-up (precise WARNING -spans, the diagnostic overlay + hint wiring, the -`LIKE`-on-numeric WARNING, the debounce state machine) plus -two manual-testing bug fixes (optional trailing-flag -completion; the `--resume` temp-project pointer): -**1131 passing, 0 failing, 1 ignored** (`cargo test` — the one -ignored test is a long-standing `` ```ignore `` doc-test in -`src/friendly/mod.rs`). Clippy clean with the nursery lint -group enabled. (Earlier reference points: 1100 after ADR-0027's -initial ship; 1079 after ADR-0026 (complex WHERE expressions); -1039 after ADR-0025 (indexes); 1006 after ADR-0024 + the -handoff-14 cleanup; 449 after B2/C2.) +After ADR-0022 Amendment 6 (the curated SQL function-name list — +issues #15 tab-completion + #16 typing-time typo hint): +**1538 lib unit tests passing, 0 failing, 1 ignored** +(`cargo test --lib`; the full `cargo test` across every binary is +2107 passing, 0 failing, 1 ignored — the one ignored is a +long-standing `` ```ignore `` doc-test in `src/friendly/mod.rs`). +Clippy clean with the nursery lint group enabled. (Earlier +reference points — lib counts: 1131 after the ADR-0027 highlight / +hint follow-up + the optional-trailing-flag / `--resume` +manual-testing fixes; 1100 after ADR-0027's initial ship; 1079 +after ADR-0026 (complex WHERE expressions); 1039 after ADR-0025 +(indexes); 1006 after ADR-0024 + the handoff-14 cleanup; 449 +after B2/C2. Note the intervening issue fixes #8/#13/#12/#7/#9 +landed tests without a baseline bump; this is the first refresh +since ADR-0027.) --- @@ -105,11 +108,18 @@ handoff-14 cleanup; 449 after B2/C2.) rolling history is out of scope per OOS-6 / N4.)* - [ ] **I3** Tab completion for app commands, DSL keywords, table names, column names, and SQL keywords. + *(Refinement 2026-05-30, issue #15: SQL expression slots + (`sql_expr_ident`) now also offer a curated set of SQL function + names — `KNOWN_SQL_FUNCTIONS` in `src/dsl/sql_functions.rs`, + surfaced as a new `CandidateKind::Function` — ADR-0022 Amendment 6. + The broad tab-completion goal stays open.)* - [ ] **I4** Syntax highlighting for both the DSL and SQL. *(Refinement 2026-05-29, issue #8: column data types now carry a dedicated `HighlightClass::Type` / `tok_type` colour, distinct from - identifiers and clause keywords — ADR-0022 Amendment 4. The broad - highlighting goal stays open.)* + identifiers and clause keywords — ADR-0022 Amendment 4; a further + refinement 2026-05-30, issue #15: SQL function-name candidates carry + a dedicated `tok_function` colour (the ninth `Theme` token colour, + ADR-0022 Amendment 6). The broad highlighting goal stays open.)* - [ ] **I5** In-flight query/command cancellation (Ctrl-C in the output area or input field). diff --git a/src/completion.rs b/src/completion.rs index 28447f7..719c859 100644 --- a/src/completion.rs +++ b/src/completion.rs @@ -228,6 +228,12 @@ pub enum CandidateKind { /// expects either `values` or `(`, and surfacing both makes /// the Form A path discoverable. Punct, + /// A curated SQL function name (ADR-0022 Amendment 6, issue + /// #15) offered at a `sql_expr_ident` slot. Coloured with + /// `tok_function` so a learner can tell `sum` / `upper` apart + /// from a column reference or a clause keyword. The set is + /// `crate::dsl::sql_functions::KNOWN_SQL_FUNCTIONS`. + Function, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -682,6 +688,27 @@ pub fn candidates_at_cursor_with_in_mode( Vec::new() }; + // Source 1.8: SQL function-name candidates (ADR-0022 Amendment 6, + // issue #15). At a `sql_expr_ident` slot the grammar accepts a + // column reference *or* the leading name of a function call + // (ADR-0031 §1); the column candidates come from the schema + // (Source 2), and these surface the curated known-function set so + // a learner can discover `sum` / `upper` / … by Tab. Prefix- + // filtered like every other source; empty prefix offers the whole + // set. Tagged `CandidateKind::Function` for its own colour. + let has_sql_expr_slot = expected.iter().any(|e| { + matches!(e, Expectation::Ident { role: "sql_expr_ident", .. }) + }); + let mut functions: Vec = if has_sql_expr_slot { + crate::dsl::sql_functions::KNOWN_SQL_FUNCTIONS + .iter() + .filter(|f| matches_prefix(f)) + .map(|f| (*f).to_string()) + .collect() + } else { + Vec::new() + }; + // Source 2: schema identifiers — accumulated across every // matching schema-listable `Ident { source }` expectation. // `NewName` / `Types` / `Free` sources don't query the @@ -725,6 +752,16 @@ pub fn candidates_at_cursor_with_in_mode( // resolving collisions in the user's favour would create // ambiguity in the live render. identifiers.retain(|name| !keywords.contains(name)); + // If a known function name collides with one of the user's own + // columns (a column literally named `count` / `date` / …), the + // column wins and the function candidate is dropped — otherwise the + // same text would appear twice in the candidate line, distinguished + // only by colour. Same spirit as the keyword-wins rule above: a + // schema name is the user's content and the more relevant + // completion; a bare `count` accepted as the column still becomes a + // call the moment the user types `(`. Case-insensitive so `Count` + // (column) also suppresses `count` (function). (DA review, #15.) + functions.retain(|f| !identifiers.iter().any(|i| i.eq_ignore_ascii_case(f))); // Schema identifiers first: a column / table name the user // would otherwise have to look up is the highest-value @@ -738,6 +775,7 @@ pub fn candidates_at_cursor_with_in_mode( identifiers.len() + keywords.len() + type_names.len() + + functions.len() + composites.len() + punct_candidates.len() + flags.len(), @@ -762,6 +800,14 @@ pub fn candidates_at_cursor_with_in_mode( kind: CandidateKind::Keyword, mode: ModeClass::Both, })); + // Function names sit after keywords/types: a learner reads the + // clause keywords first, then discovers callables. Their own + // `Function` kind drives the `tok_function` colour (issue #15). + candidates.extend(functions.into_iter().map(|text| Candidate { + text, + kind: CandidateKind::Function, + mode: ModeClass::Both, + })); candidates.extend(composites.into_iter().map(|text| Candidate { text, kind: CandidateKind::Keyword, @@ -1134,19 +1180,23 @@ pub fn invalid_ident_at_cursor_in_mode( if expected.is_empty() { return None; } - // Issue #6 follow-on: at a SQL expression position the partial - // could resolve to either a column reference *or* a function-call - // name (the grammar admits the call shape structurally — see + // Issue #6 / #16: at a SQL expression position the partial could + // resolve to either a column reference *or* a function-call name + // (the grammar admits the call shape structurally — see // sql_expr.rs's `CALL_ARGS` comment, "it does not know which - // names are aggregates"). Without lookahead for a trailing `(`, - // we can't tell here, so we don't flag — submit-time validation - // still catches a genuine column typo via the `unknown_column` - // diagnostic, which only skips when the ident *is* followed by - // `(` (i.e. it really is a function call). + // names are aggregates"). Issue #6 dropped the flag here wholesale + // to stop the false positive on names like `sum`; issue #16 + // restores it for genuine typos using the curated known-function + // list. When the partial prefix-matches a known function name we + // still bail (it may yet become a function call); otherwise we + // fall through to the schema-column check below, which flags the + // partial as "no such column" unless it prefix-matches a real + // column. So `select Agx` warns at typing time again, while + // `select sum` does not. let has_sql_expr_slot = expected.iter().any(|e| { matches!(e, Expectation::Ident { role: "sql_expr_ident", .. }) }); - if has_sql_expr_slot { + if has_sql_expr_slot && crate::dsl::sql_functions::is_known_function_prefix(partial) { return None; } // Find every schema-listable source in the expected list. @@ -2365,6 +2415,175 @@ mod tests { // ---- ADR-0032 §10.5 qualified-prefix completion ---- + // ---- SQL function-name completion (issue #15) ---- + + #[test] + fn sql_expr_slot_offers_known_function_candidates() { + // Issue #15: at a `sql_expr_ident` slot Tab offers the curated + // function names so a learner can discover them. + let cache = two_table_schema(); + let cs = cands_with("select * from a where ", 22, &cache); + for f in ["sum", "avg", "count", "upper", "coalesce"] { + assert!( + cs.contains(&f.to_string()), + "expected function `{f}` offered at WHERE expr slot; got {cs:?}", + ); + } + } + + #[test] + fn projection_slot_offers_known_function_candidates() { + // Issue #15's headline example: at `select ` (the projection + // slot, empty prefix) the functions must surface alongside + // columns — the projection slot is also `sql_expr_ident`, and + // it must not be swallowed by the value-literal suppression + // (it carries a schema-ident expectation, so it isn't). + let cache = two_table_schema(); + let cs = cands_with("select ", 7, &cache); + for f in ["sum", "count", "upper"] { + assert!( + cs.contains(&f.to_string()), + "expected function `{f}` offered at the projection slot; got {cs:?}", + ); + } + } + + #[test] + fn sql_function_candidates_filter_by_prefix() { + // `su` narrows to the functions starting `su` — `sum`, + // `substr` — and excludes non-matches. + let cache = two_table_schema(); + let input = "select * from a where su"; + let cs = cands_with(input, input.len(), &cache); + assert!(cs.contains(&"sum".to_string()), "got {cs:?}"); + assert!(cs.contains(&"substr".to_string()), "got {cs:?}"); + assert!( + !cs.contains(&"avg".to_string()), + "`avg` does not prefix-match `su`; got {cs:?}", + ); + } + + #[test] + fn sql_function_candidates_carry_function_kind() { + // The hint panel colours functions via their own kind. + let cache = two_table_schema(); + let input = "select * from a where su"; + let cands = candidates_at_cursor(input, input.len(), &cache) + .expect("some completion") + .candidates; + let sum = cands + .iter() + .find(|c| c.text == "sum") + .expect("`sum` present"); + assert_eq!(sum.kind, CandidateKind::Function); + } + + #[test] + fn function_candidates_absent_at_non_expression_slots() { + // A non-`sql_expr_ident` slot (the table slot of `show data `) + // must not surface function names. + let cache = SchemaCache { + tables: vec!["Customers".to_string()], + ..SchemaCache::default() + }; + let cs = cands_with("show data ", 10, &cache); + for f in ["sum", "count", "upper"] { + assert!( + !cs.contains(&f.to_string()), + "function `{f}` must not appear at the table slot; got {cs:?}", + ); + } + } + + #[test] + fn function_candidate_deduped_against_a_like_named_column() { + // DA review (#15): a column literally named like a function + // (`count`) must appear exactly once in the candidate line — + // the column wins, the redundant function candidate is dropped. + let mut cache = SchemaCache { + tables: vec!["a".to_string()], + columns: vec!["count".to_string()], + ..SchemaCache::default() + }; + cache + .table_columns + .insert("a".to_string(), vec![TableColumn::new("count", Type::Int)]); + let input = "select * from a where co"; + let cands = candidates_at_cursor(input, input.len(), &cache) + .expect("some completion") + .candidates; + let count_entries: Vec<_> = + cands.iter().filter(|c| c.text.eq_ignore_ascii_case("count")).collect(); + assert_eq!( + count_entries.len(), + 1, + "`count` must appear once, not duplicated as column + function; got {count_entries:?}", + ); + assert_eq!( + count_entries[0].kind, + CandidateKind::Identifier, + "the surviving `count` candidate must be the user's column", + ); + // A non-colliding function at the same slot is unaffected. + assert!( + cands.iter().any(|c| c.text == "coalesce" && c.kind == CandidateKind::Function), + "non-colliding functions still surface; got {cands:?}", + ); + } + + #[test] + fn cast_is_not_offered_as_a_function_candidate() { + // `cast` uses `CAST(x AS type)`, which the call-shape grammar + // does not parse — it must never be offered (ADR-0031 call + // shape; sql_functions.rs exclusion). + let cache = two_table_schema(); + let input = "select * from a where ca"; + let cs = cands_with(input, input.len(), &cache); + assert!( + !cs.contains(&"cast".to_string()), + "`cast` must not be offered as a function candidate; got {cs:?}", + ); + } + + // ---- typing-time column-typo hint restored (issue #16) ---- + + #[test] + fn invalid_ident_fires_for_genuine_typo_at_sql_expr_slot() { + // Issue #16: a genuine column typo at a `sql_expr_ident` slot + // (before FROM is in scope) warns at typing time again — it + // matches neither a schema column nor a known function name. + let cache = two_table_schema(); + let inv = invalid_ident_at_cursor("select Agx", 10, &cache) + .expect("genuine typo at an expr slot must flag"); + assert_eq!(inv.found, "Agx"); + assert_eq!(inv.source, IdentSource::Columns); + } + + #[test] + fn invalid_ident_does_not_fire_for_function_prefix_at_sql_expr_slot() { + // The reason issue #6 dropped the flag: a known function name + // (or its prefix) must NOT be mis-flagged as an unknown column. + let cache = two_table_schema(); + assert!( + invalid_ident_at_cursor("select su", 9, &cache).is_none(), + "`su` prefixes `sum`/`substr` — must not flag", + ); + assert!( + invalid_ident_at_cursor("select sum", 10, &cache).is_none(), + "`sum` is a known function — must not flag", + ); + } + + #[test] + fn invalid_ident_does_not_fire_for_column_prefix_at_sql_expr_slot() { + // A real column prefix is an in-progress lookup, not a typo. + let cache = two_table_schema(); + assert!( + invalid_ident_at_cursor("select na", 9, &cache).is_none(), + "`na` prefixes the `name` column — must not flag", + ); + } + fn two_table_schema() -> SchemaCache { use crate::dsl::types::Type; let mut s = SchemaCache::default(); diff --git a/src/dsl/mod.rs b/src/dsl/mod.rs index a5caff7..d53ff08 100644 --- a/src/dsl/mod.rs +++ b/src/dsl/mod.rs @@ -14,6 +14,7 @@ pub mod command; pub mod grammar; pub mod parser; pub mod shortid; +pub mod sql_functions; pub mod types; pub mod value; pub mod walker; diff --git a/src/dsl/sql_functions.rs b/src/dsl/sql_functions.rs new file mode 100644 index 0000000..7daa93d --- /dev/null +++ b/src/dsl/sql_functions.rs @@ -0,0 +1,151 @@ +//! Curated set of SQL function names the playground recognises +//! (ADR-0022 Amendment 6, issues #15 / #16). +//! +//! This is the single source of truth for "what SQL function names +//! does this playground know about", shared by two consumers: +//! +//! - **Tab completion** (issue #15): at a `sql_expr_ident` slot the +//! completion engine offers these as `CandidateKind::Function` +//! candidates, so a learner can discover `sum` / `upper` / … without +//! already knowing them. +//! - **The typing-time column-typo hint** (issue #16): at the same +//! slot, `invalid_ident_at_cursor` flags a partial as "no such +//! column" only when it matches neither a schema column *nor* a +//! known function name — so a genuine typo still warns early while +//! a real function name like `sum` does not get mis-flagged. +//! +//! Scope (ADR-0031 §1): the SQL expression grammar admits any +//! function-call *shape* without knowing which names are real — the +//! walker is a structural matcher, not an evaluator. This list is a +//! deliberately *curated pedagogical set*, not "every SQLite +//! built-in". It covers the aggregates a learner meets first plus the +//! common scalar functions, and is the allow-list / discovery source +//! layered on top of that shape-only grammar. +//! +//! Deliberately excluded: `cast`. SQLite's `CAST` uses the +//! `CAST(expr AS type)` syntax, which the expression grammar does +//! **not** parse as a function call (function args are expressions, +//! and `expr AS type` is not one). Offering `cast` as a completion +//! candidate would surface a name that does not parse in the call +//! shape, so it stays out of the set until the grammar grows a +//! dedicated `CAST` form. + +/// The curated SQL function names, lowercase and sorted. +/// +/// Sorted + lowercase is an invariant (pinned by a unit test): the +/// completion engine relies on a stable order, and the prefix match +/// is case-insensitive against these canonical lowercase spellings. +/// +/// Grouping (all plain `name(args)` call shapes): +/// - **Aggregates:** `avg`, `count`, `max`, `min`, `sum`. +/// - **Common scalars:** `abs`, `coalesce`, `length`, `lower`, +/// `round`, `substr`, `trim`, `upper`. +/// - **Broader scalars:** `date`, `datetime`, `hex`, `ifnull`, +/// `instr`, `nullif`, `random`, `replace`, `strftime`, `typeof`. +pub const KNOWN_SQL_FUNCTIONS: &[&str] = &[ + "abs", + "avg", + "coalesce", + "count", + "date", + "datetime", + "hex", + "ifnull", + "instr", + "length", + "lower", + "max", + "min", + "nullif", + "random", + "replace", + "round", + "strftime", + "substr", + "sum", + "trim", + "typeof", + "upper", +]; + +/// Whether `partial` is a case-insensitive prefix of at least one +/// known function name. +/// +/// An empty `partial` matches every function (it is a prefix of +/// all), mirroring the empty-prefix behaviour of the keyword / +/// identifier completion sources. Used by `invalid_ident_at_cursor` +/// to decide whether a partial at a `sql_expr_ident` slot might still +/// resolve to a function name (and so must not be flagged as an +/// unknown column). +#[must_use] +pub fn is_known_function_prefix(partial: &str) -> bool { + let lowered = partial.to_lowercase(); + KNOWN_SQL_FUNCTIONS + .iter() + .any(|f| f.starts_with(&lowered)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn list_is_sorted_and_lowercase() { + // The completion engine and prefix matcher both rely on the + // canonical sorted-lowercase invariant. + for f in KNOWN_SQL_FUNCTIONS { + assert_eq!( + *f, + f.to_lowercase(), + "function name `{f}` must be lowercase", + ); + } + let mut sorted = KNOWN_SQL_FUNCTIONS.to_vec(); + sorted.sort_unstable(); + assert_eq!( + sorted.as_slice(), + KNOWN_SQL_FUNCTIONS, + "KNOWN_SQL_FUNCTIONS must be declared in sorted order", + ); + } + + #[test] + fn list_has_no_duplicates() { + let mut seen = std::collections::HashSet::new(); + for f in KNOWN_SQL_FUNCTIONS { + assert!(seen.insert(*f), "duplicate function name `{f}`"); + } + } + + #[test] + fn cast_is_excluded() { + // `CAST(expr AS type)` is not a plain call shape the grammar + // parses — it must not be offered as a candidate (faithfulness + // to ADR-0031's call-shape grammar). + assert!( + !KNOWN_SQL_FUNCTIONS.contains(&"cast"), + "`cast` is not a plain-call function and must stay out of the set", + ); + } + + #[test] + fn prefix_match_is_case_insensitive() { + assert!(is_known_function_prefix("su")); // sum, substr + assert!(is_known_function_prefix("SU")); // case-folded + assert!(is_known_function_prefix("sum")); // exact + assert!(is_known_function_prefix("UPP")); // upper + } + + #[test] + fn empty_prefix_matches_all() { + assert!(is_known_function_prefix("")); + } + + #[test] + fn unknown_prefix_does_not_match() { + assert!(!is_known_function_prefix("zzz")); + assert!(!is_known_function_prefix("xqz")); + // A genuine column-typo shape that prefixes no function. + assert!(!is_known_function_prefix("Agx")); + } +} diff --git a/src/input_render.rs b/src/input_render.rs index ac49d1d..6f87742 100644 --- a/src/input_render.rs +++ b/src/input_render.rs @@ -1344,6 +1344,39 @@ mod tests { } } + #[test] + fn advanced_select_genuine_column_typo_before_from_warns_at_typing_time() { + // Issue #16: the gap the issue-#6 trade-off opened. While the + // user types `select Agx` (no FROM yet, so the schema-existence + // diagnostic stays silent), a genuine column typo must warn at + // typing time via the restored `invalid_ident` path — `Agx` + // matches neither a schema column nor a known function name. + use crate::completion::{SchemaCache, TableColumn}; + use crate::dsl::types::Type; + let mut cache = SchemaCache::default(); + cache.tables.push("Customers".to_string()); + let tc = vec![TableColumn { + name: "Age".to_string(), + user_type: Type::Int, + not_null: false, + has_default: false, + }]; + for c in &tc { + cache.columns.push(c.name.clone()); + } + cache.table_columns.insert("Customers".to_string(), tc); + let input = "select Agx"; + match ambient_hint_in_mode(input, input.len(), None, &cache, Mode::Advanced) { + Some(AmbientHint::Prose(p)) => assert!( + p.contains("No such") && p.contains("Agx"), + "a genuine column typo before FROM must warn at typing time; got: {p:?}", + ), + other => panic!( + "`select Agx` must surface a typing-time typo hint; got: {other:?}", + ), + } + } + #[test] fn advanced_partial_typing_does_not_leak_bare_double_in_prose() { // Issue #5 (prose half): at `create table Orders (count` (no diff --git a/src/theme.rs b/src/theme.rs index 15a161a..925e48d 100644 --- a/src/theme.rs +++ b/src/theme.rs @@ -61,6 +61,13 @@ pub struct Theme { pub tok_punct: Color, pub tok_flag: Color, pub tok_error: Color, + /// SQL function-name candidate colour (ADR-0022 Amendment 6, + /// issue #15) — a dedicated tone distinct from `tok_keyword`, + /// `tok_identifier`, and `tok_type` so a learner can tell a + /// callable (`sum`, `upper`) apart from a clause keyword, a + /// column reference, and a column type. Drives the + /// `CandidateKind::Function` colour in the hint panel. + pub tok_function: Color, } impl Theme { @@ -95,6 +102,7 @@ impl Theme { tok_punct: Color::Rgb(0x8B, 0x90, 0x9A), // == muted tok_flag: Color::Rgb(0xFF, 0xCB, 0x6B), // amber tok_error: Color::Rgb(0xFF, 0x6B, 0x6B), // == error + tok_function: Color::Rgb(0x82, 0xCF, 0xFD), // sky blue — cool like keyword but bluer, clearly apart from purple keyword + teal identifier + pink type } } @@ -125,6 +133,7 @@ impl Theme { tok_punct: Color::Rgb(0x60, 0x66, 0x73), // == muted tok_flag: Color::Rgb(0xB0, 0x88, 0x00), // mustard tok_error: Color::Rgb(0xC0, 0x39, 0x2B), // == error + tok_function: Color::Rgb(0x1A, 0x5F, 0xB0), // strong blue — cool like keyword but bluer, apart from royal-purple keyword + teal identifier + magenta type } } @@ -169,6 +178,7 @@ mod tests { ("tok_string", t.tok_string), ("tok_flag", t.tok_flag), ("tok_error", t.tok_error), + ("tok_function", t.tok_function), ("warning", t.warning), ] { assert_ne!( @@ -188,6 +198,7 @@ mod tests { ("tok_string", t.tok_string), ("tok_flag", t.tok_flag), ("tok_error", t.tok_error), + ("tok_function", t.tok_function), ("warning", t.warning), ] { assert_ne!( @@ -220,4 +231,16 @@ mod tests { assert_ne!(t.tok_type, t.tok_identifier); } } + + #[test] + fn function_colour_is_distinct_from_keyword_identifier_and_type() { + // ADR-0022 Amendment 6 / issue #15: function-name candidates + // get their own tone so a callable reads apart from a clause + // keyword, a column reference, and a column type. + for t in [Theme::dark(), Theme::light()] { + assert_ne!(t.tok_function, t.tok_keyword); + assert_ne!(t.tok_function, t.tok_identifier); + assert_ne!(t.tok_function, t.tok_type); + } + } } diff --git a/src/ui.rs b/src/ui.rs index 1429ae3..e006633 100644 --- a/src/ui.rs +++ b/src/ui.rs @@ -1136,6 +1136,7 @@ fn render_candidate_line( crate::completion::CandidateKind::Identifier => theme.tok_identifier, crate::completion::CandidateKind::Flag => theme.tok_flag, crate::completion::CandidateKind::Punct => theme.tok_punct, + crate::completion::CandidateKind::Function => theme.tok_function, }; let base_fg = if mixed { match items[i].mode {