feat: curated SQL function list — Tab completion (#15) + typing-time typo hint (#16)

Add src/dsl/sql_functions.rs (KNOWN_SQL_FUNCTIONS) as the shared source
of truth at sql_expr_ident slots:

- #15: offer the functions as Tab candidates under a new
  CandidateKind::Function + ninth Theme colour tok_function (blue,
  distinct from keyword/identifier/type).
- #16: restore the column-typo flag the #6 fix had dropped wholesale —
  invalid_ident_at_cursor now bails only when the partial prefix-matches
  a known function, else falls through to the schema-column check.

A column named like a function (e.g. `count`) is deduped (column wins).
`cast` is excluded — CAST(x AS type) is not a plain-call shape.
The no-validation-allowlist posture stands: the list drives completion +
the typo hint only, never parse-time acceptance.

Docs: ADR-0022 Amendment 6, ADR-0031 status note, README index,
requirements I3/I4 + refreshed test baseline.
This commit is contained in:
claude@clouddev1
2026-05-31 11:49:10 +00:00
parent 01ec926ec8
commit 6d8c9eea36
10 changed files with 570 additions and 25 deletions
+228 -9
View File
@@ -228,6 +228,12 @@ pub enum CandidateKind {
/// expects either `values` or `(`, and surfacing both makes
/// the Form A path discoverable.
Punct,
/// A curated SQL function name (ADR-0022 Amendment 6, issue
/// #15) offered at a `sql_expr_ident` slot. Coloured with
/// `tok_function` so a learner can tell `sum` / `upper` apart
/// from a column reference or a clause keyword. The set is
/// `crate::dsl::sql_functions::KNOWN_SQL_FUNCTIONS`.
Function,
}
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -682,6 +688,27 @@ pub fn candidates_at_cursor_with_in_mode(
Vec::new()
};
// Source 1.8: SQL function-name candidates (ADR-0022 Amendment 6,
// issue #15). At a `sql_expr_ident` slot the grammar accepts a
// column reference *or* the leading name of a function call
// (ADR-0031 §1); the column candidates come from the schema
// (Source 2), and these surface the curated known-function set so
// a learner can discover `sum` / `upper` / … by Tab. Prefix-
// filtered like every other source; empty prefix offers the whole
// set. Tagged `CandidateKind::Function` for its own colour.
let has_sql_expr_slot = expected.iter().any(|e| {
matches!(e, Expectation::Ident { role: "sql_expr_ident", .. })
});
let mut functions: Vec<String> = if has_sql_expr_slot {
crate::dsl::sql_functions::KNOWN_SQL_FUNCTIONS
.iter()
.filter(|f| matches_prefix(f))
.map(|f| (*f).to_string())
.collect()
} else {
Vec::new()
};
// Source 2: schema identifiers — accumulated across every
// matching schema-listable `Ident { source }` expectation.
// `NewName` / `Types` / `Free` sources don't query the
@@ -725,6 +752,16 @@ pub fn candidates_at_cursor_with_in_mode(
// resolving collisions in the user's favour would create
// ambiguity in the live render.
identifiers.retain(|name| !keywords.contains(name));
// If a known function name collides with one of the user's own
// columns (a column literally named `count` / `date` / …), the
// column wins and the function candidate is dropped — otherwise the
// same text would appear twice in the candidate line, distinguished
// only by colour. Same spirit as the keyword-wins rule above: a
// schema name is the user's content and the more relevant
// completion; a bare `count` accepted as the column still becomes a
// call the moment the user types `(`. Case-insensitive so `Count`
// (column) also suppresses `count` (function). (DA review, #15.)
functions.retain(|f| !identifiers.iter().any(|i| i.eq_ignore_ascii_case(f)));
// Schema identifiers first: a column / table name the user
// would otherwise have to look up is the highest-value
@@ -738,6 +775,7 @@ pub fn candidates_at_cursor_with_in_mode(
identifiers.len()
+ keywords.len()
+ type_names.len()
+ functions.len()
+ composites.len()
+ punct_candidates.len()
+ flags.len(),
@@ -762,6 +800,14 @@ pub fn candidates_at_cursor_with_in_mode(
kind: CandidateKind::Keyword,
mode: ModeClass::Both,
}));
// Function names sit after keywords/types: a learner reads the
// clause keywords first, then discovers callables. Their own
// `Function` kind drives the `tok_function` colour (issue #15).
candidates.extend(functions.into_iter().map(|text| Candidate {
text,
kind: CandidateKind::Function,
mode: ModeClass::Both,
}));
candidates.extend(composites.into_iter().map(|text| Candidate {
text,
kind: CandidateKind::Keyword,
@@ -1134,19 +1180,23 @@ pub fn invalid_ident_at_cursor_in_mode(
if expected.is_empty() {
return None;
}
// Issue #6 follow-on: at a SQL expression position the partial
// could resolve to either a column reference *or* a function-call
// name (the grammar admits the call shape structurally — see
// Issue #6 / #16: at a SQL expression position the partial could
// resolve to either a column reference *or* a function-call name
// (the grammar admits the call shape structurally — see
// sql_expr.rs's `CALL_ARGS` comment, "it does not know which
// names are aggregates"). Without lookahead for a trailing `(`,
// we can't tell here, so we don't flag — submit-time validation
// still catches a genuine column typo via the `unknown_column`
// diagnostic, which only skips when the ident *is* followed by
// `(` (i.e. it really is a function call).
// names are aggregates"). Issue #6 dropped the flag here wholesale
// to stop the false positive on names like `sum`; issue #16
// restores it for genuine typos using the curated known-function
// list. When the partial prefix-matches a known function name we
// still bail (it may yet become a function call); otherwise we
// fall through to the schema-column check below, which flags the
// partial as "no such column" unless it prefix-matches a real
// column. So `select Agx` warns at typing time again, while
// `select sum` does not.
let has_sql_expr_slot = expected.iter().any(|e| {
matches!(e, Expectation::Ident { role: "sql_expr_ident", .. })
});
if has_sql_expr_slot {
if has_sql_expr_slot && crate::dsl::sql_functions::is_known_function_prefix(partial) {
return None;
}
// Find every schema-listable source in the expected list.
@@ -2365,6 +2415,175 @@ mod tests {
// ---- ADR-0032 §10.5 qualified-prefix completion ----
// ---- SQL function-name completion (issue #15) ----
#[test]
fn sql_expr_slot_offers_known_function_candidates() {
// Issue #15: at a `sql_expr_ident` slot Tab offers the curated
// function names so a learner can discover them.
let cache = two_table_schema();
let cs = cands_with("select * from a where ", 22, &cache);
for f in ["sum", "avg", "count", "upper", "coalesce"] {
assert!(
cs.contains(&f.to_string()),
"expected function `{f}` offered at WHERE expr slot; got {cs:?}",
);
}
}
#[test]
fn projection_slot_offers_known_function_candidates() {
// Issue #15's headline example: at `select ` (the projection
// slot, empty prefix) the functions must surface alongside
// columns — the projection slot is also `sql_expr_ident`, and
// it must not be swallowed by the value-literal suppression
// (it carries a schema-ident expectation, so it isn't).
let cache = two_table_schema();
let cs = cands_with("select ", 7, &cache);
for f in ["sum", "count", "upper"] {
assert!(
cs.contains(&f.to_string()),
"expected function `{f}` offered at the projection slot; got {cs:?}",
);
}
}
#[test]
fn sql_function_candidates_filter_by_prefix() {
// `su` narrows to the functions starting `su` — `sum`,
// `substr` — and excludes non-matches.
let cache = two_table_schema();
let input = "select * from a where su";
let cs = cands_with(input, input.len(), &cache);
assert!(cs.contains(&"sum".to_string()), "got {cs:?}");
assert!(cs.contains(&"substr".to_string()), "got {cs:?}");
assert!(
!cs.contains(&"avg".to_string()),
"`avg` does not prefix-match `su`; got {cs:?}",
);
}
#[test]
fn sql_function_candidates_carry_function_kind() {
// The hint panel colours functions via their own kind.
let cache = two_table_schema();
let input = "select * from a where su";
let cands = candidates_at_cursor(input, input.len(), &cache)
.expect("some completion")
.candidates;
let sum = cands
.iter()
.find(|c| c.text == "sum")
.expect("`sum` present");
assert_eq!(sum.kind, CandidateKind::Function);
}
#[test]
fn function_candidates_absent_at_non_expression_slots() {
// A non-`sql_expr_ident` slot (the table slot of `show data `)
// must not surface function names.
let cache = SchemaCache {
tables: vec!["Customers".to_string()],
..SchemaCache::default()
};
let cs = cands_with("show data ", 10, &cache);
for f in ["sum", "count", "upper"] {
assert!(
!cs.contains(&f.to_string()),
"function `{f}` must not appear at the table slot; got {cs:?}",
);
}
}
#[test]
fn function_candidate_deduped_against_a_like_named_column() {
// DA review (#15): a column literally named like a function
// (`count`) must appear exactly once in the candidate line —
// the column wins, the redundant function candidate is dropped.
let mut cache = SchemaCache {
tables: vec!["a".to_string()],
columns: vec!["count".to_string()],
..SchemaCache::default()
};
cache
.table_columns
.insert("a".to_string(), vec![TableColumn::new("count", Type::Int)]);
let input = "select * from a where co";
let cands = candidates_at_cursor(input, input.len(), &cache)
.expect("some completion")
.candidates;
let count_entries: Vec<_> =
cands.iter().filter(|c| c.text.eq_ignore_ascii_case("count")).collect();
assert_eq!(
count_entries.len(),
1,
"`count` must appear once, not duplicated as column + function; got {count_entries:?}",
);
assert_eq!(
count_entries[0].kind,
CandidateKind::Identifier,
"the surviving `count` candidate must be the user's column",
);
// A non-colliding function at the same slot is unaffected.
assert!(
cands.iter().any(|c| c.text == "coalesce" && c.kind == CandidateKind::Function),
"non-colliding functions still surface; got {cands:?}",
);
}
#[test]
fn cast_is_not_offered_as_a_function_candidate() {
// `cast` uses `CAST(x AS type)`, which the call-shape grammar
// does not parse — it must never be offered (ADR-0031 call
// shape; sql_functions.rs exclusion).
let cache = two_table_schema();
let input = "select * from a where ca";
let cs = cands_with(input, input.len(), &cache);
assert!(
!cs.contains(&"cast".to_string()),
"`cast` must not be offered as a function candidate; got {cs:?}",
);
}
// ---- typing-time column-typo hint restored (issue #16) ----
#[test]
fn invalid_ident_fires_for_genuine_typo_at_sql_expr_slot() {
// Issue #16: a genuine column typo at a `sql_expr_ident` slot
// (before FROM is in scope) warns at typing time again — it
// matches neither a schema column nor a known function name.
let cache = two_table_schema();
let inv = invalid_ident_at_cursor("select Agx", 10, &cache)
.expect("genuine typo at an expr slot must flag");
assert_eq!(inv.found, "Agx");
assert_eq!(inv.source, IdentSource::Columns);
}
#[test]
fn invalid_ident_does_not_fire_for_function_prefix_at_sql_expr_slot() {
// The reason issue #6 dropped the flag: a known function name
// (or its prefix) must NOT be mis-flagged as an unknown column.
let cache = two_table_schema();
assert!(
invalid_ident_at_cursor("select su", 9, &cache).is_none(),
"`su` prefixes `sum`/`substr` — must not flag",
);
assert!(
invalid_ident_at_cursor("select sum", 10, &cache).is_none(),
"`sum` is a known function — must not flag",
);
}
#[test]
fn invalid_ident_does_not_fire_for_column_prefix_at_sql_expr_slot() {
// A real column prefix is an in-progress lookup, not a typo.
let cache = two_table_schema();
assert!(
invalid_ident_at_cursor("select na", 9, &cache).is_none(),
"`na` prefixes the `name` column — must not flag",
);
}
fn two_table_schema() -> SchemaCache {
use crate::dsl::types::Type;
let mut s = SchemaCache::default();