Add src/dsl/sql_functions.rs (KNOWN_SQL_FUNCTIONS) as the shared source of truth at sql_expr_ident slots: - #15: offer the functions as Tab candidates under a new CandidateKind::Function + ninth Theme colour tok_function (blue, distinct from keyword/identifier/type). - #16: restore the column-typo flag the #6 fix had dropped wholesale — invalid_ident_at_cursor now bails only when the partial prefix-matches a known function, else falls through to the schema-column check. A column named like a function (e.g. `count`) is deduped (column wins). `cast` is excluded — CAST(x AS type) is not a plain-call shape. The no-validation-allowlist posture stands: the list drives completion + the typo hint only, never parse-time acceptance. Docs: ADR-0022 Amendment 6, ADR-0031 status note, README index, requirements I3/I4 + refreshed test baseline.
This commit is contained in:
+228
-9
@@ -228,6 +228,12 @@ pub enum CandidateKind {
|
||||
/// expects either `values` or `(`, and surfacing both makes
|
||||
/// the Form A path discoverable.
|
||||
Punct,
|
||||
/// A curated SQL function name (ADR-0022 Amendment 6, issue
|
||||
/// #15) offered at a `sql_expr_ident` slot. Coloured with
|
||||
/// `tok_function` so a learner can tell `sum` / `upper` apart
|
||||
/// from a column reference or a clause keyword. The set is
|
||||
/// `crate::dsl::sql_functions::KNOWN_SQL_FUNCTIONS`.
|
||||
Function,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
@@ -682,6 +688,27 @@ pub fn candidates_at_cursor_with_in_mode(
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
// Source 1.8: SQL function-name candidates (ADR-0022 Amendment 6,
|
||||
// issue #15). At a `sql_expr_ident` slot the grammar accepts a
|
||||
// column reference *or* the leading name of a function call
|
||||
// (ADR-0031 §1); the column candidates come from the schema
|
||||
// (Source 2), and these surface the curated known-function set so
|
||||
// a learner can discover `sum` / `upper` / … by Tab. Prefix-
|
||||
// filtered like every other source; empty prefix offers the whole
|
||||
// set. Tagged `CandidateKind::Function` for its own colour.
|
||||
let has_sql_expr_slot = expected.iter().any(|e| {
|
||||
matches!(e, Expectation::Ident { role: "sql_expr_ident", .. })
|
||||
});
|
||||
let mut functions: Vec<String> = if has_sql_expr_slot {
|
||||
crate::dsl::sql_functions::KNOWN_SQL_FUNCTIONS
|
||||
.iter()
|
||||
.filter(|f| matches_prefix(f))
|
||||
.map(|f| (*f).to_string())
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
// Source 2: schema identifiers — accumulated across every
|
||||
// matching schema-listable `Ident { source }` expectation.
|
||||
// `NewName` / `Types` / `Free` sources don't query the
|
||||
@@ -725,6 +752,16 @@ pub fn candidates_at_cursor_with_in_mode(
|
||||
// resolving collisions in the user's favour would create
|
||||
// ambiguity in the live render.
|
||||
identifiers.retain(|name| !keywords.contains(name));
|
||||
// If a known function name collides with one of the user's own
|
||||
// columns (a column literally named `count` / `date` / …), the
|
||||
// column wins and the function candidate is dropped — otherwise the
|
||||
// same text would appear twice in the candidate line, distinguished
|
||||
// only by colour. Same spirit as the keyword-wins rule above: a
|
||||
// schema name is the user's content and the more relevant
|
||||
// completion; a bare `count` accepted as the column still becomes a
|
||||
// call the moment the user types `(`. Case-insensitive so `Count`
|
||||
// (column) also suppresses `count` (function). (DA review, #15.)
|
||||
functions.retain(|f| !identifiers.iter().any(|i| i.eq_ignore_ascii_case(f)));
|
||||
|
||||
// Schema identifiers first: a column / table name the user
|
||||
// would otherwise have to look up is the highest-value
|
||||
@@ -738,6 +775,7 @@ pub fn candidates_at_cursor_with_in_mode(
|
||||
identifiers.len()
|
||||
+ keywords.len()
|
||||
+ type_names.len()
|
||||
+ functions.len()
|
||||
+ composites.len()
|
||||
+ punct_candidates.len()
|
||||
+ flags.len(),
|
||||
@@ -762,6 +800,14 @@ pub fn candidates_at_cursor_with_in_mode(
|
||||
kind: CandidateKind::Keyword,
|
||||
mode: ModeClass::Both,
|
||||
}));
|
||||
// Function names sit after keywords/types: a learner reads the
|
||||
// clause keywords first, then discovers callables. Their own
|
||||
// `Function` kind drives the `tok_function` colour (issue #15).
|
||||
candidates.extend(functions.into_iter().map(|text| Candidate {
|
||||
text,
|
||||
kind: CandidateKind::Function,
|
||||
mode: ModeClass::Both,
|
||||
}));
|
||||
candidates.extend(composites.into_iter().map(|text| Candidate {
|
||||
text,
|
||||
kind: CandidateKind::Keyword,
|
||||
@@ -1134,19 +1180,23 @@ pub fn invalid_ident_at_cursor_in_mode(
|
||||
if expected.is_empty() {
|
||||
return None;
|
||||
}
|
||||
// Issue #6 follow-on: at a SQL expression position the partial
|
||||
// could resolve to either a column reference *or* a function-call
|
||||
// name (the grammar admits the call shape structurally — see
|
||||
// Issue #6 / #16: at a SQL expression position the partial could
|
||||
// resolve to either a column reference *or* a function-call name
|
||||
// (the grammar admits the call shape structurally — see
|
||||
// sql_expr.rs's `CALL_ARGS` comment, "it does not know which
|
||||
// names are aggregates"). Without lookahead for a trailing `(`,
|
||||
// we can't tell here, so we don't flag — submit-time validation
|
||||
// still catches a genuine column typo via the `unknown_column`
|
||||
// diagnostic, which only skips when the ident *is* followed by
|
||||
// `(` (i.e. it really is a function call).
|
||||
// names are aggregates"). Issue #6 dropped the flag here wholesale
|
||||
// to stop the false positive on names like `sum`; issue #16
|
||||
// restores it for genuine typos using the curated known-function
|
||||
// list. When the partial prefix-matches a known function name we
|
||||
// still bail (it may yet become a function call); otherwise we
|
||||
// fall through to the schema-column check below, which flags the
|
||||
// partial as "no such column" unless it prefix-matches a real
|
||||
// column. So `select Agx` warns at typing time again, while
|
||||
// `select sum` does not.
|
||||
let has_sql_expr_slot = expected.iter().any(|e| {
|
||||
matches!(e, Expectation::Ident { role: "sql_expr_ident", .. })
|
||||
});
|
||||
if has_sql_expr_slot {
|
||||
if has_sql_expr_slot && crate::dsl::sql_functions::is_known_function_prefix(partial) {
|
||||
return None;
|
||||
}
|
||||
// Find every schema-listable source in the expected list.
|
||||
@@ -2365,6 +2415,175 @@ mod tests {
|
||||
|
||||
// ---- ADR-0032 §10.5 qualified-prefix completion ----
|
||||
|
||||
// ---- SQL function-name completion (issue #15) ----
|
||||
|
||||
#[test]
|
||||
fn sql_expr_slot_offers_known_function_candidates() {
|
||||
// Issue #15: at a `sql_expr_ident` slot Tab offers the curated
|
||||
// function names so a learner can discover them.
|
||||
let cache = two_table_schema();
|
||||
let cs = cands_with("select * from a where ", 22, &cache);
|
||||
for f in ["sum", "avg", "count", "upper", "coalesce"] {
|
||||
assert!(
|
||||
cs.contains(&f.to_string()),
|
||||
"expected function `{f}` offered at WHERE expr slot; got {cs:?}",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn projection_slot_offers_known_function_candidates() {
|
||||
// Issue #15's headline example: at `select ` (the projection
|
||||
// slot, empty prefix) the functions must surface alongside
|
||||
// columns — the projection slot is also `sql_expr_ident`, and
|
||||
// it must not be swallowed by the value-literal suppression
|
||||
// (it carries a schema-ident expectation, so it isn't).
|
||||
let cache = two_table_schema();
|
||||
let cs = cands_with("select ", 7, &cache);
|
||||
for f in ["sum", "count", "upper"] {
|
||||
assert!(
|
||||
cs.contains(&f.to_string()),
|
||||
"expected function `{f}` offered at the projection slot; got {cs:?}",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sql_function_candidates_filter_by_prefix() {
|
||||
// `su` narrows to the functions starting `su` — `sum`,
|
||||
// `substr` — and excludes non-matches.
|
||||
let cache = two_table_schema();
|
||||
let input = "select * from a where su";
|
||||
let cs = cands_with(input, input.len(), &cache);
|
||||
assert!(cs.contains(&"sum".to_string()), "got {cs:?}");
|
||||
assert!(cs.contains(&"substr".to_string()), "got {cs:?}");
|
||||
assert!(
|
||||
!cs.contains(&"avg".to_string()),
|
||||
"`avg` does not prefix-match `su`; got {cs:?}",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sql_function_candidates_carry_function_kind() {
|
||||
// The hint panel colours functions via their own kind.
|
||||
let cache = two_table_schema();
|
||||
let input = "select * from a where su";
|
||||
let cands = candidates_at_cursor(input, input.len(), &cache)
|
||||
.expect("some completion")
|
||||
.candidates;
|
||||
let sum = cands
|
||||
.iter()
|
||||
.find(|c| c.text == "sum")
|
||||
.expect("`sum` present");
|
||||
assert_eq!(sum.kind, CandidateKind::Function);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn function_candidates_absent_at_non_expression_slots() {
|
||||
// A non-`sql_expr_ident` slot (the table slot of `show data `)
|
||||
// must not surface function names.
|
||||
let cache = SchemaCache {
|
||||
tables: vec!["Customers".to_string()],
|
||||
..SchemaCache::default()
|
||||
};
|
||||
let cs = cands_with("show data ", 10, &cache);
|
||||
for f in ["sum", "count", "upper"] {
|
||||
assert!(
|
||||
!cs.contains(&f.to_string()),
|
||||
"function `{f}` must not appear at the table slot; got {cs:?}",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn function_candidate_deduped_against_a_like_named_column() {
|
||||
// DA review (#15): a column literally named like a function
|
||||
// (`count`) must appear exactly once in the candidate line —
|
||||
// the column wins, the redundant function candidate is dropped.
|
||||
let mut cache = SchemaCache {
|
||||
tables: vec!["a".to_string()],
|
||||
columns: vec!["count".to_string()],
|
||||
..SchemaCache::default()
|
||||
};
|
||||
cache
|
||||
.table_columns
|
||||
.insert("a".to_string(), vec![TableColumn::new("count", Type::Int)]);
|
||||
let input = "select * from a where co";
|
||||
let cands = candidates_at_cursor(input, input.len(), &cache)
|
||||
.expect("some completion")
|
||||
.candidates;
|
||||
let count_entries: Vec<_> =
|
||||
cands.iter().filter(|c| c.text.eq_ignore_ascii_case("count")).collect();
|
||||
assert_eq!(
|
||||
count_entries.len(),
|
||||
1,
|
||||
"`count` must appear once, not duplicated as column + function; got {count_entries:?}",
|
||||
);
|
||||
assert_eq!(
|
||||
count_entries[0].kind,
|
||||
CandidateKind::Identifier,
|
||||
"the surviving `count` candidate must be the user's column",
|
||||
);
|
||||
// A non-colliding function at the same slot is unaffected.
|
||||
assert!(
|
||||
cands.iter().any(|c| c.text == "coalesce" && c.kind == CandidateKind::Function),
|
||||
"non-colliding functions still surface; got {cands:?}",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cast_is_not_offered_as_a_function_candidate() {
|
||||
// `cast` uses `CAST(x AS type)`, which the call-shape grammar
|
||||
// does not parse — it must never be offered (ADR-0031 call
|
||||
// shape; sql_functions.rs exclusion).
|
||||
let cache = two_table_schema();
|
||||
let input = "select * from a where ca";
|
||||
let cs = cands_with(input, input.len(), &cache);
|
||||
assert!(
|
||||
!cs.contains(&"cast".to_string()),
|
||||
"`cast` must not be offered as a function candidate; got {cs:?}",
|
||||
);
|
||||
}
|
||||
|
||||
// ---- typing-time column-typo hint restored (issue #16) ----
|
||||
|
||||
#[test]
|
||||
fn invalid_ident_fires_for_genuine_typo_at_sql_expr_slot() {
|
||||
// Issue #16: a genuine column typo at a `sql_expr_ident` slot
|
||||
// (before FROM is in scope) warns at typing time again — it
|
||||
// matches neither a schema column nor a known function name.
|
||||
let cache = two_table_schema();
|
||||
let inv = invalid_ident_at_cursor("select Agx", 10, &cache)
|
||||
.expect("genuine typo at an expr slot must flag");
|
||||
assert_eq!(inv.found, "Agx");
|
||||
assert_eq!(inv.source, IdentSource::Columns);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_ident_does_not_fire_for_function_prefix_at_sql_expr_slot() {
|
||||
// The reason issue #6 dropped the flag: a known function name
|
||||
// (or its prefix) must NOT be mis-flagged as an unknown column.
|
||||
let cache = two_table_schema();
|
||||
assert!(
|
||||
invalid_ident_at_cursor("select su", 9, &cache).is_none(),
|
||||
"`su` prefixes `sum`/`substr` — must not flag",
|
||||
);
|
||||
assert!(
|
||||
invalid_ident_at_cursor("select sum", 10, &cache).is_none(),
|
||||
"`sum` is a known function — must not flag",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_ident_does_not_fire_for_column_prefix_at_sql_expr_slot() {
|
||||
// A real column prefix is an in-progress lookup, not a typo.
|
||||
let cache = two_table_schema();
|
||||
assert!(
|
||||
invalid_ident_at_cursor("select na", 9, &cache).is_none(),
|
||||
"`na` prefixes the `name` column — must not flag",
|
||||
);
|
||||
}
|
||||
|
||||
fn two_table_schema() -> SchemaCache {
|
||||
use crate::dsl::types::Type;
|
||||
let mut s = SchemaCache::default();
|
||||
|
||||
Reference in New Issue
Block a user