fix: SQL function-call names not flagged as columns

Two layered fixes for the same bug class: `select sum(Age) from
Customers` runs cleanly at the engine but the validator was treating
`sum` as a column reference. The grammar already admits function
calls structurally (ADR-0031 §1: "it does not know which names are
aggregates"); the validator needed to match.

1. Walker (schema_existence_diagnostics): the bare-column check on
   `sql_expr_ident` items now skips when the ident is immediately
   followed by `(` — it's a function-call name, not a column. New
   helper `is_followed_by_call_args` mirrors the existing
   `is_followed_by_qualified_ref` guard. Args inside the call are
   ordinary expressions and their idents still flow through the
   normal bare-column check on subsequent iterations.

   Cascades to: the [ERR] validity indicator (verdict derived from
   diagnostics), the red highlight overlay (renderer overlays
   diagnostic spans), and the ambient hint at complete inputs (the
   diagnostic-driven `pick_hint_diagnostic` path).

2. Typing-time (invalid_ident_at_cursor_in_mode): at any
   `sql_expr_ident` position the partial could resolve to either a
   column reference or a function-call name; without lookahead for a
   trailing `(` we can't tell. The check now returns early at
   `sql_expr_ident` positions. Submit-time still catches genuine
   column typos: the schema-existence diagnostic only skips when the
   ident *is* followed by `(`, so a bare unknown ident still trips
   and the hint surfaces it via `pick_hint_diagnostic`.

Trade-off worth flagging: typing `select Agx` (no FROM yet) is now
silent until FROM is added; previously the typing-time path flagged
it as "No such column". This makes typing-time consistent with
submit-time — the schema-existence pass already silently skips
no-FROM expressions ("no FROM in scope — engine catches"). For any
expression-with-scope (SELECT with FROM, WHERE, etc.) the
diagnostic-driven hint still fires for column typos; new test pins
this.

Tests added (5): walker positive (every standard aggregate plus
count(*), count(distinct …), nested calls, WHERE-clause functions,
and non-aggregate functions), walker negative (unknown column inside
call args), walker negative for DISTINCT-shielded arg, typing-time
positive (no false flag on partial function name), typing-time
trade-off lockdown (genuine column typo still hints when FROM is in
scope).

No grammar change; no ADR amendment (the fix matches ADR-0031 §1's
existing posture). Full suite 2040 passed / 0 failed / 0 unexpected
skips. Clippy clean.
This commit is contained in:
claude@clouddev1
2026-05-28 22:03:31 +00:00
parent 6f87ad1842
commit 24c268541e
3 changed files with 206 additions and 0 deletions
+15
View File
@@ -1123,6 +1123,21 @@ pub fn invalid_ident_at_cursor_in_mode(
if expected.is_empty() { if expected.is_empty() {
return None; return None;
} }
// Issue #6 follow-on: at a SQL expression position the partial
// could resolve to either a column reference *or* a function-call
// name (the grammar admits the call shape structurally — see
// sql_expr.rs's `CALL_ARGS` comment, "it does not know which
// names are aggregates"). Without lookahead for a trailing `(`,
// we can't tell here, so we don't flag — submit-time validation
// still catches a genuine column typo via the `unknown_column`
// diagnostic, which only skips when the ident *is* followed by
// `(` (i.e. it really is a function call).
let has_sql_expr_slot = expected.iter().any(|e| {
matches!(e, Expectation::Ident { role: "sql_expr_ident", .. })
});
if has_sql_expr_slot {
return None;
}
// Find every schema-listable source in the expected list. // Find every schema-listable source in the expected list.
let sources: Vec<IdentSource> = expected let sources: Vec<IdentSource> = expected
.iter() .iter()
+110
View File
@@ -1034,6 +1034,18 @@ fn schema_existence_diagnostics(
// check on the next iteration. // check on the next iteration.
pending_qualifier = pending_qualifier =
Some((item.text.clone(), item.span)); Some((item.text.clone(), item.span));
} else if role == "sql_expr_ident"
&& is_followed_by_call_args(&path.items, i)
{
// Issue #6: this ident is the *name* of a function
// call (immediately followed by `(`), not a column
// reference. The grammar admits the call shape
// structurally (sql_expr.rs's `CALL_ARGS` comment:
// "it does not know which names are aggregates"),
// so the validator skips here. The arguments
// *inside* the call are ordinary expressions and
// their idents flow through the bare-column check
// on subsequent iterations.
} else if let Some(target) = (role == "sql_expr_ident") } else if let Some(target) = (role == "sql_expr_ident")
.then(|| { .then(|| {
bare_ref_insert_target( bare_ref_insert_target(
@@ -2015,6 +2027,25 @@ fn is_followed_by_qualified_ref(
) )
} }
/// True when the ident at `items[i]` is the *name* of a function call —
/// the immediately following item is `(`. Mirrors the qualified-ref
/// guard and lets the bare-column check in
/// `schema_existence_diagnostics` skip validating a function-call name
/// against the schema (issue #6). The grammar admits the call shape
/// structurally (sql_expr.rs's CALL_ARGS comment: "it does not know
/// which names are aggregates"), so this validator must match: an
/// ident followed by `(` is a function-call name, not a column ref.
fn is_followed_by_call_args(
items: &[outcome::MatchedItem],
i: usize,
) -> bool {
use outcome::MatchedKind;
matches!(
items.get(i + 1).map(|it| &it.kind),
Some(MatchedKind::Punct('('))
)
}
fn schema_has_table(schema: &crate::completion::SchemaCache, name: &str) -> bool { fn schema_has_table(schema: &crate::completion::SchemaCache, name: &str) -> bool {
schema.tables.iter().any(|t| t.eq_ignore_ascii_case(name)) schema.tables.iter().any(|t| t.eq_ignore_ascii_case(name))
} }
@@ -5399,6 +5430,85 @@ mod tests {
); );
} }
#[test]
fn select_aggregate_function_name_not_flagged_as_unknown_column() {
// Issue #6: `select sum(Age) from Customers` runs cleanly at
// the engine but the schema-existence pass was treating `sum`
// as a bare column reference and flagging "no such column
// `sum` on table `Customers`". The grammar admits the
// function-call shape structurally (sql_expr.rs comment at
// CALL_ARGS, ADR-0031 §1: "it does not know which names are
// aggregates"), so the validator must match: an ident
// followed by `(` is a function-call name, not a column
// reference.
let schema = schema_with("Customers", &[("Age", Type::Int)]);
for input in [
// The five standard SQL aggregates …
"select sum(Age) from Customers",
"select avg(Age) from Customers",
"select count(*) from Customers",
"select count(Age) from Customers",
"select min(Age) from Customers",
"select max(Age) from Customers",
// … `count(distinct …)` (ADR-0031 §1 — the one place
// DISTINCT leads the argument list) …
"select count(distinct Age) from Customers",
// … and non-aggregate functions, which get the same
// treatment by the same fix because the grammar already
// accepts them and the engine evaluates them.
"select length(Age) from Customers",
"select coalesce(Age, 0) from Customers",
// Nested calls — the outer name is followed by `(` (skip),
// the inner name is followed by `(` (skip), the innermost
// bare ident is followed by `)` and gets validated.
"select upper(lower(Age)) from Customers",
// Functions in WHERE: same `sql_expr_ident` role, same
// skip guard applies.
"select Age from Customers where length(Age) > 0",
] {
let diags = diag_keys(input, &schema);
assert!(
!diags.iter().any(|d| d.contains("no such column")),
"function-call name must not trip `no such column` for {input:?}; got {diags:?}",
);
}
}
#[test]
fn select_unknown_column_inside_distinct_arg_still_flagged() {
// ADR-0031 §1 calls out `count(distinct col)` as a special
// argument shape. The argument is still an ordinary column
// reference and a genuinely unknown one must still flag —
// the DISTINCT keyword doesn't shield the column-ref
// validation downstream.
let schema = schema_with("Customers", &[("Age", Type::Int)]);
let diags = diag_keys(
"select count(distinct nonexistent) from Customers",
&schema,
);
assert!(
diags.iter().any(|d| d.contains("no such column")),
"unknown column inside `count(distinct …)` must still fire; got {diags:?}",
);
}
#[test]
fn select_unknown_argument_inside_function_call_still_flagged() {
// Symmetric guard: the fix only skips validation of the
// function-call *name*; the argument expressions are still
// ordinary expressions and a genuinely unknown column inside
// a call must still fire `no such column`.
let schema = schema_with("Customers", &[("Age", Type::Int)]);
let diags = diag_keys(
"select sum(nonexistent) from Customers",
&schema,
);
assert!(
diags.iter().any(|d| d.contains("no such column")),
"unknown column inside the call args must still fire; got {diags:?}",
);
}
#[test] #[test]
fn insert_select_unknown_projection_column_is_error() { fn insert_select_unknown_projection_column_is_error() {
// ADR-0033 sub-phase 3c cross-cut: the Phase-2 // ADR-0033 sub-phase 3c cross-cut: the Phase-2
+81
View File
@@ -1169,6 +1169,87 @@ mod tests {
} }
} }
#[test]
fn genuine_column_typo_in_complete_select_still_hints_via_diagnostic() {
// Issue #6 trade-off lockdown: dropping the typing-time
// `invalid_ident_at_cursor` flag at `sql_expr_ident` positions
// (to avoid the false positive on function names like `sum`)
// must not silently kill the typo signal for *genuine* column
// typos. Once the SELECT is structurally complete (FROM is in
// scope), the schema-existence pass fires `unknown_column`
// and the ambient hint surfaces that diagnostic via
// `pick_hint_diagnostic`. The user still gets the typing-time
// warning, just through a different path.
use crate::completion::{SchemaCache, TableColumn};
use crate::dsl::types::Type;
let mut cache = SchemaCache::default();
cache.tables.push("Customers".to_string());
let tc = vec![TableColumn {
name: "Age".to_string(),
user_type: Type::Int,
not_null: false,
has_default: false,
}];
for c in &tc {
cache.columns.push(c.name.clone());
}
cache.table_columns.insert("Customers".to_string(), tc);
for input in [
"select Agx from Customers",
"select * from Customers where Agx = 5",
"select * from Customers where Agx",
] {
match ambient_hint_in_mode(input, input.len(), None, &cache, Mode::Advanced) {
Some(AmbientHint::Prose(p)) => assert!(
p.contains("no such column") && p.contains("Agx"),
"complete SELECT with column typo must surface the diagnostic hint for {input:?}; got: {p:?}",
),
other => panic!(
"complete SELECT with column typo must produce a Prose hint for {input:?}; got: {other:?}",
),
}
}
}
#[test]
fn advanced_select_partial_function_name_not_flagged_as_invalid_column() {
// Issue #6 follow-on: while the user is typing
// `select sum` (no `(` yet), the ambient hint must not
// pre-emptively show "No such column: `sum`". At a SQL
// expression position the partial could resolve to either a
// column reference *or* a function-call name; the typing-time
// `invalid_ident_at_cursor` check would otherwise mislead
// because it only knows about schema columns. Submit-time
// validation still flags a genuine column typo (the
// `unknown_column` diagnostic only skips function-call names,
// which require a trailing `(` — so a bare unknown ident
// still trips at submit).
use crate::completion::{SchemaCache, TableColumn};
use crate::dsl::types::Type;
let mut cache = SchemaCache::default();
cache.tables.push("Customers".to_string());
let tc = vec![
TableColumn {
name: "Age".to_string(),
user_type: Type::Int,
not_null: false,
has_default: false,
},
];
for c in &tc {
cache.columns.push(c.name.clone());
}
cache.table_columns.insert("Customers".to_string(), tc);
let input = "select sum";
let hint = ambient_hint_in_mode(input, input.len(), None, &cache, Mode::Advanced);
if let Some(AmbientHint::Prose(p)) = &hint {
assert!(
!p.contains("No such column"),
"`select sum` mid-typing must not pre-emptively flag `sum` as an invalid column; got: {p:?}",
);
}
}
#[test] #[test]
fn advanced_partial_typing_does_not_leak_bare_double_in_prose() { fn advanced_partial_typing_does_not_leak_bare_double_in_prose() {
// Issue #5 (prose half): at `create table Orders (count` (no // Issue #5 (prose half): at `create table Orders (count` (no