walker: 2e prereq — §10.3 stage-2 CTE harvest + cte_arity_mismatch
Implements the six ADR-0032 §10.3 output-column derivation rules at CTE body-frame exit, populating the placeholder CteBinding's columns. Unblocks `diagnostic.cte_arity_mismatch` (which compares declared col-list arity vs derived projection arity) and the upcoming qualified-prefix completion in 2e proper. - `WalkContext::pending_cte_harvest`: bookkeeping for an in-progress CTE harvest, armed by writes_cte_name + extended by cte_column idents, consumed by the next walk_scoped_subgrammar invocation (CTE syntax has no intervening ScopedSubgrammar, so timing is deterministic). Cleared on every walk_scoped_subgrammar entry to prevent stale state surviving a speculative walk rollback. - `run_cte_harvest`: post-walk path-scan classifier that reconstructs the body's first leg's projection-list and applies the six derivation rules. Compound bodies take columns from the first leg per spec; recursive CTE bodies take the non-recursive (first) leg. Optional (col-list) renames positionally with preserved types. - `expand_binding`: bridges a TableBinding to a CteColumn list, resolving CTE-source bindings (empty columns + table-name matches an in-scope CteBinding) through to the CTE's harvested columns. Enables sibling CTEs to project correctly: in \`WITH a AS (...), b AS (SELECT * FROM a) ...\`, b's harvest sees a's derived columns through the body's from_scope binding. - `WalkContext::pending_diagnostics`: accumulator for diagnostics emitted DURING the walk by node handlers with context the post-walk passes can't reconstruct. Drained by the top-level walk function on both match and non-match paths so a re-used context can't leak entries between walks. Test totals: 1399 → 1414 passing (+15: 10 derivation rules + 1 sibling CTE + 4 arity match/mismatch tests). Clippy clean.
This commit is contained in:
@@ -164,6 +164,56 @@ pub struct WalkContext<'a> {
|
||||
/// on top. Always non-empty: the bottom frame is created at
|
||||
/// `WalkContext::new` / `with_schema` time and never popped.
|
||||
pub from_scope_stack: Vec<ScopeFrame>,
|
||||
/// Diagnostics emitted *during* the walk by node handlers
|
||||
/// that have context the post-walk path scanners can no
|
||||
/// longer reconstruct (notably the §10.3 CTE harvest, which
|
||||
/// runs at body-frame exit and has direct access to both
|
||||
/// the declared col-list and the derived columns). The
|
||||
/// walker's top-level `walk` function drains this on
|
||||
/// successful parses and folds the entries into the final
|
||||
/// diagnostic vector.
|
||||
pub pending_diagnostics: Vec<crate::dsl::walker::outcome::Diagnostic>,
|
||||
/// Set by the `writes_cte_name` ident path right after the
|
||||
/// placeholder `CteBinding` is pushed onto the outer frame.
|
||||
/// Tells the very next `walk_scoped_subgrammar` invocation
|
||||
/// that the body it's about to walk is a CTE body and that,
|
||||
/// on `Matched` exit, it should run the §10.3 harvest into
|
||||
/// the recorded placeholder. `cte_column` idents (the
|
||||
/// optional `(c1, c2)` list between the cte name and `AS`)
|
||||
/// append to `col_list` as they're seen.
|
||||
///
|
||||
/// CTE syntax has no intervening `ScopedSubgrammar` between
|
||||
/// the cte-name ident and the body, so the timing is
|
||||
/// deterministic. Cleared by `walk_scoped_subgrammar` whether
|
||||
/// or not the inner walk matched (a speculatively-walked
|
||||
/// then-rolled-back body must not leave a stale request).
|
||||
pub pending_cte_harvest: Option<PendingCteHarvest>,
|
||||
}
|
||||
|
||||
/// Bookkeeping for an in-progress CTE harvest (ADR-0032 §10.3
|
||||
/// stage 2).
|
||||
///
|
||||
/// The `writes_cte_name` ident sets one of these after pushing
|
||||
/// the placeholder `CteBinding`; the next
|
||||
/// `walk_scoped_subgrammar` invocation takes it and runs the
|
||||
/// harvest after the body matches.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PendingCteHarvest {
|
||||
/// Index of the placeholder `CteBinding` in the *outer*
|
||||
/// frame's `cte_bindings`. The outer frame is
|
||||
/// `from_scope_stack[len() - 2]` at the moment the body's
|
||||
/// frame is on top.
|
||||
pub placeholder_index: usize,
|
||||
/// Explicit `(c1, c2, …)` rename list — empty when the CTE
|
||||
/// declared no column list. The harvest's derived column
|
||||
/// names are overridden positionally by this list per ADR-
|
||||
/// 0032 §10.3.
|
||||
pub col_list: Vec<String>,
|
||||
/// Span of the cte_name ident — the diagnostic anchor for
|
||||
/// `cte_arity_mismatch` if the col-list arity disagrees with
|
||||
/// the body's derived arity.
|
||||
pub cte_name: String,
|
||||
pub cte_name_span: (usize, usize),
|
||||
}
|
||||
|
||||
impl<'a> WalkContext<'a> {
|
||||
@@ -185,6 +235,8 @@ impl<'a> WalkContext<'a> {
|
||||
user_listed_columns: None,
|
||||
subgrammar_depth: 0,
|
||||
from_scope_stack: vec![ScopeFrame::default()],
|
||||
pending_diagnostics: Vec::new(),
|
||||
pending_cte_harvest: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -205,6 +257,8 @@ impl<'a> WalkContext<'a> {
|
||||
user_listed_columns: None,
|
||||
subgrammar_depth: 0,
|
||||
from_scope_stack: vec![ScopeFrame::default()],
|
||||
pending_diagnostics: Vec::new(),
|
||||
pending_cte_harvest: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+727
-15
@@ -435,17 +435,15 @@ fn walk_ident(
|
||||
{
|
||||
binding.alias = Some(text.clone());
|
||||
}
|
||||
// ADR-0032 §10.3 stage 1: push a placeholder CteBinding into
|
||||
// the top (outer) frame before the body's ScopedSubgrammar
|
||||
// pushes its own frame. The body can self-reference the CTE
|
||||
// name as a table source (WITH RECURSIVE), and downstream
|
||||
// CTE-name validators see the binding. The body-frame-exit
|
||||
// harvest (§10.3 stage 2) is structurally hooked but the six
|
||||
// derivation rules for output columns are pending — the
|
||||
// placeholder's `columns` stays empty until a later sub-phase
|
||||
// wires the harvest. Diagnostic / completion machinery in 2d
|
||||
// and 2e can already use the name-presence to resolve "is
|
||||
// this an in-scope CTE?".
|
||||
// ADR-0032 §10.3 stage 1 + stage 2: push a placeholder
|
||||
// CteBinding into the top (outer) frame before the body's
|
||||
// ScopedSubgrammar pushes its own frame. The body can
|
||||
// self-reference the CTE name as a table source (WITH
|
||||
// RECURSIVE), and downstream CTE-name validators see the
|
||||
// binding. Then arm `pending_cte_harvest` so the next
|
||||
// ScopedSubgrammar (which is structurally guaranteed to be
|
||||
// the CTE body — no intervening scoped subgrammar in CTE
|
||||
// syntax) runs the harvest at body-frame exit.
|
||||
if writes_cte_name
|
||||
&& let Some(frame) = ctx.from_scope_stack.last_mut()
|
||||
{
|
||||
@@ -455,6 +453,24 @@ fn walk_ident(
|
||||
name: text.clone(),
|
||||
columns: Vec::new(),
|
||||
});
|
||||
let placeholder_index = frame.cte_bindings.len() - 1;
|
||||
ctx.pending_cte_harvest =
|
||||
Some(crate::dsl::walker::context::PendingCteHarvest {
|
||||
placeholder_index,
|
||||
col_list: Vec::new(),
|
||||
cte_name: text.clone(),
|
||||
cte_name_span: (start, end),
|
||||
});
|
||||
}
|
||||
// ADR-0032 §10.3: the optional `(c1, c2, …)` rename list
|
||||
// between the cte name and `AS`. Each `cte_column` ident
|
||||
// appends to the pending harvest's col_list; the harvest
|
||||
// applies them as positional renames on the derived
|
||||
// columns.
|
||||
if role == "cte_column"
|
||||
&& let Some(pending) = ctx.pending_cte_harvest.as_mut()
|
||||
{
|
||||
pending.col_list.push(text.clone());
|
||||
}
|
||||
// ADR-0032 §10.4: projection-list alias accumulator for
|
||||
// ORDER BY completion candidates.
|
||||
@@ -1038,14 +1054,465 @@ fn walk_scoped_subgrammar(
|
||||
}),
|
||||
};
|
||||
}
|
||||
// ADR-0032 §10.3 stage 2 — pick up a pending CTE harvest
|
||||
// request armed by the immediately-preceding cte_name ident.
|
||||
// Clear unconditionally: a non-matching body must not leave
|
||||
// stale state for a later unrelated ScopedSubgrammar.
|
||||
let pending_cte = ctx.pending_cte_harvest.take();
|
||||
ctx.from_scope_stack
|
||||
.push(crate::dsl::walker::context::ScopeFrame::default());
|
||||
let result = walk_node(source, pos, inner, ctx, path, per_byte);
|
||||
|
||||
// Harvest happens only on a fully-matched body. Speculative
|
||||
// walks that NoMatch / Incomplete / Fail leave the placeholder
|
||||
// empty (the outer-frame state is also discarded in the
|
||||
// speculative path, so this is correct).
|
||||
if let (Some(req), NodeWalkResult::Matched { end, .. }) =
|
||||
(pending_cte, &result)
|
||||
{
|
||||
run_cte_harvest(ctx, path, source, pos, *end, &req);
|
||||
}
|
||||
|
||||
ctx.from_scope_stack.pop();
|
||||
ctx.subgrammar_depth = saved_depth;
|
||||
result
|
||||
}
|
||||
|
||||
/// Run the §10.3 stage-2 harvest after a CTE body's
|
||||
/// `ScopedSubgrammar` matched, while the body's frame is still
|
||||
/// on top of `from_scope_stack`.
|
||||
///
|
||||
/// Reads the body's projection items out of the matched path's
|
||||
/// byte range, classifies each via the six derivation rules,
|
||||
/// applies any `(col-list)` positional rename, and writes the
|
||||
/// derived columns into the placeholder `CteBinding` in the
|
||||
/// outer (now `len - 2`) frame.
|
||||
fn run_cte_harvest(
|
||||
ctx: &mut WalkContext,
|
||||
path: &MatchedPath,
|
||||
_source: &str,
|
||||
body_start: usize,
|
||||
body_end: usize,
|
||||
req: &crate::dsl::walker::context::PendingCteHarvest,
|
||||
) {
|
||||
use crate::dsl::walker::context::{CteColumn, ScopeFrame};
|
||||
use crate::dsl::walker::outcome::{MatchedItem, MatchedKind};
|
||||
|
||||
// The body's frame is at the top of the stack while the
|
||||
// harvest runs. Need this for from_scope lookups in the
|
||||
// derivation rules.
|
||||
let body_frame: &ScopeFrame = match ctx.from_scope_stack.last() {
|
||||
Some(f) => f,
|
||||
None => return,
|
||||
};
|
||||
|
||||
// Compute body_depth = paren-balance over path items strictly
|
||||
// before body_start. The `(` immediately preceding the body
|
||||
// is at the outer depth and increments to the body's depth;
|
||||
// body_start is INSIDE that paren.
|
||||
let mut prefix_depth: i32 = 0;
|
||||
for item in &path.items {
|
||||
if item.span.0 >= body_start {
|
||||
break;
|
||||
}
|
||||
match item.kind {
|
||||
MatchedKind::Punct('(') => prefix_depth += 1,
|
||||
MatchedKind::Punct(')') => prefix_depth -= 1,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
let body_depth = prefix_depth;
|
||||
|
||||
// The path items strictly inside the body byte range.
|
||||
let body_items: Vec<&MatchedItem> = path
|
||||
.items
|
||||
.iter()
|
||||
.filter(|i| i.span.0 >= body_start && i.span.1 <= body_end)
|
||||
.collect();
|
||||
|
||||
// Track depth within the body. First leg's projection list
|
||||
// begins at the first body-depth SELECT and ends at the
|
||||
// first body-depth FROM/WHERE/etc OR set-op keyword OR end.
|
||||
let mut depth = body_depth;
|
||||
let mut select_idx: Option<usize> = None;
|
||||
let mut end_idx: usize = body_items.len();
|
||||
for (i, item) in body_items.iter().enumerate() {
|
||||
let cur = depth;
|
||||
match item.kind {
|
||||
MatchedKind::Punct('(') => depth += 1,
|
||||
MatchedKind::Punct(')') => depth -= 1,
|
||||
_ => {}
|
||||
}
|
||||
if cur != body_depth {
|
||||
continue;
|
||||
}
|
||||
match item.kind {
|
||||
MatchedKind::Word("select") if select_idx.is_none() => {
|
||||
select_idx = Some(i + 1); // start of projection list
|
||||
}
|
||||
MatchedKind::Word(
|
||||
"from" | "where" | "group" | "having" | "order"
|
||||
| "limit" | "offset" | "union" | "intersect"
|
||||
| "except",
|
||||
) if select_idx.is_some() => {
|
||||
end_idx = i;
|
||||
break;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
let Some(start_idx) = select_idx else {
|
||||
return;
|
||||
};
|
||||
if start_idx >= end_idx {
|
||||
return;
|
||||
}
|
||||
|
||||
// Split the projection-list slice into individual items by
|
||||
// commas at body_depth.
|
||||
let mut item_slices: Vec<&[&MatchedItem]> = Vec::new();
|
||||
let mut depth_scan = body_depth;
|
||||
let mut slice_start = start_idx;
|
||||
for i in start_idx..end_idx {
|
||||
let cur = depth_scan;
|
||||
match body_items[i].kind {
|
||||
MatchedKind::Punct('(') => depth_scan += 1,
|
||||
MatchedKind::Punct(')') => depth_scan -= 1,
|
||||
MatchedKind::Punct(',') if cur == body_depth => {
|
||||
item_slices.push(&body_items[slice_start..i]);
|
||||
slice_start = i + 1;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
if slice_start < end_idx {
|
||||
item_slices.push(&body_items[slice_start..end_idx]);
|
||||
}
|
||||
|
||||
// Classify each projection item per ADR-0032 §10.3.
|
||||
let mut derived: Vec<CteColumn> = Vec::new();
|
||||
for slice in item_slices {
|
||||
classify_projection_item(
|
||||
slice,
|
||||
body_frame,
|
||||
&ctx.from_scope_stack,
|
||||
&mut derived,
|
||||
);
|
||||
}
|
||||
|
||||
// Apply (c1, c2, …) positional rename if provided. Types
|
||||
// are preserved; names overridden by the col_list. Arity
|
||||
// mismatch is emitted as `diagnostic.cte_arity_mismatch`
|
||||
// on the cte_name span before any padding/truncation so
|
||||
// the diagnostic carries the *true* derived count.
|
||||
if !req.col_list.is_empty() {
|
||||
let declared = req.col_list.len();
|
||||
let actual = derived.len();
|
||||
if declared != actual {
|
||||
use crate::dsl::walker::outcome::{Diagnostic, Severity};
|
||||
ctx.pending_diagnostics.push(Diagnostic {
|
||||
severity: Severity::Error,
|
||||
span: req.cte_name_span,
|
||||
message: crate::friendly::translate(
|
||||
"diagnostic.cte_arity_mismatch",
|
||||
&[
|
||||
("cte", &req.cte_name as &dyn std::fmt::Display),
|
||||
("declared", &declared as &dyn std::fmt::Display),
|
||||
("actual", &actual as &dyn std::fmt::Display),
|
||||
],
|
||||
),
|
||||
});
|
||||
}
|
||||
for (i, name) in req.col_list.iter().enumerate() {
|
||||
if let Some(col) = derived.get_mut(i) {
|
||||
col.name = Some(name.clone());
|
||||
} else {
|
||||
// col_list has MORE entries than derived items —
|
||||
// synthesize a typeless slot with the declared
|
||||
// name so qualified-prefix completion still
|
||||
// surfaces it.
|
||||
derived.push(CteColumn {
|
||||
name: Some(name.clone()),
|
||||
type_: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
// Truncate any extras when derived > declared, so the
|
||||
// CTE's externally visible arity matches the col-list
|
||||
// declaration. (The diagnostic above already captured
|
||||
// the original derived count.)
|
||||
if derived.len() > declared {
|
||||
derived.truncate(declared);
|
||||
}
|
||||
}
|
||||
|
||||
// Write into the outer frame's placeholder.
|
||||
let stack_len = ctx.from_scope_stack.len();
|
||||
if stack_len >= 2
|
||||
&& let Some(outer) = ctx.from_scope_stack.get_mut(stack_len - 2)
|
||||
&& let Some(placeholder) =
|
||||
outer.cte_bindings.get_mut(req.placeholder_index)
|
||||
{
|
||||
placeholder.columns = derived;
|
||||
}
|
||||
}
|
||||
|
||||
/// Classify one projection item by examining its leading
|
||||
/// terminals and append its derived CteColumn(s) to `out`. The
|
||||
/// six rules of ADR-0032 §10.3.
|
||||
fn classify_projection_item(
|
||||
slice: &[&crate::dsl::walker::outcome::MatchedItem],
|
||||
body_frame: &crate::dsl::walker::context::ScopeFrame,
|
||||
scope_stack: &[crate::dsl::walker::context::ScopeFrame],
|
||||
out: &mut Vec<crate::dsl::walker::context::CteColumn>,
|
||||
) {
|
||||
use crate::dsl::grammar::IdentSource;
|
||||
use crate::dsl::walker::context::CteColumn;
|
||||
use crate::dsl::walker::outcome::MatchedKind;
|
||||
|
||||
// Strip an optional trailing `[AS] alias` from the slice so
|
||||
// shape detection can examine just the expression part.
|
||||
let (expr_slice, alias) = strip_trailing_alias(slice);
|
||||
|
||||
// Rule 1: `*` — every column from body_frame.from_scope.
|
||||
// When a binding represents a CTE reference (its columns are
|
||||
// empty because it wasn't a base-table lookup), resolve
|
||||
// through to the in-scope CteBinding so nested CTEs project
|
||||
// correctly.
|
||||
if expr_slice.len() == 1
|
||||
&& matches!(expr_slice[0].kind, MatchedKind::Punct('*'))
|
||||
{
|
||||
for binding in &body_frame.from_scope {
|
||||
for col in expand_binding(binding, scope_stack) {
|
||||
out.push(col);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Rule 2: `t.*` — every column from binding `t`.
|
||||
if expr_slice.len() == 3
|
||||
&& matches!(
|
||||
expr_slice[0].kind,
|
||||
MatchedKind::Ident { role: "qualified_star_qualifier", .. }
|
||||
)
|
||||
&& matches!(expr_slice[1].kind, MatchedKind::Punct('.'))
|
||||
&& matches!(expr_slice[2].kind, MatchedKind::Punct('*'))
|
||||
{
|
||||
let qual = &expr_slice[0].text;
|
||||
if let Some(binding) = body_frame.from_scope.iter().find(|b| {
|
||||
b.alias
|
||||
.as_deref()
|
||||
.is_some_and(|a| a.eq_ignore_ascii_case(qual))
|
||||
|| b.table.eq_ignore_ascii_case(qual)
|
||||
}) {
|
||||
for col in expand_binding(binding, scope_stack) {
|
||||
out.push(col);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Rule 3: bare `col` — a single sql_expr_ident terminal.
|
||||
if expr_slice.len() == 1
|
||||
&& matches!(
|
||||
expr_slice[0].kind,
|
||||
MatchedKind::Ident {
|
||||
source: IdentSource::Columns,
|
||||
role: "sql_expr_ident",
|
||||
}
|
||||
)
|
||||
{
|
||||
let col_text = &expr_slice[0].text;
|
||||
let resolved_type = resolve_bare_column_type_in_frame(
|
||||
body_frame,
|
||||
scope_stack,
|
||||
col_text,
|
||||
);
|
||||
let name = alias.unwrap_or_else(|| col_text.clone());
|
||||
out.push(CteColumn {
|
||||
name: Some(name),
|
||||
type_: resolved_type,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Rule 4: qualified `t.col` — three-token shape with the
|
||||
// sql_expr_qualified_ref role on the tail ident.
|
||||
if expr_slice.len() == 3
|
||||
&& matches!(
|
||||
expr_slice[0].kind,
|
||||
MatchedKind::Ident {
|
||||
source: IdentSource::Columns,
|
||||
role: "sql_expr_ident",
|
||||
}
|
||||
)
|
||||
&& matches!(expr_slice[1].kind, MatchedKind::Punct('.'))
|
||||
&& matches!(
|
||||
expr_slice[2].kind,
|
||||
MatchedKind::Ident {
|
||||
source: IdentSource::Columns,
|
||||
role: "sql_expr_qualified_ref",
|
||||
}
|
||||
)
|
||||
{
|
||||
let qual = &expr_slice[0].text;
|
||||
let col_text = &expr_slice[2].text;
|
||||
let resolved_type = resolve_qualified_column_type(
|
||||
body_frame,
|
||||
scope_stack,
|
||||
qual,
|
||||
col_text,
|
||||
);
|
||||
let name = alias.unwrap_or_else(|| col_text.clone());
|
||||
out.push(CteColumn {
|
||||
name: Some(name),
|
||||
type_: resolved_type,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// Rule 5 / 6: computed expression — name = alias if present,
|
||||
// else None. Type = None either way (ADR-0032 Amendment 1).
|
||||
out.push(CteColumn {
|
||||
name: alias,
|
||||
type_: None,
|
||||
});
|
||||
}
|
||||
|
||||
/// Peel a trailing `[AS] <ident>` off the projection-item slice
|
||||
/// if present. Returns (expr_slice_without_alias, Some(alias))
|
||||
/// or (slice, None) if no alias is detected.
|
||||
fn strip_trailing_alias<'a>(
|
||||
slice: &'a [&'a crate::dsl::walker::outcome::MatchedItem],
|
||||
) -> (
|
||||
&'a [&'a crate::dsl::walker::outcome::MatchedItem],
|
||||
Option<String>,
|
||||
) {
|
||||
use crate::dsl::grammar::IdentSource;
|
||||
use crate::dsl::walker::outcome::MatchedKind;
|
||||
|
||||
if slice.is_empty() {
|
||||
return (slice, None);
|
||||
}
|
||||
let last = slice[slice.len() - 1];
|
||||
if matches!(
|
||||
last.kind,
|
||||
MatchedKind::Ident {
|
||||
source: IdentSource::NewName,
|
||||
role: "projection_alias",
|
||||
}
|
||||
) {
|
||||
// Optional preceding `AS` keyword.
|
||||
if slice.len() >= 2
|
||||
&& matches!(
|
||||
slice[slice.len() - 2].kind,
|
||||
MatchedKind::Word("as")
|
||||
)
|
||||
{
|
||||
return (
|
||||
&slice[..slice.len() - 2],
|
||||
Some(last.text.clone()),
|
||||
);
|
||||
}
|
||||
return (&slice[..slice.len() - 1], Some(last.text.clone()));
|
||||
}
|
||||
(slice, None)
|
||||
}
|
||||
|
||||
fn resolve_bare_column_type_in_frame(
|
||||
frame: &crate::dsl::walker::context::ScopeFrame,
|
||||
scope_stack: &[crate::dsl::walker::context::ScopeFrame],
|
||||
column: &str,
|
||||
) -> Option<crate::dsl::types::Type> {
|
||||
let mut found = None;
|
||||
for binding in &frame.from_scope {
|
||||
for col in expand_binding(binding, scope_stack) {
|
||||
if col
|
||||
.name
|
||||
.as_deref()
|
||||
.is_some_and(|n| n.eq_ignore_ascii_case(column))
|
||||
{
|
||||
if found.is_some() {
|
||||
return None; // ambiguous — no type
|
||||
}
|
||||
found = col.type_;
|
||||
}
|
||||
}
|
||||
}
|
||||
found
|
||||
}
|
||||
|
||||
fn resolve_qualified_column_type(
|
||||
frame: &crate::dsl::walker::context::ScopeFrame,
|
||||
scope_stack: &[crate::dsl::walker::context::ScopeFrame],
|
||||
qualifier: &str,
|
||||
column: &str,
|
||||
) -> Option<crate::dsl::types::Type> {
|
||||
let binding = frame.from_scope.iter().find(|b| {
|
||||
b.alias
|
||||
.as_deref()
|
||||
.is_some_and(|a| a.eq_ignore_ascii_case(qualifier))
|
||||
|| b.table.eq_ignore_ascii_case(qualifier)
|
||||
})?;
|
||||
expand_binding(binding, scope_stack)
|
||||
.into_iter()
|
||||
.find(|c| {
|
||||
c.name
|
||||
.as_deref()
|
||||
.is_some_and(|n| n.eq_ignore_ascii_case(column))
|
||||
})
|
||||
.and_then(|c| c.type_)
|
||||
}
|
||||
|
||||
/// Resolve a `TableBinding` to its column list as `CteColumn`s.
|
||||
///
|
||||
/// Base-table bindings carry typed `TableColumn`s populated from
|
||||
/// the schema cache — convert them directly. CTE-source bindings
|
||||
/// (the binding's `columns` is empty because the FROM name
|
||||
/// didn't match a base table) look up the matching `CteBinding`
|
||||
/// in any in-scope frame and return its `columns` verbatim.
|
||||
///
|
||||
/// This is the bridge that lets a nested CTE's outer harvest see
|
||||
/// the inner CTE's derived columns: the body's `FROM inner`
|
||||
/// produces an empty-columns binding, but `expand_binding`
|
||||
/// resolves it through the inner CteBinding (which has its
|
||||
/// derived columns by the time the outer harvest runs, because
|
||||
/// the inner body's harvest fires on inner-body exit, before the
|
||||
/// outer body exits).
|
||||
///
|
||||
/// A self-reference inside a `WITH RECURSIVE` body sees the
|
||||
/// placeholder (empty columns) and the resolution returns empty
|
||||
/// — that's correct, since the harvest only fires on the
|
||||
/// non-recursive (first) leg per §10.3.
|
||||
fn expand_binding(
|
||||
binding: &crate::dsl::walker::context::TableBinding,
|
||||
scope_stack: &[crate::dsl::walker::context::ScopeFrame],
|
||||
) -> Vec<crate::dsl::walker::context::CteColumn> {
|
||||
use crate::dsl::walker::context::CteColumn;
|
||||
|
||||
if !binding.columns.is_empty() {
|
||||
return binding
|
||||
.columns
|
||||
.iter()
|
||||
.map(|c| CteColumn {
|
||||
name: Some(c.name.clone()),
|
||||
type_: Some(c.user_type),
|
||||
})
|
||||
.collect();
|
||||
}
|
||||
for frame in scope_stack.iter().rev() {
|
||||
if let Some(cte) = frame
|
||||
.cte_bindings
|
||||
.iter()
|
||||
.find(|c| c.name.eq_ignore_ascii_case(&binding.table))
|
||||
{
|
||||
return cte.columns.clone();
|
||||
}
|
||||
}
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
fn merge_expected(dst: &mut Vec<Expectation>, src: Vec<Expectation>) {
|
||||
for e in src {
|
||||
if !dst.contains(&e) {
|
||||
@@ -1417,10 +1884,13 @@ mod tests {
|
||||
);
|
||||
assert_eq!(ctes.len(), 1);
|
||||
assert_eq!(ctes[0].name, "cte_x");
|
||||
// Output column derivation pending — placeholder's
|
||||
// columns stays empty until the §10.3 stage-2 harvest
|
||||
// is implemented.
|
||||
assert!(ctes[0].columns.is_empty());
|
||||
// §10.3 stage-2 harvest produces one CteColumn per
|
||||
// projection item. `SELECT 1` is a computed expression
|
||||
// without an alias → `CteColumn { name: None, type_:
|
||||
// None }`.
|
||||
assert_eq!(ctes[0].columns.len(), 1);
|
||||
assert!(ctes[0].columns[0].name.is_none());
|
||||
assert!(ctes[0].columns[0].type_.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1490,4 +1960,246 @@ mod tests {
|
||||
);
|
||||
assert_eq!(aliases, vec!["outer_b".to_string()]);
|
||||
}
|
||||
|
||||
// ---- §10.3 stage-2 CTE column-derivation harvest ----
|
||||
|
||||
/// Schema-aware walk variant — returns the outer frame's
|
||||
/// `cte_bindings` after walking the input.
|
||||
fn cte_bindings_after_walk_with_schema(
|
||||
input: &str,
|
||||
schema: &crate::completion::SchemaCache,
|
||||
) -> Vec<crate::dsl::walker::context::CteBinding> {
|
||||
let mut ctx = WalkContext::with_schema(schema);
|
||||
ctx.mode = crate::mode::Mode::Advanced;
|
||||
let mut path = MatchedPath::new();
|
||||
let mut per_byte = Vec::new();
|
||||
let result = walk_node(
|
||||
input,
|
||||
0,
|
||||
&crate::dsl::grammar::sql_select::SQL_SELECT_STATEMENT,
|
||||
&mut ctx,
|
||||
&mut path,
|
||||
&mut per_byte,
|
||||
);
|
||||
assert!(
|
||||
matches!(result, NodeWalkResult::Matched { .. }),
|
||||
"{input:?} should match: got {result:?}"
|
||||
);
|
||||
ctx.from_scope_stack[0].cte_bindings.clone()
|
||||
}
|
||||
|
||||
fn schema_users() -> crate::completion::SchemaCache {
|
||||
use crate::completion::{SchemaCache, TableColumn};
|
||||
use crate::dsl::types::Type;
|
||||
let mut s = SchemaCache::default();
|
||||
s.tables.push("users".to_string());
|
||||
s.columns.push("id".to_string());
|
||||
s.columns.push("name".to_string());
|
||||
s.columns.push("age".to_string());
|
||||
s.table_columns.insert(
|
||||
"users".to_string(),
|
||||
vec![
|
||||
TableColumn { name: "id".to_string(), user_type: Type::Int },
|
||||
TableColumn { name: "name".to_string(), user_type: Type::Text },
|
||||
TableColumn { name: "age".to_string(), user_type: Type::Int },
|
||||
],
|
||||
);
|
||||
s
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cte_harvest_star_expands_from_scope() {
|
||||
// Rule 1: `SELECT *` body — derived columns = every
|
||||
// column from the body frame's from_scope, with types.
|
||||
let schema = schema_users();
|
||||
let ctes = cte_bindings_after_walk_with_schema(
|
||||
"with x as (select * from users) select * from x",
|
||||
&schema,
|
||||
);
|
||||
assert_eq!(ctes.len(), 1);
|
||||
assert_eq!(ctes[0].columns.len(), 3);
|
||||
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("id"));
|
||||
assert_eq!(
|
||||
ctes[0].columns[0].type_,
|
||||
Some(crate::dsl::types::Type::Int),
|
||||
);
|
||||
assert_eq!(ctes[0].columns[1].name.as_deref(), Some("name"));
|
||||
assert_eq!(
|
||||
ctes[0].columns[1].type_,
|
||||
Some(crate::dsl::types::Type::Text),
|
||||
);
|
||||
assert_eq!(ctes[0].columns[2].name.as_deref(), Some("age"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cte_harvest_qualified_star_expands_one_binding() {
|
||||
// Rule 2: `t.*` — every column from binding `t`.
|
||||
let schema = schema_users();
|
||||
let ctes = cte_bindings_after_walk_with_schema(
|
||||
"with x as (select u.* from users u) select * from x",
|
||||
&schema,
|
||||
);
|
||||
assert_eq!(ctes.len(), 1);
|
||||
assert_eq!(ctes[0].columns.len(), 3);
|
||||
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("id"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cte_harvest_bare_ref_with_alias() {
|
||||
// Rule 5 variant: `col AS alias` — name = alias, type
|
||||
// preserved from the source column.
|
||||
let schema = schema_users();
|
||||
let ctes = cte_bindings_after_walk_with_schema(
|
||||
"with x as (select name as label from users) select * from x",
|
||||
&schema,
|
||||
);
|
||||
assert_eq!(ctes[0].columns.len(), 1);
|
||||
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("label"));
|
||||
assert_eq!(
|
||||
ctes[0].columns[0].type_,
|
||||
Some(crate::dsl::types::Type::Text),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cte_harvest_bare_ref_without_alias_uses_column_name() {
|
||||
// Rule 3: bare `col` — name = column name, type from
|
||||
// source column.
|
||||
let schema = schema_users();
|
||||
let ctes = cte_bindings_after_walk_with_schema(
|
||||
"with x as (select age from users) select * from x",
|
||||
&schema,
|
||||
);
|
||||
assert_eq!(ctes[0].columns.len(), 1);
|
||||
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("age"));
|
||||
assert_eq!(
|
||||
ctes[0].columns[0].type_,
|
||||
Some(crate::dsl::types::Type::Int),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cte_harvest_qualified_ref() {
|
||||
// Rule 4: `t.col` — name = column, type from binding.
|
||||
let schema = schema_users();
|
||||
let ctes = cte_bindings_after_walk_with_schema(
|
||||
"with x as (select u.name from users u) select * from x",
|
||||
&schema,
|
||||
);
|
||||
assert_eq!(ctes[0].columns.len(), 1);
|
||||
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("name"));
|
||||
assert_eq!(
|
||||
ctes[0].columns[0].type_,
|
||||
Some(crate::dsl::types::Type::Text),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cte_harvest_computed_no_alias_is_unnamed() {
|
||||
// Rule 6: computed expression without alias → name =
|
||||
// None, type = None.
|
||||
let schema = schema_users();
|
||||
let ctes = cte_bindings_after_walk_with_schema(
|
||||
"with x as (select age + 1 from users) select * from x",
|
||||
&schema,
|
||||
);
|
||||
assert_eq!(ctes[0].columns.len(), 1);
|
||||
assert!(ctes[0].columns[0].name.is_none());
|
||||
assert!(ctes[0].columns[0].type_.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cte_harvest_computed_with_alias() {
|
||||
// Rule 5: computed expression with alias → name =
|
||||
// alias, type = None (Amendment 1).
|
||||
let schema = schema_users();
|
||||
let ctes = cte_bindings_after_walk_with_schema(
|
||||
"with x as (select age + 1 as years from users) select * from x",
|
||||
&schema,
|
||||
);
|
||||
assert_eq!(ctes[0].columns.len(), 1);
|
||||
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("years"));
|
||||
assert!(ctes[0].columns[0].type_.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cte_harvest_compound_takes_first_leg() {
|
||||
// For UNION / INTERSECT / EXCEPT bodies, columns come
|
||||
// from the first leg per ADR-0032 §10.3.
|
||||
let schema = schema_users();
|
||||
let ctes = cte_bindings_after_walk_with_schema(
|
||||
"with x as (select id from users union select age from users) select * from x",
|
||||
&schema,
|
||||
);
|
||||
// First leg: `select id from users` → one column `id`,
|
||||
// type Int. Second leg ignored.
|
||||
assert_eq!(ctes[0].columns.len(), 1);
|
||||
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("id"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cte_harvest_recursive_uses_non_recursive_leg() {
|
||||
// WITH RECURSIVE — the first (non-recursive) leg
|
||||
// dictates columns. The recursive leg self-references
|
||||
// the CTE name; we don't try to introspect.
|
||||
let schema = schema_users();
|
||||
let ctes = cte_bindings_after_walk_with_schema(
|
||||
"with recursive r as (select id from users union all select id from r) select * from r",
|
||||
&schema,
|
||||
);
|
||||
assert_eq!(ctes[0].columns.len(), 1);
|
||||
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("id"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cte_harvest_sibling_b_sees_a_columns() {
|
||||
// Sibling CTEs at the same level. When `b`'s body
|
||||
// walks, the outer scope's cte_bindings already
|
||||
// contains `a` (with harvested columns) and `b`'s
|
||||
// placeholder. `b`'s `FROM a` produces an empty-columns
|
||||
// TableBinding which `expand_binding` resolves through
|
||||
// the in-scope `a` CteBinding. So `*` in `b`'s body
|
||||
// expands to `a`'s columns.
|
||||
let schema = schema_users();
|
||||
let ctes = cte_bindings_after_walk_with_schema(
|
||||
"with a as (select id, name from users), b as (select * from a) select * from b",
|
||||
&schema,
|
||||
);
|
||||
let b = ctes.iter().find(|c| c.name == "b").expect("b binding");
|
||||
assert_eq!(b.columns.len(), 2);
|
||||
assert_eq!(b.columns[0].name.as_deref(), Some("id"));
|
||||
assert_eq!(
|
||||
b.columns[0].type_,
|
||||
Some(crate::dsl::types::Type::Int),
|
||||
);
|
||||
assert_eq!(b.columns[1].name.as_deref(), Some("name"));
|
||||
assert_eq!(
|
||||
b.columns[1].type_,
|
||||
Some(crate::dsl::types::Type::Text),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cte_harvest_col_list_renames_positionally() {
|
||||
// `WITH x(a, b, c) AS (SELECT * FROM users)` —
|
||||
// positional rename overrides derived names; types
|
||||
// preserved.
|
||||
let schema = schema_users();
|
||||
let ctes = cte_bindings_after_walk_with_schema(
|
||||
"with x (a, b, c) as (select * from users) select * from x",
|
||||
&schema,
|
||||
);
|
||||
assert_eq!(ctes[0].columns.len(), 3);
|
||||
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("a"));
|
||||
assert_eq!(
|
||||
ctes[0].columns[0].type_,
|
||||
Some(crate::dsl::types::Type::Int),
|
||||
);
|
||||
assert_eq!(ctes[0].columns[1].name.as_deref(), Some("b"));
|
||||
assert_eq!(
|
||||
ctes[0].columns[1].type_,
|
||||
Some(crate::dsl::types::Type::Text),
|
||||
);
|
||||
assert_eq!(ctes[0].columns[2].name.as_deref(), Some("c"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1797,8 +1797,16 @@ pub fn walk<'a>(
|
||||
// operator slot is highlighted rather than the engine
|
||||
// wording shown at execution time.
|
||||
d.extend(compound_arity_diagnostics(&path));
|
||||
// ADR-0032 §10.3 / §11.2 — diagnostics emitted during
|
||||
// the walk by node handlers with direct context the
|
||||
// post-walk passes can't reconstruct (primarily the
|
||||
// CTE harvest's arity-check at body-frame exit). Drain
|
||||
// unconditionally so accumulated entries don't leak
|
||||
// into a subsequent walk via a re-used WalkContext.
|
||||
d.extend(std::mem::take(&mut ctx.pending_diagnostics));
|
||||
d
|
||||
} else {
|
||||
ctx.pending_diagnostics.clear();
|
||||
Vec::new()
|
||||
};
|
||||
// Expression WARNING diagnostics — type-mismatched
|
||||
@@ -4038,6 +4046,76 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
// ---- ADR-0032 §11.2 — cte_arity_mismatch ----
|
||||
|
||||
#[test]
|
||||
fn cte_arity_mismatch_when_col_list_shorter() {
|
||||
// `WITH x(a, b) AS (SELECT 1, 2, 3)` — declared 2,
|
||||
// derived 3 → fires.
|
||||
let schema = schema_with("base", &[("id", Type::Int)]);
|
||||
let diags = diag_keys(
|
||||
"with x (a, b) as (select 1, 2, 3) select * from x",
|
||||
&schema,
|
||||
);
|
||||
assert!(
|
||||
diags.iter().any(|d| {
|
||||
d.contains("CTE `x`")
|
||||
&& d.contains("declares 2 columns")
|
||||
&& d.contains("body has 3")
|
||||
}),
|
||||
"expected cte_arity_mismatch (declared 2, actual 3); got {diags:?}",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cte_arity_mismatch_when_col_list_longer() {
|
||||
// `WITH x(a, b, c) AS (SELECT 1)` — declared 3,
|
||||
// derived 1 → fires.
|
||||
let schema = schema_with("base", &[("id", Type::Int)]);
|
||||
let diags = diag_keys(
|
||||
"with x (a, b, c) as (select 1) select * from x",
|
||||
&schema,
|
||||
);
|
||||
assert!(
|
||||
diags.iter().any(|d| {
|
||||
d.contains("CTE `x`")
|
||||
&& d.contains("declares 3 columns")
|
||||
&& d.contains("body has 1")
|
||||
}),
|
||||
"expected cte_arity_mismatch (declared 3, actual 1); got {diags:?}",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cte_arity_match_no_diagnostic() {
|
||||
// `WITH x(a, b) AS (SELECT 1, 2)` — matched arity, no
|
||||
// diagnostic.
|
||||
let schema = schema_with("base", &[("id", Type::Int)]);
|
||||
let diags = diag_keys(
|
||||
"with x (a, b) as (select 1, 2) select * from x",
|
||||
&schema,
|
||||
);
|
||||
assert!(
|
||||
!diags.iter().any(|d| d.contains("declares")),
|
||||
"matched arity should not fire; got {diags:?}",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cte_arity_no_col_list_no_diagnostic() {
|
||||
// No explicit col-list → no arity check (derived
|
||||
// columns are the canonical view).
|
||||
let schema = schema_with("base", &[("id", Type::Int)]);
|
||||
let diags = diag_keys(
|
||||
"with x as (select 1, 2, 3) select * from x",
|
||||
&schema,
|
||||
);
|
||||
assert!(
|
||||
!diags.iter().any(|d| d.contains("declares")),
|
||||
"no col-list should suppress arity check; got {diags:?}",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn alias_in_inner_subquery_does_not_affect_outer_aliases() {
|
||||
// The inner `AS y` is inside parens (depth > 0) and
|
||||
|
||||
Reference in New Issue
Block a user