walker: 2e prereq — §10.3 stage-2 CTE harvest + cte_arity_mismatch

Implements the six ADR-0032 §10.3 output-column derivation rules
at CTE body-frame exit, populating the placeholder CteBinding's
columns. Unblocks `diagnostic.cte_arity_mismatch` (which compares
declared col-list arity vs derived projection arity) and the
upcoming qualified-prefix completion in 2e proper.

- `WalkContext::pending_cte_harvest`: bookkeeping for an in-progress
  CTE harvest, armed by writes_cte_name + extended by cte_column
  idents, consumed by the next walk_scoped_subgrammar invocation
  (CTE syntax has no intervening ScopedSubgrammar, so timing is
  deterministic). Cleared on every walk_scoped_subgrammar entry
  to prevent stale state surviving a speculative walk rollback.

- `run_cte_harvest`: post-walk path-scan classifier that
  reconstructs the body's first leg's projection-list and applies
  the six derivation rules. Compound bodies take columns from the
  first leg per spec; recursive CTE bodies take the non-recursive
  (first) leg. Optional (col-list) renames positionally with
  preserved types.

- `expand_binding`: bridges a TableBinding to a CteColumn list,
  resolving CTE-source bindings (empty columns + table-name
  matches an in-scope CteBinding) through to the CTE's harvested
  columns. Enables sibling CTEs to project correctly: in
  \`WITH a AS (...), b AS (SELECT * FROM a) ...\`, b's harvest sees
  a's derived columns through the body's from_scope binding.

- `WalkContext::pending_diagnostics`: accumulator for diagnostics
  emitted DURING the walk by node handlers with context the
  post-walk passes can't reconstruct. Drained by the top-level
  walk function on both match and non-match paths so a re-used
  context can't leak entries between walks.

Test totals: 1399 → 1414 passing (+15: 10 derivation rules + 1
sibling CTE + 4 arity match/mismatch tests). Clippy clean.
This commit is contained in:
claude@clouddev1
2026-05-20 17:42:17 +00:00
parent c20c6e05ca
commit dd37a1cbfc
3 changed files with 859 additions and 15 deletions
+54
View File
@@ -164,6 +164,56 @@ pub struct WalkContext<'a> {
/// on top. Always non-empty: the bottom frame is created at /// on top. Always non-empty: the bottom frame is created at
/// `WalkContext::new` / `with_schema` time and never popped. /// `WalkContext::new` / `with_schema` time and never popped.
pub from_scope_stack: Vec<ScopeFrame>, pub from_scope_stack: Vec<ScopeFrame>,
/// Diagnostics emitted *during* the walk by node handlers
/// that have context the post-walk path scanners can no
/// longer reconstruct (notably the §10.3 CTE harvest, which
/// runs at body-frame exit and has direct access to both
/// the declared col-list and the derived columns). The
/// walker's top-level `walk` function drains this on
/// successful parses and folds the entries into the final
/// diagnostic vector.
pub pending_diagnostics: Vec<crate::dsl::walker::outcome::Diagnostic>,
/// Set by the `writes_cte_name` ident path right after the
/// placeholder `CteBinding` is pushed onto the outer frame.
/// Tells the very next `walk_scoped_subgrammar` invocation
/// that the body it's about to walk is a CTE body and that,
/// on `Matched` exit, it should run the §10.3 harvest into
/// the recorded placeholder. `cte_column` idents (the
/// optional `(c1, c2)` list between the cte name and `AS`)
/// append to `col_list` as they're seen.
///
/// CTE syntax has no intervening `ScopedSubgrammar` between
/// the cte-name ident and the body, so the timing is
/// deterministic. Cleared by `walk_scoped_subgrammar` whether
/// or not the inner walk matched (a speculatively-walked
/// then-rolled-back body must not leave a stale request).
pub pending_cte_harvest: Option<PendingCteHarvest>,
}
/// Bookkeeping for an in-progress CTE harvest (ADR-0032 §10.3
/// stage 2).
///
/// The `writes_cte_name` ident sets one of these after pushing
/// the placeholder `CteBinding`; the next
/// `walk_scoped_subgrammar` invocation takes it and runs the
/// harvest after the body matches.
#[derive(Debug, Clone)]
pub struct PendingCteHarvest {
/// Index of the placeholder `CteBinding` in the *outer*
/// frame's `cte_bindings`. The outer frame is
/// `from_scope_stack[len() - 2]` at the moment the body's
/// frame is on top.
pub placeholder_index: usize,
/// Explicit `(c1, c2, …)` rename list — empty when the CTE
/// declared no column list. The harvest's derived column
/// names are overridden positionally by this list per ADR-
/// 0032 §10.3.
pub col_list: Vec<String>,
/// Span of the cte_name ident — the diagnostic anchor for
/// `cte_arity_mismatch` if the col-list arity disagrees with
/// the body's derived arity.
pub cte_name: String,
pub cte_name_span: (usize, usize),
} }
impl<'a> WalkContext<'a> { impl<'a> WalkContext<'a> {
@@ -185,6 +235,8 @@ impl<'a> WalkContext<'a> {
user_listed_columns: None, user_listed_columns: None,
subgrammar_depth: 0, subgrammar_depth: 0,
from_scope_stack: vec![ScopeFrame::default()], from_scope_stack: vec![ScopeFrame::default()],
pending_diagnostics: Vec::new(),
pending_cte_harvest: None,
} }
} }
@@ -205,6 +257,8 @@ impl<'a> WalkContext<'a> {
user_listed_columns: None, user_listed_columns: None,
subgrammar_depth: 0, subgrammar_depth: 0,
from_scope_stack: vec![ScopeFrame::default()], from_scope_stack: vec![ScopeFrame::default()],
pending_diagnostics: Vec::new(),
pending_cte_harvest: None,
} }
} }
} }
+727 -15
View File
@@ -435,17 +435,15 @@ fn walk_ident(
{ {
binding.alias = Some(text.clone()); binding.alias = Some(text.clone());
} }
// ADR-0032 §10.3 stage 1: push a placeholder CteBinding into // ADR-0032 §10.3 stage 1 + stage 2: push a placeholder
// the top (outer) frame before the body's ScopedSubgrammar // CteBinding into the top (outer) frame before the body's
// pushes its own frame. The body can self-reference the CTE // ScopedSubgrammar pushes its own frame. The body can
// name as a table source (WITH RECURSIVE), and downstream // self-reference the CTE name as a table source (WITH
// CTE-name validators see the binding. The body-frame-exit // RECURSIVE), and downstream CTE-name validators see the
// harvest (§10.3 stage 2) is structurally hooked but the six // binding. Then arm `pending_cte_harvest` so the next
// derivation rules for output columns are pending — the // ScopedSubgrammar (which is structurally guaranteed to be
// placeholder's `columns` stays empty until a later sub-phase // the CTE body — no intervening scoped subgrammar in CTE
// wires the harvest. Diagnostic / completion machinery in 2d // syntax) runs the harvest at body-frame exit.
// and 2e can already use the name-presence to resolve "is
// this an in-scope CTE?".
if writes_cte_name if writes_cte_name
&& let Some(frame) = ctx.from_scope_stack.last_mut() && let Some(frame) = ctx.from_scope_stack.last_mut()
{ {
@@ -455,6 +453,24 @@ fn walk_ident(
name: text.clone(), name: text.clone(),
columns: Vec::new(), columns: Vec::new(),
}); });
let placeholder_index = frame.cte_bindings.len() - 1;
ctx.pending_cte_harvest =
Some(crate::dsl::walker::context::PendingCteHarvest {
placeholder_index,
col_list: Vec::new(),
cte_name: text.clone(),
cte_name_span: (start, end),
});
}
// ADR-0032 §10.3: the optional `(c1, c2, …)` rename list
// between the cte name and `AS`. Each `cte_column` ident
// appends to the pending harvest's col_list; the harvest
// applies them as positional renames on the derived
// columns.
if role == "cte_column"
&& let Some(pending) = ctx.pending_cte_harvest.as_mut()
{
pending.col_list.push(text.clone());
} }
// ADR-0032 §10.4: projection-list alias accumulator for // ADR-0032 §10.4: projection-list alias accumulator for
// ORDER BY completion candidates. // ORDER BY completion candidates.
@@ -1038,14 +1054,465 @@ fn walk_scoped_subgrammar(
}), }),
}; };
} }
// ADR-0032 §10.3 stage 2 — pick up a pending CTE harvest
// request armed by the immediately-preceding cte_name ident.
// Clear unconditionally: a non-matching body must not leave
// stale state for a later unrelated ScopedSubgrammar.
let pending_cte = ctx.pending_cte_harvest.take();
ctx.from_scope_stack ctx.from_scope_stack
.push(crate::dsl::walker::context::ScopeFrame::default()); .push(crate::dsl::walker::context::ScopeFrame::default());
let result = walk_node(source, pos, inner, ctx, path, per_byte); let result = walk_node(source, pos, inner, ctx, path, per_byte);
// Harvest happens only on a fully-matched body. Speculative
// walks that NoMatch / Incomplete / Fail leave the placeholder
// empty (the outer-frame state is also discarded in the
// speculative path, so this is correct).
if let (Some(req), NodeWalkResult::Matched { end, .. }) =
(pending_cte, &result)
{
run_cte_harvest(ctx, path, source, pos, *end, &req);
}
ctx.from_scope_stack.pop(); ctx.from_scope_stack.pop();
ctx.subgrammar_depth = saved_depth; ctx.subgrammar_depth = saved_depth;
result result
} }
/// Run the §10.3 stage-2 harvest after a CTE body's
/// `ScopedSubgrammar` matched, while the body's frame is still
/// on top of `from_scope_stack`.
///
/// Reads the body's projection items out of the matched path's
/// byte range, classifies each via the six derivation rules,
/// applies any `(col-list)` positional rename, and writes the
/// derived columns into the placeholder `CteBinding` in the
/// outer (now `len - 2`) frame.
fn run_cte_harvest(
ctx: &mut WalkContext,
path: &MatchedPath,
_source: &str,
body_start: usize,
body_end: usize,
req: &crate::dsl::walker::context::PendingCteHarvest,
) {
use crate::dsl::walker::context::{CteColumn, ScopeFrame};
use crate::dsl::walker::outcome::{MatchedItem, MatchedKind};
// The body's frame is at the top of the stack while the
// harvest runs. Need this for from_scope lookups in the
// derivation rules.
let body_frame: &ScopeFrame = match ctx.from_scope_stack.last() {
Some(f) => f,
None => return,
};
// Compute body_depth = paren-balance over path items strictly
// before body_start. The `(` immediately preceding the body
// is at the outer depth and increments to the body's depth;
// body_start is INSIDE that paren.
let mut prefix_depth: i32 = 0;
for item in &path.items {
if item.span.0 >= body_start {
break;
}
match item.kind {
MatchedKind::Punct('(') => prefix_depth += 1,
MatchedKind::Punct(')') => prefix_depth -= 1,
_ => {}
}
}
let body_depth = prefix_depth;
// The path items strictly inside the body byte range.
let body_items: Vec<&MatchedItem> = path
.items
.iter()
.filter(|i| i.span.0 >= body_start && i.span.1 <= body_end)
.collect();
// Track depth within the body. First leg's projection list
// begins at the first body-depth SELECT and ends at the
// first body-depth FROM/WHERE/etc OR set-op keyword OR end.
let mut depth = body_depth;
let mut select_idx: Option<usize> = None;
let mut end_idx: usize = body_items.len();
for (i, item) in body_items.iter().enumerate() {
let cur = depth;
match item.kind {
MatchedKind::Punct('(') => depth += 1,
MatchedKind::Punct(')') => depth -= 1,
_ => {}
}
if cur != body_depth {
continue;
}
match item.kind {
MatchedKind::Word("select") if select_idx.is_none() => {
select_idx = Some(i + 1); // start of projection list
}
MatchedKind::Word(
"from" | "where" | "group" | "having" | "order"
| "limit" | "offset" | "union" | "intersect"
| "except",
) if select_idx.is_some() => {
end_idx = i;
break;
}
_ => {}
}
}
let Some(start_idx) = select_idx else {
return;
};
if start_idx >= end_idx {
return;
}
// Split the projection-list slice into individual items by
// commas at body_depth.
let mut item_slices: Vec<&[&MatchedItem]> = Vec::new();
let mut depth_scan = body_depth;
let mut slice_start = start_idx;
for i in start_idx..end_idx {
let cur = depth_scan;
match body_items[i].kind {
MatchedKind::Punct('(') => depth_scan += 1,
MatchedKind::Punct(')') => depth_scan -= 1,
MatchedKind::Punct(',') if cur == body_depth => {
item_slices.push(&body_items[slice_start..i]);
slice_start = i + 1;
}
_ => {}
}
}
if slice_start < end_idx {
item_slices.push(&body_items[slice_start..end_idx]);
}
// Classify each projection item per ADR-0032 §10.3.
let mut derived: Vec<CteColumn> = Vec::new();
for slice in item_slices {
classify_projection_item(
slice,
body_frame,
&ctx.from_scope_stack,
&mut derived,
);
}
// Apply (c1, c2, …) positional rename if provided. Types
// are preserved; names overridden by the col_list. Arity
// mismatch is emitted as `diagnostic.cte_arity_mismatch`
// on the cte_name span before any padding/truncation so
// the diagnostic carries the *true* derived count.
if !req.col_list.is_empty() {
let declared = req.col_list.len();
let actual = derived.len();
if declared != actual {
use crate::dsl::walker::outcome::{Diagnostic, Severity};
ctx.pending_diagnostics.push(Diagnostic {
severity: Severity::Error,
span: req.cte_name_span,
message: crate::friendly::translate(
"diagnostic.cte_arity_mismatch",
&[
("cte", &req.cte_name as &dyn std::fmt::Display),
("declared", &declared as &dyn std::fmt::Display),
("actual", &actual as &dyn std::fmt::Display),
],
),
});
}
for (i, name) in req.col_list.iter().enumerate() {
if let Some(col) = derived.get_mut(i) {
col.name = Some(name.clone());
} else {
// col_list has MORE entries than derived items —
// synthesize a typeless slot with the declared
// name so qualified-prefix completion still
// surfaces it.
derived.push(CteColumn {
name: Some(name.clone()),
type_: None,
});
}
}
// Truncate any extras when derived > declared, so the
// CTE's externally visible arity matches the col-list
// declaration. (The diagnostic above already captured
// the original derived count.)
if derived.len() > declared {
derived.truncate(declared);
}
}
// Write into the outer frame's placeholder.
let stack_len = ctx.from_scope_stack.len();
if stack_len >= 2
&& let Some(outer) = ctx.from_scope_stack.get_mut(stack_len - 2)
&& let Some(placeholder) =
outer.cte_bindings.get_mut(req.placeholder_index)
{
placeholder.columns = derived;
}
}
/// Classify one projection item by examining its leading
/// terminals and append its derived CteColumn(s) to `out`. The
/// six rules of ADR-0032 §10.3.
fn classify_projection_item(
slice: &[&crate::dsl::walker::outcome::MatchedItem],
body_frame: &crate::dsl::walker::context::ScopeFrame,
scope_stack: &[crate::dsl::walker::context::ScopeFrame],
out: &mut Vec<crate::dsl::walker::context::CteColumn>,
) {
use crate::dsl::grammar::IdentSource;
use crate::dsl::walker::context::CteColumn;
use crate::dsl::walker::outcome::MatchedKind;
// Strip an optional trailing `[AS] alias` from the slice so
// shape detection can examine just the expression part.
let (expr_slice, alias) = strip_trailing_alias(slice);
// Rule 1: `*` — every column from body_frame.from_scope.
// When a binding represents a CTE reference (its columns are
// empty because it wasn't a base-table lookup), resolve
// through to the in-scope CteBinding so nested CTEs project
// correctly.
if expr_slice.len() == 1
&& matches!(expr_slice[0].kind, MatchedKind::Punct('*'))
{
for binding in &body_frame.from_scope {
for col in expand_binding(binding, scope_stack) {
out.push(col);
}
}
return;
}
// Rule 2: `t.*` — every column from binding `t`.
if expr_slice.len() == 3
&& matches!(
expr_slice[0].kind,
MatchedKind::Ident { role: "qualified_star_qualifier", .. }
)
&& matches!(expr_slice[1].kind, MatchedKind::Punct('.'))
&& matches!(expr_slice[2].kind, MatchedKind::Punct('*'))
{
let qual = &expr_slice[0].text;
if let Some(binding) = body_frame.from_scope.iter().find(|b| {
b.alias
.as_deref()
.is_some_and(|a| a.eq_ignore_ascii_case(qual))
|| b.table.eq_ignore_ascii_case(qual)
}) {
for col in expand_binding(binding, scope_stack) {
out.push(col);
}
}
return;
}
// Rule 3: bare `col` — a single sql_expr_ident terminal.
if expr_slice.len() == 1
&& matches!(
expr_slice[0].kind,
MatchedKind::Ident {
source: IdentSource::Columns,
role: "sql_expr_ident",
}
)
{
let col_text = &expr_slice[0].text;
let resolved_type = resolve_bare_column_type_in_frame(
body_frame,
scope_stack,
col_text,
);
let name = alias.unwrap_or_else(|| col_text.clone());
out.push(CteColumn {
name: Some(name),
type_: resolved_type,
});
return;
}
// Rule 4: qualified `t.col` — three-token shape with the
// sql_expr_qualified_ref role on the tail ident.
if expr_slice.len() == 3
&& matches!(
expr_slice[0].kind,
MatchedKind::Ident {
source: IdentSource::Columns,
role: "sql_expr_ident",
}
)
&& matches!(expr_slice[1].kind, MatchedKind::Punct('.'))
&& matches!(
expr_slice[2].kind,
MatchedKind::Ident {
source: IdentSource::Columns,
role: "sql_expr_qualified_ref",
}
)
{
let qual = &expr_slice[0].text;
let col_text = &expr_slice[2].text;
let resolved_type = resolve_qualified_column_type(
body_frame,
scope_stack,
qual,
col_text,
);
let name = alias.unwrap_or_else(|| col_text.clone());
out.push(CteColumn {
name: Some(name),
type_: resolved_type,
});
return;
}
// Rule 5 / 6: computed expression — name = alias if present,
// else None. Type = None either way (ADR-0032 Amendment 1).
out.push(CteColumn {
name: alias,
type_: None,
});
}
/// Peel a trailing `[AS] <ident>` off the projection-item slice
/// if present. Returns (expr_slice_without_alias, Some(alias))
/// or (slice, None) if no alias is detected.
fn strip_trailing_alias<'a>(
slice: &'a [&'a crate::dsl::walker::outcome::MatchedItem],
) -> (
&'a [&'a crate::dsl::walker::outcome::MatchedItem],
Option<String>,
) {
use crate::dsl::grammar::IdentSource;
use crate::dsl::walker::outcome::MatchedKind;
if slice.is_empty() {
return (slice, None);
}
let last = slice[slice.len() - 1];
if matches!(
last.kind,
MatchedKind::Ident {
source: IdentSource::NewName,
role: "projection_alias",
}
) {
// Optional preceding `AS` keyword.
if slice.len() >= 2
&& matches!(
slice[slice.len() - 2].kind,
MatchedKind::Word("as")
)
{
return (
&slice[..slice.len() - 2],
Some(last.text.clone()),
);
}
return (&slice[..slice.len() - 1], Some(last.text.clone()));
}
(slice, None)
}
fn resolve_bare_column_type_in_frame(
frame: &crate::dsl::walker::context::ScopeFrame,
scope_stack: &[crate::dsl::walker::context::ScopeFrame],
column: &str,
) -> Option<crate::dsl::types::Type> {
let mut found = None;
for binding in &frame.from_scope {
for col in expand_binding(binding, scope_stack) {
if col
.name
.as_deref()
.is_some_and(|n| n.eq_ignore_ascii_case(column))
{
if found.is_some() {
return None; // ambiguous — no type
}
found = col.type_;
}
}
}
found
}
fn resolve_qualified_column_type(
frame: &crate::dsl::walker::context::ScopeFrame,
scope_stack: &[crate::dsl::walker::context::ScopeFrame],
qualifier: &str,
column: &str,
) -> Option<crate::dsl::types::Type> {
let binding = frame.from_scope.iter().find(|b| {
b.alias
.as_deref()
.is_some_and(|a| a.eq_ignore_ascii_case(qualifier))
|| b.table.eq_ignore_ascii_case(qualifier)
})?;
expand_binding(binding, scope_stack)
.into_iter()
.find(|c| {
c.name
.as_deref()
.is_some_and(|n| n.eq_ignore_ascii_case(column))
})
.and_then(|c| c.type_)
}
/// Resolve a `TableBinding` to its column list as `CteColumn`s.
///
/// Base-table bindings carry typed `TableColumn`s populated from
/// the schema cache — convert them directly. CTE-source bindings
/// (the binding's `columns` is empty because the FROM name
/// didn't match a base table) look up the matching `CteBinding`
/// in any in-scope frame and return its `columns` verbatim.
///
/// This is the bridge that lets a nested CTE's outer harvest see
/// the inner CTE's derived columns: the body's `FROM inner`
/// produces an empty-columns binding, but `expand_binding`
/// resolves it through the inner CteBinding (which has its
/// derived columns by the time the outer harvest runs, because
/// the inner body's harvest fires on inner-body exit, before the
/// outer body exits).
///
/// A self-reference inside a `WITH RECURSIVE` body sees the
/// placeholder (empty columns) and the resolution returns empty
/// — that's correct, since the harvest only fires on the
/// non-recursive (first) leg per §10.3.
fn expand_binding(
binding: &crate::dsl::walker::context::TableBinding,
scope_stack: &[crate::dsl::walker::context::ScopeFrame],
) -> Vec<crate::dsl::walker::context::CteColumn> {
use crate::dsl::walker::context::CteColumn;
if !binding.columns.is_empty() {
return binding
.columns
.iter()
.map(|c| CteColumn {
name: Some(c.name.clone()),
type_: Some(c.user_type),
})
.collect();
}
for frame in scope_stack.iter().rev() {
if let Some(cte) = frame
.cte_bindings
.iter()
.find(|c| c.name.eq_ignore_ascii_case(&binding.table))
{
return cte.columns.clone();
}
}
Vec::new()
}
fn merge_expected(dst: &mut Vec<Expectation>, src: Vec<Expectation>) { fn merge_expected(dst: &mut Vec<Expectation>, src: Vec<Expectation>) {
for e in src { for e in src {
if !dst.contains(&e) { if !dst.contains(&e) {
@@ -1417,10 +1884,13 @@ mod tests {
); );
assert_eq!(ctes.len(), 1); assert_eq!(ctes.len(), 1);
assert_eq!(ctes[0].name, "cte_x"); assert_eq!(ctes[0].name, "cte_x");
// Output column derivation pending — placeholder's // §10.3 stage-2 harvest produces one CteColumn per
// columns stays empty until the §10.3 stage-2 harvest // projection item. `SELECT 1` is a computed expression
// is implemented. // without an alias → `CteColumn { name: None, type_:
assert!(ctes[0].columns.is_empty()); // None }`.
assert_eq!(ctes[0].columns.len(), 1);
assert!(ctes[0].columns[0].name.is_none());
assert!(ctes[0].columns[0].type_.is_none());
} }
#[test] #[test]
@@ -1490,4 +1960,246 @@ mod tests {
); );
assert_eq!(aliases, vec!["outer_b".to_string()]); assert_eq!(aliases, vec!["outer_b".to_string()]);
} }
// ---- §10.3 stage-2 CTE column-derivation harvest ----
/// Schema-aware walk variant — returns the outer frame's
/// `cte_bindings` after walking the input.
fn cte_bindings_after_walk_with_schema(
input: &str,
schema: &crate::completion::SchemaCache,
) -> Vec<crate::dsl::walker::context::CteBinding> {
let mut ctx = WalkContext::with_schema(schema);
ctx.mode = crate::mode::Mode::Advanced;
let mut path = MatchedPath::new();
let mut per_byte = Vec::new();
let result = walk_node(
input,
0,
&crate::dsl::grammar::sql_select::SQL_SELECT_STATEMENT,
&mut ctx,
&mut path,
&mut per_byte,
);
assert!(
matches!(result, NodeWalkResult::Matched { .. }),
"{input:?} should match: got {result:?}"
);
ctx.from_scope_stack[0].cte_bindings.clone()
}
fn schema_users() -> crate::completion::SchemaCache {
use crate::completion::{SchemaCache, TableColumn};
use crate::dsl::types::Type;
let mut s = SchemaCache::default();
s.tables.push("users".to_string());
s.columns.push("id".to_string());
s.columns.push("name".to_string());
s.columns.push("age".to_string());
s.table_columns.insert(
"users".to_string(),
vec![
TableColumn { name: "id".to_string(), user_type: Type::Int },
TableColumn { name: "name".to_string(), user_type: Type::Text },
TableColumn { name: "age".to_string(), user_type: Type::Int },
],
);
s
}
#[test]
fn cte_harvest_star_expands_from_scope() {
// Rule 1: `SELECT *` body — derived columns = every
// column from the body frame's from_scope, with types.
let schema = schema_users();
let ctes = cte_bindings_after_walk_with_schema(
"with x as (select * from users) select * from x",
&schema,
);
assert_eq!(ctes.len(), 1);
assert_eq!(ctes[0].columns.len(), 3);
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("id"));
assert_eq!(
ctes[0].columns[0].type_,
Some(crate::dsl::types::Type::Int),
);
assert_eq!(ctes[0].columns[1].name.as_deref(), Some("name"));
assert_eq!(
ctes[0].columns[1].type_,
Some(crate::dsl::types::Type::Text),
);
assert_eq!(ctes[0].columns[2].name.as_deref(), Some("age"));
}
#[test]
fn cte_harvest_qualified_star_expands_one_binding() {
// Rule 2: `t.*` — every column from binding `t`.
let schema = schema_users();
let ctes = cte_bindings_after_walk_with_schema(
"with x as (select u.* from users u) select * from x",
&schema,
);
assert_eq!(ctes.len(), 1);
assert_eq!(ctes[0].columns.len(), 3);
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("id"));
}
#[test]
fn cte_harvest_bare_ref_with_alias() {
// Rule 5 variant: `col AS alias` — name = alias, type
// preserved from the source column.
let schema = schema_users();
let ctes = cte_bindings_after_walk_with_schema(
"with x as (select name as label from users) select * from x",
&schema,
);
assert_eq!(ctes[0].columns.len(), 1);
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("label"));
assert_eq!(
ctes[0].columns[0].type_,
Some(crate::dsl::types::Type::Text),
);
}
#[test]
fn cte_harvest_bare_ref_without_alias_uses_column_name() {
// Rule 3: bare `col` — name = column name, type from
// source column.
let schema = schema_users();
let ctes = cte_bindings_after_walk_with_schema(
"with x as (select age from users) select * from x",
&schema,
);
assert_eq!(ctes[0].columns.len(), 1);
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("age"));
assert_eq!(
ctes[0].columns[0].type_,
Some(crate::dsl::types::Type::Int),
);
}
#[test]
fn cte_harvest_qualified_ref() {
// Rule 4: `t.col` — name = column, type from binding.
let schema = schema_users();
let ctes = cte_bindings_after_walk_with_schema(
"with x as (select u.name from users u) select * from x",
&schema,
);
assert_eq!(ctes[0].columns.len(), 1);
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("name"));
assert_eq!(
ctes[0].columns[0].type_,
Some(crate::dsl::types::Type::Text),
);
}
#[test]
fn cte_harvest_computed_no_alias_is_unnamed() {
// Rule 6: computed expression without alias → name =
// None, type = None.
let schema = schema_users();
let ctes = cte_bindings_after_walk_with_schema(
"with x as (select age + 1 from users) select * from x",
&schema,
);
assert_eq!(ctes[0].columns.len(), 1);
assert!(ctes[0].columns[0].name.is_none());
assert!(ctes[0].columns[0].type_.is_none());
}
#[test]
fn cte_harvest_computed_with_alias() {
// Rule 5: computed expression with alias → name =
// alias, type = None (Amendment 1).
let schema = schema_users();
let ctes = cte_bindings_after_walk_with_schema(
"with x as (select age + 1 as years from users) select * from x",
&schema,
);
assert_eq!(ctes[0].columns.len(), 1);
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("years"));
assert!(ctes[0].columns[0].type_.is_none());
}
#[test]
fn cte_harvest_compound_takes_first_leg() {
// For UNION / INTERSECT / EXCEPT bodies, columns come
// from the first leg per ADR-0032 §10.3.
let schema = schema_users();
let ctes = cte_bindings_after_walk_with_schema(
"with x as (select id from users union select age from users) select * from x",
&schema,
);
// First leg: `select id from users` → one column `id`,
// type Int. Second leg ignored.
assert_eq!(ctes[0].columns.len(), 1);
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("id"));
}
#[test]
fn cte_harvest_recursive_uses_non_recursive_leg() {
// WITH RECURSIVE — the first (non-recursive) leg
// dictates columns. The recursive leg self-references
// the CTE name; we don't try to introspect.
let schema = schema_users();
let ctes = cte_bindings_after_walk_with_schema(
"with recursive r as (select id from users union all select id from r) select * from r",
&schema,
);
assert_eq!(ctes[0].columns.len(), 1);
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("id"));
}
#[test]
fn cte_harvest_sibling_b_sees_a_columns() {
// Sibling CTEs at the same level. When `b`'s body
// walks, the outer scope's cte_bindings already
// contains `a` (with harvested columns) and `b`'s
// placeholder. `b`'s `FROM a` produces an empty-columns
// TableBinding which `expand_binding` resolves through
// the in-scope `a` CteBinding. So `*` in `b`'s body
// expands to `a`'s columns.
let schema = schema_users();
let ctes = cte_bindings_after_walk_with_schema(
"with a as (select id, name from users), b as (select * from a) select * from b",
&schema,
);
let b = ctes.iter().find(|c| c.name == "b").expect("b binding");
assert_eq!(b.columns.len(), 2);
assert_eq!(b.columns[0].name.as_deref(), Some("id"));
assert_eq!(
b.columns[0].type_,
Some(crate::dsl::types::Type::Int),
);
assert_eq!(b.columns[1].name.as_deref(), Some("name"));
assert_eq!(
b.columns[1].type_,
Some(crate::dsl::types::Type::Text),
);
}
#[test]
fn cte_harvest_col_list_renames_positionally() {
// `WITH x(a, b, c) AS (SELECT * FROM users)` —
// positional rename overrides derived names; types
// preserved.
let schema = schema_users();
let ctes = cte_bindings_after_walk_with_schema(
"with x (a, b, c) as (select * from users) select * from x",
&schema,
);
assert_eq!(ctes[0].columns.len(), 3);
assert_eq!(ctes[0].columns[0].name.as_deref(), Some("a"));
assert_eq!(
ctes[0].columns[0].type_,
Some(crate::dsl::types::Type::Int),
);
assert_eq!(ctes[0].columns[1].name.as_deref(), Some("b"));
assert_eq!(
ctes[0].columns[1].type_,
Some(crate::dsl::types::Type::Text),
);
assert_eq!(ctes[0].columns[2].name.as_deref(), Some("c"));
}
} }
+78
View File
@@ -1797,8 +1797,16 @@ pub fn walk<'a>(
// operator slot is highlighted rather than the engine // operator slot is highlighted rather than the engine
// wording shown at execution time. // wording shown at execution time.
d.extend(compound_arity_diagnostics(&path)); d.extend(compound_arity_diagnostics(&path));
// ADR-0032 §10.3 / §11.2 — diagnostics emitted during
// the walk by node handlers with direct context the
// post-walk passes can't reconstruct (primarily the
// CTE harvest's arity-check at body-frame exit). Drain
// unconditionally so accumulated entries don't leak
// into a subsequent walk via a re-used WalkContext.
d.extend(std::mem::take(&mut ctx.pending_diagnostics));
d d
} else { } else {
ctx.pending_diagnostics.clear();
Vec::new() Vec::new()
}; };
// Expression WARNING diagnostics — type-mismatched // Expression WARNING diagnostics — type-mismatched
@@ -4038,6 +4046,76 @@ mod tests {
); );
} }
// ---- ADR-0032 §11.2 — cte_arity_mismatch ----
#[test]
fn cte_arity_mismatch_when_col_list_shorter() {
// `WITH x(a, b) AS (SELECT 1, 2, 3)` — declared 2,
// derived 3 → fires.
let schema = schema_with("base", &[("id", Type::Int)]);
let diags = diag_keys(
"with x (a, b) as (select 1, 2, 3) select * from x",
&schema,
);
assert!(
diags.iter().any(|d| {
d.contains("CTE `x`")
&& d.contains("declares 2 columns")
&& d.contains("body has 3")
}),
"expected cte_arity_mismatch (declared 2, actual 3); got {diags:?}",
);
}
#[test]
fn cte_arity_mismatch_when_col_list_longer() {
// `WITH x(a, b, c) AS (SELECT 1)` — declared 3,
// derived 1 → fires.
let schema = schema_with("base", &[("id", Type::Int)]);
let diags = diag_keys(
"with x (a, b, c) as (select 1) select * from x",
&schema,
);
assert!(
diags.iter().any(|d| {
d.contains("CTE `x`")
&& d.contains("declares 3 columns")
&& d.contains("body has 1")
}),
"expected cte_arity_mismatch (declared 3, actual 1); got {diags:?}",
);
}
#[test]
fn cte_arity_match_no_diagnostic() {
// `WITH x(a, b) AS (SELECT 1, 2)` — matched arity, no
// diagnostic.
let schema = schema_with("base", &[("id", Type::Int)]);
let diags = diag_keys(
"with x (a, b) as (select 1, 2) select * from x",
&schema,
);
assert!(
!diags.iter().any(|d| d.contains("declares")),
"matched arity should not fire; got {diags:?}",
);
}
#[test]
fn cte_arity_no_col_list_no_diagnostic() {
// No explicit col-list → no arity check (derived
// columns are the canonical view).
let schema = schema_with("base", &[("id", Type::Int)]);
let diags = diag_keys(
"with x as (select 1, 2, 3) select * from x",
&schema,
);
assert!(
!diags.iter().any(|d| d.contains("declares")),
"no col-list should suppress arity check; got {diags:?}",
);
}
#[test] #[test]
fn alias_in_inner_subquery_does_not_affect_outer_aliases() { fn alias_in_inner_subquery_does_not_affect_outer_aliases() {
// The inner `AS y` is inside parens (depth > 0) and // The inner `AS y` is inside parens (depth > 0) and