diff --git a/src/dsl/walker/context.rs b/src/dsl/walker/context.rs index 130e147..807f41e 100644 --- a/src/dsl/walker/context.rs +++ b/src/dsl/walker/context.rs @@ -164,6 +164,56 @@ pub struct WalkContext<'a> { /// on top. Always non-empty: the bottom frame is created at /// `WalkContext::new` / `with_schema` time and never popped. pub from_scope_stack: Vec, + /// Diagnostics emitted *during* the walk by node handlers + /// that have context the post-walk path scanners can no + /// longer reconstruct (notably the §10.3 CTE harvest, which + /// runs at body-frame exit and has direct access to both + /// the declared col-list and the derived columns). The + /// walker's top-level `walk` function drains this on + /// successful parses and folds the entries into the final + /// diagnostic vector. + pub pending_diagnostics: Vec, + /// Set by the `writes_cte_name` ident path right after the + /// placeholder `CteBinding` is pushed onto the outer frame. + /// Tells the very next `walk_scoped_subgrammar` invocation + /// that the body it's about to walk is a CTE body and that, + /// on `Matched` exit, it should run the §10.3 harvest into + /// the recorded placeholder. `cte_column` idents (the + /// optional `(c1, c2)` list between the cte name and `AS`) + /// append to `col_list` as they're seen. + /// + /// CTE syntax has no intervening `ScopedSubgrammar` between + /// the cte-name ident and the body, so the timing is + /// deterministic. Cleared by `walk_scoped_subgrammar` whether + /// or not the inner walk matched (a speculatively-walked + /// then-rolled-back body must not leave a stale request). + pub pending_cte_harvest: Option, +} + +/// Bookkeeping for an in-progress CTE harvest (ADR-0032 §10.3 +/// stage 2). +/// +/// The `writes_cte_name` ident sets one of these after pushing +/// the placeholder `CteBinding`; the next +/// `walk_scoped_subgrammar` invocation takes it and runs the +/// harvest after the body matches. +#[derive(Debug, Clone)] +pub struct PendingCteHarvest { + /// Index of the placeholder `CteBinding` in the *outer* + /// frame's `cte_bindings`. The outer frame is + /// `from_scope_stack[len() - 2]` at the moment the body's + /// frame is on top. + pub placeholder_index: usize, + /// Explicit `(c1, c2, …)` rename list — empty when the CTE + /// declared no column list. The harvest's derived column + /// names are overridden positionally by this list per ADR- + /// 0032 §10.3. + pub col_list: Vec, + /// Span of the cte_name ident — the diagnostic anchor for + /// `cte_arity_mismatch` if the col-list arity disagrees with + /// the body's derived arity. + pub cte_name: String, + pub cte_name_span: (usize, usize), } impl<'a> WalkContext<'a> { @@ -185,6 +235,8 @@ impl<'a> WalkContext<'a> { user_listed_columns: None, subgrammar_depth: 0, from_scope_stack: vec![ScopeFrame::default()], + pending_diagnostics: Vec::new(), + pending_cte_harvest: None, } } @@ -205,6 +257,8 @@ impl<'a> WalkContext<'a> { user_listed_columns: None, subgrammar_depth: 0, from_scope_stack: vec![ScopeFrame::default()], + pending_diagnostics: Vec::new(), + pending_cte_harvest: None, } } } diff --git a/src/dsl/walker/driver.rs b/src/dsl/walker/driver.rs index 5575c48..1de6c7f 100644 --- a/src/dsl/walker/driver.rs +++ b/src/dsl/walker/driver.rs @@ -435,17 +435,15 @@ fn walk_ident( { binding.alias = Some(text.clone()); } - // ADR-0032 §10.3 stage 1: push a placeholder CteBinding into - // the top (outer) frame before the body's ScopedSubgrammar - // pushes its own frame. The body can self-reference the CTE - // name as a table source (WITH RECURSIVE), and downstream - // CTE-name validators see the binding. The body-frame-exit - // harvest (§10.3 stage 2) is structurally hooked but the six - // derivation rules for output columns are pending — the - // placeholder's `columns` stays empty until a later sub-phase - // wires the harvest. Diagnostic / completion machinery in 2d - // and 2e can already use the name-presence to resolve "is - // this an in-scope CTE?". + // ADR-0032 §10.3 stage 1 + stage 2: push a placeholder + // CteBinding into the top (outer) frame before the body's + // ScopedSubgrammar pushes its own frame. The body can + // self-reference the CTE name as a table source (WITH + // RECURSIVE), and downstream CTE-name validators see the + // binding. Then arm `pending_cte_harvest` so the next + // ScopedSubgrammar (which is structurally guaranteed to be + // the CTE body — no intervening scoped subgrammar in CTE + // syntax) runs the harvest at body-frame exit. if writes_cte_name && let Some(frame) = ctx.from_scope_stack.last_mut() { @@ -455,6 +453,24 @@ fn walk_ident( name: text.clone(), columns: Vec::new(), }); + let placeholder_index = frame.cte_bindings.len() - 1; + ctx.pending_cte_harvest = + Some(crate::dsl::walker::context::PendingCteHarvest { + placeholder_index, + col_list: Vec::new(), + cte_name: text.clone(), + cte_name_span: (start, end), + }); + } + // ADR-0032 §10.3: the optional `(c1, c2, …)` rename list + // between the cte name and `AS`. Each `cte_column` ident + // appends to the pending harvest's col_list; the harvest + // applies them as positional renames on the derived + // columns. + if role == "cte_column" + && let Some(pending) = ctx.pending_cte_harvest.as_mut() + { + pending.col_list.push(text.clone()); } // ADR-0032 §10.4: projection-list alias accumulator for // ORDER BY completion candidates. @@ -1038,14 +1054,465 @@ fn walk_scoped_subgrammar( }), }; } + // ADR-0032 §10.3 stage 2 — pick up a pending CTE harvest + // request armed by the immediately-preceding cte_name ident. + // Clear unconditionally: a non-matching body must not leave + // stale state for a later unrelated ScopedSubgrammar. + let pending_cte = ctx.pending_cte_harvest.take(); ctx.from_scope_stack .push(crate::dsl::walker::context::ScopeFrame::default()); let result = walk_node(source, pos, inner, ctx, path, per_byte); + + // Harvest happens only on a fully-matched body. Speculative + // walks that NoMatch / Incomplete / Fail leave the placeholder + // empty (the outer-frame state is also discarded in the + // speculative path, so this is correct). + if let (Some(req), NodeWalkResult::Matched { end, .. }) = + (pending_cte, &result) + { + run_cte_harvest(ctx, path, source, pos, *end, &req); + } + ctx.from_scope_stack.pop(); ctx.subgrammar_depth = saved_depth; result } +/// Run the §10.3 stage-2 harvest after a CTE body's +/// `ScopedSubgrammar` matched, while the body's frame is still +/// on top of `from_scope_stack`. +/// +/// Reads the body's projection items out of the matched path's +/// byte range, classifies each via the six derivation rules, +/// applies any `(col-list)` positional rename, and writes the +/// derived columns into the placeholder `CteBinding` in the +/// outer (now `len - 2`) frame. +fn run_cte_harvest( + ctx: &mut WalkContext, + path: &MatchedPath, + _source: &str, + body_start: usize, + body_end: usize, + req: &crate::dsl::walker::context::PendingCteHarvest, +) { + use crate::dsl::walker::context::{CteColumn, ScopeFrame}; + use crate::dsl::walker::outcome::{MatchedItem, MatchedKind}; + + // The body's frame is at the top of the stack while the + // harvest runs. Need this for from_scope lookups in the + // derivation rules. + let body_frame: &ScopeFrame = match ctx.from_scope_stack.last() { + Some(f) => f, + None => return, + }; + + // Compute body_depth = paren-balance over path items strictly + // before body_start. The `(` immediately preceding the body + // is at the outer depth and increments to the body's depth; + // body_start is INSIDE that paren. + let mut prefix_depth: i32 = 0; + for item in &path.items { + if item.span.0 >= body_start { + break; + } + match item.kind { + MatchedKind::Punct('(') => prefix_depth += 1, + MatchedKind::Punct(')') => prefix_depth -= 1, + _ => {} + } + } + let body_depth = prefix_depth; + + // The path items strictly inside the body byte range. + let body_items: Vec<&MatchedItem> = path + .items + .iter() + .filter(|i| i.span.0 >= body_start && i.span.1 <= body_end) + .collect(); + + // Track depth within the body. First leg's projection list + // begins at the first body-depth SELECT and ends at the + // first body-depth FROM/WHERE/etc OR set-op keyword OR end. + let mut depth = body_depth; + let mut select_idx: Option = None; + let mut end_idx: usize = body_items.len(); + for (i, item) in body_items.iter().enumerate() { + let cur = depth; + match item.kind { + MatchedKind::Punct('(') => depth += 1, + MatchedKind::Punct(')') => depth -= 1, + _ => {} + } + if cur != body_depth { + continue; + } + match item.kind { + MatchedKind::Word("select") if select_idx.is_none() => { + select_idx = Some(i + 1); // start of projection list + } + MatchedKind::Word( + "from" | "where" | "group" | "having" | "order" + | "limit" | "offset" | "union" | "intersect" + | "except", + ) if select_idx.is_some() => { + end_idx = i; + break; + } + _ => {} + } + } + let Some(start_idx) = select_idx else { + return; + }; + if start_idx >= end_idx { + return; + } + + // Split the projection-list slice into individual items by + // commas at body_depth. + let mut item_slices: Vec<&[&MatchedItem]> = Vec::new(); + let mut depth_scan = body_depth; + let mut slice_start = start_idx; + for i in start_idx..end_idx { + let cur = depth_scan; + match body_items[i].kind { + MatchedKind::Punct('(') => depth_scan += 1, + MatchedKind::Punct(')') => depth_scan -= 1, + MatchedKind::Punct(',') if cur == body_depth => { + item_slices.push(&body_items[slice_start..i]); + slice_start = i + 1; + } + _ => {} + } + } + if slice_start < end_idx { + item_slices.push(&body_items[slice_start..end_idx]); + } + + // Classify each projection item per ADR-0032 §10.3. + let mut derived: Vec = Vec::new(); + for slice in item_slices { + classify_projection_item( + slice, + body_frame, + &ctx.from_scope_stack, + &mut derived, + ); + } + + // Apply (c1, c2, …) positional rename if provided. Types + // are preserved; names overridden by the col_list. Arity + // mismatch is emitted as `diagnostic.cte_arity_mismatch` + // on the cte_name span before any padding/truncation so + // the diagnostic carries the *true* derived count. + if !req.col_list.is_empty() { + let declared = req.col_list.len(); + let actual = derived.len(); + if declared != actual { + use crate::dsl::walker::outcome::{Diagnostic, Severity}; + ctx.pending_diagnostics.push(Diagnostic { + severity: Severity::Error, + span: req.cte_name_span, + message: crate::friendly::translate( + "diagnostic.cte_arity_mismatch", + &[ + ("cte", &req.cte_name as &dyn std::fmt::Display), + ("declared", &declared as &dyn std::fmt::Display), + ("actual", &actual as &dyn std::fmt::Display), + ], + ), + }); + } + for (i, name) in req.col_list.iter().enumerate() { + if let Some(col) = derived.get_mut(i) { + col.name = Some(name.clone()); + } else { + // col_list has MORE entries than derived items — + // synthesize a typeless slot with the declared + // name so qualified-prefix completion still + // surfaces it. + derived.push(CteColumn { + name: Some(name.clone()), + type_: None, + }); + } + } + // Truncate any extras when derived > declared, so the + // CTE's externally visible arity matches the col-list + // declaration. (The diagnostic above already captured + // the original derived count.) + if derived.len() > declared { + derived.truncate(declared); + } + } + + // Write into the outer frame's placeholder. + let stack_len = ctx.from_scope_stack.len(); + if stack_len >= 2 + && let Some(outer) = ctx.from_scope_stack.get_mut(stack_len - 2) + && let Some(placeholder) = + outer.cte_bindings.get_mut(req.placeholder_index) + { + placeholder.columns = derived; + } +} + +/// Classify one projection item by examining its leading +/// terminals and append its derived CteColumn(s) to `out`. The +/// six rules of ADR-0032 §10.3. +fn classify_projection_item( + slice: &[&crate::dsl::walker::outcome::MatchedItem], + body_frame: &crate::dsl::walker::context::ScopeFrame, + scope_stack: &[crate::dsl::walker::context::ScopeFrame], + out: &mut Vec, +) { + use crate::dsl::grammar::IdentSource; + use crate::dsl::walker::context::CteColumn; + use crate::dsl::walker::outcome::MatchedKind; + + // Strip an optional trailing `[AS] alias` from the slice so + // shape detection can examine just the expression part. + let (expr_slice, alias) = strip_trailing_alias(slice); + + // Rule 1: `*` — every column from body_frame.from_scope. + // When a binding represents a CTE reference (its columns are + // empty because it wasn't a base-table lookup), resolve + // through to the in-scope CteBinding so nested CTEs project + // correctly. + if expr_slice.len() == 1 + && matches!(expr_slice[0].kind, MatchedKind::Punct('*')) + { + for binding in &body_frame.from_scope { + for col in expand_binding(binding, scope_stack) { + out.push(col); + } + } + return; + } + + // Rule 2: `t.*` — every column from binding `t`. + if expr_slice.len() == 3 + && matches!( + expr_slice[0].kind, + MatchedKind::Ident { role: "qualified_star_qualifier", .. } + ) + && matches!(expr_slice[1].kind, MatchedKind::Punct('.')) + && matches!(expr_slice[2].kind, MatchedKind::Punct('*')) + { + let qual = &expr_slice[0].text; + if let Some(binding) = body_frame.from_scope.iter().find(|b| { + b.alias + .as_deref() + .is_some_and(|a| a.eq_ignore_ascii_case(qual)) + || b.table.eq_ignore_ascii_case(qual) + }) { + for col in expand_binding(binding, scope_stack) { + out.push(col); + } + } + return; + } + + // Rule 3: bare `col` — a single sql_expr_ident terminal. + if expr_slice.len() == 1 + && matches!( + expr_slice[0].kind, + MatchedKind::Ident { + source: IdentSource::Columns, + role: "sql_expr_ident", + } + ) + { + let col_text = &expr_slice[0].text; + let resolved_type = resolve_bare_column_type_in_frame( + body_frame, + scope_stack, + col_text, + ); + let name = alias.unwrap_or_else(|| col_text.clone()); + out.push(CteColumn { + name: Some(name), + type_: resolved_type, + }); + return; + } + + // Rule 4: qualified `t.col` — three-token shape with the + // sql_expr_qualified_ref role on the tail ident. + if expr_slice.len() == 3 + && matches!( + expr_slice[0].kind, + MatchedKind::Ident { + source: IdentSource::Columns, + role: "sql_expr_ident", + } + ) + && matches!(expr_slice[1].kind, MatchedKind::Punct('.')) + && matches!( + expr_slice[2].kind, + MatchedKind::Ident { + source: IdentSource::Columns, + role: "sql_expr_qualified_ref", + } + ) + { + let qual = &expr_slice[0].text; + let col_text = &expr_slice[2].text; + let resolved_type = resolve_qualified_column_type( + body_frame, + scope_stack, + qual, + col_text, + ); + let name = alias.unwrap_or_else(|| col_text.clone()); + out.push(CteColumn { + name: Some(name), + type_: resolved_type, + }); + return; + } + + // Rule 5 / 6: computed expression — name = alias if present, + // else None. Type = None either way (ADR-0032 Amendment 1). + out.push(CteColumn { + name: alias, + type_: None, + }); +} + +/// Peel a trailing `[AS] ` off the projection-item slice +/// if present. Returns (expr_slice_without_alias, Some(alias)) +/// or (slice, None) if no alias is detected. +fn strip_trailing_alias<'a>( + slice: &'a [&'a crate::dsl::walker::outcome::MatchedItem], +) -> ( + &'a [&'a crate::dsl::walker::outcome::MatchedItem], + Option, +) { + use crate::dsl::grammar::IdentSource; + use crate::dsl::walker::outcome::MatchedKind; + + if slice.is_empty() { + return (slice, None); + } + let last = slice[slice.len() - 1]; + if matches!( + last.kind, + MatchedKind::Ident { + source: IdentSource::NewName, + role: "projection_alias", + } + ) { + // Optional preceding `AS` keyword. + if slice.len() >= 2 + && matches!( + slice[slice.len() - 2].kind, + MatchedKind::Word("as") + ) + { + return ( + &slice[..slice.len() - 2], + Some(last.text.clone()), + ); + } + return (&slice[..slice.len() - 1], Some(last.text.clone())); + } + (slice, None) +} + +fn resolve_bare_column_type_in_frame( + frame: &crate::dsl::walker::context::ScopeFrame, + scope_stack: &[crate::dsl::walker::context::ScopeFrame], + column: &str, +) -> Option { + let mut found = None; + for binding in &frame.from_scope { + for col in expand_binding(binding, scope_stack) { + if col + .name + .as_deref() + .is_some_and(|n| n.eq_ignore_ascii_case(column)) + { + if found.is_some() { + return None; // ambiguous — no type + } + found = col.type_; + } + } + } + found +} + +fn resolve_qualified_column_type( + frame: &crate::dsl::walker::context::ScopeFrame, + scope_stack: &[crate::dsl::walker::context::ScopeFrame], + qualifier: &str, + column: &str, +) -> Option { + let binding = frame.from_scope.iter().find(|b| { + b.alias + .as_deref() + .is_some_and(|a| a.eq_ignore_ascii_case(qualifier)) + || b.table.eq_ignore_ascii_case(qualifier) + })?; + expand_binding(binding, scope_stack) + .into_iter() + .find(|c| { + c.name + .as_deref() + .is_some_and(|n| n.eq_ignore_ascii_case(column)) + }) + .and_then(|c| c.type_) +} + +/// Resolve a `TableBinding` to its column list as `CteColumn`s. +/// +/// Base-table bindings carry typed `TableColumn`s populated from +/// the schema cache — convert them directly. CTE-source bindings +/// (the binding's `columns` is empty because the FROM name +/// didn't match a base table) look up the matching `CteBinding` +/// in any in-scope frame and return its `columns` verbatim. +/// +/// This is the bridge that lets a nested CTE's outer harvest see +/// the inner CTE's derived columns: the body's `FROM inner` +/// produces an empty-columns binding, but `expand_binding` +/// resolves it through the inner CteBinding (which has its +/// derived columns by the time the outer harvest runs, because +/// the inner body's harvest fires on inner-body exit, before the +/// outer body exits). +/// +/// A self-reference inside a `WITH RECURSIVE` body sees the +/// placeholder (empty columns) and the resolution returns empty +/// — that's correct, since the harvest only fires on the +/// non-recursive (first) leg per §10.3. +fn expand_binding( + binding: &crate::dsl::walker::context::TableBinding, + scope_stack: &[crate::dsl::walker::context::ScopeFrame], +) -> Vec { + use crate::dsl::walker::context::CteColumn; + + if !binding.columns.is_empty() { + return binding + .columns + .iter() + .map(|c| CteColumn { + name: Some(c.name.clone()), + type_: Some(c.user_type), + }) + .collect(); + } + for frame in scope_stack.iter().rev() { + if let Some(cte) = frame + .cte_bindings + .iter() + .find(|c| c.name.eq_ignore_ascii_case(&binding.table)) + { + return cte.columns.clone(); + } + } + Vec::new() +} + fn merge_expected(dst: &mut Vec, src: Vec) { for e in src { if !dst.contains(&e) { @@ -1417,10 +1884,13 @@ mod tests { ); assert_eq!(ctes.len(), 1); assert_eq!(ctes[0].name, "cte_x"); - // Output column derivation pending — placeholder's - // columns stays empty until the §10.3 stage-2 harvest - // is implemented. - assert!(ctes[0].columns.is_empty()); + // §10.3 stage-2 harvest produces one CteColumn per + // projection item. `SELECT 1` is a computed expression + // without an alias → `CteColumn { name: None, type_: + // None }`. + assert_eq!(ctes[0].columns.len(), 1); + assert!(ctes[0].columns[0].name.is_none()); + assert!(ctes[0].columns[0].type_.is_none()); } #[test] @@ -1490,4 +1960,246 @@ mod tests { ); assert_eq!(aliases, vec!["outer_b".to_string()]); } + + // ---- §10.3 stage-2 CTE column-derivation harvest ---- + + /// Schema-aware walk variant — returns the outer frame's + /// `cte_bindings` after walking the input. + fn cte_bindings_after_walk_with_schema( + input: &str, + schema: &crate::completion::SchemaCache, + ) -> Vec { + let mut ctx = WalkContext::with_schema(schema); + ctx.mode = crate::mode::Mode::Advanced; + let mut path = MatchedPath::new(); + let mut per_byte = Vec::new(); + let result = walk_node( + input, + 0, + &crate::dsl::grammar::sql_select::SQL_SELECT_STATEMENT, + &mut ctx, + &mut path, + &mut per_byte, + ); + assert!( + matches!(result, NodeWalkResult::Matched { .. }), + "{input:?} should match: got {result:?}" + ); + ctx.from_scope_stack[0].cte_bindings.clone() + } + + fn schema_users() -> crate::completion::SchemaCache { + use crate::completion::{SchemaCache, TableColumn}; + use crate::dsl::types::Type; + let mut s = SchemaCache::default(); + s.tables.push("users".to_string()); + s.columns.push("id".to_string()); + s.columns.push("name".to_string()); + s.columns.push("age".to_string()); + s.table_columns.insert( + "users".to_string(), + vec![ + TableColumn { name: "id".to_string(), user_type: Type::Int }, + TableColumn { name: "name".to_string(), user_type: Type::Text }, + TableColumn { name: "age".to_string(), user_type: Type::Int }, + ], + ); + s + } + + #[test] + fn cte_harvest_star_expands_from_scope() { + // Rule 1: `SELECT *` body — derived columns = every + // column from the body frame's from_scope, with types. + let schema = schema_users(); + let ctes = cte_bindings_after_walk_with_schema( + "with x as (select * from users) select * from x", + &schema, + ); + assert_eq!(ctes.len(), 1); + assert_eq!(ctes[0].columns.len(), 3); + assert_eq!(ctes[0].columns[0].name.as_deref(), Some("id")); + assert_eq!( + ctes[0].columns[0].type_, + Some(crate::dsl::types::Type::Int), + ); + assert_eq!(ctes[0].columns[1].name.as_deref(), Some("name")); + assert_eq!( + ctes[0].columns[1].type_, + Some(crate::dsl::types::Type::Text), + ); + assert_eq!(ctes[0].columns[2].name.as_deref(), Some("age")); + } + + #[test] + fn cte_harvest_qualified_star_expands_one_binding() { + // Rule 2: `t.*` — every column from binding `t`. + let schema = schema_users(); + let ctes = cte_bindings_after_walk_with_schema( + "with x as (select u.* from users u) select * from x", + &schema, + ); + assert_eq!(ctes.len(), 1); + assert_eq!(ctes[0].columns.len(), 3); + assert_eq!(ctes[0].columns[0].name.as_deref(), Some("id")); + } + + #[test] + fn cte_harvest_bare_ref_with_alias() { + // Rule 5 variant: `col AS alias` — name = alias, type + // preserved from the source column. + let schema = schema_users(); + let ctes = cte_bindings_after_walk_with_schema( + "with x as (select name as label from users) select * from x", + &schema, + ); + assert_eq!(ctes[0].columns.len(), 1); + assert_eq!(ctes[0].columns[0].name.as_deref(), Some("label")); + assert_eq!( + ctes[0].columns[0].type_, + Some(crate::dsl::types::Type::Text), + ); + } + + #[test] + fn cte_harvest_bare_ref_without_alias_uses_column_name() { + // Rule 3: bare `col` — name = column name, type from + // source column. + let schema = schema_users(); + let ctes = cte_bindings_after_walk_with_schema( + "with x as (select age from users) select * from x", + &schema, + ); + assert_eq!(ctes[0].columns.len(), 1); + assert_eq!(ctes[0].columns[0].name.as_deref(), Some("age")); + assert_eq!( + ctes[0].columns[0].type_, + Some(crate::dsl::types::Type::Int), + ); + } + + #[test] + fn cte_harvest_qualified_ref() { + // Rule 4: `t.col` — name = column, type from binding. + let schema = schema_users(); + let ctes = cte_bindings_after_walk_with_schema( + "with x as (select u.name from users u) select * from x", + &schema, + ); + assert_eq!(ctes[0].columns.len(), 1); + assert_eq!(ctes[0].columns[0].name.as_deref(), Some("name")); + assert_eq!( + ctes[0].columns[0].type_, + Some(crate::dsl::types::Type::Text), + ); + } + + #[test] + fn cte_harvest_computed_no_alias_is_unnamed() { + // Rule 6: computed expression without alias → name = + // None, type = None. + let schema = schema_users(); + let ctes = cte_bindings_after_walk_with_schema( + "with x as (select age + 1 from users) select * from x", + &schema, + ); + assert_eq!(ctes[0].columns.len(), 1); + assert!(ctes[0].columns[0].name.is_none()); + assert!(ctes[0].columns[0].type_.is_none()); + } + + #[test] + fn cte_harvest_computed_with_alias() { + // Rule 5: computed expression with alias → name = + // alias, type = None (Amendment 1). + let schema = schema_users(); + let ctes = cte_bindings_after_walk_with_schema( + "with x as (select age + 1 as years from users) select * from x", + &schema, + ); + assert_eq!(ctes[0].columns.len(), 1); + assert_eq!(ctes[0].columns[0].name.as_deref(), Some("years")); + assert!(ctes[0].columns[0].type_.is_none()); + } + + #[test] + fn cte_harvest_compound_takes_first_leg() { + // For UNION / INTERSECT / EXCEPT bodies, columns come + // from the first leg per ADR-0032 §10.3. + let schema = schema_users(); + let ctes = cte_bindings_after_walk_with_schema( + "with x as (select id from users union select age from users) select * from x", + &schema, + ); + // First leg: `select id from users` → one column `id`, + // type Int. Second leg ignored. + assert_eq!(ctes[0].columns.len(), 1); + assert_eq!(ctes[0].columns[0].name.as_deref(), Some("id")); + } + + #[test] + fn cte_harvest_recursive_uses_non_recursive_leg() { + // WITH RECURSIVE — the first (non-recursive) leg + // dictates columns. The recursive leg self-references + // the CTE name; we don't try to introspect. + let schema = schema_users(); + let ctes = cte_bindings_after_walk_with_schema( + "with recursive r as (select id from users union all select id from r) select * from r", + &schema, + ); + assert_eq!(ctes[0].columns.len(), 1); + assert_eq!(ctes[0].columns[0].name.as_deref(), Some("id")); + } + + #[test] + fn cte_harvest_sibling_b_sees_a_columns() { + // Sibling CTEs at the same level. When `b`'s body + // walks, the outer scope's cte_bindings already + // contains `a` (with harvested columns) and `b`'s + // placeholder. `b`'s `FROM a` produces an empty-columns + // TableBinding which `expand_binding` resolves through + // the in-scope `a` CteBinding. So `*` in `b`'s body + // expands to `a`'s columns. + let schema = schema_users(); + let ctes = cte_bindings_after_walk_with_schema( + "with a as (select id, name from users), b as (select * from a) select * from b", + &schema, + ); + let b = ctes.iter().find(|c| c.name == "b").expect("b binding"); + assert_eq!(b.columns.len(), 2); + assert_eq!(b.columns[0].name.as_deref(), Some("id")); + assert_eq!( + b.columns[0].type_, + Some(crate::dsl::types::Type::Int), + ); + assert_eq!(b.columns[1].name.as_deref(), Some("name")); + assert_eq!( + b.columns[1].type_, + Some(crate::dsl::types::Type::Text), + ); + } + + #[test] + fn cte_harvest_col_list_renames_positionally() { + // `WITH x(a, b, c) AS (SELECT * FROM users)` — + // positional rename overrides derived names; types + // preserved. + let schema = schema_users(); + let ctes = cte_bindings_after_walk_with_schema( + "with x (a, b, c) as (select * from users) select * from x", + &schema, + ); + assert_eq!(ctes[0].columns.len(), 3); + assert_eq!(ctes[0].columns[0].name.as_deref(), Some("a")); + assert_eq!( + ctes[0].columns[0].type_, + Some(crate::dsl::types::Type::Int), + ); + assert_eq!(ctes[0].columns[1].name.as_deref(), Some("b")); + assert_eq!( + ctes[0].columns[1].type_, + Some(crate::dsl::types::Type::Text), + ); + assert_eq!(ctes[0].columns[2].name.as_deref(), Some("c")); + } } diff --git a/src/dsl/walker/mod.rs b/src/dsl/walker/mod.rs index 4e589f8..5fe46e9 100644 --- a/src/dsl/walker/mod.rs +++ b/src/dsl/walker/mod.rs @@ -1797,8 +1797,16 @@ pub fn walk<'a>( // operator slot is highlighted rather than the engine // wording shown at execution time. d.extend(compound_arity_diagnostics(&path)); + // ADR-0032 §10.3 / §11.2 — diagnostics emitted during + // the walk by node handlers with direct context the + // post-walk passes can't reconstruct (primarily the + // CTE harvest's arity-check at body-frame exit). Drain + // unconditionally so accumulated entries don't leak + // into a subsequent walk via a re-used WalkContext. + d.extend(std::mem::take(&mut ctx.pending_diagnostics)); d } else { + ctx.pending_diagnostics.clear(); Vec::new() }; // Expression WARNING diagnostics — type-mismatched @@ -4038,6 +4046,76 @@ mod tests { ); } + // ---- ADR-0032 §11.2 — cte_arity_mismatch ---- + + #[test] + fn cte_arity_mismatch_when_col_list_shorter() { + // `WITH x(a, b) AS (SELECT 1, 2, 3)` — declared 2, + // derived 3 → fires. + let schema = schema_with("base", &[("id", Type::Int)]); + let diags = diag_keys( + "with x (a, b) as (select 1, 2, 3) select * from x", + &schema, + ); + assert!( + diags.iter().any(|d| { + d.contains("CTE `x`") + && d.contains("declares 2 columns") + && d.contains("body has 3") + }), + "expected cte_arity_mismatch (declared 2, actual 3); got {diags:?}", + ); + } + + #[test] + fn cte_arity_mismatch_when_col_list_longer() { + // `WITH x(a, b, c) AS (SELECT 1)` — declared 3, + // derived 1 → fires. + let schema = schema_with("base", &[("id", Type::Int)]); + let diags = diag_keys( + "with x (a, b, c) as (select 1) select * from x", + &schema, + ); + assert!( + diags.iter().any(|d| { + d.contains("CTE `x`") + && d.contains("declares 3 columns") + && d.contains("body has 1") + }), + "expected cte_arity_mismatch (declared 3, actual 1); got {diags:?}", + ); + } + + #[test] + fn cte_arity_match_no_diagnostic() { + // `WITH x(a, b) AS (SELECT 1, 2)` — matched arity, no + // diagnostic. + let schema = schema_with("base", &[("id", Type::Int)]); + let diags = diag_keys( + "with x (a, b) as (select 1, 2) select * from x", + &schema, + ); + assert!( + !diags.iter().any(|d| d.contains("declares")), + "matched arity should not fire; got {diags:?}", + ); + } + + #[test] + fn cte_arity_no_col_list_no_diagnostic() { + // No explicit col-list → no arity check (derived + // columns are the canonical view). + let schema = schema_with("base", &[("id", Type::Int)]); + let diags = diag_keys( + "with x as (select 1, 2, 3) select * from x", + &schema, + ); + assert!( + !diags.iter().any(|d| d.contains("declares")), + "no col-list should suppress arity check; got {diags:?}", + ); + } + #[test] fn alias_in_inner_subquery_does_not_affect_outer_aliases() { // The inner `AS y` is inside parens (depth > 0) and