walker: populate cte_bindings placeholders + projection_aliases (ADR-0032 §10.3 stage 1 / §10.4)

Sub-phase 2b checkpoints 4 and 5 combined — adds the
placeholder CTE binding push (§10.3 stage 1) and the
projection alias accumulator (§10.4).

Node::Ident gains two more flags, mechanically applied to
every existing site:

- `writes_cte_name: bool` — push a placeholder `CteBinding`
  (name only, empty columns) onto the top `ScopeFrame`'s
  `cte_bindings`. Set on `CTE_NAME_IDENT` in sql_select.rs.
  Fires BEFORE the body's `ScopedSubgrammar` enters (the
  CTE-def Seq's ident slot precedes the body's `(`), so the
  body can self-reference the CTE name as a valid table source
  (WITH RECURSIVE).
- `writes_projection_alias: bool` — append the matched name to
  the top frame's `projection_aliases`. Set on
  `PROJECTION_BARE_ALIAS_IDENT` so both the AS-form
  (`a AS alpha`) and bare-form (`a alpha`) paths capture
  cleanly. The ident is shared by both paths through
  `PROJECTION_AS_ALIAS` and the lookahead factory, so
  capturing on the ident itself covers both forms with no
  duplication.

The §10.3 stage-2 harvest (deriving CTE output columns from the
body's projection per the six derivation rules in the ADR's
table) is structurally deferred — the placeholder's `columns`
stays empty until the harvest is wired. This is intentional
scope honesty: the placeholder-name presence is sufficient for
the schema-existence diagnostic (2d) to recognize CTE names as
valid table sources, and the qualified-prefix completion (2e)
will populate the columns when the harvest hook is added there.
Tests below assert the placeholder-name behavior; the
column-derivation tests from plan §2b's exit gate will be
satisfied incrementally as later sub-phases need them.

Tests (8 new, all green):

- Single CTE → one placeholder binding with the matched name.
- Multiple CTEs → placeholders in declaration order.
- Recursive CTE → name visible inside body (the body's
  `from r` reference parses; verified by the walk completing).
- Projection aliases via AS form → captured into the top
  frame's `projection_aliases`.
- Projection aliases via bare form → captured.
- Mixed alias forms → captured in projection order, with
  unaliased projection items absent from the alias list.
- No aliases → empty `projection_aliases`.
- CTE body aliases do not leak to outer scope (the body's
  frame pops on `ScopedSubgrammar` exit, taking its
  projection_aliases with it).

All 1358 previous tests still pass. Test totals: 1366
passing, 0 failed, 1 ignored. Clippy clean.

This closes out the scope-accumulator side of sub-phase 2b.
The remaining 2b-style work — full CTE column-derivation
harvest per §10.3's six rules — folds into 2d (where the
arity-check pass needs declared-vs-derived column counts) and
2e (where qualified-prefix completion needs CTE columns).
This commit is contained in:
claude@clouddev1
2026-05-20 15:29:08 +00:00
parent b522d09f5a
commit 4ff054ca75
9 changed files with 247 additions and 4 deletions
+147
View File
@@ -185,6 +185,8 @@ fn walk_node_inner(
writes_column,
writes_user_listed_column,
writes_table_alias,
writes_cte_name,
writes_projection_alias,
} => walk_ident(
source,
pos,
@@ -195,6 +197,8 @@ fn walk_node_inner(
*writes_column,
*writes_user_listed_column,
*writes_table_alias,
*writes_cte_name,
*writes_projection_alias,
ctx,
path,
per_byte,
@@ -369,6 +373,8 @@ fn walk_ident(
writes_column: bool,
writes_user_listed_column: bool,
writes_table_alias: bool,
writes_cte_name: bool,
writes_projection_alias: bool,
ctx: &mut WalkContext,
path: &mut MatchedPath,
per_byte: &mut Vec<ByteClass>,
@@ -429,6 +435,34 @@ fn walk_ident(
{
binding.alias = Some(text.clone());
}
// ADR-0032 §10.3 stage 1: push a placeholder CteBinding into
// the top (outer) frame before the body's ScopedSubgrammar
// pushes its own frame. The body can self-reference the CTE
// name as a table source (WITH RECURSIVE), and downstream
// CTE-name validators see the binding. The body-frame-exit
// harvest (§10.3 stage 2) is structurally hooked but the six
// derivation rules for output columns are pending — the
// placeholder's `columns` stays empty until a later sub-phase
// wires the harvest. Diagnostic / completion machinery in 2d
// and 2e can already use the name-presence to resolve "is
// this an in-scope CTE?".
if writes_cte_name
&& let Some(frame) = ctx.from_scope_stack.last_mut()
{
frame
.cte_bindings
.push(crate::dsl::walker::context::CteBinding {
name: text.clone(),
columns: Vec::new(),
});
}
// ADR-0032 §10.4: projection-list alias accumulator for
// ORDER BY completion candidates.
if writes_projection_alias
&& let Some(frame) = ctx.from_scope_stack.last_mut()
{
frame.projection_aliases.push(text.clone());
}
if writes_column && matches!(src, crate::dsl::grammar::IdentSource::Columns) {
ctx.current_column = ctx.current_table_columns.as_ref().and_then(|cols| {
cols.iter()
@@ -1343,4 +1377,117 @@ mod tests {
let bindings = from_scope_after_walk("select 1");
assert!(bindings.is_empty());
}
// ---- cte_bindings & projection_aliases (ADR-0032 §10.3 / §10.4) ----
/// Walk a top-level SELECT and return the bottom frame's
/// `cte_bindings` and `projection_aliases` after the walk.
fn frame_state_after_walk(
input: &str,
) -> (
Vec<crate::dsl::walker::context::CteBinding>,
Vec<String>,
) {
let mut ctx = WalkContext::new();
let mut path = MatchedPath::new();
let mut per_byte = Vec::new();
let result = walk_node(
input,
0,
&crate::dsl::grammar::sql_select::SQL_SELECT_STATEMENT,
&mut ctx,
&mut path,
&mut per_byte,
);
assert!(
matches!(result, NodeWalkResult::Matched { .. }),
"{input:?} should match: got {result:?}"
);
let bottom = &ctx.from_scope_stack[0];
(
bottom.cte_bindings.clone(),
bottom.projection_aliases.clone(),
)
}
#[test]
fn cte_name_pushes_placeholder_binding() {
let (ctes, _) = frame_state_after_walk(
"with cte_x as (select 1) select * from cte_x",
);
assert_eq!(ctes.len(), 1);
assert_eq!(ctes[0].name, "cte_x");
// Output column derivation pending — placeholder's
// columns stays empty until the §10.3 stage-2 harvest
// is implemented.
assert!(ctes[0].columns.is_empty());
}
#[test]
fn multiple_ctes_push_in_order() {
let (ctes, _) = frame_state_after_walk(
"with a as (select 1), b as (select 2) select * from b",
);
assert_eq!(ctes.len(), 2);
assert_eq!(ctes[0].name, "a");
assert_eq!(ctes[1].name, "b");
}
#[test]
fn recursive_cte_name_visible_in_body() {
// The CTE name `r` is pushed BEFORE the body's
// ScopedSubgrammar enters, so the body's `from r`
// reference is structurally valid (parses).
let (ctes, _) = frame_state_after_walk(
"with recursive r as (select 1 union all select 2 from r) select * from r",
);
assert_eq!(ctes.len(), 1);
assert_eq!(ctes[0].name, "r");
}
#[test]
fn projection_aliases_captured_via_as_form() {
let (_, aliases) = frame_state_after_walk(
"select a as alpha, b as beta from t",
);
assert_eq!(aliases, vec!["alpha".to_string(), "beta".to_string()]);
}
#[test]
fn projection_aliases_captured_via_bare_form() {
let (_, aliases) = frame_state_after_walk(
"select a alpha, b beta from t",
);
assert_eq!(aliases, vec!["alpha".to_string(), "beta".to_string()]);
}
#[test]
fn projection_aliases_mixed_forms() {
let (_, aliases) = frame_state_after_walk(
"select a as alpha, b beta, c, d as delta from t",
);
assert_eq!(
aliases,
vec!["alpha".to_string(), "beta".to_string(), "delta".to_string()]
);
}
#[test]
fn projection_aliases_empty_when_no_aliases() {
let (_, aliases) =
frame_state_after_walk("select a, b from t");
assert!(aliases.is_empty());
}
#[test]
fn cte_body_aliases_do_not_leak_to_outer_scope() {
// The body's projection_aliases live in the body's
// scope frame, which pops on exit. The outer frame's
// projection_aliases only carries the outer SELECT's
// own aliases.
let (_, aliases) = frame_state_after_walk(
"with x as (select a as inner_a from t) select b as outer_b from x",
);
assert_eq!(aliases, vec!["outer_b".to_string()]);
}
}