rdbms-playground/src/dsl/grammar/data.rs

//! Data command nodes (ADR-0024 §migration Phase D).
//!
//! Five commands at four entry words: `show` (show data /
//! show table), `insert`, `update`, `delete`. The walker route
//! owns these end-to-end.
//!
//! Schema awareness (ADR-0024 §Phase D): the DSL value slots are
//! wired to `DynamicSubgrammar(column_value_list)` /
//! `current_column_value` (see `INSERT_VALUES_LIST`,
//! `insert_first_paren`, `PER_COLUMN_VALUE`), so the schema reference
//! that flows through `parse_command` unfolds a typed slot per column:
//! numeric-shape mismatch is caught at parse (`int`/`decimal`/`bool`
//! slots in `shared.rs`) and the full semantic type (`date` / `shortid`
//! format) is validated at bind time. So the simple-mode DSL gives data
//! values per-column feedback end-to-end.
//!
//! The advanced-mode SQL DML surface (`build_sql_insert` /
//! `build_sql_update` below) is a separate path: it executes the
//! validated statement verbatim (ADR-0030 §4) and is NOT yet wired to
//! the typed slots. ADR-0036 closes the resulting value-feedback gap
//! without a grammar change by *capturing* each literal value position
//! at parse (`capture_literal_rows` / `capture_set_literals`) and
//! validating it against the column type in the worker — Phase 3 will
//! later swap that capture for the same typed slots used here, adding
//! live hints/highlighting.

use crate::dsl::command::{Command, Expr, RowFilter, ShowListKind};
use crate::dsl::grammar::{
    CommandNode, IdentSource, Node, NumberValidator, ValidationError, Word, expr,
    shared::{
        FALLBACK_VALUE_LIST, column_value_list, count_tuple_values,
        current_column_value, insert_target_columns,
    },
    sql_delete, sql_insert, sql_select, sql_update,
};
use crate::dsl::walker::context::WalkContext;
use crate::dsl::value::Value;
use crate::dsl::walker::outcome::{MatchedItem, MatchedKind, MatchedPath};

// =================================================================
// Building blocks
// =================================================================

const TABLE_NAME_EXISTING: Node = Node::Ident {
    source: IdentSource::Tables,
    // Reject `__rdbms_*` internal tables at the table-source slot
    // (ADR-0030 §6 — "every table-source slot"), matching the SQL
    // grammar's `reject_internal_table`. Without this, simple-mode DSL
    // data commands could read/write the internal metadata tables
    // even though advanced-mode SQL rejects them (ADR-0033
    // Amendment 3 / `/runda` finding B).
    role: "table_name",
    validator: Some(sql_select::reject_internal_table),
    highlight_override: None,
    writes_table: false,
    writes_column: false,
        writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};

/// Table-name slot variant that populates
/// `WalkContext::current_table_columns` (ADR-0024 §Phase D).
/// Used by `insert into <T> …` so the inner value list can
/// dispatch typed slots per column.
const TABLE_NAME_INSERT: Node = Node::Ident {
    source: IdentSource::Tables,
    // Reject `__rdbms_*` internal tables (ADR-0030 §6; `/runda`
    // finding B) — see `TABLE_NAME_EXISTING`.
    role: "table_name",
    validator: Some(sql_select::reject_internal_table),
    highlight_override: None,
    writes_table: true,
    writes_column: false,
        writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};

// =================================================================
// show — `show (data|table) <T>`
// =================================================================

const SHOW_DATA_NODES: &[Node] = &[
    Node::Word(Word::keyword("data")),
    // `writes_table` so the optional `where` expression's
    // column slots resolve against this table for completion.
    TABLE_NAME_WRITES,
    Node::Optional(&WHERE_CLAUSE),
    Node::Optional(&LIMIT_CLAUSE),
];
const SHOW_DATA: Node = Node::Seq(SHOW_DATA_NODES);

const SHOW_TABLE_NODES: &[Node] = &[
    Node::Word(Word::keyword("table")),
    TABLE_NAME_EXISTING,
];
const SHOW_TABLE: Node = Node::Seq(SHOW_TABLE_NODES);

// `show tables` / `show relationships` / `show indexes` — the
// list-all forms (V5). Each is a single keyword with no argument;
// the executor lists every item of the kind. Distinct keyword
// tokens (`tables` ≠ `table`), so Choice ordering is irrelevant.
const SHOW_TABLES: Node = Node::Word(Word::keyword("tables"));
const SHOW_RELATIONSHIPS: Node = Node::Word(Word::keyword("relationships"));
const SHOW_INDEXES: Node = Node::Word(Word::keyword("indexes"));

// `show relationship <name>` / `show index <name>` — singular
// per-item detail (V5a). The name slot reuses the existing
// completion sources (relationship / index names). Distinct
// keyword tokens from the plurals (`relationship` ≠
// `relationships`), so Choice ordering is irrelevant.
const SHOW_RELATIONSHIP_NAME: Node = Node::Ident {
    source: IdentSource::Relationships,
    role: "relationship_name",
    validator: None,
    highlight_override: None,
    writes_table: false,
    writes_column: false,
    writes_user_listed_column: false,
    writes_table_alias: false,
    writes_cte_name: false,
    writes_projection_alias: false,
};
const SHOW_RELATIONSHIP_NODES: &[Node] = &[
    Node::Word(Word::keyword("relationship")),
    SHOW_RELATIONSHIP_NAME,
];
const SHOW_RELATIONSHIP: Node = Node::Seq(SHOW_RELATIONSHIP_NODES);

const SHOW_INDEX_NAME: Node = Node::Ident {
    source: IdentSource::Indexes,
    role: "index_name",
    validator: None,
    highlight_override: None,
    writes_table: false,
    writes_column: false,
    writes_user_listed_column: false,
    writes_table_alias: false,
    writes_cte_name: false,
    writes_projection_alias: false,
};
const SHOW_INDEX_NODES: &[Node] =
    &[Node::Word(Word::keyword("index")), SHOW_INDEX_NAME];
const SHOW_INDEX: Node = Node::Seq(SHOW_INDEX_NODES);

const SHOW_CHOICES: &[Node] = &[
    SHOW_DATA,
    SHOW_TABLE,
    SHOW_TABLES,
    SHOW_RELATIONSHIPS,
    SHOW_INDEXES,
    SHOW_RELATIONSHIP,
    SHOW_INDEX,
];
const SHOW_SHAPE: Node = Node::Choice(SHOW_CHOICES);

// =================================================================
// insert — `insert into <T> (<a>,<b>,…) values (<v>,<v>,…)`
//        | `insert into <T> values (<v>,…)`
//        | `insert into <T> (<v>,…)`
// =================================================================
//
// Forms A (with column list) and C (bare value list) both start
// with `(`. The walker's "first commit wins" Choice semantics
// can't pick between them after the `(` matches, so the first
// paren's contents are resolved by a `Node::Lookahead` factory
// (`insert_first_paren`): it peeks the first token to decide.
//
// - First token is a value literal (number / string /
//   null / true / false) → Form C → the typed `column_value_list`
//   (same dispatch contract as Form B — ADR-0024 §Phase D Form-C
//   type-awareness). Form C values are now type-checked at parse
//   time, not only at bind time.
// - Otherwise (column-name identifier, or an empty paren) →
//   Form A → a repeated column-name list. The idents write
//   `WalkContext::user_listed_columns` so the trailing
//   `values (…)` slots mirror the user's selection.

/// Form A's column-name slot. `static` (not `const`) so the
/// `insert_first_paren` factory can take a `&'static` reference
/// to it when building the repeated list at walk time.
static FORM_A_COLUMN: Node = Node::Ident {
    source: IdentSource::Columns,
    role: "insert_first_item",
    validator: None,
    highlight_override: None,
    writes_table: false,
    writes_column: false,
    writes_user_listed_column: true,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};
static INSERT_COMMA: Node = Node::Punct(',');

/// First-paren resolver (ADR-0024 §Phase D Form-C type-awareness).
/// Peeks the first token after `(` to route to Form A's
/// column-name list or Form C's typed value list.
fn insert_first_paren(ctx: &WalkContext, source: &str, pos: usize) -> Node {
    if first_paren_item_is_value_literal(source, pos) {
        // Form C — bare value list. Arity-gated exactly like Form B's
        // `values (…)`: a correct-count tuple gets the typed per-column
        // slots; a wrong-count tuple routes to the type-blind fallback
        // so it still matches and the arity diagnostic fires (issue #17).
        dsl_insert_value_list(ctx, source, pos)
    } else {
        // Form A (or Form A in progress / empty paren).
        Node::Repeated {
            inner: &FORM_A_COLUMN,
            separator: Some(&INSERT_COMMA),
            min: 1,
        }
    }
}

/// True when the first token after the insert `(` is a
/// value literal — the signal that the paren is a Form C value
/// list rather than a Form A column-name list. An empty paren
/// or an identifier-shaped token (a column name) returns false.
fn first_paren_item_is_value_literal(source: &str, pos: usize) -> bool {
    use crate::dsl::walker::lex_helpers::{
        consume_ident, consume_number_literal, consume_string_literal,
        skip_whitespace,
    };
    let p = skip_whitespace(source, pos);
    if p >= source.len() {
        return false; // empty paren — treat as Form A
    }
    if consume_string_literal(source, p).is_some() {
        return true;
    }
    if consume_number_literal(source, p).is_some() {
        return true;
    }
    if let Some((s, e)) = consume_ident(source, p) {
        let word = &source[s..e];
        // `null` / `true` / `false` are value literals; any
        // other identifier is a column name (Form A).
        return word.eq_ignore_ascii_case("null")
            || word.eq_ignore_ascii_case("true")
            || word.eq_ignore_ascii_case("false");
    }
    false // punctuation (e.g. `)`) — treat as Form A
}

const INSERT_PAREN_LIST: Node = Node::Lookahead(insert_first_paren);

/// Insert value-list arity gate (issue #17) — the simple-mode DSL
/// counterpart of the advanced grammar's `tuple_value_list`
/// (`sql_insert.rs`). Routes a correct-arity tuple to the typed
/// per-column slots ([`column_value_list`]) and a wrong-arity tuple to
/// the type-blind [`FALLBACK_VALUE_LIST`], so the wrong-count tuple
/// still structurally matches and the per-tuple arity diagnostic
/// (ADR-0033 §8.1, made mode-aware for issue #17) fires its friendly
/// message instead of a bare "expected `,`/`)`".
///
/// Target arity comes from [`insert_target_columns`] — the same source
/// `column_value_list` uses, so gate and slots never disagree. `None`
/// (schemaless / unknown table / all-auto-generated) → fallback: either
/// we can't gate (schemaless) or the all-auto case wants the tuple to
/// match so the diagnostic can explain it.
///
/// **Simple-mode only.** The fallback routing is what lets a wrong-count
/// tuple structurally match (so the diagnostic fires); that is a
/// simple-mode behaviour. In advanced mode the DSL insert node must stay
/// strict — otherwise a non-SQL shape like Form C (`insert into T
/// (1, 2)`, no `values`) would spuriously match here and be accepted in
/// advanced mode, where SQL requires `values` and the dedicated SQL
/// grammar (`sql_insert.rs`) owns inserts. Keeping advanced strict
/// preserves the pre-#17 advanced behaviour exactly (issue #17).
fn dsl_insert_value_list(ctx: &WalkContext, source: &str, pos: usize) -> Node {
    if ctx.mode != crate::mode::Mode::Simple {
        return Node::DynamicSubgrammar(column_value_list);
    }
    let Some(cols) = insert_target_columns(ctx) else {
        return FALLBACK_VALUE_LIST;
    };
    let (count, closed) = count_tuple_values(source, pos);
    let arity_ok = if closed { count == cols.len() } else { count <= cols.len() };
    if arity_ok {
        Node::DynamicSubgrammar(column_value_list)
    } else {
        FALLBACK_VALUE_LIST
    }
}

/// Schema-aware value list, arity-gated (issue #17): a correct-count
/// tuple unfolds to a `Seq` of typed slots per column (`int_slot`,
/// `text_slot`, …); a wrong-count tuple or a schemaless walk falls back
/// to the type-blind `Repeated(VALUE_LITERAL, ',', 1)` shape (ADR-0024
/// §Phase D §column_value_list).
const INSERT_VALUES_LIST: Node = Node::Lookahead(dsl_insert_value_list);

const INSERT_OPTIONAL_VALUES_NODES: &[Node] = &[
    Node::Word(Word::keyword("values")),
    Node::Punct('('),
    INSERT_VALUES_LIST,
    Node::Punct(')'),
];
const INSERT_OPTIONAL_VALUES: Node = Node::Optional(&Node::Seq(INSERT_OPTIONAL_VALUES_NODES));

const INSERT_PAREN_FIRST_NODES: &[Node] = &[
    Node::Punct('('),
    INSERT_PAREN_LIST,
    Node::Punct(')'),
    INSERT_OPTIONAL_VALUES,
];
const INSERT_PAREN_FIRST: Node = Node::Seq(INSERT_PAREN_FIRST_NODES);

const INSERT_VALUES_KEYWORD_FIRST_NODES: &[Node] = &[
    Node::Word(Word::keyword("values")),
    Node::Punct('('),
    INSERT_VALUES_LIST,
    Node::Punct(')'),
];
const INSERT_VALUES_KEYWORD_FIRST: Node = Node::Seq(INSERT_VALUES_KEYWORD_FIRST_NODES);

const INSERT_AFTER_TABLE_CHOICES: &[Node] =
    &[INSERT_VALUES_KEYWORD_FIRST, INSERT_PAREN_FIRST];
const INSERT_AFTER_TABLE: Node = Node::Choice(INSERT_AFTER_TABLE_CHOICES);

const INSERT_NODES: &[Node] = &[
    Node::Word(Word::keyword("into")),
    TABLE_NAME_INSERT,
    INSERT_AFTER_TABLE,
];
const INSERT_SHAPE: Node = Node::Seq(INSERT_NODES);

// =================================================================
// update — `update <T> set <col>=<v>[, <col>=<v>] (where … | --all-rows)`
// =================================================================

/// Table-name slot that populates `current_table_columns` so
/// the inner `set <col>=<value>` / `where <col>=<value>` slots
/// can resolve column types (Phase D).
const TABLE_NAME_WRITES: Node = Node::Ident {
    source: IdentSource::Tables,
    // Reject `__rdbms_*` internal tables (ADR-0030 §6; `/runda`
    // finding B) — see `TABLE_NAME_EXISTING`. Shared by `update`,
    // `delete`, and `show data`, so all three reject the internal
    // metadata tables, matching the SQL grammar.
    role: "table_name",
    validator: Some(sql_select::reject_internal_table),
    highlight_override: None,
    writes_table: true,
    writes_column: false,
        writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};

/// Column-name slot in `set col = …` — resolves the column's
/// type into `current_column` so the value slot dispatches per
/// column type (Phase D).
const SET_COLUMN: Node = Node::Ident {
    source: IdentSource::Columns,
    role: "update_set_column",
    validator: None,
    highlight_override: None,
    writes_table: false,
    writes_column: true,
    writes_user_listed_column: false,
writes_table_alias: false,
writes_cte_name: false,
writes_projection_alias: false,
};

/// Value slot resolved at walk time from
/// `WalkContext::current_column`. Falls back to the schemaless
/// value-literal choice when no current_column is bound.
const PER_COLUMN_VALUE: Node = Node::DynamicSubgrammar(current_column_value);

const UPDATE_ASSIGNMENT_NODES: &[Node] = &[
    SET_COLUMN,
    Node::Punct('='),
    PER_COLUMN_VALUE,
];
const UPDATE_ASSIGNMENT: Node = Node::Seq(UPDATE_ASSIGNMENT_NODES);
const UPDATE_ASSIGNMENTS: Node = Node::Repeated {
    inner: &UPDATE_ASSIGNMENT,
    separator: Some(&Node::Punct(',')),
    min: 1,
};

/// `where <expr>` — the complex WHERE-expression fragment
/// (ADR-0026). The grammar tier is defined once in
/// `grammar::expr` and reached here through `Subgrammar`.
const WHERE_CLAUSE_NODES: &[Node] = &[
    Node::Word(Word::keyword("where")),
    Node::Subgrammar(&expr::OR_EXPR),
];
const WHERE_CLAUSE: Node = Node::Seq(WHERE_CLAUSE_NODES);

const FILTER_CHOICES: &[Node] = &[WHERE_CLAUSE, Node::Flag("all-rows")];
const FILTER_CLAUSE: Node = Node::Choice(FILTER_CHOICES);

/// `limit <n>` — `<n>` is a non-negative integer; the
/// validator rejects fractional / negative literals at parse
/// time (ADR-0026 §5).
fn validate_limit_count(value: &str) -> Result<(), ValidationError> {
    if value.parse::<u64>().is_ok() {
        Ok(())
    } else {
        Err(ValidationError {
            message_key: "parse.custom.bind_type_mismatch",
            args: vec![
                ("found", value.to_string()),
                ("expected", "non-negative integer".to_string()),
            ],
        })
    }
}
const LIMIT_VALIDATOR: NumberValidator = validate_limit_count;

/// `limit <n>` clause, optional on `show data` (ADR-0026 §5).
const LIMIT_CLAUSE_NODES: &[Node] = &[
    Node::Word(Word::keyword("limit")),
    Node::NumberLit {
        validator: Some(LIMIT_VALIDATOR),
    },
];
const LIMIT_CLAUSE: Node = Node::Seq(LIMIT_CLAUSE_NODES);

const UPDATE_NODES: &[Node] = &[
    TABLE_NAME_WRITES,
    Node::Word(Word::keyword("set")),
    UPDATE_ASSIGNMENTS,
    FILTER_CLAUSE,
];
const UPDATE_SHAPE: Node = Node::Seq(UPDATE_NODES);

// =================================================================
// delete — `delete from <T> (where … | --all-rows)`
// =================================================================

const DELETE_NODES: &[Node] = &[
    Node::Word(Word::keyword("from")),
    TABLE_NAME_WRITES,
    FILTER_CLAUSE,
];
const DELETE_SHAPE: Node = Node::Seq(DELETE_NODES);

// =================================================================
// explain — `explain (show data … | update … | delete from …)`
// =================================================================
//
// ADR-0028 §1: `explain` is a top-level command whose shape is a
// `Choice` over the three explainable query commands. The inner
// query grammars are *referenced* through `Subgrammar`, not
// duplicated — so an explained command is parsed, completed,
// hinted and highlighted exactly as it is on its own.
//
// `Subgrammar` needs a `&'static Node`; `SHOW_DATA` /
// `UPDATE_SHAPE` / `DELETE_SHAPE` are `const` (and cannot be
// referenced as `&'static`). These three thin `static` wrappers
// over the existing `_NODES` slices give the references without
// any churn to the standalone command shapes. `explain show`
// references `EXPLAIN_SHOW_DATA` directly (not the `show`
// command's `data | table` choice) — `explain` covers `show
// data` only (ADR-0028 §1).

static EXPLAIN_SHOW_DATA: Node = Node::Seq(SHOW_DATA_NODES);
static EXPLAIN_UPDATE: Node = Node::Seq(UPDATE_NODES);
static EXPLAIN_DELETE: Node = Node::Seq(DELETE_NODES);

const EXPLAIN_SHOW_NODES: &[Node] = &[
    Node::Word(Word::keyword("show")),
    Node::Subgrammar(&EXPLAIN_SHOW_DATA),
];
const EXPLAIN_UPDATE_NODES: &[Node] = &[
    Node::Word(Word::keyword("update")),
    Node::Subgrammar(&EXPLAIN_UPDATE),
];
const EXPLAIN_DELETE_NODES: &[Node] = &[
    Node::Word(Word::keyword("delete")),
    Node::Subgrammar(&EXPLAIN_DELETE),
];
const EXPLAIN_CHOICES: &[Node] = &[
    Node::Seq(EXPLAIN_SHOW_NODES),
    Node::Seq(EXPLAIN_UPDATE_NODES),
    Node::Seq(EXPLAIN_DELETE_NODES),
];
const EXPLAIN_SHAPE: Node = Node::Choice(EXPLAIN_CHOICES);

// --- explain over advanced-mode SQL (ADR-0039) -------------------
//
// The SQL inner mirrors the DSL inner above, but wraps the SQL
// command shapes (the same nodes the standalone `SELECT` / `WITH` /
// `SQL_*` commands use). This shape backs a *second* `explain`
// CommandNode (`EXPLAIN_SQL`, registered `Advanced`); the registry's
// shared-entry-word dispatch tries it first in advanced mode and
// falls back to the `Simple` DSL `EXPLAIN` when a branch can't match
// (e.g. `explain show data …`, or a DSL-only `--all-rows`). `select`
// and `with` are SQL-only, so they only ever resolve here.

const EXPLAIN_SELECT_NODES: &[Node] = &[
    Node::Word(Word::keyword("select")),
    Node::Subgrammar(&sql_select::SQL_SELECT_TAIL),
];
const EXPLAIN_WITH_NODES: &[Node] = &[
    Node::Word(Word::keyword("with")),
    Node::Subgrammar(&sql_select::SQL_WITH_TAIL),
];
const EXPLAIN_SQL_INSERT_NODES: &[Node] = &[
    Node::Word(Word::keyword("insert")),
    Node::Subgrammar(&sql_insert::SQL_INSERT_SHAPE),
];
const EXPLAIN_SQL_UPDATE_NODES: &[Node] = &[
    Node::Word(Word::keyword("update")),
    Node::Subgrammar(&sql_update::SQL_UPDATE_SHAPE),
];
const EXPLAIN_SQL_DELETE_NODES: &[Node] = &[
    Node::Word(Word::keyword("delete")),
    Node::Subgrammar(&sql_delete::SQL_DELETE_SHAPE),
];
const EXPLAIN_SQL_CHOICES: &[Node] = &[
    Node::Seq(EXPLAIN_SELECT_NODES),
    Node::Seq(EXPLAIN_WITH_NODES),
    Node::Seq(EXPLAIN_SQL_INSERT_NODES),
    Node::Seq(EXPLAIN_SQL_UPDATE_NODES),
    Node::Seq(EXPLAIN_SQL_DELETE_NODES),
];
const EXPLAIN_SQL_SHAPE: Node = Node::Choice(EXPLAIN_SQL_CHOICES);

// =================================================================
// select — SQL `SELECT` (advanced mode; ADR-0030 §6, ADR-0031)
// =================================================================
//
// Phase 1's single-table `SELECT`: a projection, a `FROM` table,
// and optional `WHERE` / `ORDER BY` / `LIMIT`. The projection,
// `WHERE` and `ORDER BY` expression slots reference the SQL
// expression grammar (ADR-0031) through `Subgrammar`, so SQL gets
// the same completion / highlighting / hints as the DSL for free.
//
// Advanced mode only — the walker's mode gate (ADR-0030 §2,
// `grammar::is_advanced_only`) refuses `select` in simple mode
// with the "this is SQL" hint, so this grammar is never reached
// there.
//
// `JOIN`s, `GROUP BY` / `HAVING`, subqueries, `UNION`, CTEs, and
// `OFFSET` are ADR-0030 Phase 2 ("`SELECT` — full"); implicit
// column aliasing (`select a x`) and qualified `t.*` are out of
// Phase 1 (see the inline notes).

// SQL expression slot — `Node::Subgrammar(&sql_expr::SQL_OR_EXPR)`
// is inlined at each use site to avoid a Rust const-evaluation
// cycle through the sql_expr ⇄ sql_select recursion (see the
// matching note in sql_select.rs).

// Phase 1's local `SELECT_*` grammar nodes have been retired in
// favour of `sql_select::SQL_SELECT_TAIL` (ADR-0032 sub-phase
// 2c). The shape definition that `data::SELECT` references now
// lives in the dedicated `sql_select` module — including the
// `reject_internal_table` validator, the `LIMIT` count
// validator, and the projection / FROM / WHERE / ORDER BY
// machinery. The full §1 grammar (JOIN, GROUP BY, HAVING,
// set-ops, qualified refs, subqueries, CTEs) is admitted as a
// natural superset.

// =================================================================
// AST builders
// =================================================================

fn ident_text<'a>(path: &'a MatchedPath, role: &str) -> Option<&'a str> {
    path.items.iter().find_map(|i| match &i.kind {
        MatchedKind::Ident { role: r, .. } if *r == role => Some(i.text.as_str()),
        _ => None,
    })
}

fn require_ident(path: &MatchedPath, role: &'static str) -> Result<String, ValidationError> {
    ident_text(path, role)
        .map(str::to_string)
        .ok_or_else(|| ValidationError {
            message_key: "parse.error_wrapper",
            args: vec![("detail", format!("missing {role}"))],
        })
}

/// Convert a `MatchedItem` whose kind is one of the `value_literal`
/// variants (Word("null"|"true"|"false"), NumberLit, StringLit) to
/// a `Value`. Returns None for non-value items.
///
/// `pub(crate)` so `grammar::ddl` can reuse it when collecting a
/// `default <literal>` column constraint (ADR-0029).
pub(crate) fn item_to_value(item: &MatchedItem) -> Option<Value> {
    match &item.kind {
        MatchedKind::Word("null") => Some(Value::Null),
        MatchedKind::Word("true") => Some(Value::Bool(true)),
        MatchedKind::Word("false") => Some(Value::Bool(false)),
        MatchedKind::NumberLit => Some(Value::Number(item.text.clone())),
        MatchedKind::StringLit => Some(Value::Text(item.text.clone())),
        _ => None,
    }
}

fn build_show(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
    let sub = path
        .items
        .iter()
        .filter_map(|i| match &i.kind {
            MatchedKind::Word(w) => Some(*w),
            _ => None,
        })
        .nth(1);
    match sub {
        Some("data") => build_show_data(path, _source),
        // `name` is resolved only for the forms that carry one; the
        // list-all forms (`tables` / `relationships` / `indexes`)
        // have no table argument.
        Some("table") => Ok(Command::ShowTable {
            name: require_ident(path, "table_name")?,
        }),
        Some("tables") => Ok(Command::ShowList {
            kind: ShowListKind::Tables,
            name: None,
        }),
        Some("relationships") => Ok(Command::ShowList {
            kind: ShowListKind::Relationships,
            name: None,
        }),
        Some("indexes") => Ok(Command::ShowList {
            kind: ShowListKind::Indexes,
            name: None,
        }),
        // V5a singular per-item detail — carry the named item.
        Some("relationship") => Ok(Command::ShowList {
            kind: ShowListKind::Relationships,
            name: Some(require_ident(path, "relationship_name")?),
        }),
        Some("index") => Ok(Command::ShowList {
            kind: ShowListKind::Indexes,
            name: Some(require_ident(path, "index_name")?),
        }),
        _ => Err(ValidationError {
            message_key: "parse.error_wrapper",
            args: vec![("detail", "unknown show subcommand".to_string())],
        }),
    }
}

/// Build a `show data` command from a matched path. Role-based
/// (no positional `nth` lookups), so it serves both the
/// standalone `show data` entry word and the `explain show
/// data …` wrapper, where the entry-word offset shifts.
fn build_show_data(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
    Ok(Command::ShowData {
        name: require_ident(path, "table_name")?,
        filter: build_show_filter(path)?,
        limit: build_show_limit(path)?,
    })
}

/// The optional `where <expr>` of a `show data`. The expression
/// terminals run from just past `Word("where")` to the start of
/// the `limit` clause (or the end of the path) — neither the
/// `limit` keyword nor any expression keyword collide, so the
/// slice is exact.
fn build_show_filter(path: &MatchedPath) -> Result<Option<Expr>, ValidationError> {
    let Some(where_idx) = path
        .items
        .iter()
        .position(|i| matches!(&i.kind, MatchedKind::Word("where")))
    else {
        return Ok(None);
    };
    let end = path
        .items
        .iter()
        .position(|i| matches!(&i.kind, MatchedKind::Word("limit")))
        .unwrap_or(path.items.len());
    Ok(Some(expr::build_expr(&path.items[where_idx + 1..end])?))
}

/// The optional `limit <n>` of a `show data`. The grammar's
/// `LIMIT_VALIDATOR` already constrained `<n>` to a
/// non-negative integer, so the parse here cannot realistically
/// fail.
fn build_show_limit(path: &MatchedPath) -> Result<Option<u64>, ValidationError> {
    let Some(limit_idx) = path
        .items
        .iter()
        .position(|i| matches!(&i.kind, MatchedKind::Word("limit")))
    else {
        return Ok(None);
    };
    let count = path
        .items
        .get(limit_idx + 1)
        .ok_or_else(|| ValidationError {
            message_key: "parse.error_wrapper",
            args: vec![("detail", "missing limit count".to_string())],
        })?;
    count
        .text
        .parse::<u64>()
        .map(Some)
        .map_err(|_| ValidationError {
            message_key: "parse.custom.bind_type_mismatch",
            args: vec![
                ("found", count.text.clone()),
                ("expected", "non-negative integer".to_string()),
            ],
        })
}

fn build_insert(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
    let table = require_ident(path, "table_name")?;

    // Locate the second `values` keyword (the first is the
    // command word `insert`'s sibling — but `insert` isn't a
    // matched Word here since it's the entry word and the
    // entry-word push uses the literal "insert"; only later
    // `values` matches as Word("values")).
    //
    // Strategy: walk the path. After the table name:
    //   - If we see Word("values") next (Form B), the next
    //     parenthesized values are the value list.
    //   - If we see Punct('('), the first paren's content was
    //     either column names (Form A) or values (Form C).
    //     If a Word("values") follows the closing paren, it's
    //     Form A.
    //
    // Easier discriminator: collect all matched keyword words;
    // count occurrences of "values".
    let saw_values = path
        .items
        .iter()
        .any(|i| matches!(i.kind, MatchedKind::Word("values")));

    // Find the index of the table_name match — the first paren
    // afterwards starts the parsed list.
    let table_idx = path
        .items
        .iter()
        .position(|i| matches!(&i.kind, MatchedKind::Ident { role: "table_name", .. }))
        .ok_or_else(|| ValidationError {
            message_key: "parse.error_wrapper",
            args: vec![("detail", "missing table".to_string())],
        })?;

    // Form B (values keyword right after table): no column list,
    // values come from the single paren-bounded list.
    let first_token_after_table = path.items.get(table_idx + 1);
    let form_b = matches!(
        first_token_after_table.map(|i| &i.kind),
        Some(MatchedKind::Word("values"))
    );

    if form_b {
        // Form B: the only value run is between the only `(` … `)`.
        let values = collect_values_in_parens(path, table_idx + 1)?;
        return Ok(Command::Insert {
            table,
            columns: None,
            values,
        });
    }

    // Form A or C: the first paren after the table is a Choice
    // of either column-idents or value-literals.
    let first_paren_idx = path
        .items
        .iter()
        .enumerate()
        .skip(table_idx + 1)
        .find(|(_, i)| matches!(i.kind, MatchedKind::Punct('(')))
        .map(|(idx, _)| idx)
        .ok_or_else(|| ValidationError {
            message_key: "parse.error_wrapper",
            args: vec![("detail", "missing `(`".to_string())],
        })?;

    if saw_values {
        // Form A: first paren = column names; second paren = values.
        // The Repeated inside the first paren tagged matched idents
        // with role "insert_first_item".
        let columns: Vec<String> = path
            .items
            .iter()
            .filter_map(|i| match &i.kind {
                MatchedKind::Ident {
                    role: "insert_first_item",
                    ..
                } => Some(i.text.clone()),
                _ => None,
            })
            .collect();
        if columns.is_empty() {
            return Err(ValidationError {
                message_key: "parse.error_wrapper",
                args: vec![("detail", "expected column names in `insert into T (…)`".to_string())],
            });
        }
        // Find the `values` keyword and the next `(` — the values
        // run starts after that `(`.
        let values_idx = path
            .items
            .iter()
            .enumerate()
            .skip(first_paren_idx)
            .find(|(_, i)| matches!(i.kind, MatchedKind::Word("values")))
            .map(|(i, _)| i)
            .ok_or_else(|| ValidationError {
                message_key: "parse.error_wrapper",
                args: vec![("detail", "missing `values` keyword".to_string())],
            })?;
        let values = collect_values_in_parens(path, values_idx + 1)?;
        Ok(Command::Insert {
            table,
            columns: Some(columns),
            values,
        })
    } else {
        // Form C: the first paren contained the value list. The
        // Repeated tagged the matched values via their natural
        // MatchedKind (Word/NumberLit/StringLit); collect them.
        //
        // Form-A-without-`values` recovery: the shared
        // INSERT_PAREN_ITEM choice accepts both VALUE_LITERAL
        // and Ident{Columns} so that Form A can resolve
        // column-name items inside its `( cols )` list. When the
        // user types `insert into T (col)` (column-shaped item,
        // no `values` keyword), the grammar walks to a complete
        // match but the user almost certainly meant Form A and
        // forgot the `values (...)` suffix. Reject here with a
        // ValidationError — the walker classifies validation
        // errors as `at_eof: true`, so the input renderer
        // surfaces this as IncompleteAtEof (mid-typing) rather
        // than dispatching a logically-broken Form C insert with
        // an empty value list.
        let user_listed_columns: Vec<String> = path
            .items
            .iter()
            .filter_map(|i| match &i.kind {
                MatchedKind::Ident {
                    role: "insert_first_item",
                    ..
                } => Some(i.text.clone()),
                _ => None,
            })
            .collect();
        if !user_listed_columns.is_empty() {
            return Err(ValidationError {
                message_key: "parse.custom.insert_form_a_missing_values",
                args: vec![("columns", user_listed_columns.join(", "))],
            });
        }
        let values = collect_values_in_parens(path, first_paren_idx)?;
        Ok(Command::Insert {
            table,
            columns: None,
            values,
        })
    }
}

/// Collect Value items inside the next `(…)` block at or after
/// `start_idx`. Stops at the matching `)`.
fn collect_values_in_parens(
    path: &MatchedPath,
    start_idx: usize,
) -> Result<Vec<Value>, ValidationError> {
    let mut out = Vec::new();
    let mut inside = false;
    for item in path.items.iter().skip(start_idx) {
        match &item.kind {
            MatchedKind::Punct('(') => inside = true,
            MatchedKind::Punct(')') if inside => return Ok(out),
            _ if inside => {
                if let Some(v) = item_to_value(item) {
                    out.push(v);
                }
            }
            _ => {}
        }
    }
    if out.is_empty() && !inside {
        return Err(ValidationError {
            message_key: "parse.error_wrapper",
            args: vec![("detail", "missing `(`".to_string())],
        });
    }
    Ok(out)
}

fn build_update(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
    let table = require_ident(path, "table_name")?;
    let assignments = collect_assignments(path)?;
    let filter = collect_filter(path)?;
    Ok(Command::Update {
        table,
        assignments,
        filter,
    })
}

fn collect_assignments(
    path: &MatchedPath,
) -> Result<Vec<(String, Value)>, ValidationError> {
    let mut out = Vec::new();
    let mut iter = path.items.iter();
    while let Some(item) = iter.next() {
        if matches!(
            item.kind,
            MatchedKind::Ident {
                role: "update_set_column",
                ..
            }
        ) {
            let column = item.text.clone();
            // Skip the `=` punct.
            for next in iter.by_ref() {
                if matches!(next.kind, MatchedKind::Punct('=')) {
                    break;
                }
            }
            // Next item is the value.
            let value_item = iter.next().ok_or_else(|| ValidationError {
                message_key: "parse.error_wrapper",
                args: vec![("detail", "missing assignment value".to_string())],
            })?;
            let value = item_to_value(value_item).ok_or_else(|| ValidationError {
                message_key: "parse.error_wrapper",
                args: vec![("detail", "expected value literal".to_string())],
            })?;
            out.push((column, value));
        }
    }
    Ok(out)
}

fn collect_filter(path: &MatchedPath) -> Result<RowFilter, ValidationError> {
    if path
        .items
        .iter()
        .any(|i| matches!(i.kind, MatchedKind::Flag("all-rows")))
    {
        return Ok(RowFilter::AllRows);
    }
    let where_idx = path
        .items
        .iter()
        .position(|i| matches!(&i.kind, MatchedKind::Word("where")))
        .ok_or_else(|| ValidationError {
            message_key: "parse.error_wrapper",
            args: vec![("detail", "missing where or --all-rows".to_string())],
        })?;
    // `where` is the last clause of update / delete, so every
    // terminal after it belongs to the expression.
    Ok(RowFilter::Where(expr::build_expr(
        &path.items[where_idx + 1..],
    )?))
}

fn build_delete(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
    let table = require_ident(path, "table_name")?;
    let filter = collect_filter(path)?;
    Ok(Command::Delete { table, filter })
}

/// Build `Command::Explain` (ADR-0028 §1). The matched-word
/// sequence is `[explain, show|update|delete, …]` — the entry
/// word `explain` is at index 0, the inner command's lead word
/// at index 1. The inner command is built by the same builder
/// it uses standalone (`build_show_data` / `build_update` /
/// `build_delete`), all of which are role-based and so are
/// indifferent to the entry-word offset the `explain` prefix
/// introduces.
fn build_explain(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
    let inner_word = path
        .items
        .iter()
        .filter_map(|i| match &i.kind {
            MatchedKind::Word(w) => Some(*w),
            _ => None,
        })
        .nth(1);
    let inner = match inner_word {
        Some("show") => build_show_data(path, _source)?,
        Some("update") => build_update(path, _source)?,
        Some("delete") => build_delete(path, _source)?,
        _ => {
            return Err(ValidationError {
                message_key: "parse.error_wrapper",
                args: vec![("detail", "unknown explain target".to_string())],
            });
        }
    };
    Ok(Command::Explain {
        query: Box::new(inner),
    })
}

/// Build `Command::Explain` over an advanced-mode SQL inner
/// (ADR-0039). The inner SQL text is sliced from `source` starting
/// at the inner entry keyword's span, so the carried SQL excludes
/// the `explain` prefix — `EXPLAIN QUERY PLAN` runs over the inner
/// statement, not the wrapper. The SQL builders extract their
/// metadata (target table, etc.) from `path` by role, which is
/// offset-independent, so passing the whole explain `path` is safe;
/// only the SQL *text* needs the prefix stripped.
fn build_explain_sql(path: &MatchedPath, source: &str) -> Result<Command, ValidationError> {
    // Words in the path: [0] is the `explain` entry word, [1] is the
    // inner entry keyword (select / with / insert / update / delete).
    let inner_item = path
        .items
        .iter()
        .filter(|i| matches!(i.kind, MatchedKind::Word(_)))
        .nth(1)
        .ok_or_else(|| ValidationError {
            message_key: "parse.error_wrapper",
            args: vec![("detail", "missing explain target".to_string())],
        })?;
    let inner_word = match &inner_item.kind {
        MatchedKind::Word(w) => *w,
        _ => unreachable!("filtered to Word above"),
    };
    let inner_source = source[inner_item.span.0..].trim();
    let inner = match inner_word {
        "select" | "with" => build_select(path, inner_source)?,
        "insert" => build_sql_insert(path, inner_source)?,
        "update" => build_sql_update(path, inner_source)?,
        "delete" => build_sql_delete(path, inner_source)?,
        _ => {
            return Err(ValidationError {
                message_key: "parse.error_wrapper",
                args: vec![("detail", "unknown explain target".to_string())],
            });
        }
    };
    Ok(Command::Explain {
        query: Box::new(inner),
    })
}

// =================================================================
// replay — `replay <bare-path>` | `replay '<path>'`
// =================================================================
//
// Phase E (ADR-0024 §migration). The chumsky-side
// `try_parse_replay_with_bare_path` source-slice helper is
// retired here: walker BarePath consumes the unquoted form
// (terminating at whitespace per the path-bearing UX change),
// and StringLit consumes the quoted form. Paths with spaces
// must use the quoted form — same UX that `import` / `export`
// adopted in Phase A.

const REPLAY_PATH_CHOICES: &[Node] = &[Node::StringLit, Node::BarePath];
const REPLAY_PATH: Node = Node::Choice(REPLAY_PATH_CHOICES);

fn build_replay(path: &MatchedPath, _source: &str) -> Result<Command, ValidationError> {
    let payload = path
        .items
        .iter()
        .find_map(|i| match &i.kind {
            MatchedKind::StringLit | MatchedKind::BarePath => Some(i.text.clone()),
            _ => None,
        })
        .ok_or_else(|| ValidationError {
            message_key: "parse.error_wrapper",
            args: vec![("detail", "missing path".to_string())],
        })?;
    Ok(Command::Replay { path: payload })
}

/// `Command::Select` carries the validated SQL text verbatim
/// (ADR-0030 §4/§6, ADR-0031 §2): a `SELECT` builds no AST — the
/// walk has confirmed it is in the supported subset, and the
/// worker runs the statement as text. `source` is the full
/// submitted line; on a `Match` outcome the `SELECT` shape
/// consumed all of it.
fn build_select(_path: &MatchedPath, source: &str) -> Result<Command, ValidationError> {
    Ok(Command::Select {
        sql: source.trim().to_string(),
    })
}

/// Build `Command::SqlInsert` from a validated SQL `INSERT`
/// (ADR-0033 §1). Extracts the target table from the matched path
/// so the worker re-persists the right CSV. `insert` is now the
/// real (shared) entry word, so the validated `source` runs
/// verbatim — like `build_select` (sub-phase 3j).
fn build_sql_insert(path: &MatchedPath, source: &str) -> Result<Command, ValidationError> {
    let target_table = path
        .items
        .iter()
        .find_map(|item| match item.kind {
            MatchedKind::Ident {
                role: "insert_target_table",
                ..
            } => Some(item.text.clone()),
            _ => None,
        })
        .unwrap_or_default();
    // The user's explicit `(col, …)` list, in order (empty when the
    // form omits it). Sub-phase 3d reads this to decide which
    // `shortid` columns were left for the worker to auto-fill.
    let listed_columns: Vec<String> = path
        .items
        .iter()
        .filter_map(|item| match item.kind {
            MatchedKind::Ident {
                role: "insert_column",
                ..
            } => Some(item.text.clone()),
            _ => None,
        })
        .collect();
    // The row source is the `VALUES` / `SELECT` / `WITH` clause —
    // from that keyword up to (but not including) any trailing
    // clause: `ON CONFLICT …` (3h) or `RETURNING …` (3g), whichever
    // comes first, else the trailing `;` / end. Boundaries are
    // located by *Word token* in the path (not a text scan), so a
    // string literal like `values ('select')` can't be mistaken for
    // a keyword. Excluding the trailing clauses keeps the row source
    // independently preparable for `shortid` auto-fill (`VALUES …
    // ON CONFLICT …` / `VALUES … RETURNING …` are not valid
    // standalone statements), and the auto-fill rewrite re-appends
    // the trailing tail verbatim (see `do_sql_insert`).
    //
    // `ON CONFLICT`'s `on` is located via the unambiguous `conflict`
    // keyword that immediately follows it — a JOIN's `on` inside a
    // SELECT row source has no following `conflict`, so it is not
    // mistaken for a clause boundary.
    let on_conflict_start = path
        .items
        .windows(2)
        .find(|w| {
            matches!(w[0].kind, MatchedKind::Word("on"))
                && matches!(w[1].kind, MatchedKind::Word("conflict"))
        })
        .map(|w| w[0].span.0);
    let returning_start = path
        .items
        .iter()
        .find(|item| matches!(item.kind, MatchedKind::Word("returning")))
        .map(|item| item.span.0);
    let tail_start = [on_conflict_start, returning_start]
        .into_iter()
        .flatten()
        .min();
    let row_source = path
        .items
        .iter()
        .find(|item| {
            matches!(item.kind, MatchedKind::Word("values" | "select" | "with"))
        })
        .map(|item| {
            let end = tail_start.unwrap_or(source.len());
            source[item.span.0..end]
                .trim()
                .trim_end_matches(';')
                .trim()
                .to_string()
        })
        .unwrap_or_default();
    // The entry word is the real `insert` keyword (sub-phase 3j),
    // so the validated line runs verbatim (grammar-as-text,
    // ADR-0030 §4) — no keyword reconstruction.
    let sql = source.trim().to_string();
    // Capture literal values per `VALUES` row for app-level type
    // validation + error enrichment (ADR-0036 Phase 1). Only for a
    // `VALUES` source (a `SELECT`/`WITH` source has no `values` keyword,
    // so this stays empty). Bounded to the row-source region by the same
    // `tail_start` the row_source slice used.
    let values_start = path
        .items
        .iter()
        .find(|i| matches!(i.kind, MatchedKind::Word("values")))
        .map(|i| i.span.0);
    let literal_rows = values_start.map_or_else(Vec::new, |vs| {
        capture_literal_rows(path, vs, tail_start.unwrap_or(source.len()))
    });
    Ok(Command::SqlInsert {
        sql,
        target_table,
        listed_columns,
        row_source,
        returning: path_has_returning(path),
        literal_rows,
    })
}

/// Capture the literal values of each `VALUES` tuple from the matched
/// path (ADR-0036 Phase 1). Each position is `Some(Value)` for a bare
/// literal (incl. a signed number — the leading sign is folded into the
/// number) and `None` for an expression position (a `func(x)`, `a+1`,
/// subquery, column ref — nothing static to validate). Works purely from
/// the tokens the walker already matched (no reparse); rows and positions
/// are delimited by tuple parens and depth-1 commas. `values_start` is the
/// byte offset of the `values` keyword; only items in `[values_start,
/// tail_end)` are considered (so any trailing `ON CONFLICT`/`RETURNING`
/// clause is excluded).
fn capture_literal_rows(
    path: &MatchedPath,
    values_start: usize,
    tail_end: usize,
) -> Vec<Vec<Option<Value>>> {
    let mut rows: Vec<Vec<Option<Value>>> = Vec::new();
    let mut depth: i32 = 0;
    let mut cur_row: Vec<Option<Value>> = Vec::new();
    let mut pos: Vec<&MatchedItem> = Vec::new();
    for item in &path.items {
        if item.span.0 < values_start || item.span.0 >= tail_end {
            continue;
        }
        match &item.kind {
            MatchedKind::Word("values") => {}
            MatchedKind::Punct('(') => {
                depth += 1;
                if depth == 1 {
                    cur_row = Vec::new();
                    pos.clear();
                } else {
                    pos.push(item);
                }
            }
            MatchedKind::Punct(')') => {
                if depth == 1 {
                    cur_row.push(classify_value_position(&pos));
                    pos.clear();
                    rows.push(std::mem::take(&mut cur_row));
                } else if depth > 1 {
                    pos.push(item);
                }
                depth -= 1;
            }
            MatchedKind::Punct(',') if depth == 1 => {
                cur_row.push(classify_value_position(&pos));
                pos.clear();
            }
            _ if depth >= 1 => pos.push(item),
            _ => {}
        }
    }
    rows
}

/// Classify one `VALUES` position's matched tokens into `Some(Value)` (a
/// bare literal) or `None` (an expression). A single literal token, or a
/// sign followed by a number, is a literal; anything else is an
/// expression (ADR-0036 §1).
fn classify_value_position(tokens: &[&MatchedItem]) -> Option<Value> {
    match tokens {
        [one] => item_to_value(one),
        [sign, num]
            if matches!(sign.kind, MatchedKind::Punct('-') | MatchedKind::Punct('+'))
                && matches!(num.kind, MatchedKind::NumberLit) =>
        {
            let text = if matches!(sign.kind, MatchedKind::Punct('-')) {
                format!("-{}", num.text)
            } else {
                num.text.clone()
            };
            Some(Value::Number(text))
        }
        _ => None,
    }
}

/// Whether the matched path contains a `RETURNING` clause
/// (ADR-0033 §5, sub-phase 3g). Located by the `returning` *Word
/// token* in the path — path-based, so a string literal can't be
/// mistaken for the keyword (mirrors `build_sql_insert`'s
/// row-source detection).
fn path_has_returning(path: &MatchedPath) -> bool {
    path.items
        .iter()
        .any(|item| matches!(item.kind, MatchedKind::Word("returning")))
}

/// Build `Command::SqlUpdate` from a validated SQL `UPDATE`
/// (ADR-0033 §2). Extracts the target table from the matched path
/// so the worker re-persists the right CSV. `update` is now the
/// real (shared) entry word, so the validated `source` runs
/// verbatim (sub-phase 3j).
fn build_sql_update(path: &MatchedPath, source: &str) -> Result<Command, ValidationError> {
    // The UPDATE target is the first `table_name` ident (it
    // precedes any table referenced inside a SET / WHERE subquery).
    let target_table = path
        .items
        .iter()
        .find_map(|item| match item.kind {
            MatchedKind::Ident {
                role: "table_name", ..
            } => Some(item.text.clone()),
            _ => None,
        })
        .unwrap_or_default();
    let sql = source.trim().to_string();
    // Capture the literal RHS of each top-level `SET col = <literal>`
    // assignment for app-level type validation + error enrichment
    // (ADR-0036 Phase 2). Purely from the matched tokens — no reparse.
    let set_literals = capture_set_literals(path);
    Ok(Command::SqlUpdate {
        sql,
        target_table,
        returning: path_has_returning(path),
        set_literals,
    })
}

/// Capture the literal RHS of each top-level `SET col = <literal>`
/// assignment from the matched path (ADR-0036 Phase 2). Returns
/// `(col, Some(Value))` for a bare-literal RHS (incl. a signed number)
/// and `(col, None)` for an expression RHS (arithmetic, function call,
/// scalar subquery, column ref — nothing static to validate). Works
/// purely from the tokens the walker already matched (no reparse).
///
/// Boundaries: the assignment LHS is the `update_set_column` ident (a
/// role only ever emitted at the top level of an assignment — expression
/// column refs carry `sql_expr_ident` / `sql_expr_qualified_ref`, so they
/// are never confused with it). A *depth-0* comma separates assignments;
/// a *depth-0* `where` / `returning` keyword (or `;` / end of path) ends
/// the SET list. Parens raise the depth so a comma, `where`, or `=`
/// inside a function call or scalar subquery on the RHS is never mistaken
/// for an assignment / clause boundary or the assignment operator.
fn capture_set_literals(path: &MatchedPath) -> Vec<(String, Option<Value>)> {
    let mut out: Vec<(String, Option<Value>)> = Vec::new();
    let mut after_set = false;
    let mut depth: i32 = 0;
    // The assignment currently being accumulated: its column name, its
    // RHS tokens so far, and whether the assignment `=` has been consumed.
    let mut cur_col: Option<String> = None;
    let mut cur_rhs: Vec<&MatchedItem> = Vec::new();
    let mut seen_eq = false;

    // Finalise the pending assignment (if any) into `out`.
    fn flush(
        col: &mut Option<String>,
        rhs: &mut Vec<&MatchedItem>,
        out: &mut Vec<(String, Option<Value>)>,
    ) {
        if let Some(c) = col.take() {
            out.push((c, classify_value_position(rhs)));
        }
        rhs.clear();
    }

    for item in &path.items {
        if !after_set {
            // Scan only the SET list — skip everything up to (and
            // including) the `set` keyword. The first `update_set_column`
            // appears after it.
            if matches!(item.kind, MatchedKind::Word("set")) {
                after_set = true;
            }
            continue;
        }
        // A depth-0 `where` / `returning` / `;` ends the SET list.
        if depth == 0
            && matches!(
                item.kind,
                MatchedKind::Word("where" | "returning") | MatchedKind::Punct(';')
            )
        {
            break;
        }
        match &item.kind {
            MatchedKind::Punct('(') => {
                depth += 1;
                if cur_col.is_some() && seen_eq {
                    cur_rhs.push(item);
                }
            }
            MatchedKind::Punct(')') => {
                depth -= 1;
                if cur_col.is_some() && seen_eq {
                    cur_rhs.push(item);
                }
            }
            MatchedKind::Ident {
                role: "update_set_column",
                ..
            } if depth == 0 => {
                // A new assignment begins — finalise the previous one.
                flush(&mut cur_col, &mut cur_rhs, &mut out);
                cur_col = Some(item.text.clone());
                seen_eq = false;
            }
            MatchedKind::Punct(',') if depth == 0 => {
                // Assignment separator — finalise the current assignment;
                // the next `update_set_column` starts the following one.
                flush(&mut cur_col, &mut cur_rhs, &mut out);
            }
            MatchedKind::Punct('=') if depth == 0 && !seen_eq && cur_col.is_some() => {
                // The assignment operator — consumed, not part of the RHS.
                seen_eq = true;
            }
            _ => {
                if cur_col.is_some() && seen_eq {
                    cur_rhs.push(item);
                }
            }
        }
    }
    // Finalise the last assignment (ended by `where`/`returning`/`;`/EOF).
    flush(&mut cur_col, &mut cur_rhs, &mut out);
    out
}

/// Build `Command::SqlDelete` from a validated SQL `DELETE`
/// (ADR-0033 §1/§7). Extracts the target table from the matched
/// path so the worker re-persists the right CSV and snapshots the
/// right inbound children for cascade diffing. No WHERE clause is
/// captured — the worker executes the verbatim SQL and never
/// inspects the predicate (Amendment 2). `delete` is now the real
/// (shared) entry word, so the validated `source` runs verbatim
/// (sub-phase 3j).
fn build_sql_delete(path: &MatchedPath, source: &str) -> Result<Command, ValidationError> {
    // The DELETE target is the first `table_name` ident (it precedes
    // any table referenced inside a WHERE subquery).
    let target_table = path
        .items
        .iter()
        .find_map(|item| match item.kind {
            MatchedKind::Ident {
                role: "table_name", ..
            } => Some(item.text.clone()),
            _ => None,
        })
        .unwrap_or_default();
    let sql = source.trim().to_string();
    Ok(Command::SqlDelete {
        sql,
        target_table,
        returning: path_has_returning(path),
    })
}

// =================================================================
// CommandNodes
// =================================================================

pub static SHOW: CommandNode = CommandNode {
    entry: Word::keyword("show"),
    shape: SHOW_SHAPE,
    ast_builder: build_show,
    help_id: Some("data.show"),
    usage_ids: &[
        "parse.usage.show_data",
        "parse.usage.show_table",
        "parse.usage.show_tables",
        "parse.usage.show_relationships",
        "parse.usage.show_indexes",
        "parse.usage.show_relationship",
        "parse.usage.show_index",
    ],};

pub static INSERT: CommandNode = CommandNode {
    entry: Word::keyword("insert"),
    shape: INSERT_SHAPE,
    ast_builder: build_insert,
    help_id: Some("data.insert"),
    usage_ids: &["parse.usage.insert"],};

pub static UPDATE: CommandNode = CommandNode {
    entry: Word::keyword("update"),
    shape: UPDATE_SHAPE,
    ast_builder: build_update,
    help_id: Some("data.update"),
    usage_ids: &["parse.usage.update"],};

pub static DELETE: CommandNode = CommandNode {
    entry: Word::keyword("delete"),
    shape: DELETE_SHAPE,
    ast_builder: build_delete,
    help_id: Some("data.delete"),
    usage_ids: &["parse.usage.delete"],};

pub static REPLAY: CommandNode = CommandNode {
    entry: Word::keyword("replay"),
    shape: REPLAY_PATH,
    ast_builder: build_replay,
    help_id: Some("data.replay"),
    usage_ids: &["parse.usage.replay"],};

pub static EXPLAIN: CommandNode = CommandNode {
    entry: Word::keyword("explain"),
    shape: EXPLAIN_SHAPE,
    ast_builder: build_explain,
    help_id: Some("data.explain"),
    usage_ids: &["parse.usage.explain"],};

/// `explain` over advanced-mode SQL (ADR-0039).
///
/// The `Advanced` node of the shared `explain` entry word. Pairs with
/// the `Simple` DSL [`EXPLAIN`] node above: in advanced mode the
/// dispatcher tries this SQL node first and falls back to the DSL node
/// when no SQL branch matches (`explain show data …`, or a DSL-only
/// `--all-rows`); in simple mode only the DSL node is reachable.
pub static EXPLAIN_SQL: CommandNode = CommandNode {
    entry: Word::keyword("explain"),
    shape: EXPLAIN_SQL_SHAPE,
    ast_builder: build_explain_sql,
    // No `help_id` / `usage_ids` — this is the `Advanced` half of the
    // shared `explain` entry word, so it defers to the `Simple`
    // `EXPLAIN` node's help/usage (which now covers the SQL forms
    // too). Mirrors the `SQL_INSERT`/`SQL_UPDATE`/`SQL_DELETE`
    // precedent; otherwise `note_help` would print `explain` twice.
    help_id: None,
    usage_ids: &[],};

/// SQL `SELECT` (ADR-0030 §6, ADR-0031, ADR-0032).
///
/// Advanced mode only — gated by `grammar::is_advanced_only`.
/// The shape is the post-`SELECT` portion of a top-level
/// statement; the registry's entry-word dispatch consumes the
/// leading `SELECT` keyword before the shape walks (sub-phase
/// 2c migration). `help_id` is `None` until the `help sql`
/// page lands (ADR-0030 Phase 6).
pub static SELECT: CommandNode = CommandNode {
    entry: Word::keyword("select"),
    shape: Node::Subgrammar(&sql_select::SQL_SELECT_TAIL),
    ast_builder: build_select,
    help_id: None,
    usage_ids: &["parse.usage.select"],};

/// `WITH …` top-level statement (ADR-0032 §4 / sub-phase 2c).
///
/// Advanced mode only. Dispatched separately from `SELECT` so
/// the registry's entry-word dispatch routes `with` and
/// `select` to the right shapes; both reach the same
/// `Command::Select` AST since execution is grammar-as-text
/// (ADR-0030 §6, ADR-0031 §2).
pub static WITH: CommandNode = CommandNode {
    entry: Word::keyword("with"),
    shape: Node::Subgrammar(&sql_select::SQL_WITH_TAIL),
    ast_builder: build_select,
    help_id: None,
    usage_ids: &["parse.usage.with"],};

/// SQL `INSERT` — the `Advanced`-category node of the shared
/// `insert` entry word (ADR-0033 §2, Amendment 1, sub-phase 3j).
///
/// `insert` is a shared entry word: this `Advanced` SQL node and
/// the `Simple` DSL [`INSERT`] node both register under `insert`.
/// In Advanced mode the dispatcher (`walker::walk` / `decide`)
/// tries this SQL node first and falls back to the DSL node when
/// the SQL shape does not match; in Simple mode only the DSL node
/// is reachable (Amendment 3 — command identity is the mode-rooted
/// grammar-path outcome).
pub static SQL_INSERT: CommandNode = CommandNode {
    entry: Word::keyword("insert"),
    shape: Node::Subgrammar(&sql_insert::SQL_INSERT_SHAPE),
    ast_builder: build_sql_insert,
    help_id: None,
    usage_ids: &[],
};

/// SQL `UPDATE` — the `Advanced` node of the shared `update` word.
///
/// ADR-0033 §2 / Amendment 1, sub-phase 3j. Pairs with the `Simple`
/// DSL [`UPDATE`] node; dispatch is SQL-first / DSL-fallback in
/// Advanced mode, DSL-only in Simple.
pub static SQL_UPDATE: CommandNode = CommandNode {
    entry: Word::keyword("update"),
    shape: Node::Subgrammar(&sql_update::SQL_UPDATE_SHAPE),
    ast_builder: build_sql_update,
    help_id: None,
    usage_ids: &[],
};

/// SQL `DELETE` — the `Advanced` node of the shared `delete` word.
///
/// ADR-0033 §2 / Amendment 1, sub-phase 3j. Pairs with the `Simple`
/// DSL [`DELETE`] node; dispatch is SQL-first / DSL-fallback in
/// Advanced mode, DSL-only in Simple. In Advanced mode `delete from t
/// --all-rows` falls back to the DSL node (the SQL shape has no
/// `--all-rows`).
pub static SQL_DELETE: CommandNode = CommandNode {
    entry: Word::keyword("delete"),
    shape: Node::Subgrammar(&sql_delete::SQL_DELETE_SHAPE),
    ast_builder: build_sql_delete,
    help_id: None,
    usage_ids: &[],
};

// =================================================================
// Tests — `explain` grammar (ADR-0028 §1)
// =================================================================

#[cfg(test)]
mod explain_tests {
    use super::Command;
    use crate::dsl::parser::parse_command;

    /// Parse `input` in **simple** mode and unwrap the
    /// `Command::Explain` wrapper, returning the inner command.
    /// These cover the DSL-explain wrapping (ADR-0028); the
    /// advanced-mode SQL wrapping (ADR-0039) is covered by
    /// `explain_inner_adv` below. (`parse_command` defaults to
    /// advanced, where `explain update`/`delete` now route to the
    /// SQL path — so DSL-explain tests pin the mode explicitly.)
    fn explain_inner(input: &str) -> Command {
        match crate::dsl::parser::parse_command_in_mode(input, crate::mode::Mode::Simple)
            .expect("explain should parse")
        {
            Command::Explain { query } => *query,
            other => panic!("expected Command::Explain, got {other:?}"),
        }
    }

    #[test]
    fn explain_show_data_wraps_a_show_data() {
        assert!(matches!(
            explain_inner("explain show data Customers"),
            Command::ShowData { .. }
        ));
    }

    #[test]
    fn explain_show_data_carries_where_and_limit_through() {
        match explain_inner("explain show data Customers where id = 1 limit 5") {
            Command::ShowData { name, filter, limit } => {
                assert_eq!(name, "Customers");
                assert!(filter.is_some(), "where clause should survive");
                assert_eq!(limit, Some(5));
            }
            other => panic!("expected ShowData, got {other:?}"),
        }
    }

    #[test]
    fn explain_update_wraps_an_update() {
        assert!(matches!(
            explain_inner("explain update Customers set Name='Bo' where id=1"),
            Command::Update { .. }
        ));
    }

    #[test]
    fn explain_delete_wraps_a_delete() {
        assert!(matches!(
            explain_inner("explain delete from Customers where id=1"),
            Command::Delete { .. }
        ));
    }

    #[test]
    fn explain_of_an_incomplete_update_is_a_parse_error() {
        // A bare `update` still needs its `where` / `--all-rows`
        // (ADR-0028 §1: `explain` of an incomplete command is the
        // same parse error the command alone would be). Simple mode:
        // in advanced mode a where-less SQL UPDATE is valid (ADR-0039).
        assert!(
            crate::dsl::parser::parse_command_in_mode(
                "explain update Customers set Name='Bo'",
                crate::mode::Mode::Simple,
            )
            .is_err()
        );
    }

    #[test]
    fn explain_does_not_cover_show_table() {
        // `explain` covers `show data` only (ADR-0028 §1).
        assert!(parse_command("explain show table Customers").is_err());
    }

    #[test]
    fn bare_explain_is_a_parse_error() {
        assert!(parse_command("explain").is_err());
        assert!(parse_command("explain show").is_err());
    }

    // ---- ADR-0039: explain over advanced-mode SQL --------------

    use crate::dsl::parser::parse_command_in_mode;
    use crate::mode::Mode;

    /// Advanced-mode counterpart of `explain_inner`.
    fn explain_inner_adv(input: &str) -> Command {
        match parse_command_in_mode(input, Mode::Advanced)
            .expect("advanced explain should parse")
        {
            Command::Explain { query } => *query,
            other => panic!("expected Command::Explain, got {other:?}"),
        }
    }

    #[test]
    fn explain_select_wraps_a_select_with_clean_sql() {
        // The carried SQL must NOT include the `explain` prefix
        // (ADR-0039) — `EXPLAIN QUERY PLAN` runs over the inner SQL.
        match explain_inner_adv("explain select * from Customers") {
            Command::Select { sql } => assert_eq!(sql, "select * from Customers"),
            other => panic!("expected Select, got {other:?}"),
        }
    }

    #[test]
    fn explain_with_cte_wraps_a_select() {
        match explain_inner_adv(
            "explain with recent as (select * from Orders) select * from recent",
        ) {
            Command::Select { sql } => {
                assert!(sql.starts_with("with recent"), "clean inner sql: {sql}");
            }
            other => panic!("expected Select, got {other:?}"),
        }
    }

    #[test]
    fn explain_sql_insert_wraps_a_sql_insert() {
        match explain_inner_adv("explain insert into Customers values (1, 'Bo')") {
            Command::SqlInsert { sql, target_table, .. } => {
                assert_eq!(target_table, "Customers");
                assert_eq!(sql, "insert into Customers values (1, 'Bo')");
            }
            other => panic!("expected SqlInsert, got {other:?}"),
        }
    }

    #[test]
    fn explain_sql_update_wraps_a_sql_update_with_clean_sql() {
        match explain_inner_adv("explain update Customers set Name = 'Bo' where id = 1") {
            Command::SqlUpdate { sql, target_table, .. } => {
                assert_eq!(target_table, "Customers");
                assert_eq!(sql, "update Customers set Name = 'Bo' where id = 1");
            }
            other => panic!("expected SqlUpdate, got {other:?}"),
        }
    }

    #[test]
    fn explain_sql_delete_wraps_a_sql_delete() {
        match explain_inner_adv("explain delete from Customers where id = 1") {
            Command::SqlDelete { sql, target_table, .. } => {
                assert_eq!(target_table, "Customers");
                assert_eq!(sql, "delete from Customers where id = 1");
            }
            other => panic!("expected SqlDelete, got {other:?}"),
        }
    }

    #[test]
    fn explain_update_with_all_rows_flag_falls_back_to_dsl_in_advanced() {
        // `--all-rows` is DSL-only; the SQL update shape can't
        // consume it, so the explain inner falls back to the DSL
        // `Update` node — mirroring the top-level shared-word
        // dispatch (ADR-0033).
        assert!(matches!(
            explain_inner_adv("explain update Customers set Name = 'Bo' --all-rows"),
            Command::Update { .. }
        ));
    }

    #[test]
    fn explain_show_data_still_uses_dsl_in_advanced() {
        // `show data` has no SQL form; advanced `explain show data`
        // falls back to the DSL inner.
        assert!(matches!(
            explain_inner_adv("explain show data Customers"),
            Command::ShowData { .. }
        ));
    }

    #[test]
    fn explain_select_is_rejected_in_simple_mode() {
        // `select` is advanced-only, so `explain select` has no
        // simple-mode form.
        assert!(parse_command_in_mode("explain select * from Customers", Mode::Simple).is_err());
    }

    #[test]
    fn explain_does_not_cover_ddl() {
        // EXPLAIN QUERY PLAN applies to DML/queries only (ADR-0039
        // out of scope); there is no SQL DDL branch under explain.
        assert!(parse_command_in_mode(
            "explain create table T (id int)",
            Mode::Advanced,
        )
        .is_err());
    }

    #[test]
    fn advanced_explain_completion_offers_the_sql_verbs() {
        // After `explain ` in advanced mode the candidate list is the
        // union across both `explain` CommandNodes: the SQL verbs
        // (select/with/insert/update/delete) plus the DSL `show`
        // (ADR-0039). The shared-entry-word completion already
        // aggregates, so there is no UX gap.
        use crate::completion::candidates_at_cursor_in_mode;
        let schema = crate::completion::SchemaCache::default();
        let input = "explain ";
        let completion =
            candidates_at_cursor_in_mode(input, input.len(), &schema, Mode::Advanced)
                .expect("explain offers candidates");
        let names: Vec<&str> = completion
            .candidates
            .iter()
            .map(|c| c.text.as_str())
            .collect();
        for verb in ["select", "with", "insert", "update", "delete", "show"] {
            assert!(names.contains(&verb), "expected `{verb}` in {names:?}");
        }
    }
}