diff --git a/docs/adr/0042-h1a-parse-error-pedagogy-grammar-tree.md b/docs/adr/0042-h1a-parse-error-pedagogy-grammar-tree.md index 834df55..366e630 100644 --- a/docs/adr/0042-h1a-parse-error-pedagogy-grammar-tree.md +++ b/docs/adr/0042-h1a-parse-error-pedagogy-grammar-tree.md @@ -184,14 +184,12 @@ must not be re-built — `cte_arity_mismatch` and `compound_arity_mismatch` (`en-US.yaml:590-591`); for these the work is matrix coverage and, for CTE, auditing whether the diagnostic is *positioned at the CTE name* (easiest to fix) rather than the body. -The genuine **absences** the pass adds are: `RETURNING` column -scope, `CROSS JOIN` rejecting an `ON` clause, and INSERT…SELECT -projection/target column-count (verified absent from the catalog -2026-06-03). Each gets a matrix entry; fixes land as walker -diagnostics or `parse.custom.*` messages following the existing -patterns. The `:` one-shot escape (a simple-mode line run once in -advanced mode) is part of the advanced surface and gets at least one -near-miss matrix entry. +The remaining items were re-checked empirically at implementation +time (2026-06-05) and **most turned out already handled** — see the +Implementation-outcome section's advanced-SQL paragraph for the +corrected picture. The `:` one-shot escape (a simple-mode line run +once in advanced mode) is part of the advanced surface and is +covered by the mode-aware usage threading (G3). This stays clear of ADR-0019 §OOS-2 (advanced-SQL *engine-error* sanitisation): §OOS-2 reworks errors raised by *executing* SQL; @@ -260,6 +258,40 @@ in `tests/it/parse_error_pedagogy.rs`: - **G4** — `with` borrowed `select`'s usage; it gains its own `parse.usage.with` CTE template. +**Advanced-SQL pedagogy (§3) — empirical re-check (2026-06-05).** +§3 (drafted from a code survey) listed `RETURNING` scope, +`CROSS JOIN … ON`, and INSERT…SELECT column-count as absences. +Verifying each against the running app **reversed two of three**: + +- **INSERT…SELECT column-count** is *already handled* — a count + mismatch fires `verdict = Error` with "the column list names N + column(s) but M value(s) are given" (walker test + `insert_select_arity_mismatch_fires`). Not a gap. +- **RETURNING scope** is *already handled* — at a bare `returning` + position completion offers the table's columns; `returning + ` fires the `unknown_column` diagnostic. Not a gap. +- **`CROSS JOIN … ON`** *was* a genuine residual: the grammar + rejects the `on` but the structural error said only "expected end + of input". **Fixed** — `parse.cross_join_no_on` renders "a CROSS + JOIN has no ON clause — …" when the failing token is `on` and the + most recent consumed join is a CROSS join (a precise signature: + every other join requires `on`, so there `on` is expected, not a + failure). Render-only, no grammar change; two misfire guards + (plain join still asks for `on`; a stray `on` with no join does + not fire). The CTE/compound arity diagnostics noted above remain + present and correct. + +**Known low-priority residual (user-confirmed to defer).** At +*submit* time, an incomplete expression position that is not a +SELECT projection (bare `where `, `returning `, `having `, `set +col=`) still renders the raw ~14-item expression first-set; only the +SELECT projection is glossed (G2, keyed on the `distinct`+`all` +quantifier pair). This is low-impact because *typing*-time +completion already offers the correct candidates (columns, +functions, expression keywords) at those positions. Generalising the +gloss was considered and deferred — the payoff is small and a +broader render-side collapse adds misfire surface. + Coverage: the matrix covers, in both modes, every entry word's bare / missing-clause / wrong-token near-misses, the app-lifecycle trailing-junk cases, **and** the committed *multi-form* variants diff --git a/src/dsl/parser.rs b/src/dsl/parser.rs index 1fd2465..5b9ecab 100644 --- a/src/dsl/parser.rs +++ b/src/dsl/parser.rs @@ -315,12 +315,54 @@ fn is_select_projection_start(expected: &[crate::dsl::walker::outcome::Expectati has_word("distinct") && has_word("all") } +/// ADR-0042 §3: detect the `… CROSS JOIN on …` mistake. A +/// CROSS JOIN takes no `ON` clause; the grammar rejects the `on`, +/// but the bare structural error ("expected end of input") doesn't +/// teach why. `on` is unexpected at this position *only* when the +/// most recent join is a CROSS join — every other join flavour +/// requires `on`, so there `on` would be in the expected set, not a +/// failure. Detection: the failing token is the keyword `on`, and +/// the last `join` word in the consumed prefix is immediately +/// preceded by `cross`. Render-only; no grammar change. +fn is_cross_join_on(source: &str, position: usize) -> bool { + let rest = source[position.min(source.len())..].trim_start(); + let next_is_on = { + let mut chars = rest.chars(); + let starts_on = rest.len() >= 2 && rest[..2].eq_ignore_ascii_case("on"); + let boundary = chars + .nth(2) + .is_none_or(|c| !c.is_ascii_alphanumeric() && c != '_'); + starts_on && boundary + }; + if !next_is_on { + return false; + } + let consumed = &source[..position.min(source.len())]; + let words: Vec<&str> = consumed + .split(|c: char| !c.is_ascii_alphanumeric() && c != '_') + .filter(|w| !w.is_empty()) + .collect(); + match words.iter().rposition(|w| w.eq_ignore_ascii_case("join")) { + Some(i) if i > 0 => words[i - 1].eq_ignore_ascii_case("cross"), + _ => false, + } +} + fn format_walker_error( source: &str, position: usize, at_eof: bool, expected: &[crate::dsl::walker::outcome::Expectation], ) -> String { + if is_cross_join_on(source, position) { + let consumed = source[..position.min(source.len())].trim_end(); + let prefix = if consumed.is_empty() { + String::new() + } else { + format!("after `{consumed}`, ") + }; + return format!("{prefix}{}", crate::t!("parse.cross_join_no_on")); + } let joined = if is_select_projection_start(expected) { crate::t!("parse.expect.select_projection") } else { diff --git a/src/friendly/keys.rs b/src/friendly/keys.rs index 0dbc0e6..3c4d8fd 100644 --- a/src/friendly/keys.rs +++ b/src/friendly/keys.rs @@ -309,6 +309,7 @@ pub const KEYS_AND_PLACEHOLDERS: &[(&str, &[&str])] = &[ ("parse.usage.update", &[]), ("parse.usage.with", &[]), ("parse.expect.select_projection", &[]), + ("parse.cross_join_no_on", &[]), // ---- Project lifecycle event notes ---- ("project.export_failed", &["error"]), ("project.export_ok", &["path"]), diff --git a/src/friendly/strings/en-US.yaml b/src/friendly/strings/en-US.yaml index 0e37724..12fc0e3 100644 --- a/src/friendly/strings/en-US.yaml +++ b/src/friendly/strings/en-US.yaml @@ -500,6 +500,12 @@ parse: # completion/hints still expand the full first-set. expect: select_projection: "a projection: `*`, a column, or an expression" + # ADR-0042 §3: a CROSS JOIN pairs every row and takes no ON + # clause. The grammar rejects a following `on`; this message + # (rendered in place of the generic structural error when the + # most recent join is a CROSS join and the failing token is `on`) + # teaches the distinction instead of just "expected end of input". + cross_join_no_on: "a CROSS JOIN has no ON clause — it pairs every row; for a join condition use `JOIN … ON`, or filter with `WHERE`" # Per-command usage templates (ADR-0021 §1). Rendered under a # "usage:" prefix when a parse fails after consuming a # known command-entry keyword. The bracket convention `[...]` diff --git a/tests/it/parse_error_pedagogy.rs b/tests/it/parse_error_pedagogy.rs index aedd920..8f04c3e 100644 --- a/tests/it/parse_error_pedagogy.rs +++ b/tests/it/parse_error_pedagogy.rs @@ -255,6 +255,37 @@ fn advanced_mode_usage_block_shows_sql_and_dsl_forms() { assert!(sql_at < dsl_at, "SQL form should precede the DSL form\n{dump_msg}"); } +#[test] +fn advanced_cross_join_with_on_teaches_no_on_clause() { + // ADR-0042 §3: a CROSS JOIN has no ON clause. The grammar + // rejects a following `on`, but the bare structural error + // ("expected end of input") does not teach why. `on` is + // unexpected here only because the most recent join is a CROSS + // join — every other join flavour *requires* `on` — so the case + // is precisely detectable and gets a teaching message. + let input = "select * from a cross join b on x = y"; + let lines = advanced_error_lines_for(input); + let joined = lines.join("\n"); + let dump_msg = dump(input, &lines); + assert!( + joined.contains("a CROSS JOIN has no ON clause"), + "cross join + on should teach that CROSS JOIN takes no ON\n{dump_msg}", + ); + // Misfire guard 1: a plain JOIN missing its ON still asks for `on`. + let plain = advanced_error_lines_for("select * from a join b").join("\n"); + assert!( + plain.contains("expected `on`") && !plain.contains("CROSS JOIN"), + "a plain join must still ask for ON, not the cross-join message: {plain}", + ); + // Misfire guard 2: a stray `on` with no join present must NOT + // claim a CROSS JOIN. + let stray = advanced_error_lines_for("select * from a on x = y").join("\n"); + assert!( + !stray.contains("CROSS JOIN has no ON"), + "no cross join present — must not fire: {stray}", + ); +} + /// The advanced-mode near-miss matrix (ADR-0042 §1/§3). Mirrors /// the simple-mode matrix for the SQL surface. Every row must show /// a per-command `usage:` block (never the available-commands