//! The complex WHERE-expression grammar fragment and its AST //! builder (ADR-0026). //! //! # The grammar //! //! A boolean WHERE expression — `a = 1 AND (b > 2 OR c LIKE //! 'x%')` — is recursive and carries operator precedence. The //! fragment is a **stratified** grammar: one named `static` //! `Node` per precedence tier. Stratification removes left //! recursion (every recursion is guarded by a token) and //! encodes precedence in the layering, so there is no separate //! precedence-resolution pass at walk time. //! //! ```text //! or_expr := and_expr ( OR and_expr )* //! and_expr := not_expr ( AND not_expr )* //! not_expr := NOT not_expr | bool_primary //! bool_primary := ( or_expr ) | predicate //! predicate := operand predicate_tail //! operand := literal | column_ref //! ``` //! //! The only recursion is `( or_expr )` and `NOT not_expr`; each //! consumes a token (`(` or `not`) before recursing, so the //! greedy top-down walker always makes progress. Both recurse //! through [`Node::Subgrammar`] — a `Seq` / `Choice` embeds its //! children by value and so cannot close a cycle. //! //! `predicate_tail` is factored so the shared `operand` prefix //! is matched once and the `Choice` branches discriminate on a //! token that fails cleanly (`NoMatch`, not a committed //! failure) — see the inline note on `PREDICATE_TAIL_CHOICES`. //! //! # Building the AST //! //! Per ADR-0026 §3 (option-1 realization, confirmed during //! implementation): the stratified grammar above is walked //! normally — its terminals flow into the flat `MatchedPath`, //! driving highlight / completion / the expected-set like any //! other command. [`build_expr`] then folds that flat //! terminal slice into a recursive [`Expr`]. The fold is a //! deterministic recursive descent mirroring the six grammar //! tiers; because the walk has already validated the input it //! cannot fail on well-formed input — the `Result` guards only //! against an internal grammar/builder drift bug. //! //! This is a second, submit-time-only pass over the expression //! tokens (parse-for-dispatch — per-keystroke completion never //! calls `build_expr`). It keeps the walker a pure structural //! matcher: `Expr` lives entirely in this builder and the //! command AST, never in the walker. The grammar tiers own //! validation / highlight / completion / no-left-recursion; //! `build_expr` owns the tree shape. use crate::dsl::command::{CompareOp, Expr, Operand, Predicate}; use crate::dsl::grammar::{HintMode, IdentSource, Node, ValidationError, Word}; use crate::dsl::value::Value; use crate::dsl::walker::context::WalkContext; use crate::dsl::walker::outcome::{MatchedItem, MatchedKind}; // ================================================================= // operand := null | true | false | | | // ================================================================= /// A column reference inside an expression. The `expr_column` /// role lets [`build_expr`] (and the command AST builders) tell /// an expression column apart from other identifier slots. /// /// `writes_column` so that, once a predicate's left operand /// resolves to a known column, the walker records it in /// `WalkContext::current_column` — the right operand's /// schema-aware slot (`where_rhs_operand`) reads it to narrow /// the hint panel to that column's type (ADR-0026 §8). const EXPR_COLUMN: Node = Node::Ident { source: IdentSource::Columns, role: "expr_column", validator: None, highlight_override: None, writes_table: false, writes_column: true, writes_user_listed_column: false, writes_table_alias: false, writes_cte_name: false, writes_projection_alias: false, }; /// Operand alternatives. The literal keywords (`null` / `true` /// / `false`) come before the column slot so they parse as /// literals; any other identifier is a column reference. No /// validators — a type-mismatched literal in a comparison is /// flagged in the editor but still parses (ADR-0026 §7). static OPERAND_CHOICES: &[Node] = &[ Node::Word(Word::keyword("null")), Node::Word(Word::keyword("true")), Node::Word(Word::keyword("false")), Node::NumberLit { validator: None }, Node::StringLit, EXPR_COLUMN, ]; /// The operand alternatives as a single node — the permissive /// inner of the schema-aware right-hand operand slot. static OPERAND_NODE: Node = Node::Choice(OPERAND_CHOICES); /// The right-hand operand of a predicate — the comparison RHS, /// a `LIKE` pattern, the `BETWEEN` bounds, the `IN` items — /// resolved at walk time (ADR-0026 §8). /// /// When the predicate's left operand resolved to a known /// column, this wraps the operand in a `TypedValueSlot` keyed /// on that column's user-facing type, so the hint panel /// narrows to the column's type and names the column. /// Otherwise a generic value-literal `Hinted` slot. Either way /// the inner grammar is the *permissive* operand choice /// (`OPERAND_NODE`) — a type-mismatched literal still matches /// (ADR-0026 §7); the mismatch is an editor flag, never a /// parse failure. fn where_rhs_operand(ctx: &WalkContext) -> Node { ctx.current_column.as_ref().map_or_else( || Node::Hinted { mode: HintMode::ProseOnly("hint.value_literal_slot"), inner: &OPERAND_NODE, }, |col| { // `Box::leak` mirrors `shared::slot_for_column` — // the leak is per distinct column (the walker // memoizes `DynamicSubgrammar` resolution on // `current_column`), not per keystroke. let leaked: &'static str = Box::leak(col.name.clone().into_boxed_str()); Node::TypedValueSlot { ty: col.user_type, column_name: Some(leaked), inner: &OPERAND_NODE, } }, ) } // ================================================================= // cmp_op := <= | <> | >= | != | < | > | = // ================================================================= // // Two-character operators come before their one-character // prefixes: `walk_literal` matches `<` against the `<` of `<=` // (the `<` has no word-boundary lookahead), so `<=` / `<>` must // be tried first or `a <= 1` would match a bare `<` and then // fail expecting an operand at `=`. static CMP_OP_CHOICES: &[Node] = &[ Node::Literal("<="), Node::Literal("<>"), Node::Literal(">="), Node::Literal("!="), Node::Literal("<"), Node::Literal(">"), Node::Literal("="), ]; // ================================================================= // predicate_tail branches // ================================================================= /// `cmp_op operand`. The right operand is the schema-aware /// `where_rhs_operand` so the hint panel can narrow to the /// left column's type. static COMPARE_FORM_NODES: &[Node] = &[ Node::Choice(CMP_OP_CHOICES), Node::DynamicSubgrammar(where_rhs_operand), ]; /// `IS [NOT] NULL`. static IS_NULL_NODES: &[Node] = &[ Node::Word(Word::keyword("is")), Node::Optional(&Node::Word(Word::keyword("not"))), Node::Word(Word::keyword("null")), ]; /// `LIKE operand`. static LIKE_FORM_NODES: &[Node] = &[ Node::Word(Word::keyword("like")), Node::DynamicSubgrammar(where_rhs_operand), ]; /// `BETWEEN operand AND operand`. The inner `and` is consumed /// here, so a stray `and` at the `and_expr` tier is always a /// connective. static BETWEEN_FORM_NODES: &[Node] = &[ Node::Word(Word::keyword("between")), Node::DynamicSubgrammar(where_rhs_operand), Node::Word(Word::keyword("and")), Node::DynamicSubgrammar(where_rhs_operand), ]; /// `IN ( operand [, operand]* )`. static IN_FORM_NODES: &[Node] = &[ Node::Word(Word::keyword("in")), Node::Punct('('), Node::Repeated { inner: &Node::DynamicSubgrammar(where_rhs_operand), separator: Some(&Node::Punct(',')), min: 1, }, Node::Punct(')'), ]; /// The negatable predicate bodies — each starts with a /// distinct keyword, so this `Choice` discriminates cleanly. static NEGATABLE_CHOICES: &[Node] = &[ Node::Seq(LIKE_FORM_NODES), Node::Seq(BETWEEN_FORM_NODES), Node::Seq(IN_FORM_NODES), ]; /// `NOT (LIKE … | BETWEEN … | IN …)` — the infix `NOT` is /// factored in front of the negatable choice (rather than /// repeated inside each, which would strand `not between` on /// the `LIKE` branch). static NOT_NEGATABLE_NODES: &[Node] = &[ Node::Word(Word::keyword("not")), Node::Choice(NEGATABLE_CHOICES), ]; /// `predicate_tail := cmp_op operand | IS [NOT] NULL /// | NOT negatable | negatable`. /// /// Branch discrimination relies on each branch's *first* child /// reporting a clean `NoMatch` on a non-match: branch 1 is a /// `Choice` of punctuation operators, the rest start with a /// `Word`. Crucially **no branch starts with an `Optional`** — /// an `Optional`-first `Seq` always "commits", which turns its /// failure into an `Incomplete` that the walker's `Choice` /// returns early, discarding every sibling branch's expected /// set (and so its completion candidates). The infix `NOT` is /// therefore its own explicit `NOT negatable` branch, with a /// bare `negatable` branch alongside. static PREDICATE_TAIL_CHOICES: &[Node] = &[ Node::Seq(COMPARE_FORM_NODES), Node::Seq(IS_NULL_NODES), Node::Seq(NOT_NEGATABLE_NODES), Node::Seq(LIKE_FORM_NODES), Node::Seq(BETWEEN_FORM_NODES), Node::Seq(IN_FORM_NODES), ]; // ================================================================= // The stratified precedence tiers // ================================================================= /// `predicate := operand predicate_tail`. static PREDICATE_NODES: &[Node] = &[ Node::Choice(OPERAND_CHOICES), Node::Choice(PREDICATE_TAIL_CHOICES), ]; static PREDICATE: Node = Node::Seq(PREDICATE_NODES); /// `bool_primary := ( or_expr ) | predicate`. static PAREN_GROUP_NODES: &[Node] = &[ Node::Punct('('), Node::Subgrammar(&OR_EXPR), Node::Punct(')'), ]; static BOOL_PRIMARY_CHOICES: &[Node] = &[ Node::Seq(PAREN_GROUP_NODES), Node::Subgrammar(&PREDICATE), ]; static BOOL_PRIMARY: Node = Node::Choice(BOOL_PRIMARY_CHOICES); /// `not_expr := NOT not_expr | bool_primary`. static NOT_FORM_NODES: &[Node] = &[ Node::Word(Word::keyword("not")), Node::Subgrammar(&NOT_EXPR), ]; static NOT_EXPR_CHOICES: &[Node] = &[ Node::Seq(NOT_FORM_NODES), Node::Subgrammar(&BOOL_PRIMARY), ]; static NOT_EXPR: Node = Node::Choice(NOT_EXPR_CHOICES); /// `and_expr := not_expr ( AND not_expr )*`. static AND_TAIL_NODES: &[Node] = &[ Node::Word(Word::keyword("and")), Node::Subgrammar(&NOT_EXPR), ]; static AND_TAIL: Node = Node::Seq(AND_TAIL_NODES); static AND_EXPR_NODES: &[Node] = &[ Node::Subgrammar(&NOT_EXPR), Node::Repeated { inner: &AND_TAIL, separator: None, min: 0, }, ]; static AND_EXPR: Node = Node::Seq(AND_EXPR_NODES); /// `or_expr := and_expr ( OR and_expr )*` — the fragment entry /// point. `update` / `delete` / `show data` reference this /// through `Node::Subgrammar(&OR_EXPR)`. static OR_TAIL_NODES: &[Node] = &[ Node::Word(Word::keyword("or")), Node::Subgrammar(&AND_EXPR), ]; static OR_TAIL: Node = Node::Seq(OR_TAIL_NODES); static OR_EXPR_NODES: &[Node] = &[ Node::Subgrammar(&AND_EXPR), Node::Repeated { inner: &OR_TAIL, separator: None, min: 0, }, ]; pub static OR_EXPR: Node = Node::Seq(OR_EXPR_NODES); /// The WHERE-expression fragment, ready to drop into a command /// `Seq` as one node. Walking it consumes a whole `or_expr`. pub static EXPRESSION: Node = Node::Subgrammar(&OR_EXPR); // ================================================================= // build_expr — fold the flat terminal slice into an `Expr` // ================================================================= /// Reconstruct an [`Expr`] from the matched-terminal slice the /// walker produced for a WHERE clause (ADR-0026 §3). /// /// On well-formed input — input the grammar above already /// accepted — this is infallible; the `Err` path guards only /// against a grammar/builder drift bug. pub fn build_expr(items: &[MatchedItem]) -> Result { let mut parser = ExprParser { items, pos: 0 }; let expr = parser.parse_or()?; if parser.pos == items.len() { Ok(expr) } else { Err(drift_error("unconsumed tokens after the expression")) } } /// Cursor over the flat terminal slice. The methods mirror the /// grammar tiers one-to-one. struct ExprParser<'a> { items: &'a [MatchedItem], pos: usize, } impl<'a> ExprParser<'a> { fn peek(&self) -> Option<&'a MatchedItem> { self.items.get(self.pos) } fn advance(&mut self) -> Option<&'a MatchedItem> { let item = self.items.get(self.pos); if item.is_some() { self.pos += 1; } item } /// The literal carried by a `Word` terminal at the cursor — /// keyword primaries (`and`, `is`, …) and `Literal`-matched /// operators (`<=`, `=`, …) both surface as `Word`. fn peek_word_text(&self) -> Option<&'static str> { match self.peek()?.kind { MatchedKind::Word(w) => Some(w), _ => None, } } fn peek_word(&self, primary: &str) -> bool { self.peek_word_text() == Some(primary) } fn peek_punct(&self, ch: char) -> bool { matches!(self.peek().map(|i| &i.kind), Some(MatchedKind::Punct(c)) if *c == ch) } fn expect_word(&mut self, primary: &str) -> Result<(), ValidationError> { if self.peek_word(primary) { self.pos += 1; Ok(()) } else { Err(drift_error(&format!("expected `{primary}`"))) } } fn expect_punct(&mut self, ch: char) -> Result<(), ValidationError> { if self.peek_punct(ch) { self.pos += 1; Ok(()) } else { Err(drift_error(&format!("expected `{ch}`"))) } } /// Consume an infix `NOT` if present, reporting whether one /// was there. fn take_optional_not(&mut self) -> bool { if self.peek_word("not") { self.pos += 1; true } else { false } } /// `or_expr := and_expr ( OR and_expr )*`. fn parse_or(&mut self) -> Result { let mut terms = vec![self.parse_and()?]; while self.peek_word("or") { self.pos += 1; terms.push(self.parse_and()?); } Ok(collapse(terms, Expr::Or)) } /// `and_expr := not_expr ( AND not_expr )*`. fn parse_and(&mut self) -> Result { let mut terms = vec![self.parse_not()?]; while self.peek_word("and") { self.pos += 1; terms.push(self.parse_not()?); } Ok(collapse(terms, Expr::And)) } /// `not_expr := NOT not_expr | bool_primary`. fn parse_not(&mut self) -> Result { if self.peek_word("not") { self.pos += 1; Ok(Expr::Not(Box::new(self.parse_not()?))) } else { self.parse_bool_primary() } } /// `bool_primary := ( or_expr ) | predicate`. fn parse_bool_primary(&mut self) -> Result { if self.peek_punct('(') { self.pos += 1; let inner = self.parse_or()?; self.expect_punct(')')?; Ok(inner) } else { Ok(Expr::Predicate(self.parse_predicate()?)) } } /// `predicate := operand predicate_tail`. fn parse_predicate(&mut self) -> Result { let left = self.parse_operand()?; // cmp_op operand if let Some(op) = self.peek_compare_op() { self.pos += 1; let right = self.parse_operand()?; return Ok(Predicate::Compare { left, op, right }); } // IS [NOT] NULL — the `NOT` here is *after* `IS`. if self.peek_word("is") { self.pos += 1; let negated = self.take_optional_not(); self.expect_word("null")?; return Ok(Predicate::IsNull { target: left, negated, }); } // [NOT] (LIKE … | BETWEEN … | IN …) — the `NOT` here is // a leading prefix factored out in front of all three. let negated = self.take_optional_not(); if self.peek_word("like") { self.pos += 1; let pattern = self.parse_operand()?; return Ok(Predicate::Like { target: left, pattern, negated, }); } if self.peek_word("between") { self.pos += 1; let low = self.parse_operand()?; self.expect_word("and")?; let high = self.parse_operand()?; return Ok(Predicate::Between { target: left, low, high, negated, }); } if self.peek_word("in") { self.pos += 1; self.expect_punct('(')?; let mut list = vec![self.parse_operand()?]; while self.peek_punct(',') { self.pos += 1; list.push(self.parse_operand()?); } self.expect_punct(')')?; return Ok(Predicate::In { target: left, items: list, negated, }); } Err(drift_error("expected a predicate operator")) } /// The comparison operator at the cursor, if any. fn peek_compare_op(&self) -> Option { Some(match self.peek_word_text()? { "=" => CompareOp::Eq, "!=" | "<>" => CompareOp::NotEq, "<" => CompareOp::Lt, "<=" => CompareOp::LtEq, ">" => CompareOp::Gt, ">=" => CompareOp::GtEq, _ => return None, }) } /// `operand := literal | column_ref`. /// /// Every operand records the byte `span` of the terminal it /// was built from — the precise highlight target for an /// expression WARNING (ADR-0027). fn parse_operand(&mut self) -> Result { let item = self .advance() .ok_or_else(|| drift_error("expected an operand"))?; let span = item.span; let literal = |value: Value| Operand::Literal { value, span }; match &item.kind { MatchedKind::Ident { role: "expr_column", .. } => { Ok(Operand::Column { name: item.text.clone(), span }) } MatchedKind::Word("null") => Ok(literal(Value::Null)), MatchedKind::Word("true") => Ok(literal(Value::Bool(true))), MatchedKind::Word("false") => Ok(literal(Value::Bool(false))), MatchedKind::NumberLit => { Ok(literal(Value::Number(item.text.clone()))) } MatchedKind::StringLit => { Ok(literal(Value::Text(item.text.clone()))) } _ => Err(drift_error("expected a column or literal operand")), } } } /// Collapse a precedence tier: a single child needs no wrapper /// (ADR-0026 §4 — single-child tiers collapse). fn collapse(mut terms: Vec, wrap: impl FnOnce(Vec) -> Expr) -> Expr { if terms.len() == 1 { terms .pop() .expect("collapse is only ever called with a non-empty tier") } else { wrap(terms) } } /// A "this should not happen" builder error. The walk against /// the stratified grammar already validated the input, so a /// failure here means the grammar and `build_expr` have drifted /// apart — a bug, surfaced loudly rather than silently /// mis-built. fn drift_error(detail: &str) -> ValidationError { ValidationError { message_key: "parse.error_wrapper", args: vec![("detail", format!("malformed WHERE expression: {detail}"))], } } #[cfg(test)] mod tests { use super::{OR_EXPR, build_expr}; use crate::dsl::command::{CompareOp, Expr, Operand, Predicate}; use crate::dsl::value::Value; use crate::dsl::walker::context::WalkContext; use crate::dsl::walker::driver::{NodeWalkResult, walk_node}; use crate::dsl::walker::outcome::MatchedPath; /// Walk `input` against the expression fragment and fold /// the result. Panics if the walk doesn't consume all of /// `input` — the test corpus is all well-formed. fn parse_expr(input: &str) -> Expr { let mut ctx = WalkContext::new(); let mut path = MatchedPath::new(); let mut per_byte = Vec::new(); let result = walk_node(input, 0, &OR_EXPR, &mut ctx, &mut path, &mut per_byte); match result { NodeWalkResult::Matched { end, .. } => { assert!( input[end..].trim().is_empty(), "{input:?} not fully consumed; tail: {:?}", &input[end..], ); } other => panic!("expected {input:?} to walk; got {other:?}"), } build_expr(&path.items).expect("build_expr on a walked expression") } fn col(name: &str) -> Operand { Operand::Column { name: name.to_string(), span: Operand::NO_SPAN, } } /// A literal operand with no source span — the span is /// ignored by `Operand`'s `PartialEq`, so test trees built /// with this compare equal to walked ones. fn lit(value: Value) -> Operand { Operand::Literal { value, span: Operand::NO_SPAN, } } fn num(n: &str) -> Operand { lit(Value::Number(n.to_string())) } fn compare(left: Operand, op: CompareOp, right: Operand) -> Expr { Expr::Predicate(Predicate::Compare { left, op, right }) } #[test] fn builds_a_simple_comparison() { assert_eq!( parse_expr("Age = 18"), compare(col("Age"), CompareOp::Eq, num("18")), ); } #[test] fn builds_every_comparison_operator() { let cases = [ ("a = 1", CompareOp::Eq), ("a != 1", CompareOp::NotEq), ("a <> 1", CompareOp::NotEq), ("a < 1", CompareOp::Lt), ("a <= 1", CompareOp::LtEq), ("a > 1", CompareOp::Gt), ("a >= 1", CompareOp::GtEq), ]; for (input, op) in cases { assert_eq!( parse_expr(input), compare(col("a"), op, num("1")), "operator parse for {input:?}", ); } } #[test] fn string_and_keyword_literal_operands() { assert_eq!( parse_expr("Name = 'Ada'"), compare( col("Name"), CompareOp::Eq, lit(Value::Text("Ada".to_string())), ), ); assert_eq!( parse_expr("Active = true"), compare(col("Active"), CompareOp::Eq, lit(Value::Bool(true))), ); assert_eq!( parse_expr("a = -7"), compare(col("a"), CompareOp::Eq, num("-7")), ); } #[test] fn and_is_n_ary_and_flattens() { let Expr::And(terms) = parse_expr("a = 1 and b = 2 and c = 3") else { panic!("expected a flat And of three predicates"); }; assert_eq!(terms.len(), 3, "a AND b AND c is one And of three"); } #[test] fn or_binds_looser_than_and() { // a = 1 OR b = 2 AND c = 3 == a=1 OR (b=2 AND c=3) let Expr::Or(terms) = parse_expr("a = 1 or b = 2 and c = 3") else { panic!("top level should be Or"); }; assert_eq!(terms.len(), 2); assert_eq!(terms[0], compare(col("a"), CompareOp::Eq, num("1"))); assert!( matches!(&terms[1], Expr::And(inner) if inner.len() == 2), "the AND should nest under the OR, got {:?}", terms[1], ); } #[test] fn parentheses_override_precedence() { // (a = 1 OR b = 2) AND c = 3 — the And is on top. let Expr::And(terms) = parse_expr("(a = 1 or b = 2) and c = 3") else { panic!("parenthesised OR should nest under the AND"); }; assert_eq!(terms.len(), 2); assert!(matches!(&terms[0], Expr::Or(inner) if inner.len() == 2)); } #[test] fn not_wraps_its_operand() { assert_eq!( parse_expr("not a = 1"), Expr::Not(Box::new(compare(col("a"), CompareOp::Eq, num("1")))), ); } #[test] fn like_predicate_with_and_without_not() { assert_eq!( parse_expr("Name like 'A%'"), Expr::Predicate(Predicate::Like { target: col("Name"), pattern: lit(Value::Text("A%".to_string())), negated: false, }), ); let Expr::Predicate(Predicate::Like { negated, .. }) = parse_expr("Name not like 'A%'") else { panic!("expected a negated Like"); }; assert!(negated); } #[test] fn between_predicate_consumes_its_inner_and() { assert_eq!( parse_expr("Age between 18 and 65"), Expr::Predicate(Predicate::Between { target: col("Age"), low: num("18"), high: num("65"), negated: false, }), ); // The BETWEEN's `and` must not be read as an `and_expr` // connective — this stays a single predicate. assert!(matches!( parse_expr("Age not between 1 and 9"), Expr::Predicate(Predicate::Between { negated: true, .. }), )); } #[test] fn in_predicate_collects_its_items() { let Expr::Predicate(Predicate::In { items, negated, .. }) = parse_expr("Status in (1, 2, 3)") else { panic!("expected an In predicate"); }; assert_eq!(items.len(), 3); assert!(!negated); assert!(matches!( parse_expr("Status not in (1)"), Expr::Predicate(Predicate::In { negated: true, .. }), )); } #[test] fn is_null_predicate_with_and_without_not() { assert_eq!( parse_expr("Email is null"), Expr::Predicate(Predicate::IsNull { target: col("Email"), negated: false, }), ); assert_eq!( parse_expr("Email is not null"), Expr::Predicate(Predicate::IsNull { target: col("Email"), negated: true, }), ); } #[test] fn nested_parentheses_round_trip() { // Exercises the Subgrammar recursion a few levels deep. let expr = parse_expr("((a = 1 and b = 2) or (c = 3))"); assert!(matches!(expr, Expr::Or(_) | Expr::And(_) | Expr::Predicate(_))); } #[test] fn case_insensitive_keywords() { // Keywords fold case; the built tree is identical. assert_eq!( parse_expr("a = 1 AND b = 2"), parse_expr("a = 1 and b = 2"), ); assert_eq!( parse_expr("Email IS NOT NULL"), parse_expr("Email is not null"), ); } }