From 59e6a541bffb1f70e23921fa32e6f52e6e0acc31 Mon Sep 17 00:00:00 2001 From: "claude@clouddev1" Date: Mon, 18 May 2026 22:40:52 +0000 Subject: [PATCH] grammar: WHERE-expression fragment + Expr AST + build_expr (ADR-0026 step 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The stratified WHERE-expression grammar — or / and / not / bool_primary / predicate tiers as named `static` Node fragments, recursing through `Subgrammar`. Covers the six comparison operators (`<>` and `!=` both NotEq), AND / OR / NOT, parentheses, LIKE / IN / BETWEEN with optional infix NOT, and IS [NOT] NULL. `predicate_tail` factors the shared operand prefix and the infix NOT so the Choice branches discriminate on a cleanly-failing first token. New recursive Expr / Predicate / Operand / CompareOp AST in dsl::command. `build_expr` folds the flat matched-terminal slice into an Expr — a deterministic recursive descent mirroring the grammar tiers, with single-child tiers collapsing. Per ADR-0026 §3 option 1: the walker stays a pure structural matcher; Expr is assembled only in this submit-time fold. Fragment + builder are unit-tested standalone (walk against &OR_EXPR, then build_expr); not yet wired into any command. --- src/dsl/command.rs | 77 +++++ src/dsl/grammar/expr.rs | 738 ++++++++++++++++++++++++++++++++++++++++ src/dsl/grammar/mod.rs | 2 +- 3 files changed, 816 insertions(+), 1 deletion(-) create mode 100644 src/dsl/grammar/expr.rs diff --git a/src/dsl/command.rs b/src/dsl/command.rs index 2b5bec8..4ba4827 100644 --- a/src/dsl/command.rs +++ b/src/dsl/command.rs @@ -239,6 +239,83 @@ pub enum RowFilter { AllRows, } +/// A complex WHERE expression (ADR-0026 §4). +/// +/// Built by `grammar::expr::build_expr` from the flat +/// matched-terminal slice the walker produces for a `where` +/// clause. The recursion mirrors the stratified expression +/// grammar — `Or` / `And` are n-ary (a flat `a AND b AND c` is +/// one `And` of three children), and single-child precedence +/// tiers collapse so a bare predicate reached through the +/// `or → and → not` layers is just the `Predicate`, not three +/// wrappers. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Expr { + /// `a OR b OR …` — at least two children. + Or(Vec), + /// `a AND b AND …` — at least two children. + And(Vec), + /// `NOT `. + Not(Box), + /// A leaf comparison / match test. + Predicate(Predicate), +} + +/// A single comparison or match test inside an [`Expr`] +/// (ADR-0026 §4). Operands are always a column reference or a +/// literal — never a nested expression. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Predicate { + /// ` ` — one of the six comparisons. + Compare { + left: Operand, + op: CompareOp, + right: Operand, + }, + /// ` [NOT] LIKE ` — `%` / `_` wildcards. + Like { + target: Operand, + pattern: Operand, + negated: bool, + }, + /// ` [NOT] BETWEEN AND `. + Between { + target: Operand, + low: Operand, + high: Operand, + negated: bool, + }, + /// ` [NOT] IN ([, …])`. + In { + target: Operand, + items: Vec, + negated: bool, + }, + /// ` IS [NOT] NULL`. + IsNull { target: Operand, negated: bool }, +} + +/// A comparison operand — a column reference or a literal +/// (ADR-0026 §1: operands are never nested expressions). +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Operand { + Column(String), + Literal(Value), +} + +/// The six comparison operators. `<>` and `!=` both parse to +/// `NotEq` — `<>` is standard SQL, `!=` the common variant +/// (ADR-0026 §1). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CompareOp { + Eq, + NotEq, + Lt, + LtEq, + Gt, + GtEq, +} + /// How a `drop relationship` command identifies the relationship /// to remove. Both forms are accepted; the executor resolves to /// a single row in the metadata table. diff --git a/src/dsl/grammar/expr.rs b/src/dsl/grammar/expr.rs new file mode 100644 index 0000000..e16c6be --- /dev/null +++ b/src/dsl/grammar/expr.rs @@ -0,0 +1,738 @@ +//! The complex WHERE-expression grammar fragment and its AST +//! builder (ADR-0026). +//! +//! # The grammar +//! +//! A boolean WHERE expression — `a = 1 AND (b > 2 OR c LIKE +//! 'x%')` — is recursive and carries operator precedence. The +//! fragment is a **stratified** grammar: one named `static` +//! `Node` per precedence tier. Stratification removes left +//! recursion (every recursion is guarded by a token) and +//! encodes precedence in the layering, so there is no separate +//! precedence-resolution pass at walk time. +//! +//! ```text +//! or_expr := and_expr ( OR and_expr )* +//! and_expr := not_expr ( AND not_expr )* +//! not_expr := NOT not_expr | bool_primary +//! bool_primary := ( or_expr ) | predicate +//! predicate := operand predicate_tail +//! operand := literal | column_ref +//! ``` +//! +//! The only recursion is `( or_expr )` and `NOT not_expr`; each +//! consumes a token (`(` or `not`) before recursing, so the +//! greedy top-down walker always makes progress. Both recurse +//! through [`Node::Subgrammar`] — a `Seq` / `Choice` embeds its +//! children by value and so cannot close a cycle. +//! +//! `predicate_tail` is factored so the shared `operand` prefix +//! is matched once and the `Choice` branches discriminate on a +//! token that fails cleanly (`NoMatch`, not a committed +//! failure) — see the inline note on `PREDICATE_TAIL_CHOICES`. +//! +//! # Building the AST +//! +//! Per ADR-0026 §3 (option-1 realization, confirmed during +//! implementation): the stratified grammar above is walked +//! normally — its terminals flow into the flat `MatchedPath`, +//! driving highlight / completion / the expected-set like any +//! other command. [`build_expr`] then folds that flat +//! terminal slice into a recursive [`Expr`]. The fold is a +//! deterministic recursive descent mirroring the six grammar +//! tiers; because the walk has already validated the input it +//! cannot fail on well-formed input — the `Result` guards only +//! against an internal grammar/builder drift bug. +//! +//! This is a second, submit-time-only pass over the expression +//! tokens (parse-for-dispatch — per-keystroke completion never +//! calls `build_expr`). It keeps the walker a pure structural +//! matcher: `Expr` lives entirely in this builder and the +//! command AST, never in the walker. The grammar tiers own +//! validation / highlight / completion / no-left-recursion; +//! `build_expr` owns the tree shape. + +use crate::dsl::command::{CompareOp, Expr, Operand, Predicate}; +use crate::dsl::grammar::{IdentSource, Node, ValidationError, Word}; +use crate::dsl::value::Value; +use crate::dsl::walker::outcome::{MatchedItem, MatchedKind}; + +// ================================================================= +// operand := null | true | false | | | +// ================================================================= + +/// A column reference inside an expression. The `expr_column` +/// role lets [`build_expr`] (and the command AST builders) tell +/// an expression column apart from other identifier slots. +const EXPR_COLUMN: Node = Node::Ident { + source: IdentSource::Columns, + role: "expr_column", + validator: None, + highlight_override: None, + writes_table: false, + writes_column: false, + writes_user_listed_column: false, +}; + +/// Operand alternatives. The literal keywords (`null` / `true` +/// / `false`) come before the column slot so they parse as +/// literals; any other identifier is a column reference. +static OPERAND_CHOICES: &[Node] = &[ + Node::Word(Word::keyword("null")), + Node::Word(Word::keyword("true")), + Node::Word(Word::keyword("false")), + Node::NumberLit { validator: None }, + Node::StringLit, + EXPR_COLUMN, +]; + +// ================================================================= +// cmp_op := <= | <> | >= | != | < | > | = +// ================================================================= +// +// Two-character operators come before their one-character +// prefixes: `walk_literal` matches `<` against the `<` of `<=` +// (the `<` has no word-boundary lookahead), so `<=` / `<>` must +// be tried first or `a <= 1` would match a bare `<` and then +// fail expecting an operand at `=`. + +static CMP_OP_CHOICES: &[Node] = &[ + Node::Literal("<="), + Node::Literal("<>"), + Node::Literal(">="), + Node::Literal("!="), + Node::Literal("<"), + Node::Literal(">"), + Node::Literal("="), +]; + +// ================================================================= +// predicate_tail branches +// ================================================================= + +/// `cmp_op operand`. +static COMPARE_FORM_NODES: &[Node] = &[ + Node::Choice(CMP_OP_CHOICES), + Node::Choice(OPERAND_CHOICES), +]; + +/// `IS [NOT] NULL`. +static IS_NULL_NODES: &[Node] = &[ + Node::Word(Word::keyword("is")), + Node::Optional(&Node::Word(Word::keyword("not"))), + Node::Word(Word::keyword("null")), +]; + +/// `LIKE operand`. +static LIKE_FORM_NODES: &[Node] = &[ + Node::Word(Word::keyword("like")), + Node::Choice(OPERAND_CHOICES), +]; + +/// `BETWEEN operand AND operand`. The inner `and` is consumed +/// here, so a stray `and` at the `and_expr` tier is always a +/// connective. +static BETWEEN_FORM_NODES: &[Node] = &[ + Node::Word(Word::keyword("between")), + Node::Choice(OPERAND_CHOICES), + Node::Word(Word::keyword("and")), + Node::Choice(OPERAND_CHOICES), +]; + +/// `IN ( operand [, operand]* )`. +static IN_FORM_NODES: &[Node] = &[ + Node::Word(Word::keyword("in")), + Node::Punct('('), + Node::Repeated { + inner: &Node::Choice(OPERAND_CHOICES), + separator: Some(&Node::Punct(',')), + min: 1, + }, + Node::Punct(')'), +]; + +/// The negatable predicates — each starts with a distinct +/// keyword, so this `Choice` discriminates cleanly. +static NEGATABLE_CHOICES: &[Node] = &[ + Node::Seq(LIKE_FORM_NODES), + Node::Seq(BETWEEN_FORM_NODES), + Node::Seq(IN_FORM_NODES), +]; + +/// `[NOT] (LIKE … | BETWEEN … | IN …)`. +static NEGATABLE_NODES: &[Node] = &[ + Node::Optional(&Node::Word(Word::keyword("not"))), + Node::Choice(NEGATABLE_CHOICES), +]; + +/// `predicate_tail := cmp_op operand | IS [NOT] NULL | [NOT] +/// negatable`. +/// +/// Branch discrimination: a `Choice` branch falls through to +/// the next branch only when its *first* child reports a clean +/// `NoMatch`. Branch 1's first child is `Choice(CMP_OP_CHOICES)` +/// (all punctuation — clean `NoMatch` on a non-operator); +/// branch 2's is `Word("is")`. Branch 3 starts with +/// `Optional(not)`, which always "matches" — so it must be +/// **last**, and the infix `NOT` is factored out in front of +/// the `LIKE` / `BETWEEN` / `IN` choice rather than repeated +/// inside each (which would strand `not between` on the `LIKE` +/// branch). +static PREDICATE_TAIL_CHOICES: &[Node] = &[ + Node::Seq(COMPARE_FORM_NODES), + Node::Seq(IS_NULL_NODES), + Node::Seq(NEGATABLE_NODES), +]; + +// ================================================================= +// The stratified precedence tiers +// ================================================================= + +/// `predicate := operand predicate_tail`. +static PREDICATE_NODES: &[Node] = &[ + Node::Choice(OPERAND_CHOICES), + Node::Choice(PREDICATE_TAIL_CHOICES), +]; +static PREDICATE: Node = Node::Seq(PREDICATE_NODES); + +/// `bool_primary := ( or_expr ) | predicate`. +static PAREN_GROUP_NODES: &[Node] = &[ + Node::Punct('('), + Node::Subgrammar(&OR_EXPR), + Node::Punct(')'), +]; +static BOOL_PRIMARY_CHOICES: &[Node] = &[ + Node::Seq(PAREN_GROUP_NODES), + Node::Subgrammar(&PREDICATE), +]; +static BOOL_PRIMARY: Node = Node::Choice(BOOL_PRIMARY_CHOICES); + +/// `not_expr := NOT not_expr | bool_primary`. +static NOT_FORM_NODES: &[Node] = &[ + Node::Word(Word::keyword("not")), + Node::Subgrammar(&NOT_EXPR), +]; +static NOT_EXPR_CHOICES: &[Node] = &[ + Node::Seq(NOT_FORM_NODES), + Node::Subgrammar(&BOOL_PRIMARY), +]; +static NOT_EXPR: Node = Node::Choice(NOT_EXPR_CHOICES); + +/// `and_expr := not_expr ( AND not_expr )*`. +static AND_TAIL_NODES: &[Node] = &[ + Node::Word(Word::keyword("and")), + Node::Subgrammar(&NOT_EXPR), +]; +static AND_TAIL: Node = Node::Seq(AND_TAIL_NODES); +static AND_EXPR_NODES: &[Node] = &[ + Node::Subgrammar(&NOT_EXPR), + Node::Repeated { + inner: &AND_TAIL, + separator: None, + min: 0, + }, +]; +static AND_EXPR: Node = Node::Seq(AND_EXPR_NODES); + +/// `or_expr := and_expr ( OR and_expr )*` — the fragment entry +/// point. `update` / `delete` / `show data` reference this +/// through `Node::Subgrammar(&OR_EXPR)`. +static OR_TAIL_NODES: &[Node] = &[ + Node::Word(Word::keyword("or")), + Node::Subgrammar(&AND_EXPR), +]; +static OR_TAIL: Node = Node::Seq(OR_TAIL_NODES); +static OR_EXPR_NODES: &[Node] = &[ + Node::Subgrammar(&AND_EXPR), + Node::Repeated { + inner: &OR_TAIL, + separator: None, + min: 0, + }, +]; +pub static OR_EXPR: Node = Node::Seq(OR_EXPR_NODES); + +/// The WHERE-expression fragment, ready to drop into a command +/// `Seq` as one node. Walking it consumes a whole `or_expr`. +pub static EXPRESSION: Node = Node::Subgrammar(&OR_EXPR); + +// ================================================================= +// build_expr — fold the flat terminal slice into an `Expr` +// ================================================================= + +/// Reconstruct an [`Expr`] from the matched-terminal slice the +/// walker produced for a WHERE clause (ADR-0026 §3). +/// +/// On well-formed input — input the grammar above already +/// accepted — this is infallible; the `Err` path guards only +/// against a grammar/builder drift bug. +pub fn build_expr(items: &[MatchedItem]) -> Result { + let mut parser = ExprParser { items, pos: 0 }; + let expr = parser.parse_or()?; + if parser.pos == items.len() { + Ok(expr) + } else { + Err(drift_error("unconsumed tokens after the expression")) + } +} + +/// Cursor over the flat terminal slice. The methods mirror the +/// grammar tiers one-to-one. +struct ExprParser<'a> { + items: &'a [MatchedItem], + pos: usize, +} + +impl<'a> ExprParser<'a> { + fn peek(&self) -> Option<&'a MatchedItem> { + self.items.get(self.pos) + } + + fn advance(&mut self) -> Option<&'a MatchedItem> { + let item = self.items.get(self.pos); + if item.is_some() { + self.pos += 1; + } + item + } + + /// The literal carried by a `Word` terminal at the cursor — + /// keyword primaries (`and`, `is`, …) and `Literal`-matched + /// operators (`<=`, `=`, …) both surface as `Word`. + fn peek_word_text(&self) -> Option<&'static str> { + match self.peek()?.kind { + MatchedKind::Word(w) => Some(w), + _ => None, + } + } + + fn peek_word(&self, primary: &str) -> bool { + self.peek_word_text() == Some(primary) + } + + fn peek_punct(&self, ch: char) -> bool { + matches!(self.peek().map(|i| &i.kind), Some(MatchedKind::Punct(c)) if *c == ch) + } + + fn expect_word(&mut self, primary: &str) -> Result<(), ValidationError> { + if self.peek_word(primary) { + self.pos += 1; + Ok(()) + } else { + Err(drift_error(&format!("expected `{primary}`"))) + } + } + + fn expect_punct(&mut self, ch: char) -> Result<(), ValidationError> { + if self.peek_punct(ch) { + self.pos += 1; + Ok(()) + } else { + Err(drift_error(&format!("expected `{ch}`"))) + } + } + + /// Consume an infix `NOT` if present, reporting whether one + /// was there. + fn take_optional_not(&mut self) -> bool { + if self.peek_word("not") { + self.pos += 1; + true + } else { + false + } + } + + /// `or_expr := and_expr ( OR and_expr )*`. + fn parse_or(&mut self) -> Result { + let mut terms = vec![self.parse_and()?]; + while self.peek_word("or") { + self.pos += 1; + terms.push(self.parse_and()?); + } + Ok(collapse(terms, Expr::Or)) + } + + /// `and_expr := not_expr ( AND not_expr )*`. + fn parse_and(&mut self) -> Result { + let mut terms = vec![self.parse_not()?]; + while self.peek_word("and") { + self.pos += 1; + terms.push(self.parse_not()?); + } + Ok(collapse(terms, Expr::And)) + } + + /// `not_expr := NOT not_expr | bool_primary`. + fn parse_not(&mut self) -> Result { + if self.peek_word("not") { + self.pos += 1; + Ok(Expr::Not(Box::new(self.parse_not()?))) + } else { + self.parse_bool_primary() + } + } + + /// `bool_primary := ( or_expr ) | predicate`. + fn parse_bool_primary(&mut self) -> Result { + if self.peek_punct('(') { + self.pos += 1; + let inner = self.parse_or()?; + self.expect_punct(')')?; + Ok(inner) + } else { + Ok(Expr::Predicate(self.parse_predicate()?)) + } + } + + /// `predicate := operand predicate_tail`. + fn parse_predicate(&mut self) -> Result { + let left = self.parse_operand()?; + + // cmp_op operand + if let Some(op) = self.peek_compare_op() { + self.pos += 1; + let right = self.parse_operand()?; + return Ok(Predicate::Compare { left, op, right }); + } + + // IS [NOT] NULL — the `NOT` here is *after* `IS`. + if self.peek_word("is") { + self.pos += 1; + let negated = self.take_optional_not(); + self.expect_word("null")?; + return Ok(Predicate::IsNull { + target: left, + negated, + }); + } + + // [NOT] (LIKE … | BETWEEN … | IN …) — the `NOT` here is + // a leading prefix factored out in front of all three. + let negated = self.take_optional_not(); + if self.peek_word("like") { + self.pos += 1; + let pattern = self.parse_operand()?; + return Ok(Predicate::Like { + target: left, + pattern, + negated, + }); + } + if self.peek_word("between") { + self.pos += 1; + let low = self.parse_operand()?; + self.expect_word("and")?; + let high = self.parse_operand()?; + return Ok(Predicate::Between { + target: left, + low, + high, + negated, + }); + } + if self.peek_word("in") { + self.pos += 1; + self.expect_punct('(')?; + let mut list = vec![self.parse_operand()?]; + while self.peek_punct(',') { + self.pos += 1; + list.push(self.parse_operand()?); + } + self.expect_punct(')')?; + return Ok(Predicate::In { + target: left, + items: list, + negated, + }); + } + + Err(drift_error("expected a predicate operator")) + } + + /// The comparison operator at the cursor, if any. + fn peek_compare_op(&self) -> Option { + Some(match self.peek_word_text()? { + "=" => CompareOp::Eq, + "!=" | "<>" => CompareOp::NotEq, + "<" => CompareOp::Lt, + "<=" => CompareOp::LtEq, + ">" => CompareOp::Gt, + ">=" => CompareOp::GtEq, + _ => return None, + }) + } + + /// `operand := literal | column_ref`. + fn parse_operand(&mut self) -> Result { + let item = self + .advance() + .ok_or_else(|| drift_error("expected an operand"))?; + match &item.kind { + MatchedKind::Ident { role: "expr_column" } => { + Ok(Operand::Column(item.text.clone())) + } + MatchedKind::Word("null") => Ok(Operand::Literal(Value::Null)), + MatchedKind::Word("true") => Ok(Operand::Literal(Value::Bool(true))), + MatchedKind::Word("false") => Ok(Operand::Literal(Value::Bool(false))), + MatchedKind::NumberLit => { + Ok(Operand::Literal(Value::Number(item.text.clone()))) + } + MatchedKind::StringLit => { + Ok(Operand::Literal(Value::Text(item.text.clone()))) + } + _ => Err(drift_error("expected a column or literal operand")), + } + } +} + +/// Collapse a precedence tier: a single child needs no wrapper +/// (ADR-0026 §4 — single-child tiers collapse). +fn collapse(mut terms: Vec, wrap: impl FnOnce(Vec) -> Expr) -> Expr { + if terms.len() == 1 { + terms + .pop() + .expect("collapse is only ever called with a non-empty tier") + } else { + wrap(terms) + } +} + +/// A "this should not happen" builder error. The walk against +/// the stratified grammar already validated the input, so a +/// failure here means the grammar and `build_expr` have drifted +/// apart — a bug, surfaced loudly rather than silently +/// mis-built. +fn drift_error(detail: &str) -> ValidationError { + ValidationError { + message_key: "parse.error_wrapper", + args: vec![("detail", format!("malformed WHERE expression: {detail}"))], + } +} + +#[cfg(test)] +mod tests { + use super::{OR_EXPR, build_expr}; + use crate::dsl::command::{CompareOp, Expr, Operand, Predicate}; + use crate::dsl::value::Value; + use crate::dsl::walker::context::WalkContext; + use crate::dsl::walker::driver::{NodeWalkResult, walk_node}; + use crate::dsl::walker::outcome::MatchedPath; + + /// Walk `input` against the expression fragment and fold + /// the result. Panics if the walk doesn't consume all of + /// `input` — the test corpus is all well-formed. + fn parse_expr(input: &str) -> Expr { + let mut ctx = WalkContext::new(); + let mut path = MatchedPath::new(); + let mut per_byte = Vec::new(); + let result = + walk_node(input, 0, &OR_EXPR, &mut ctx, &mut path, &mut per_byte); + match result { + NodeWalkResult::Matched { end, .. } => { + assert!( + input[end..].trim().is_empty(), + "{input:?} not fully consumed; tail: {:?}", + &input[end..], + ); + } + other => panic!("expected {input:?} to walk; got {other:?}"), + } + build_expr(&path.items).expect("build_expr on a walked expression") + } + + fn col(name: &str) -> Operand { + Operand::Column(name.to_string()) + } + + fn num(n: &str) -> Operand { + Operand::Literal(Value::Number(n.to_string())) + } + + fn compare(left: Operand, op: CompareOp, right: Operand) -> Expr { + Expr::Predicate(Predicate::Compare { left, op, right }) + } + + #[test] + fn builds_a_simple_comparison() { + assert_eq!( + parse_expr("Age = 18"), + compare(col("Age"), CompareOp::Eq, num("18")), + ); + } + + #[test] + fn builds_every_comparison_operator() { + let cases = [ + ("a = 1", CompareOp::Eq), + ("a != 1", CompareOp::NotEq), + ("a <> 1", CompareOp::NotEq), + ("a < 1", CompareOp::Lt), + ("a <= 1", CompareOp::LtEq), + ("a > 1", CompareOp::Gt), + ("a >= 1", CompareOp::GtEq), + ]; + for (input, op) in cases { + assert_eq!( + parse_expr(input), + compare(col("a"), op, num("1")), + "operator parse for {input:?}", + ); + } + } + + #[test] + fn string_and_keyword_literal_operands() { + assert_eq!( + parse_expr("Name = 'Ada'"), + compare( + col("Name"), + CompareOp::Eq, + Operand::Literal(Value::Text("Ada".to_string())), + ), + ); + assert_eq!( + parse_expr("Active = true"), + compare( + col("Active"), + CompareOp::Eq, + Operand::Literal(Value::Bool(true)), + ), + ); + assert_eq!( + parse_expr("a = -7"), + compare(col("a"), CompareOp::Eq, num("-7")), + ); + } + + #[test] + fn and_is_n_ary_and_flattens() { + let Expr::And(terms) = parse_expr("a = 1 and b = 2 and c = 3") else { + panic!("expected a flat And of three predicates"); + }; + assert_eq!(terms.len(), 3, "a AND b AND c is one And of three"); + } + + #[test] + fn or_binds_looser_than_and() { + // a = 1 OR b = 2 AND c = 3 == a=1 OR (b=2 AND c=3) + let Expr::Or(terms) = parse_expr("a = 1 or b = 2 and c = 3") else { + panic!("top level should be Or"); + }; + assert_eq!(terms.len(), 2); + assert_eq!(terms[0], compare(col("a"), CompareOp::Eq, num("1"))); + assert!( + matches!(&terms[1], Expr::And(inner) if inner.len() == 2), + "the AND should nest under the OR, got {:?}", + terms[1], + ); + } + + #[test] + fn parentheses_override_precedence() { + // (a = 1 OR b = 2) AND c = 3 — the And is on top. + let Expr::And(terms) = parse_expr("(a = 1 or b = 2) and c = 3") else { + panic!("parenthesised OR should nest under the AND"); + }; + assert_eq!(terms.len(), 2); + assert!(matches!(&terms[0], Expr::Or(inner) if inner.len() == 2)); + } + + #[test] + fn not_wraps_its_operand() { + assert_eq!( + parse_expr("not a = 1"), + Expr::Not(Box::new(compare(col("a"), CompareOp::Eq, num("1")))), + ); + } + + #[test] + fn like_predicate_with_and_without_not() { + assert_eq!( + parse_expr("Name like 'A%'"), + Expr::Predicate(Predicate::Like { + target: col("Name"), + pattern: Operand::Literal(Value::Text("A%".to_string())), + negated: false, + }), + ); + let Expr::Predicate(Predicate::Like { negated, .. }) = + parse_expr("Name not like 'A%'") + else { + panic!("expected a negated Like"); + }; + assert!(negated); + } + + #[test] + fn between_predicate_consumes_its_inner_and() { + assert_eq!( + parse_expr("Age between 18 and 65"), + Expr::Predicate(Predicate::Between { + target: col("Age"), + low: num("18"), + high: num("65"), + negated: false, + }), + ); + // The BETWEEN's `and` must not be read as an `and_expr` + // connective — this stays a single predicate. + assert!(matches!( + parse_expr("Age not between 1 and 9"), + Expr::Predicate(Predicate::Between { negated: true, .. }), + )); + } + + #[test] + fn in_predicate_collects_its_items() { + let Expr::Predicate(Predicate::In { items, negated, .. }) = + parse_expr("Status in (1, 2, 3)") + else { + panic!("expected an In predicate"); + }; + assert_eq!(items.len(), 3); + assert!(!negated); + assert!(matches!( + parse_expr("Status not in (1)"), + Expr::Predicate(Predicate::In { negated: true, .. }), + )); + } + + #[test] + fn is_null_predicate_with_and_without_not() { + assert_eq!( + parse_expr("Email is null"), + Expr::Predicate(Predicate::IsNull { + target: col("Email"), + negated: false, + }), + ); + assert_eq!( + parse_expr("Email is not null"), + Expr::Predicate(Predicate::IsNull { + target: col("Email"), + negated: true, + }), + ); + } + + #[test] + fn nested_parentheses_round_trip() { + // Exercises the Subgrammar recursion a few levels deep. + let expr = parse_expr("((a = 1 and b = 2) or (c = 3))"); + assert!(matches!(expr, Expr::Or(_) | Expr::And(_) | Expr::Predicate(_))); + } + + #[test] + fn case_insensitive_keywords() { + // Keywords fold case; the built tree is identical. + assert_eq!( + parse_expr("a = 1 AND b = 2"), + parse_expr("a = 1 and b = 2"), + ); + assert_eq!( + parse_expr("Email IS NOT NULL"), + parse_expr("Email is not null"), + ); + } +} diff --git a/src/dsl/grammar/mod.rs b/src/dsl/grammar/mod.rs index 241ba99..0416fcb 100644 --- a/src/dsl/grammar/mod.rs +++ b/src/dsl/grammar/mod.rs @@ -25,6 +25,7 @@ pub mod app; pub mod data; pub mod ddl; +pub mod expr; pub mod shared; use crate::dsl::command::Command; @@ -312,7 +313,6 @@ pub enum Node { /// builds a fresh node from the `WalkContext` at walk time; /// this one references a fixed fragment already in the /// grammar tree. - #[allow(dead_code)] Subgrammar(&'static Self), /// Resolves at walk time using the active `WalkContext`. /// Phase D+ uses this for `column_value_list`. The factory