//! Parse a simple ` IN ('a', 'b', …)` CHECK into its allowed //! value list (ADR-0048 D17), so the common enum-as-CHECK pattern seeds //! from the permitted values instead of generic text. Anything more //! complex (ranges, expressions, multi-column, non-literal items) //! returns `None`; the executor then best-effort generates and lets a //! violation surface through the friendly-error layer. /// Extract the string-literal values of a ` IN ( … )` CHECK. /// /// Case-insensitive on the `IN` keyword and the column name; tolerates a /// quoted column (`"status"`). Every list item must be a single-quoted /// string literal (`''` is an embedded quote). Returns `None` for any /// other shape. #[must_use] pub fn parse_in_check_values(check: &str, column: &str) -> Option> { let (in_idx, paren_open) = find_in_paren(check)?; if !lhs_is_column(check[..in_idx].trim(), column) { return None; } let values = extract_quoted_list(&check[paren_open..])?; if values.is_empty() { None } else { Some(values) } } const fn is_ident_byte(b: u8) -> bool { b.is_ascii_alphanumeric() || b == b'_' } /// Find the `IN` keyword (as a word, outside string literals) that is /// followed by `(`. Returns `(byte index of `IN`, byte index of `(`)`. fn find_in_paren(check: &str) -> Option<(usize, usize)> { let bytes = check.as_bytes(); let mut i = 0; let mut in_quote = false; while i < bytes.len() { let b = bytes[i]; if in_quote { if b == b'\'' { in_quote = false; } i += 1; continue; } if b == b'\'' { in_quote = true; i += 1; continue; } let is_in = (b == b'i' || b == b'I') && bytes.get(i + 1).is_some_and(|n| *n == b'n' || *n == b'N'); if is_in { let before_ok = i == 0 || !is_ident_byte(bytes[i - 1]); let after = i + 2; let after_ok = bytes.get(after).is_none_or(|n| !is_ident_byte(*n)); if before_ok && after_ok { let mut k = after; while bytes.get(k).is_some_and(u8::is_ascii_whitespace) { k += 1; } if bytes.get(k) == Some(&b'(') { return Some((i, k)); } } } i += 1; } None } fn lhs_is_column(lhs: &str, column: &str) -> bool { let t = lhs.trim(); let stripped = t .strip_prefix('"') .and_then(|s| s.strip_suffix('"')) .unwrap_or(t); stripped.eq_ignore_ascii_case(column) } /// Parse `( 'a', 'b', … )` from a string starting at `(` into the /// unescaped literals. `None` if any item is not a pure quoted literal. fn extract_quoted_list(s: &str) -> Option> { let mut chars = s.chars().peekable(); if chars.next()? != '(' { return None; } let mut values = Vec::new(); loop { while chars.peek().is_some_and(|c| c.is_whitespace()) { chars.next(); } match chars.peek()? { ')' => { chars.next(); break; } '\'' => { let v = read_quoted(&mut chars)?; values.push(v); while chars.peek().is_some_and(|c| c.is_whitespace()) { chars.next(); } match chars.next()? { ',' => {} ')' => break, _ => return None, } } _ => return None, } } Some(values) } /// Read a single-quoted string literal (cursor at the opening `'`), /// unescaping `''` to `'`. fn read_quoted(chars: &mut std::iter::Peekable) -> Option { if chars.next()? != '\'' { return None; } let mut out = String::new(); loop { match chars.next()? { '\'' => { if chars.peek() == Some(&'\'') { chars.next(); out.push('\''); } else { return Some(out); } } c => out.push(c), } } } #[cfg(test)] mod tests { use super::*; use pretty_assertions::assert_eq; #[test] fn parses_a_simple_in_check() { assert_eq!( parse_in_check_values("status IN ('active', 'closed')", "status"), Some(vec!["active".to_string(), "closed".to_string()]) ); } #[test] fn tolerates_a_quoted_column_and_lowercase_in() { assert_eq!( parse_in_check_values("\"status\" in ('a','b','c')", "status"), Some(vec!["a".into(), "b".into(), "c".into()]) ); } #[test] fn unescapes_embedded_quotes() { assert_eq!( parse_in_check_values("note IN ('it''s', 'ok')", "note"), Some(vec!["it's".into(), "ok".into()]) ); } #[test] fn handles_commas_and_parens_inside_literals() { assert_eq!( parse_in_check_values("label IN ('a, b', 'c)d')", "label"), Some(vec!["a, b".into(), "c)d".into()]) ); } #[test] fn rejects_non_literal_lists() { assert_eq!(parse_in_check_values("n IN (1, 2, 3)", "n"), None); } #[test] fn rejects_non_in_checks() { assert_eq!(parse_in_check_values("age >= 0", "age"), None); assert_eq!(parse_in_check_values("length(name) > 0", "name"), None); } #[test] fn rejects_when_lhs_is_a_different_column() { assert_eq!(parse_in_check_values("status IN ('a')", "role"), None); } #[test] fn does_not_trip_on_in_inside_a_word_or_literal() { // `min` contains "in" but is not the IN operator. assert_eq!(parse_in_check_values("min(x) > 0", "x"), None); } }