41b7e9a049
One-time, mechanical reformat — no functional changes. The tree was not rustfmt-clean (~1800 hunks across ~100 files); this brings it to stock `cargo fmt` defaults so a `cargo fmt --check` CI gate can follow. Behaviour-preserving: 2509 pass / 0 fail / 1 ignored (unchanged baseline), clippy clean. A .git-blame-ignore-revs entry follows so `git blame` skips this commit.
198 lines
5.8 KiB
Rust
198 lines
5.8 KiB
Rust
//! Parse a simple `<column> IN ('a', 'b', …)` CHECK into its allowed
|
|
//! value list (ADR-0048 D17), so the common enum-as-CHECK pattern seeds
|
|
//! from the permitted values instead of generic text. Anything more
|
|
//! complex (ranges, expressions, multi-column, non-literal items)
|
|
//! returns `None`; the executor then best-effort generates and lets a
|
|
//! violation surface through the friendly-error layer.
|
|
|
|
/// Extract the string-literal values of a `<column> IN ( … )` CHECK.
|
|
///
|
|
/// Case-insensitive on the `IN` keyword and the column name; tolerates a
|
|
/// quoted column (`"status"`). Every list item must be a single-quoted
|
|
/// string literal (`''` is an embedded quote). Returns `None` for any
|
|
/// other shape.
|
|
#[must_use]
|
|
pub fn parse_in_check_values(check: &str, column: &str) -> Option<Vec<String>> {
|
|
let (in_idx, paren_open) = find_in_paren(check)?;
|
|
if !lhs_is_column(check[..in_idx].trim(), column) {
|
|
return None;
|
|
}
|
|
let values = extract_quoted_list(&check[paren_open..])?;
|
|
if values.is_empty() {
|
|
None
|
|
} else {
|
|
Some(values)
|
|
}
|
|
}
|
|
|
|
const fn is_ident_byte(b: u8) -> bool {
|
|
b.is_ascii_alphanumeric() || b == b'_'
|
|
}
|
|
|
|
/// Find the `IN` keyword (as a word, outside string literals) that is
|
|
/// followed by `(`. Returns `(byte index of `IN`, byte index of `(`)`.
|
|
fn find_in_paren(check: &str) -> Option<(usize, usize)> {
|
|
let bytes = check.as_bytes();
|
|
let mut i = 0;
|
|
let mut in_quote = false;
|
|
while i < bytes.len() {
|
|
let b = bytes[i];
|
|
if in_quote {
|
|
if b == b'\'' {
|
|
in_quote = false;
|
|
}
|
|
i += 1;
|
|
continue;
|
|
}
|
|
if b == b'\'' {
|
|
in_quote = true;
|
|
i += 1;
|
|
continue;
|
|
}
|
|
let is_in =
|
|
(b == b'i' || b == b'I') && bytes.get(i + 1).is_some_and(|n| *n == b'n' || *n == b'N');
|
|
if is_in {
|
|
let before_ok = i == 0 || !is_ident_byte(bytes[i - 1]);
|
|
let after = i + 2;
|
|
let after_ok = bytes.get(after).is_none_or(|n| !is_ident_byte(*n));
|
|
if before_ok && after_ok {
|
|
let mut k = after;
|
|
while bytes.get(k).is_some_and(u8::is_ascii_whitespace) {
|
|
k += 1;
|
|
}
|
|
if bytes.get(k) == Some(&b'(') {
|
|
return Some((i, k));
|
|
}
|
|
}
|
|
}
|
|
i += 1;
|
|
}
|
|
None
|
|
}
|
|
|
|
fn lhs_is_column(lhs: &str, column: &str) -> bool {
|
|
let t = lhs.trim();
|
|
let stripped = t
|
|
.strip_prefix('"')
|
|
.and_then(|s| s.strip_suffix('"'))
|
|
.unwrap_or(t);
|
|
stripped.eq_ignore_ascii_case(column)
|
|
}
|
|
|
|
/// Parse `( 'a', 'b', … )` from a string starting at `(` into the
|
|
/// unescaped literals. `None` if any item is not a pure quoted literal.
|
|
fn extract_quoted_list(s: &str) -> Option<Vec<String>> {
|
|
let mut chars = s.chars().peekable();
|
|
if chars.next()? != '(' {
|
|
return None;
|
|
}
|
|
let mut values = Vec::new();
|
|
loop {
|
|
while chars.peek().is_some_and(|c| c.is_whitespace()) {
|
|
chars.next();
|
|
}
|
|
match chars.peek()? {
|
|
')' => {
|
|
chars.next();
|
|
break;
|
|
}
|
|
'\'' => {
|
|
let v = read_quoted(&mut chars)?;
|
|
values.push(v);
|
|
while chars.peek().is_some_and(|c| c.is_whitespace()) {
|
|
chars.next();
|
|
}
|
|
match chars.next()? {
|
|
',' => {}
|
|
')' => break,
|
|
_ => return None,
|
|
}
|
|
}
|
|
_ => return None,
|
|
}
|
|
}
|
|
Some(values)
|
|
}
|
|
|
|
/// Read a single-quoted string literal (cursor at the opening `'`),
|
|
/// unescaping `''` to `'`.
|
|
fn read_quoted(chars: &mut std::iter::Peekable<std::str::Chars>) -> Option<String> {
|
|
if chars.next()? != '\'' {
|
|
return None;
|
|
}
|
|
let mut out = String::new();
|
|
loop {
|
|
match chars.next()? {
|
|
'\'' => {
|
|
if chars.peek() == Some(&'\'') {
|
|
chars.next();
|
|
out.push('\'');
|
|
} else {
|
|
return Some(out);
|
|
}
|
|
}
|
|
c => out.push(c),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use pretty_assertions::assert_eq;
|
|
|
|
#[test]
|
|
fn parses_a_simple_in_check() {
|
|
assert_eq!(
|
|
parse_in_check_values("status IN ('active', 'closed')", "status"),
|
|
Some(vec!["active".to_string(), "closed".to_string()])
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn tolerates_a_quoted_column_and_lowercase_in() {
|
|
assert_eq!(
|
|
parse_in_check_values("\"status\" in ('a','b','c')", "status"),
|
|
Some(vec!["a".into(), "b".into(), "c".into()])
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn unescapes_embedded_quotes() {
|
|
assert_eq!(
|
|
parse_in_check_values("note IN ('it''s', 'ok')", "note"),
|
|
Some(vec!["it's".into(), "ok".into()])
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn handles_commas_and_parens_inside_literals() {
|
|
assert_eq!(
|
|
parse_in_check_values("label IN ('a, b', 'c)d')", "label"),
|
|
Some(vec!["a, b".into(), "c)d".into()])
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn rejects_non_literal_lists() {
|
|
assert_eq!(parse_in_check_values("n IN (1, 2, 3)", "n"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn rejects_non_in_checks() {
|
|
assert_eq!(parse_in_check_values("age >= 0", "age"), None);
|
|
assert_eq!(parse_in_check_values("length(name) > 0", "name"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn rejects_when_lhs_is_a_different_column() {
|
|
assert_eq!(parse_in_check_values("status IN ('a')", "role"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn does_not_trip_on_in_inside_a_word_or_literal() {
|
|
// `min` contains "in" but is not the IN operator.
|
|
assert_eq!(parse_in_check_values("min(x) > 0", "x"), None);
|
|
}
|
|
}
|