//! Byte-level helpers for the scannerless walker (ADR-0024 //! §scannerless). //! //! Each helper takes the source string and a byte position, //! returns either `Some(end_position)` (matched, post-token end) //! or `None` (didn't match here). Helpers are pure and span- //! exact; multi-byte UTF-8 within identifiers and string //! literals is handled byte-correctly. //! //! These helpers internally mirror the logic of the legacy //! `dsl::lexer` module but are invoked per-position by the //! walker rather than as a pre-pass. /// Return the byte index of the first non-whitespace byte at or /// after `start`. If the rest is all whitespace, returns /// `source.len()`. pub fn skip_whitespace(source: &str, start: usize) -> usize { let bytes = source.as_bytes(); let mut i = start; while i < bytes.len() && bytes[i].is_ascii_whitespace() { i += 1; } i } /// Identifier shape: ASCII letter or `_` to start, then ASCII /// alphanumeric or `_`. Returns `Some((start, end))` on match. pub fn consume_ident(source: &str, start: usize) -> Option<(usize, usize)> { let bytes = source.as_bytes(); let first = *bytes.get(start)?; if !(first.is_ascii_alphabetic() || first == b'_') { return None; } let mut i = start + 1; while i < bytes.len() { let b = bytes[i]; if b.is_ascii_alphanumeric() || b == b'_' { i += 1; } else { break; } } Some((start, i)) } /// Try to match `keyword` at `position` case-insensitively. /// /// The match must end at a non-identifier byte (or end-of-input) /// so that `save` doesn't half-match the prefix of `saved`. /// Returns the end byte index on match. pub fn match_keyword(source: &str, position: usize, keyword: &str) -> Option { let bytes = source.as_bytes(); let kw_bytes = keyword.as_bytes(); if position + kw_bytes.len() > bytes.len() { return None; } for (offset, &kb) in kw_bytes.iter().enumerate() { let sb = bytes[position + offset]; if !sb.eq_ignore_ascii_case(&kb) { return None; } } let end = position + kw_bytes.len(); if end < bytes.len() { let next = bytes[end]; if next.is_ascii_alphanumeric() || next == b'_' { return None; } } Some(end) } /// Bare-path token: a non-whitespace run. /// /// Per ADR-0024 the path-bearing UX dropped the "spaces don't /// need quoting" feature; paths with spaces use `StringLit`. /// Phase A's `import` / `export` slots use this. pub fn consume_bare_path(source: &str, start: usize) -> Option<(usize, usize)> { let bytes = source.as_bytes(); if start >= bytes.len() || bytes[start].is_ascii_whitespace() { return None; } let mut i = start; while i < bytes.len() && !bytes[i].is_ascii_whitespace() { i += 1; } Some((start, i)) } /// Match a single punctuation character at `position`. #[allow(dead_code)] pub fn match_punct(source: &str, position: usize, ch: char) -> Option { let bytes = source.as_bytes(); if position < bytes.len() && bytes[position] == ch as u8 { Some(position + 1) } else { None } } /// Number literal: optional leading `-` (when adjacent to a digit), /// then 1+ digits, optional `.` + 1+ digits. /// /// Mirrors `dsl::lexer::lex_number`. Used by Phase B's `add 1:n /// relationship` form (where the literal `1` lexes as a Number) /// and by Phase D's value-literal slots. pub fn consume_number_literal(source: &str, start: usize) -> Option<(usize, usize)> { let bytes = source.as_bytes(); if start >= bytes.len() { return None; } let mut i = start; let leading_minus = bytes[i] == b'-' && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit(); if leading_minus { i += 1; } if i >= bytes.len() || !bytes[i].is_ascii_digit() { return None; } while i < bytes.len() && bytes[i].is_ascii_digit() { i += 1; } if i < bytes.len() && bytes[i] == b'.' { let after = i + 1; if after < bytes.len() && bytes[after].is_ascii_digit() { i = after; while i < bytes.len() && bytes[i].is_ascii_digit() { i += 1; } } } Some((start, i)) } /// Flag token: `--name` where name is alphanumeric / `-` / `_`, /// at least one character. Returns the span (including `--`) on /// match. The caller checks `name` against an expected value. pub fn consume_flag(source: &str, start: usize) -> Option<(usize, usize)> { let bytes = source.as_bytes(); if start + 2 > bytes.len() || &bytes[start..start + 2] != b"--" { return None; } let mut i = start + 2; while i < bytes.len() { let b = bytes[i]; if b.is_ascii_alphanumeric() || b == b'-' || b == b'_' { i += 1; } else { break; } } if i == start + 2 { return None; } Some((start, i)) } /// Single-quoted string literal with `''` escape (mirrors /// `dsl::lexer::lex_string`). /// /// Returns `(start, end)` where end is past the closing quote, /// plus the unescaped content. `None` when the literal is /// unterminated or the position isn't at a `'`. #[allow(dead_code)] pub fn consume_string_literal(source: &str, start: usize) -> Option<((usize, usize), String)> { let bytes = source.as_bytes(); if start >= bytes.len() || bytes[start] != b'\'' { return None; } let mut content = String::new(); let mut i = start + 1; while i < bytes.len() { if bytes[i] == b'\'' { if bytes.get(i + 1) == Some(&b'\'') { content.push('\''); i += 2; continue; } return Some(((start, i + 1), content)); } let ch = source[i..].chars().next()?; content.push(ch); i += ch.len_utf8(); } None }