Two additive D7 catalogue rules, surfaced while writing the website seed docs. No change to the type fallback, executor, or grammar. #33 — year-like int columns. `published`/`birth_year` were just `int`, so they fell to the unbounded int path and produced nonsense (`9419`). Add an int-gated year rule (after the quantity rule, so `year_count` stays a count): `year`/`*_year`/`published`/`founded` -> a bounded 1950-2025 year (new `YearRecent`), or the dob-style birth window 1945-2007 for `birth`/`born`/`dob` (new `YearBirth`). Plain int; not added to the D9 named-generator vocabulary. #34 — conventional choice sets. A few enum-ish names have a near-canonical small set that reads far better than lorem text. Add a type-gated PickFrom lookup (reusing the existing generator): priority/prio, severity, rating/stars. `status` is deliberately excluded (values too domain-specific) and keeps the D12 advisory; a user IN-CHECK still wins. `priority` leaves ENUM_TOKENS. ADR-0048 Amendment 1; +8 tests (incl. a column-fill integration test that also closes a pre-existing gap on that path).
This commit is contained in:
@@ -31,6 +31,16 @@ const RECENT_WINDOW_DAYS: i64 = 3 * 365;
|
||||
const ADULT_MIN_DAYS: i64 = 18 * 365;
|
||||
const ADULT_MAX_DAYS: i64 = 80 * 365;
|
||||
|
||||
/// Year windows for the `int`-typed year heuristics (issue #33),
|
||||
/// expressed relative to [`REF_YEAR`] so they advance with releases —
|
||||
/// the year siblings of the `DateRecent` / `DateAdult` windows above.
|
||||
/// `YearRecent` spans ~75 years (1950–2025 at REF_YEAR=2025), wide
|
||||
/// enough for `published` / `founded` / `release_year`; `YearBirth`
|
||||
/// mirrors the adult birth window (1945–2007).
|
||||
const YEAR_RECENT_SPAN: i32 = 75;
|
||||
const YEAR_BIRTH_MIN_AGE: i32 = 18;
|
||||
const YEAR_BIRTH_MAX_AGE: i32 = 80;
|
||||
|
||||
/// Produce one value for `generator` against destination type `ty`.
|
||||
#[must_use]
|
||||
pub fn generate_value(generator: &Generator, ty: Type, rng: &mut SeedRng) -> Value {
|
||||
@@ -71,6 +81,13 @@ pub fn generate_value(generator: &Generator, ty: Type, rng: &mut SeedRng) -> Val
|
||||
Generator::CurrencyAmount => currency_amount(ty, rng),
|
||||
Generator::Age => Value::Number(rng.random_range(18..=80).to_string()),
|
||||
Generator::SmallInt => Value::Number(rng.random_range(1..=100).to_string()),
|
||||
Generator::YearRecent => {
|
||||
Value::Number(rng.random_range((REF_YEAR - YEAR_RECENT_SPAN)..=REF_YEAR).to_string())
|
||||
}
|
||||
Generator::YearBirth => Value::Number(
|
||||
rng.random_range((REF_YEAR - YEAR_BIRTH_MAX_AGE)..=(REF_YEAR - YEAR_BIRTH_MIN_AGE))
|
||||
.to_string(),
|
||||
),
|
||||
Generator::DateRecent => Value::Text(format_date(random_past_date(rng, 0, RECENT_WINDOW_DAYS))),
|
||||
Generator::DateAdult => {
|
||||
Value::Text(format_date(random_past_date(rng, ADULT_MIN_DAYS, ADULT_MAX_DAYS)))
|
||||
@@ -489,6 +506,41 @@ mod tests {
|
||||
assert!(matches!(v, Value::Number(_)), "numeric pick should be a Number: {v:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn year_generators_stay_within_their_bounded_windows() {
|
||||
// Issue #33: both year generators emit a plain `int` inside a
|
||||
// bounded, plausible window — never the unbounded-int nonsense.
|
||||
let mut rng = make_rng(Some(7));
|
||||
for _ in 0..300 {
|
||||
let Value::Number(s) = generate_value(&Generator::YearRecent, Type::Int, &mut rng)
|
||||
else {
|
||||
panic!("YearRecent must be a Number")
|
||||
};
|
||||
let n: i32 = s.parse().unwrap();
|
||||
assert!((1950..=2025).contains(&n), "YearRecent {n} out of [1950,2025]");
|
||||
}
|
||||
for _ in 0..300 {
|
||||
let Value::Number(s) = generate_value(&Generator::YearBirth, Type::Int, &mut rng)
|
||||
else {
|
||||
panic!("YearBirth must be a Number")
|
||||
};
|
||||
let n: i32 = s.parse().unwrap();
|
||||
assert!((1945..=2007).contains(&n), "YearBirth {n} out of [1945,2007]");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn year_generators_are_deterministic_for_a_fixed_seed() {
|
||||
assert_eq!(
|
||||
gen_once(&Generator::YearRecent, Type::Int, 42),
|
||||
gen_once(&Generator::YearRecent, Type::Int, 42),
|
||||
);
|
||||
assert_eq!(
|
||||
gen_once(&Generator::YearBirth, Type::Int, 42),
|
||||
gen_once(&Generator::YearBirth, Type::Int, 42),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn int_range_stays_within_inclusive_bounds() {
|
||||
let g = Generator::Range { low: "10".into(), high: "20".into() };
|
||||
|
||||
+128
-2
@@ -57,9 +57,14 @@ fn choose_generator_inner(table: &str, col: &ColumnSpec) -> Generator {
|
||||
/// the post-seed advisory; such columns still receive generic text.
|
||||
#[must_use]
|
||||
pub fn is_enum_ish(name: &str) -> bool {
|
||||
// `priority` is intentionally absent: issue #34 gave it a built-in
|
||||
// value set (low/medium/high · 1/2/3), so it is no longer "filled
|
||||
// generically" and must not trigger the D13 advisory. `severity` /
|
||||
// `rating` / `stars` were never here. `status` stays — it is
|
||||
// deliberately left to the advisory (no built-in set).
|
||||
const ENUM_TOKENS: &[&str] = &[
|
||||
"role", "status", "state", "type", "kind", "category", "level",
|
||||
"tier", "stage", "priority", "gender",
|
||||
"tier", "stage", "gender",
|
||||
];
|
||||
let toks = tokens(name);
|
||||
toks.iter().any(|t| ENUM_TOKENS.contains(&t.as_str()))
|
||||
@@ -150,6 +155,49 @@ fn match_name_generator(table: &str, toks: &[String], ty: Type) -> Option<Genera
|
||||
if numeric && has_any(toks, &["quantity", "qty", "stock", "count"]) {
|
||||
return Some(Generator::SmallInt);
|
||||
}
|
||||
// — Year-as-int (issue #33) — bounded plausible years so the `int`
|
||||
// type fallback (D8) can't emit nonsense like `9419`. `int`-gated
|
||||
// (years are whole numbers) and placed *after* the quantity rule so
|
||||
// `year_count` (a count of years) stays a `SmallInt`. `birth`/`born`/
|
||||
// `dob` + year picks the birth window — the int sibling of the
|
||||
// `dob → DateAdult` rule above — otherwise a recent window covers
|
||||
// `year` / `*_year` / `published` / `founded`.
|
||||
if matches!(ty, Type::Int)
|
||||
&& (has_token(toks, "year") || has_any(toks, &["published", "founded"]))
|
||||
{
|
||||
return Some(if has_any(toks, &["birth", "born", "dob"]) {
|
||||
Generator::YearBirth
|
||||
} else {
|
||||
Generator::YearRecent
|
||||
});
|
||||
}
|
||||
|
||||
// — Conventional choice sets (issue #34) — a few enum-ish names have
|
||||
// a near-canonical small value set that reads far better than lorem
|
||||
// text. Type-gated; reuses `PickFrom`. Names *without* a canonical
|
||||
// set (`status`, `role`, `type`, …) stay unmatched → generic text +
|
||||
// the D12/D13 advisory. `status` is deliberately excluded: its real
|
||||
// values are too domain-specific (user-confirmed, issue #34). A
|
||||
// user-declared `IN`-CHECK still wins — it is resolved before this.
|
||||
if has_any(toks, &["priority", "prio"]) {
|
||||
if text {
|
||||
return Some(pick_from(&["low", "medium", "high"]));
|
||||
}
|
||||
if matches!(ty, Type::Int) {
|
||||
return Some(pick_from(&["1", "2", "3"]));
|
||||
}
|
||||
}
|
||||
if has_token(toks, "severity") {
|
||||
if text {
|
||||
return Some(pick_from(&["low", "medium", "high", "critical"]));
|
||||
}
|
||||
if matches!(ty, Type::Int) {
|
||||
return Some(pick_from(&["1", "2", "3", "4"]));
|
||||
}
|
||||
}
|
||||
if matches!(ty, Type::Int) && has_any(toks, &["rating", "stars"]) {
|
||||
return Some(pick_from(&["1", "2", "3", "4", "5"]));
|
||||
}
|
||||
|
||||
// — Temporal (bounded, D8) —
|
||||
if matches!(ty, Type::Date) && has_any(toks, &["dob", "birthday", "birthdate"]) {
|
||||
@@ -267,6 +315,14 @@ fn tokens(name: &str) -> Vec<String> {
|
||||
out
|
||||
}
|
||||
|
||||
/// A `PickFrom` generator from string-literal values (issue #34's
|
||||
/// conventional choice sets). `literal_to_value` interprets each entry
|
||||
/// by the destination type at generation time (an `int` column turns
|
||||
/// `"1"` into a number).
|
||||
fn pick_from(values: &[&str]) -> Generator {
|
||||
Generator::PickFrom(values.iter().map(|s| (*s).to_string()).collect())
|
||||
}
|
||||
|
||||
fn has_token(toks: &[String], t: &str) -> bool {
|
||||
toks.iter().any(|x| x == t)
|
||||
}
|
||||
@@ -412,11 +468,81 @@ mod tests {
|
||||
assert!(is_enum_ish("status"));
|
||||
assert!(is_enum_ish("role"));
|
||||
assert!(is_enum_ish("order_state"));
|
||||
assert!(is_enum_ish("priority"));
|
||||
// Issue #34: `priority` gained a built-in value set, so it is no
|
||||
// longer advised (it is no longer "filled generically").
|
||||
assert!(!is_enum_ish("priority"));
|
||||
assert!(!is_enum_ish("severity"));
|
||||
assert!(!is_enum_ish("rating"));
|
||||
assert!(!is_enum_ish("email"));
|
||||
assert!(!is_enum_ish("first_name"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn year_like_int_columns_map_to_bounded_years() {
|
||||
// Issue #33: `int`-gated year heuristics. `birth`/`born`/`dob`
|
||||
// years pick the birth window; the rest a recent window.
|
||||
assert_eq!(choose("authors", "birth_year", Type::Int), Generator::YearBirth);
|
||||
assert_eq!(choose("authors", "birthYear", Type::Int), Generator::YearBirth);
|
||||
assert_eq!(choose("u", "year_born", Type::Int), Generator::YearBirth);
|
||||
assert_eq!(choose("books", "year", Type::Int), Generator::YearRecent);
|
||||
assert_eq!(choose("films", "release_year", Type::Int), Generator::YearRecent);
|
||||
assert_eq!(choose("books", "published", Type::Int), Generator::YearRecent);
|
||||
assert_eq!(choose("companies", "founded", Type::Int), Generator::YearRecent);
|
||||
// Type-gated: a text `year` is not a bounded-year int.
|
||||
assert_eq!(choose("books", "year", Type::Text), Generator::Generic);
|
||||
// `year_count` is a count, not a year — the quantity rule wins.
|
||||
assert_eq!(choose("t", "year_count", Type::Int), Generator::SmallInt);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn conventional_choice_sets_map_to_pick_from() {
|
||||
// Issue #34: type-gated built-in value sets.
|
||||
assert_eq!(
|
||||
choose("tickets", "priority", Type::Text),
|
||||
Generator::PickFrom(vec!["low".into(), "medium".into(), "high".into()]),
|
||||
);
|
||||
assert_eq!(
|
||||
choose("tickets", "prio", Type::Int),
|
||||
Generator::PickFrom(vec!["1".into(), "2".into(), "3".into()]),
|
||||
);
|
||||
assert_eq!(
|
||||
choose("bugs", "severity", Type::Text),
|
||||
Generator::PickFrom(vec!["low".into(), "medium".into(), "high".into(), "critical".into()]),
|
||||
);
|
||||
assert_eq!(
|
||||
choose("bugs", "severity", Type::Int),
|
||||
Generator::PickFrom(vec!["1".into(), "2".into(), "3".into(), "4".into()]),
|
||||
);
|
||||
assert_eq!(
|
||||
choose("reviews", "rating", Type::Int),
|
||||
Generator::PickFrom(vec!["1".into(), "2".into(), "3".into(), "4".into(), "5".into()]),
|
||||
);
|
||||
assert_eq!(
|
||||
choose("reviews", "stars", Type::Int),
|
||||
Generator::PickFrom(vec!["1".into(), "2".into(), "3".into(), "4".into(), "5".into()]),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn status_is_left_to_the_advisory_not_given_a_set() {
|
||||
// User-confirmed (issue #34): `status` keeps the D12 "don't
|
||||
// guess" stance — generic text + the advisory, no built-in set.
|
||||
assert_eq!(choose("orders", "status", Type::Text), Generator::Generic);
|
||||
assert!(is_enum_ish("status"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn a_declared_in_check_still_wins_over_a_built_in_set() {
|
||||
// The CHECK is the user's explicit intent; it precedes the
|
||||
// issue-#34 default set for the same name.
|
||||
let mut spec = ColumnSpec::plain("priority", Type::Text);
|
||||
spec.check_in_values = Some(vec!["p1".into(), "p2".into()]);
|
||||
assert_eq!(
|
||||
choose_generator("tickets", &spec),
|
||||
Generator::PickFrom(vec!["p1".into(), "p2".into()]),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn enum_ish_columns_fall_through_to_generic() {
|
||||
// No special generator — generic text + the advisory flags them.
|
||||
|
||||
@@ -149,6 +149,13 @@ pub enum Generator {
|
||||
Age,
|
||||
/// A small positive integer (quantities, counts).
|
||||
SmallInt,
|
||||
/// A plausible recent year as a plain `int` — `year` / `*_year` /
|
||||
/// `published` / `founded` columns (issue #33). Bounded window so the
|
||||
/// type-based `int` fallback can't emit nonsense like `9419`.
|
||||
YearRecent,
|
||||
/// A plausible birth year as a plain `int` — `birth_year` and kin
|
||||
/// (issue #33), the year-typed sibling of [`Self::DateAdult`].
|
||||
YearBirth,
|
||||
// — Temporal (bounded windows, D8) —
|
||||
/// A date within the last few years.
|
||||
DateRecent,
|
||||
|
||||
Reference in New Issue
Block a user