fix: ADR-0006 — clear redo when new work commits without a snapshot

/runda found silent data loss: with the non-fatal snapshot-failure
policy, a committed mutation whose snapshot couldn't be staged left
the redo stack stale (redo-clear was only a side effect of finalize),
so a later redo silently discarded the new work. Same gap in batches.

- SnapshotStore::clear_redo() drops the redo stack + payloads
- snapshot_then / end_batch call it when committed user work has no
  staged snapshot; for disk-full it succeeds where a full backup
  couldn't (tiny index write + payload deletes)
- unit test + integration regression (forced staging failure)
- ADR-0006 implementation note records the fix + residual edge

1698 passed / 0 failed / 1 ignored; clippy clean.
This commit is contained in:
claude@clouddev1
2026-05-24 21:10:44 +00:00
parent 5442cfc0b9
commit df6aa69155
4 changed files with 123 additions and 11 deletions
+20 -10
View File
@@ -1542,11 +1542,17 @@ fn snapshot_then<T>(
if committed {
batch.dirty = true;
}
} else if let (Some(store), Some(st)) = (snap, staged) {
let outcome = if committed {
store.finalize(st).map(|_| ())
} else {
store.discard(st)
} else if let Some(store) = snap {
let outcome = match staged {
Some(st) if committed => store.finalize(st).map(|_| ()),
Some(st) => store.discard(st),
// No snapshot was staged. If this is a committed user
// mutation (source present → staging FAILED, not just an
// internal op), the redo stack is now stale and must be
// cleared, or a later `redo` would silently discard this
// work. `finalize` would have done this; it didn't run.
None if committed && source.is_some() => store.clear_redo(),
None => Ok(()),
};
if let Err(e) = outcome {
warn!(error = %e, "undo snapshot bookkeeping failed (command already applied)");
@@ -1579,11 +1585,15 @@ fn end_batch(snap: Option<&SnapshotStore>, batch: &mut BatchState) {
warn!("EndBatch with no active batch; ignoring");
return;
}
if let (Some(store), Some(st)) = (snap, batch.staged.take()) {
let outcome = if batch.dirty {
store.finalize(st).map(|_| ())
} else {
store.discard(st)
if let Some(store) = snap {
let outcome = match batch.staged.take() {
Some(st) if batch.dirty => store.finalize(st).map(|_| ()),
Some(st) => store.discard(st),
// Boundary snapshot failed to stage but mutations
// committed: clear the now-stale redo stack (same
// data-loss guard as the per-command path).
None if batch.dirty => store.clear_redo(),
None => Ok(()),
};
if let Err(e) = outcome {
warn!(error = %e, "batch undo snapshot bookkeeping failed");