refactor(cleanback): make backups root configurable and update docs/tests

- require --backups-root instead of hardcoded /Backups
- update README examples and wording accordingly
- adjust CLI help text and internal path handling
- refactor unit and E2E tests to pass explicit backups root
- minor formatting and readability cleanups
This commit is contained in:
2025-12-31 08:31:43 +01:00
parent a628f8d6a9
commit bb5bdcf084
4 changed files with 171 additions and 84 deletions

View File

@@ -8,7 +8,7 @@
**Repository:** https://github.com/kevinveenbirkenbach/cleanup-failed-backups **Repository:** https://github.com/kevinveenbirkenbach/cleanup-failed-backups
`cleanback` validates and (optionally) cleans up **failed Docker backup directories**. `cleanback` validates and (optionally) cleans up **failed Docker backup directories**.
It scans backup folders under `/Backups`, uses :contentReference[oaicite:0]{index=0} to validate each subdirectory, and lets you delete the ones that fail validation. It scans backup folders under a configurable backups root (e.g. `/Backups`), uses `dirval` to validate each subdirectory, and lets you delete the ones that fail validation.
Validation runs **in parallel** for performance; deletions are controlled and can be **interactive** or **fully automatic**. Validation runs **in parallel** for performance; deletions are controlled and can be **interactive** or **fully automatic**.
@@ -51,7 +51,7 @@ pip install -e .
## 🔧 Requirements ## 🔧 Requirements
* Python **3.8+** * Python **3.8+**
* Access to the `/Backups` directory tree * Access to the backups root directory tree (e.g. `/Backups`)
* `dirval` (installed automatically via pip dependency) * `dirval` (installed automatically via pip dependency)
--- ---
@@ -69,7 +69,7 @@ cleanback
### Validate a single backup ID ### Validate a single backup ID
```bash ```bash
cleanback --id <ID> cleanback --backups-root /Backups --id <ID>
``` ```
Validates directories under: Validates directories under:
@@ -81,7 +81,7 @@ Validates directories under:
### Validate all backups ### Validate all backups
```bash ```bash
cleanback --all cleanback --backups-root /Backups --all
``` ```
Scans: Scans:
@@ -107,13 +107,13 @@ Scans:
```bash ```bash
# Validate a single backup and prompt on failures # Validate a single backup and prompt on failures
cleanback --id 2024-09-01T12-00-00 cleanback --backups-root /Backups --id 2024-09-01T12-00-00
# Validate everything with 8 workers and auto-delete failures # Validate everything with 8 workers and auto-delete failures
cleanback --all --workers 8 --yes cleanback --backups-root /Backups --all --workers 8 --yes
# Use a custom dirval binary and short timeout # Use a custom dirval binary and short timeout
cleanback --all --dirval-cmd /usr/local/bin/dirval --timeout 5.0 cleanback --backups-root /Backups --all --dirval-cmd /usr/local/bin/dirval --timeout 5.0
``` ```
--- ---

View File

@@ -3,8 +3,8 @@
Cleanup Failed Docker Backups — parallel validator (using dirval) Cleanup Failed Docker Backups — parallel validator (using dirval)
Validates backup subdirectories under: Validates backup subdirectories under:
- /Backups/<ID>/backup-docker-to-local (when --id is used) - <BACKUPS_ROOT>/<ID>/backup-docker-to-local (when --id is used)
- /Backups/*/backup-docker-to-local (when --all is used) - <BACKUPS_ROOT>/*/backup-docker-to-local (when --all is used)
For each subdirectory: For each subdirectory:
- Runs `dirval <subdir> --validate`. - Runs `dirval <subdir> --validate`.
@@ -19,17 +19,15 @@ Parallelism:
from __future__ import annotations from __future__ import annotations
import argparse import argparse
import sys import multiprocessing
import shutil import shutil
import subprocess import subprocess
import sys
import time
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
import multiprocessing
import time
BACKUPS_ROOT = Path("/Backups")
@dataclass(frozen=True) @dataclass(frozen=True)
@@ -41,25 +39,28 @@ class ValidationResult:
stdout: str stdout: str
def discover_target_subdirs(backup_id: Optional[str], all_mode: bool) -> List[Path]: def discover_target_subdirs(
backups_root: Path, backup_id: Optional[str], all_mode: bool
) -> List[Path]:
""" """
Return a list of subdirectories to validate: Return a list of subdirectories to validate:
- If backup_id is given: /Backups/<id>/backup-docker-to-local/* (dirs only) - If backup_id is given: <root>/<id>/backup-docker-to-local/* (dirs only)
- If --all: for each /Backups/* that has backup-docker-to-local, include its subdirs - If --all: for each <root>/* that has backup-docker-to-local, include its subdirs
""" """
targets: List[Path] = [] targets: List[Path] = []
if not backups_root.is_dir():
raise FileNotFoundError(f"Backups root does not exist: {backups_root}")
if all_mode: if all_mode:
if not BACKUPS_ROOT.is_dir(): for backup_folder in sorted(p for p in backups_root.iterdir() if p.is_dir()):
raise FileNotFoundError(f"Backups root does not exist: {BACKUPS_ROOT}")
for backup_folder in sorted(p for p in BACKUPS_ROOT.iterdir() if p.is_dir()):
candidate = backup_folder / "backup-docker-to-local" candidate = backup_folder / "backup-docker-to-local"
if candidate.is_dir(): if candidate.is_dir():
targets.extend(sorted([p for p in candidate.iterdir() if p.is_dir()])) targets.extend(sorted([p for p in candidate.iterdir() if p.is_dir()]))
else: else:
if not backup_id: if not backup_id:
raise ValueError("Either --id or --all must be provided.") raise ValueError("Either --id or --all must be provided.")
base = BACKUPS_ROOT / backup_id / "backup-docker-to-local" base = backups_root / backup_id / "backup-docker-to-local"
if not base.is_dir(): if not base.is_dir():
raise FileNotFoundError(f"Directory does not exist: {base}") raise FileNotFoundError(f"Directory does not exist: {base}")
targets = sorted([p for p in base.iterdir() if p.is_dir()]) targets = sorted([p for p in base.iterdir() if p.is_dir()])
@@ -67,7 +68,9 @@ def discover_target_subdirs(backup_id: Optional[str], all_mode: bool) -> List[Pa
return targets return targets
def run_dirval_validate(subdir: Path, dirval_cmd: str, timeout: float) -> ValidationResult: def run_dirval_validate(
subdir: Path, dirval_cmd: str, timeout: float
) -> ValidationResult:
""" """
Execute dirval: Execute dirval:
<dirval_cmd> "<SUBDIR>" --validate <dirval_cmd> "<SUBDIR>" --validate
@@ -108,16 +111,23 @@ def run_dirval_validate(subdir: Path, dirval_cmd: str, timeout: float) -> Valida
) )
def parallel_validate(subdirs: List[Path], dirval_cmd: str, workers: int, timeout: float) -> List[ValidationResult]: def parallel_validate(
subdirs: List[Path], dirval_cmd: str, workers: int, timeout: float
) -> List[ValidationResult]:
results: List[ValidationResult] = [] results: List[ValidationResult] = []
if not subdirs: if not subdirs:
return results return results
print(f"Validating {len(subdirs)} directories with {workers} workers (dirval: {dirval_cmd})...") print(
f"Validating {len(subdirs)} directories with {workers} workers (dirval: {dirval_cmd})..."
)
start = time.time() start = time.time()
with ThreadPoolExecutor(max_workers=workers) as pool: with ThreadPoolExecutor(max_workers=workers) as pool:
future_map = {pool.submit(run_dirval_validate, sd, dirval_cmd, timeout): sd for sd in subdirs} future_map = {
pool.submit(run_dirval_validate, sd, dirval_cmd, timeout): sd
for sd in subdirs
}
for fut in as_completed(future_map): for fut in as_completed(future_map):
res = fut.result() res = fut.result()
status = "ok" if res.ok else "error" status = "ok" if res.ok else "error"
@@ -140,7 +150,7 @@ def print_dir_listing(path: Path, max_items: int = 50) -> None:
typ = "<DIR>" if entry.is_dir() else " " typ = "<DIR>" if entry.is_dir() else " "
print(f" {typ} {entry.name}") print(f" {typ} {entry.name}")
if i + 1 >= max_items and len(entries) > i + 1: if i + 1 >= max_items and len(entries) > i + 1:
print(f" ... (+{len(entries) - (i+1)} more)") print(f" ... (+{len(entries) - (i + 1)} more)")
break break
@@ -190,9 +200,24 @@ def parse_args(argv: Optional[List[str]] = None) -> argparse.Namespace:
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Validate (and optionally delete) failed backup subdirectories in parallel using dirval." description="Validate (and optionally delete) failed backup subdirectories in parallel using dirval."
) )
parser.add_argument(
"--backups-root",
required=True,
type=Path,
help="Root directory containing backup folders (required).",
)
scope = parser.add_mutually_exclusive_group(required=True) scope = parser.add_mutually_exclusive_group(required=True)
scope.add_argument("--id", dest="backup_id", help="Backup folder name under /Backups.") scope.add_argument(
scope.add_argument("--all", dest="all_mode", action="store_true", help="Scan all /Backups/* folders.") "--id", dest="backup_id", help="Backup folder name under backups root."
)
scope.add_argument(
"--all",
dest="all_mode",
action="store_true",
help="Scan all backups root/* folders.",
)
parser.add_argument( parser.add_argument(
"--dirval-cmd", "--dirval-cmd",
@@ -223,7 +248,9 @@ def main(argv: Optional[List[str]] = None) -> int:
args = parse_args(argv) args = parse_args(argv)
try: try:
subdirs = discover_target_subdirs(args.backup_id, bool(args.all_mode)) subdirs = discover_target_subdirs(
args.backups_root, args.backup_id, bool(args.all_mode)
)
except Exception as e: except Exception as e:
print(f"ERROR: {e}", file=sys.stderr) print(f"ERROR: {e}", file=sys.stderr)
return 2 return 2
@@ -242,7 +269,9 @@ def main(argv: Optional[List[str]] = None) -> int:
print(f"\n{len(failures)} directory(ies) failed validation.") print(f"\n{len(failures)} directory(ies) failed validation.")
deleted = process_deletions(failures, assume_yes=args.yes) deleted = process_deletions(failures, assume_yes=args.yes)
kept = len(failures) - deleted kept = len(failures) - deleted
print(f"\nSummary: deleted={deleted}, kept={kept}, ok={len(results) - len(failures)}") print(
f"\nSummary: deleted={deleted}, kept={kept}, ok={len(results) - len(failures)}"
)
return 0 return 0

View File

@@ -118,7 +118,7 @@ class CleanbackE2EDockerTests(unittest.TestCase):
env = os.environ.copy() env = os.environ.copy()
# Prepend fake dirval path for this test run # Prepend fake dirval path for this test run
env["PATH"] = f"{self.bin_dir}:{env.get('PATH','')}" env["PATH"] = f"{self.bin_dir}:{env.get('PATH', '')}"
# Run: python -m cleanback --id <ID> --yes # Run: python -m cleanback --id <ID> --yes
# We must point BACKUPS_ROOT to our run_root. Easiest: set /Backups = run_root # We must point BACKUPS_ROOT to our run_root. Easiest: set /Backups = run_root
@@ -131,11 +131,19 @@ class CleanbackE2EDockerTests(unittest.TestCase):
composite_id = f"{self.run_root.name}/{self.backup_id}" composite_id = f"{self.run_root.name}/{self.backup_id}"
cmd = [ cmd = [
"python", "-m", "cleanback", "python",
"--id", composite_id, "-m",
"--dirval-cmd", "dirval", "cleanback",
"--workers", "4", "--backups-root",
"--timeout", SHORT_TIMEOUT, "/Backups",
"--id",
composite_id,
"--dirval-cmd",
"dirval",
"--workers",
"4",
"--timeout",
SHORT_TIMEOUT,
"--yes", "--yes",
] ]
proc = subprocess.run(cmd, text=True, capture_output=True, env=env) proc = subprocess.run(cmd, text=True, capture_output=True, env=env)
@@ -143,7 +151,10 @@ class CleanbackE2EDockerTests(unittest.TestCase):
self.assertEqual(proc.returncode, 0, msg=proc.stderr or proc.stdout) self.assertEqual(proc.returncode, 0, msg=proc.stderr or proc.stdout)
self.assertTrue(self.good.exists(), "good should remain") self.assertTrue(self.good.exists(), "good should remain")
self.assertFalse(self.bad.exists(), "bad should be deleted") self.assertFalse(self.bad.exists(), "bad should be deleted")
self.assertFalse(self.timeout.exists(), "timeout should be deleted (timeout treated as failure)") self.assertFalse(
self.timeout.exists(),
"timeout should be deleted (timeout treated as failure)",
)
self.assertIn("Summary:", proc.stdout) self.assertIn("Summary:", proc.stdout)

View File

@@ -16,8 +16,8 @@ from cleanback import __main__ as main # noqa: E402
# Keep tests snappy but reliable: # Keep tests snappy but reliable:
# - "timeout" dirs sleep 0.3s in fake dirval # - "timeout" dirs sleep 0.3s in fake dirval
# - we pass --timeout 0.1s -> they will time out # - we pass --timeout 0.1s -> they will time out
FAKE_TIMEOUT_SLEEP = 0.3 # 300 ms FAKE_TIMEOUT_SLEEP = 0.3 # 300 ms
SHORT_TIMEOUT = "0.1" # 100 ms SHORT_TIMEOUT = "0.1" # 100 ms
FAKE_DIRVAL = f"""#!/usr/bin/env python3 FAKE_DIRVAL = f"""#!/usr/bin/env python3
import sys, time, argparse, pathlib import sys, time, argparse, pathlib
@@ -50,6 +50,7 @@ if __name__ == "__main__":
sys.exit(main()) sys.exit(main())
""" """
class CleanupBackupsUsingDirvalTests(unittest.TestCase): class CleanupBackupsUsingDirvalTests(unittest.TestCase):
def setUp(self): def setUp(self):
# temp /Backups root # temp /Backups root
@@ -89,12 +90,7 @@ class CleanupBackupsUsingDirvalTests(unittest.TestCase):
self.stdout_cm.__enter__() self.stdout_cm.__enter__()
self.stderr_cm.__enter__() self.stderr_cm.__enter__()
# Patch BACKUPS_ROOT to temp root
self.backups_patcher = patch.object(main, "BACKUPS_ROOT", self.backups_root)
self.backups_patcher.start()
def tearDown(self): def tearDown(self):
self.backups_patcher.stop()
self.stdout_cm.__exit__(None, None, None) self.stdout_cm.__exit__(None, None, None)
self.stderr_cm.__exit__(None, None, None) self.stderr_cm.__exit__(None, None, None)
self.tmpdir.cleanup() self.tmpdir.cleanup()
@@ -105,32 +101,52 @@ class CleanupBackupsUsingDirvalTests(unittest.TestCase):
out = self._stdout.getvalue() out = self._stdout.getvalue()
err = self._stderr.getvalue() err = self._stderr.getvalue()
dur = time.time() - start dur = time.time() - start
self._stdout.seek(0); self._stdout.truncate(0) self._stdout.seek(0)
self._stderr.seek(0); self._stderr.truncate(0) self._stdout.truncate(0)
self._stderr.seek(0)
self._stderr.truncate(0)
return rc, out, err, dur return rc, out, err, dur
def test_id_mode_yes_deletes_failures(self): def test_id_mode_yes_deletes_failures(self):
rc, out, err, _ = self.run_main([ rc, out, err, _ = self.run_main(
"--id", "ID1", [
"--dirval-cmd", str(self.dirval), "--backups-root",
"--workers", "4", str(self.backups_root),
"--timeout", SHORT_TIMEOUT, "--id",
"--yes", "ID1",
]) "--dirval-cmd",
str(self.dirval),
"--workers",
"4",
"--timeout",
SHORT_TIMEOUT,
"--yes",
]
)
self.assertEqual(rc, 0, msg=err or out) self.assertEqual(rc, 0, msg=err or out)
self.assertTrue(self.goodA.exists(), "goodA should remain") self.assertTrue(self.goodA.exists(), "goodA should remain")
self.assertFalse(self.badB.exists(), "badB should be deleted") self.assertFalse(self.badB.exists(), "badB should be deleted")
self.assertFalse(self.timeoutC.exists(), "timeoutC should be deleted (timeout treated as failure)") self.assertFalse(
self.timeoutC.exists(),
"timeoutC should be deleted (timeout treated as failure)",
)
self.assertIn("Summary:", out) self.assertIn("Summary:", out)
def test_all_mode(self): def test_all_mode(self):
rc, out, err, _ = self.run_main([ rc, out, err, _ = self.run_main(
"--all", [
"--dirval-cmd", str(self.dirval), "--backups-root",
"--workers", "4", str(self.backups_root),
"--timeout", SHORT_TIMEOUT, "--all",
"--yes", "--dirval-cmd",
]) str(self.dirval),
"--workers",
"4",
"--timeout",
SHORT_TIMEOUT,
"--yes",
]
)
self.assertEqual(rc, 0, msg=err or out) self.assertEqual(rc, 0, msg=err or out)
self.assertTrue(self.goodA.exists()) self.assertTrue(self.goodA.exists())
self.assertFalse(self.badB.exists()) self.assertFalse(self.badB.exists())
@@ -139,49 +155,80 @@ class CleanupBackupsUsingDirvalTests(unittest.TestCase):
self.assertFalse(self.badY.exists()) self.assertFalse(self.badY.exists())
def test_dirval_missing_errors(self): def test_dirval_missing_errors(self):
rc, out, err, _ = self.run_main([ rc, out, err, _ = self.run_main(
"--id", "ID1", [
"--dirval-cmd", str(self.backups_root / "nope-dirval"), "--backups-root",
"--timeout", SHORT_TIMEOUT, str(self.backups_root),
"--yes", "--id",
]) "ID1",
"--dirval-cmd",
str(self.backups_root / "nope-dirval"),
"--timeout",
SHORT_TIMEOUT,
"--yes",
]
)
self.assertEqual(rc, 0, msg=err or out) self.assertEqual(rc, 0, msg=err or out)
self.assertIn("dirval not found", out + err) self.assertIn("dirval not found", out + err)
def test_no_targets_message(self): def test_no_targets_message(self):
empty = self.backups_root / "EMPTY" / "backup-docker-to-local" empty = self.backups_root / "EMPTY" / "backup-docker-to-local"
empty.mkdir(parents=True, exist_ok=True) empty.mkdir(parents=True, exist_ok=True)
rc, out, err, _ = self.run_main([ rc, out, err, _ = self.run_main(
"--id", "EMPTY", [
"--dirval-cmd", str(self.dirval), "--backups-root",
"--timeout", SHORT_TIMEOUT, str(self.backups_root),
]) "--id",
"EMPTY",
"--dirval-cmd",
str(self.dirval),
"--timeout",
SHORT_TIMEOUT,
]
)
self.assertEqual(rc, 0) self.assertEqual(rc, 0)
self.assertIn("No subdirectories to validate. Nothing to do.", out) self.assertIn("No subdirectories to validate. Nothing to do.", out)
def test_interactive_keeps_when_no(self): def test_interactive_keeps_when_no(self):
with patch("builtins.input", return_value=""): with patch("builtins.input", return_value=""):
rc, out, err, _ = self.run_main([ rc, out, err, _ = self.run_main(
"--id", "ID2", [
"--dirval-cmd", str(self.dirval), "--backups-root",
"--workers", "1", str(self.backups_root),
"--timeout", SHORT_TIMEOUT, "--id",
]) "ID2",
"--dirval-cmd",
str(self.dirval),
"--workers",
"1",
"--timeout",
SHORT_TIMEOUT,
]
)
self.assertEqual(rc, 0, msg=err or out) self.assertEqual(rc, 0, msg=err or out)
self.assertTrue(self.badY.exists(), "badY should be kept without confirmation") self.assertTrue(self.badY.exists(), "badY should be kept without confirmation")
self.assertTrue(self.goodX.exists()) self.assertTrue(self.goodX.exists())
def test_interactive_yes_deletes(self): def test_interactive_yes_deletes(self):
with patch("builtins.input", return_value="y"): with patch("builtins.input", return_value="y"):
rc, out, err, _ = self.run_main([ rc, out, err, _ = self.run_main(
"--id", "ID2", [
"--dirval-cmd", str(self.dirval), "--backups-root",
"--workers", "1", str(self.backups_root),
"--timeout", SHORT_TIMEOUT, "--id",
]) "ID2",
"--dirval-cmd",
str(self.dirval),
"--workers",
"1",
"--timeout",
SHORT_TIMEOUT,
]
)
self.assertEqual(rc, 0, msg=err or out) self.assertEqual(rc, 0, msg=err or out)
self.assertFalse(self.badY.exists(), "badY should be deleted") self.assertFalse(self.badY.exists(), "badY should be deleted")
self.assertTrue(self.goodX.exists()) self.assertTrue(self.goodX.exists())
if __name__ == "__main__": if __name__ == "__main__":
unittest.main(verbosity=2) unittest.main(verbosity=2)