From bb5bdcf08484b73ef1fec036ae37eed3b7eee942 Mon Sep 17 00:00:00 2001 From: Kevin Veen-Birkenbach Date: Wed, 31 Dec 2025 08:31:43 +0100 Subject: [PATCH] refactor(cleanback): make backups root configurable and update docs/tests - require --backups-root instead of hardcoded /Backups - update README examples and wording accordingly - adjust CLI help text and internal path handling - refactor unit and E2E tests to pass explicit backups root - minor formatting and readability cleanups --- README.md | 14 ++-- src/cleanback/__main__.py | 75 +++++++++++++------ tests/e2e/test_e2e_docker.py | 25 +++++-- tests/unit/test_main.py | 141 +++++++++++++++++++++++------------ 4 files changed, 171 insertions(+), 84 deletions(-) diff --git a/README.md b/README.md index 13ac92b..f183f6a 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ **Repository:** https://github.com/kevinveenbirkenbach/cleanup-failed-backups `cleanback` validates and (optionally) cleans up **failed Docker backup directories**. -It scans backup folders under `/Backups`, uses :contentReference[oaicite:0]{index=0} to validate each subdirectory, and lets you delete the ones that fail validation. +It scans backup folders under a configurable backups root (e.g. `/Backups`), uses `dirval` to validate each subdirectory, and lets you delete the ones that fail validation. Validation runs **in parallel** for performance; deletions are controlled and can be **interactive** or **fully automatic**. @@ -51,7 +51,7 @@ pip install -e . ## 🔧 Requirements * Python **3.8+** -* Access to the `/Backups` directory tree +* Access to the backups root directory tree (e.g. `/Backups`) * `dirval` (installed automatically via pip dependency) --- @@ -69,7 +69,7 @@ cleanback ### Validate a single backup ID ```bash -cleanback --id +cleanback --backups-root /Backups --id ``` Validates directories under: @@ -81,7 +81,7 @@ Validates directories under: ### Validate all backups ```bash -cleanback --all +cleanback --backups-root /Backups --all ``` Scans: @@ -107,13 +107,13 @@ Scans: ```bash # Validate a single backup and prompt on failures -cleanback --id 2024-09-01T12-00-00 +cleanback --backups-root /Backups --id 2024-09-01T12-00-00 # Validate everything with 8 workers and auto-delete failures -cleanback --all --workers 8 --yes +cleanback --backups-root /Backups --all --workers 8 --yes # Use a custom dirval binary and short timeout -cleanback --all --dirval-cmd /usr/local/bin/dirval --timeout 5.0 +cleanback --backups-root /Backups --all --dirval-cmd /usr/local/bin/dirval --timeout 5.0 ``` --- diff --git a/src/cleanback/__main__.py b/src/cleanback/__main__.py index aba1a9b..f73a565 100755 --- a/src/cleanback/__main__.py +++ b/src/cleanback/__main__.py @@ -3,8 +3,8 @@ Cleanup Failed Docker Backups — parallel validator (using dirval) Validates backup subdirectories under: -- /Backups//backup-docker-to-local (when --id is used) -- /Backups/*/backup-docker-to-local (when --all is used) +- //backup-docker-to-local (when --id is used) +- /*/backup-docker-to-local (when --all is used) For each subdirectory: - Runs `dirval --validate`. @@ -19,17 +19,15 @@ Parallelism: from __future__ import annotations import argparse -import sys +import multiprocessing import shutil import subprocess +import sys +import time from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass from pathlib import Path from typing import List, Optional, Tuple -import multiprocessing -import time - -BACKUPS_ROOT = Path("/Backups") @dataclass(frozen=True) @@ -41,25 +39,28 @@ class ValidationResult: stdout: str -def discover_target_subdirs(backup_id: Optional[str], all_mode: bool) -> List[Path]: +def discover_target_subdirs( + backups_root: Path, backup_id: Optional[str], all_mode: bool +) -> List[Path]: """ Return a list of subdirectories to validate: - - If backup_id is given: /Backups//backup-docker-to-local/* (dirs only) - - If --all: for each /Backups/* that has backup-docker-to-local, include its subdirs + - If backup_id is given: //backup-docker-to-local/* (dirs only) + - If --all: for each /* that has backup-docker-to-local, include its subdirs """ targets: List[Path] = [] + if not backups_root.is_dir(): + raise FileNotFoundError(f"Backups root does not exist: {backups_root}") + if all_mode: - if not BACKUPS_ROOT.is_dir(): - raise FileNotFoundError(f"Backups root does not exist: {BACKUPS_ROOT}") - for backup_folder in sorted(p for p in BACKUPS_ROOT.iterdir() if p.is_dir()): + for backup_folder in sorted(p for p in backups_root.iterdir() if p.is_dir()): candidate = backup_folder / "backup-docker-to-local" if candidate.is_dir(): targets.extend(sorted([p for p in candidate.iterdir() if p.is_dir()])) else: if not backup_id: raise ValueError("Either --id or --all must be provided.") - base = BACKUPS_ROOT / backup_id / "backup-docker-to-local" + base = backups_root / backup_id / "backup-docker-to-local" if not base.is_dir(): raise FileNotFoundError(f"Directory does not exist: {base}") targets = sorted([p for p in base.iterdir() if p.is_dir()]) @@ -67,7 +68,9 @@ def discover_target_subdirs(backup_id: Optional[str], all_mode: bool) -> List[Pa return targets -def run_dirval_validate(subdir: Path, dirval_cmd: str, timeout: float) -> ValidationResult: +def run_dirval_validate( + subdir: Path, dirval_cmd: str, timeout: float +) -> ValidationResult: """ Execute dirval: "" --validate @@ -108,16 +111,23 @@ def run_dirval_validate(subdir: Path, dirval_cmd: str, timeout: float) -> Valida ) -def parallel_validate(subdirs: List[Path], dirval_cmd: str, workers: int, timeout: float) -> List[ValidationResult]: +def parallel_validate( + subdirs: List[Path], dirval_cmd: str, workers: int, timeout: float +) -> List[ValidationResult]: results: List[ValidationResult] = [] if not subdirs: return results - print(f"Validating {len(subdirs)} directories with {workers} workers (dirval: {dirval_cmd})...") + print( + f"Validating {len(subdirs)} directories with {workers} workers (dirval: {dirval_cmd})..." + ) start = time.time() with ThreadPoolExecutor(max_workers=workers) as pool: - future_map = {pool.submit(run_dirval_validate, sd, dirval_cmd, timeout): sd for sd in subdirs} + future_map = { + pool.submit(run_dirval_validate, sd, dirval_cmd, timeout): sd + for sd in subdirs + } for fut in as_completed(future_map): res = fut.result() status = "ok" if res.ok else "error" @@ -140,7 +150,7 @@ def print_dir_listing(path: Path, max_items: int = 50) -> None: typ = "" if entry.is_dir() else " " print(f" {typ} {entry.name}") if i + 1 >= max_items and len(entries) > i + 1: - print(f" ... (+{len(entries) - (i+1)} more)") + print(f" ... (+{len(entries) - (i + 1)} more)") break @@ -190,9 +200,24 @@ def parse_args(argv: Optional[List[str]] = None) -> argparse.Namespace: parser = argparse.ArgumentParser( description="Validate (and optionally delete) failed backup subdirectories in parallel using dirval." ) + + parser.add_argument( + "--backups-root", + required=True, + type=Path, + help="Root directory containing backup folders (required).", + ) + scope = parser.add_mutually_exclusive_group(required=True) - scope.add_argument("--id", dest="backup_id", help="Backup folder name under /Backups.") - scope.add_argument("--all", dest="all_mode", action="store_true", help="Scan all /Backups/* folders.") + scope.add_argument( + "--id", dest="backup_id", help="Backup folder name under backups root." + ) + scope.add_argument( + "--all", + dest="all_mode", + action="store_true", + help="Scan all backups root/* folders.", + ) parser.add_argument( "--dirval-cmd", @@ -223,7 +248,9 @@ def main(argv: Optional[List[str]] = None) -> int: args = parse_args(argv) try: - subdirs = discover_target_subdirs(args.backup_id, bool(args.all_mode)) + subdirs = discover_target_subdirs( + args.backups_root, args.backup_id, bool(args.all_mode) + ) except Exception as e: print(f"ERROR: {e}", file=sys.stderr) return 2 @@ -242,7 +269,9 @@ def main(argv: Optional[List[str]] = None) -> int: print(f"\n{len(failures)} directory(ies) failed validation.") deleted = process_deletions(failures, assume_yes=args.yes) kept = len(failures) - deleted - print(f"\nSummary: deleted={deleted}, kept={kept}, ok={len(results) - len(failures)}") + print( + f"\nSummary: deleted={deleted}, kept={kept}, ok={len(results) - len(failures)}" + ) return 0 diff --git a/tests/e2e/test_e2e_docker.py b/tests/e2e/test_e2e_docker.py index 1efb770..6d99343 100644 --- a/tests/e2e/test_e2e_docker.py +++ b/tests/e2e/test_e2e_docker.py @@ -118,7 +118,7 @@ class CleanbackE2EDockerTests(unittest.TestCase): env = os.environ.copy() # Prepend fake dirval path for this test run - env["PATH"] = f"{self.bin_dir}:{env.get('PATH','')}" + env["PATH"] = f"{self.bin_dir}:{env.get('PATH', '')}" # Run: python -m cleanback --id --yes # We must point BACKUPS_ROOT to our run_root. Easiest: set /Backups = run_root @@ -131,11 +131,19 @@ class CleanbackE2EDockerTests(unittest.TestCase): composite_id = f"{self.run_root.name}/{self.backup_id}" cmd = [ - "python", "-m", "cleanback", - "--id", composite_id, - "--dirval-cmd", "dirval", - "--workers", "4", - "--timeout", SHORT_TIMEOUT, + "python", + "-m", + "cleanback", + "--backups-root", + "/Backups", + "--id", + composite_id, + "--dirval-cmd", + "dirval", + "--workers", + "4", + "--timeout", + SHORT_TIMEOUT, "--yes", ] proc = subprocess.run(cmd, text=True, capture_output=True, env=env) @@ -143,7 +151,10 @@ class CleanbackE2EDockerTests(unittest.TestCase): self.assertEqual(proc.returncode, 0, msg=proc.stderr or proc.stdout) self.assertTrue(self.good.exists(), "good should remain") self.assertFalse(self.bad.exists(), "bad should be deleted") - self.assertFalse(self.timeout.exists(), "timeout should be deleted (timeout treated as failure)") + self.assertFalse( + self.timeout.exists(), + "timeout should be deleted (timeout treated as failure)", + ) self.assertIn("Summary:", proc.stdout) diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py index a43e511..640a6c9 100644 --- a/tests/unit/test_main.py +++ b/tests/unit/test_main.py @@ -16,8 +16,8 @@ from cleanback import __main__ as main # noqa: E402 # Keep tests snappy but reliable: # - "timeout" dirs sleep 0.3s in fake dirval # - we pass --timeout 0.1s -> they will time out -FAKE_TIMEOUT_SLEEP = 0.3 # 300 ms -SHORT_TIMEOUT = "0.1" # 100 ms +FAKE_TIMEOUT_SLEEP = 0.3 # 300 ms +SHORT_TIMEOUT = "0.1" # 100 ms FAKE_DIRVAL = f"""#!/usr/bin/env python3 import sys, time, argparse, pathlib @@ -50,6 +50,7 @@ if __name__ == "__main__": sys.exit(main()) """ + class CleanupBackupsUsingDirvalTests(unittest.TestCase): def setUp(self): # temp /Backups root @@ -89,12 +90,7 @@ class CleanupBackupsUsingDirvalTests(unittest.TestCase): self.stdout_cm.__enter__() self.stderr_cm.__enter__() - # Patch BACKUPS_ROOT to temp root - self.backups_patcher = patch.object(main, "BACKUPS_ROOT", self.backups_root) - self.backups_patcher.start() - def tearDown(self): - self.backups_patcher.stop() self.stdout_cm.__exit__(None, None, None) self.stderr_cm.__exit__(None, None, None) self.tmpdir.cleanup() @@ -105,32 +101,52 @@ class CleanupBackupsUsingDirvalTests(unittest.TestCase): out = self._stdout.getvalue() err = self._stderr.getvalue() dur = time.time() - start - self._stdout.seek(0); self._stdout.truncate(0) - self._stderr.seek(0); self._stderr.truncate(0) + self._stdout.seek(0) + self._stdout.truncate(0) + self._stderr.seek(0) + self._stderr.truncate(0) return rc, out, err, dur def test_id_mode_yes_deletes_failures(self): - rc, out, err, _ = self.run_main([ - "--id", "ID1", - "--dirval-cmd", str(self.dirval), - "--workers", "4", - "--timeout", SHORT_TIMEOUT, - "--yes", - ]) + rc, out, err, _ = self.run_main( + [ + "--backups-root", + str(self.backups_root), + "--id", + "ID1", + "--dirval-cmd", + str(self.dirval), + "--workers", + "4", + "--timeout", + SHORT_TIMEOUT, + "--yes", + ] + ) self.assertEqual(rc, 0, msg=err or out) self.assertTrue(self.goodA.exists(), "goodA should remain") self.assertFalse(self.badB.exists(), "badB should be deleted") - self.assertFalse(self.timeoutC.exists(), "timeoutC should be deleted (timeout treated as failure)") + self.assertFalse( + self.timeoutC.exists(), + "timeoutC should be deleted (timeout treated as failure)", + ) self.assertIn("Summary:", out) def test_all_mode(self): - rc, out, err, _ = self.run_main([ - "--all", - "--dirval-cmd", str(self.dirval), - "--workers", "4", - "--timeout", SHORT_TIMEOUT, - "--yes", - ]) + rc, out, err, _ = self.run_main( + [ + "--backups-root", + str(self.backups_root), + "--all", + "--dirval-cmd", + str(self.dirval), + "--workers", + "4", + "--timeout", + SHORT_TIMEOUT, + "--yes", + ] + ) self.assertEqual(rc, 0, msg=err or out) self.assertTrue(self.goodA.exists()) self.assertFalse(self.badB.exists()) @@ -139,49 +155,80 @@ class CleanupBackupsUsingDirvalTests(unittest.TestCase): self.assertFalse(self.badY.exists()) def test_dirval_missing_errors(self): - rc, out, err, _ = self.run_main([ - "--id", "ID1", - "--dirval-cmd", str(self.backups_root / "nope-dirval"), - "--timeout", SHORT_TIMEOUT, - "--yes", - ]) + rc, out, err, _ = self.run_main( + [ + "--backups-root", + str(self.backups_root), + "--id", + "ID1", + "--dirval-cmd", + str(self.backups_root / "nope-dirval"), + "--timeout", + SHORT_TIMEOUT, + "--yes", + ] + ) self.assertEqual(rc, 0, msg=err or out) self.assertIn("dirval not found", out + err) def test_no_targets_message(self): empty = self.backups_root / "EMPTY" / "backup-docker-to-local" empty.mkdir(parents=True, exist_ok=True) - rc, out, err, _ = self.run_main([ - "--id", "EMPTY", - "--dirval-cmd", str(self.dirval), - "--timeout", SHORT_TIMEOUT, - ]) + rc, out, err, _ = self.run_main( + [ + "--backups-root", + str(self.backups_root), + "--id", + "EMPTY", + "--dirval-cmd", + str(self.dirval), + "--timeout", + SHORT_TIMEOUT, + ] + ) self.assertEqual(rc, 0) self.assertIn("No subdirectories to validate. Nothing to do.", out) def test_interactive_keeps_when_no(self): with patch("builtins.input", return_value=""): - rc, out, err, _ = self.run_main([ - "--id", "ID2", - "--dirval-cmd", str(self.dirval), - "--workers", "1", - "--timeout", SHORT_TIMEOUT, - ]) + rc, out, err, _ = self.run_main( + [ + "--backups-root", + str(self.backups_root), + "--id", + "ID2", + "--dirval-cmd", + str(self.dirval), + "--workers", + "1", + "--timeout", + SHORT_TIMEOUT, + ] + ) self.assertEqual(rc, 0, msg=err or out) self.assertTrue(self.badY.exists(), "badY should be kept without confirmation") self.assertTrue(self.goodX.exists()) def test_interactive_yes_deletes(self): with patch("builtins.input", return_value="y"): - rc, out, err, _ = self.run_main([ - "--id", "ID2", - "--dirval-cmd", str(self.dirval), - "--workers", "1", - "--timeout", SHORT_TIMEOUT, - ]) + rc, out, err, _ = self.run_main( + [ + "--backups-root", + str(self.backups_root), + "--id", + "ID2", + "--dirval-cmd", + str(self.dirval), + "--workers", + "1", + "--timeout", + SHORT_TIMEOUT, + ] + ) self.assertEqual(rc, 0, msg=err or out) self.assertFalse(self.badY.exists(), "badY should be deleted") self.assertTrue(self.goodX.exists()) + if __name__ == "__main__": unittest.main(verbosity=2)