From bb5bdcf08484b73ef1fec036ae37eed3b7eee942 Mon Sep 17 00:00:00 2001
From: Kevin Veen-Birkenbach <kevin@veen.world>
Date: Wed, 31 Dec 2025 08:31:43 +0100
Subject: [PATCH] refactor(cleanback): make backups root configurable and
 update docs/tests

- require --backups-root instead of hardcoded /Backups
- update README examples and wording accordingly
- adjust CLI help text and internal path handling
- refactor unit and E2E tests to pass explicit backups root
- minor formatting and readability cleanups
---
 README.md                    |  14 ++--
 src/cleanback/__main__.py    |  75 +++++++++++++------
 tests/e2e/test_e2e_docker.py |  25 +++++--
 tests/unit/test_main.py      | 141 +++++++++++++++++++++++------------
 4 files changed, 171 insertions(+), 84 deletions(-)
diff --git a/README.md b/README.md
index 13ac92b..f183f6a 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 **Repository:** https://github.com/kevinveenbirkenbach/cleanup-failed-backups
 
 `cleanback` validates and (optionally) cleans up **failed Docker backup directories**.  
-It scans backup folders under `/Backups`, uses :contentReference[oaicite:0]{index=0} to validate each subdirectory, and lets you delete the ones that fail validation.
+It scans backup folders under a configurable backups root (e.g. `/Backups`), uses `dirval` to validate each subdirectory, and lets you delete the ones that fail validation.
 
 Validation runs **in parallel** for performance; deletions are controlled and can be **interactive** or **fully automatic**.
 
@@ -51,7 +51,7 @@ pip install -e .
 ## 🔧 Requirements
 
 * Python **3.8+**
-* Access to the `/Backups` directory tree
+* Access to the backups root directory tree (e.g. `/Backups`)
 * `dirval` (installed automatically via pip dependency)
 
 ---
@@ -69,7 +69,7 @@ cleanback
 ### Validate a single backup ID
 
 ```bash
-cleanback --id <ID>
+cleanback --backups-root /Backups --id <ID>
 ```
 
 Validates directories under:
@@ -81,7 +81,7 @@ Validates directories under:
 ### Validate all backups
 
 ```bash
-cleanback --all
+cleanback --backups-root /Backups --all
 ```
 
 Scans:
@@ -107,13 +107,13 @@ Scans:
 
 ```bash
 # Validate a single backup and prompt on failures
-cleanback --id 2024-09-01T12-00-00
+cleanback --backups-root /Backups --id 2024-09-01T12-00-00
 
 # Validate everything with 8 workers and auto-delete failures
-cleanback --all --workers 8 --yes
+cleanback --backups-root /Backups --all --workers 8 --yes
 
 # Use a custom dirval binary and short timeout
-cleanback --all --dirval-cmd /usr/local/bin/dirval --timeout 5.0
+cleanback --backups-root /Backups --all --dirval-cmd /usr/local/bin/dirval --timeout 5.0
 ```
 
 ---
diff --git a/src/cleanback/__main__.py b/src/cleanback/__main__.py
index aba1a9b..f73a565 100755
--- a/src/cleanback/__main__.py
+++ b/src/cleanback/__main__.py
@@ -3,8 +3,8 @@
 Cleanup Failed Docker Backups — parallel validator (using dirval)
 
 Validates backup subdirectories under:
-- /Backups/<ID>/backup-docker-to-local          (when --id is used)
-- /Backups/*/backup-docker-to-local             (when --all is used)
+- <BACKUPS_ROOT>/<ID>/backup-docker-to-local          (when --id is used)
+- <BACKUPS_ROOT>/*/backup-docker-to-local             (when --all is used)
 
 For each subdirectory:
 - Runs `dirval <subdir> --validate`.
@@ -19,17 +19,15 @@ Parallelism:
 from __future__ import annotations
 
 import argparse
-import sys
+import multiprocessing
 import shutil
 import subprocess
+import sys
+import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from dataclasses import dataclass
 from pathlib import Path
 from typing import List, Optional, Tuple
-import multiprocessing
-import time
-
-BACKUPS_ROOT = Path("/Backups")
 
 
 @dataclass(frozen=True)
@@ -41,25 +39,28 @@ class ValidationResult:
     stdout: str
 
 
-def discover_target_subdirs(backup_id: Optional[str], all_mode: bool) -> List[Path]:
+def discover_target_subdirs(
+    backups_root: Path, backup_id: Optional[str], all_mode: bool
+) -> List[Path]:
     """
     Return a list of subdirectories to validate:
-      - If backup_id is given: /Backups/<id>/backup-docker-to-local/* (dirs only)
-      - If --all: for each /Backups/* that has backup-docker-to-local, include its subdirs
+      - If backup_id is given: <root>/<id>/backup-docker-to-local/* (dirs only)
+      - If --all: for each <root>/* that has backup-docker-to-local, include its subdirs
     """
     targets: List[Path] = []
 
+    if not backups_root.is_dir():
+        raise FileNotFoundError(f"Backups root does not exist: {backups_root}")
+
     if all_mode:
-        if not BACKUPS_ROOT.is_dir():
-            raise FileNotFoundError(f"Backups root does not exist: {BACKUPS_ROOT}")
-        for backup_folder in sorted(p for p in BACKUPS_ROOT.iterdir() if p.is_dir()):
+        for backup_folder in sorted(p for p in backups_root.iterdir() if p.is_dir()):
             candidate = backup_folder / "backup-docker-to-local"
             if candidate.is_dir():
                 targets.extend(sorted([p for p in candidate.iterdir() if p.is_dir()]))
     else:
         if not backup_id:
             raise ValueError("Either --id or --all must be provided.")
-        base = BACKUPS_ROOT / backup_id / "backup-docker-to-local"
+        base = backups_root / backup_id / "backup-docker-to-local"
         if not base.is_dir():
             raise FileNotFoundError(f"Directory does not exist: {base}")
         targets = sorted([p for p in base.iterdir() if p.is_dir()])
@@ -67,7 +68,9 @@ def discover_target_subdirs(backup_id: Optional[str], all_mode: bool) -> List[Pa
     return targets
 
 
-def run_dirval_validate(subdir: Path, dirval_cmd: str, timeout: float) -> ValidationResult:
+def run_dirval_validate(
+    subdir: Path, dirval_cmd: str, timeout: float
+) -> ValidationResult:
     """
     Execute dirval:
         <dirval_cmd> "<SUBDIR>" --validate
@@ -108,16 +111,23 @@ def run_dirval_validate(subdir: Path, dirval_cmd: str, timeout: float) -> Valida
         )
 
 
-def parallel_validate(subdirs: List[Path], dirval_cmd: str, workers: int, timeout: float) -> List[ValidationResult]:
+def parallel_validate(
+    subdirs: List[Path], dirval_cmd: str, workers: int, timeout: float
+) -> List[ValidationResult]:
     results: List[ValidationResult] = []
     if not subdirs:
         return results
 
-    print(f"Validating {len(subdirs)} directories with {workers} workers (dirval: {dirval_cmd})...")
+    print(
+        f"Validating {len(subdirs)} directories with {workers} workers (dirval: {dirval_cmd})..."
+    )
     start = time.time()
 
     with ThreadPoolExecutor(max_workers=workers) as pool:
-        future_map = {pool.submit(run_dirval_validate, sd, dirval_cmd, timeout): sd for sd in subdirs}
+        future_map = {
+            pool.submit(run_dirval_validate, sd, dirval_cmd, timeout): sd
+            for sd in subdirs
+        }
         for fut in as_completed(future_map):
             res = fut.result()
             status = "ok" if res.ok else "error"
@@ -140,7 +150,7 @@ def print_dir_listing(path: Path, max_items: int = 50) -> None:
         typ = "<DIR>" if entry.is_dir() else "     "
         print(f"  {typ} {entry.name}")
         if i + 1 >= max_items and len(entries) > i + 1:
-            print(f"  ... (+{len(entries) - (i+1)} more)")
+            print(f"  ... (+{len(entries) - (i + 1)} more)")
             break
 
 
@@ -190,9 +200,24 @@ def parse_args(argv: Optional[List[str]] = None) -> argparse.Namespace:
     parser = argparse.ArgumentParser(
         description="Validate (and optionally delete) failed backup subdirectories in parallel using dirval."
     )
+
+    parser.add_argument(
+        "--backups-root",
+        required=True,
+        type=Path,
+        help="Root directory containing backup folders (required).",
+    )
+
     scope = parser.add_mutually_exclusive_group(required=True)
-    scope.add_argument("--id", dest="backup_id", help="Backup folder name under /Backups.")
-    scope.add_argument("--all", dest="all_mode", action="store_true", help="Scan all /Backups/* folders.")
+    scope.add_argument(
+        "--id", dest="backup_id", help="Backup folder name under backups root."
+    )
+    scope.add_argument(
+        "--all",
+        dest="all_mode",
+        action="store_true",
+        help="Scan all backups root/* folders.",
+    )
 
     parser.add_argument(
         "--dirval-cmd",
@@ -223,7 +248,9 @@ def main(argv: Optional[List[str]] = None) -> int:
     args = parse_args(argv)
 
     try:
-        subdirs = discover_target_subdirs(args.backup_id, bool(args.all_mode))
+        subdirs = discover_target_subdirs(
+            args.backups_root, args.backup_id, bool(args.all_mode)
+        )
     except Exception as e:
         print(f"ERROR: {e}", file=sys.stderr)
         return 2
@@ -242,7 +269,9 @@ def main(argv: Optional[List[str]] = None) -> int:
     print(f"\n{len(failures)} directory(ies) failed validation.")
     deleted = process_deletions(failures, assume_yes=args.yes)
     kept = len(failures) - deleted
-    print(f"\nSummary: deleted={deleted}, kept={kept}, ok={len(results) - len(failures)}")
+    print(
+        f"\nSummary: deleted={deleted}, kept={kept}, ok={len(results) - len(failures)}"
+    )
     return 0
 
 
diff --git a/tests/e2e/test_e2e_docker.py b/tests/e2e/test_e2e_docker.py
index 1efb770..6d99343 100644
--- a/tests/e2e/test_e2e_docker.py
+++ b/tests/e2e/test_e2e_docker.py
@@ -118,7 +118,7 @@ class CleanbackE2EDockerTests(unittest.TestCase):
         env = os.environ.copy()
 
         # Prepend fake dirval path for this test run
-        env["PATH"] = f"{self.bin_dir}:{env.get('PATH','')}"
+        env["PATH"] = f"{self.bin_dir}:{env.get('PATH', '')}"
 
         # Run: python -m cleanback --id <ID> --yes
         # We must point BACKUPS_ROOT to our run_root. Easiest: set /Backups = run_root
@@ -131,11 +131,19 @@ class CleanbackE2EDockerTests(unittest.TestCase):
         composite_id = f"{self.run_root.name}/{self.backup_id}"
 
         cmd = [
-            "python", "-m", "cleanback",
-            "--id", composite_id,
-            "--dirval-cmd", "dirval",
-            "--workers", "4",
-            "--timeout", SHORT_TIMEOUT,
+            "python",
+            "-m",
+            "cleanback",
+            "--backups-root",
+            "/Backups",
+            "--id",
+            composite_id,
+            "--dirval-cmd",
+            "dirval",
+            "--workers",
+            "4",
+            "--timeout",
+            SHORT_TIMEOUT,
             "--yes",
         ]
         proc = subprocess.run(cmd, text=True, capture_output=True, env=env)
@@ -143,7 +151,10 @@ class CleanbackE2EDockerTests(unittest.TestCase):
         self.assertEqual(proc.returncode, 0, msg=proc.stderr or proc.stdout)
         self.assertTrue(self.good.exists(), "good should remain")
         self.assertFalse(self.bad.exists(), "bad should be deleted")
-        self.assertFalse(self.timeout.exists(), "timeout should be deleted (timeout treated as failure)")
+        self.assertFalse(
+            self.timeout.exists(),
+            "timeout should be deleted (timeout treated as failure)",
+        )
         self.assertIn("Summary:", proc.stdout)
 
 
diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py
index a43e511..640a6c9 100644
--- a/tests/unit/test_main.py
+++ b/tests/unit/test_main.py
@@ -16,8 +16,8 @@ from cleanback import __main__ as main  # noqa: E402
 # Keep tests snappy but reliable:
 # - "timeout" dirs sleep 0.3s in fake dirval
 # - we pass --timeout 0.1s -> they will time out
-FAKE_TIMEOUT_SLEEP = 0.3   # 300 ms
-SHORT_TIMEOUT = "0.1"      # 100 ms
+FAKE_TIMEOUT_SLEEP = 0.3  # 300 ms
+SHORT_TIMEOUT = "0.1"  # 100 ms
 
 FAKE_DIRVAL = f"""#!/usr/bin/env python3
 import sys, time, argparse, pathlib
@@ -50,6 +50,7 @@ if __name__ == "__main__":
     sys.exit(main())
 """
 
+
 class CleanupBackupsUsingDirvalTests(unittest.TestCase):
     def setUp(self):
         # temp /Backups root
@@ -89,12 +90,7 @@ class CleanupBackupsUsingDirvalTests(unittest.TestCase):
         self.stdout_cm.__enter__()
         self.stderr_cm.__enter__()
 
-        # Patch BACKUPS_ROOT to temp root
-        self.backups_patcher = patch.object(main, "BACKUPS_ROOT", self.backups_root)
-        self.backups_patcher.start()
-
     def tearDown(self):
-        self.backups_patcher.stop()
         self.stdout_cm.__exit__(None, None, None)
         self.stderr_cm.__exit__(None, None, None)
         self.tmpdir.cleanup()
@@ -105,32 +101,52 @@ class CleanupBackupsUsingDirvalTests(unittest.TestCase):
         out = self._stdout.getvalue()
         err = self._stderr.getvalue()
         dur = time.time() - start
-        self._stdout.seek(0); self._stdout.truncate(0)
-        self._stderr.seek(0); self._stderr.truncate(0)
+        self._stdout.seek(0)
+        self._stdout.truncate(0)
+        self._stderr.seek(0)
+        self._stderr.truncate(0)
         return rc, out, err, dur
 
     def test_id_mode_yes_deletes_failures(self):
-        rc, out, err, _ = self.run_main([
-            "--id", "ID1",
-            "--dirval-cmd", str(self.dirval),
-            "--workers", "4",
-            "--timeout", SHORT_TIMEOUT,
-            "--yes",
-        ])
+        rc, out, err, _ = self.run_main(
+            [
+                "--backups-root",
+                str(self.backups_root),
+                "--id",
+                "ID1",
+                "--dirval-cmd",
+                str(self.dirval),
+                "--workers",
+                "4",
+                "--timeout",
+                SHORT_TIMEOUT,
+                "--yes",
+            ]
+        )
         self.assertEqual(rc, 0, msg=err or out)
         self.assertTrue(self.goodA.exists(), "goodA should remain")
         self.assertFalse(self.badB.exists(), "badB should be deleted")
-        self.assertFalse(self.timeoutC.exists(), "timeoutC should be deleted (timeout treated as failure)")
+        self.assertFalse(
+            self.timeoutC.exists(),
+            "timeoutC should be deleted (timeout treated as failure)",
+        )
         self.assertIn("Summary:", out)
 
     def test_all_mode(self):
-        rc, out, err, _ = self.run_main([
-            "--all",
-            "--dirval-cmd", str(self.dirval),
-            "--workers", "4",
-            "--timeout", SHORT_TIMEOUT,
-            "--yes",
-        ])
+        rc, out, err, _ = self.run_main(
+            [
+                "--backups-root",
+                str(self.backups_root),
+                "--all",
+                "--dirval-cmd",
+                str(self.dirval),
+                "--workers",
+                "4",
+                "--timeout",
+                SHORT_TIMEOUT,
+                "--yes",
+            ]
+        )
         self.assertEqual(rc, 0, msg=err or out)
         self.assertTrue(self.goodA.exists())
         self.assertFalse(self.badB.exists())
@@ -139,49 +155,80 @@ class CleanupBackupsUsingDirvalTests(unittest.TestCase):
         self.assertFalse(self.badY.exists())
 
     def test_dirval_missing_errors(self):
-        rc, out, err, _ = self.run_main([
-            "--id", "ID1",
-            "--dirval-cmd", str(self.backups_root / "nope-dirval"),
-            "--timeout", SHORT_TIMEOUT,
-            "--yes",
-        ])
+        rc, out, err, _ = self.run_main(
+            [
+                "--backups-root",
+                str(self.backups_root),
+                "--id",
+                "ID1",
+                "--dirval-cmd",
+                str(self.backups_root / "nope-dirval"),
+                "--timeout",
+                SHORT_TIMEOUT,
+                "--yes",
+            ]
+        )
         self.assertEqual(rc, 0, msg=err or out)
         self.assertIn("dirval not found", out + err)
 
     def test_no_targets_message(self):
         empty = self.backups_root / "EMPTY" / "backup-docker-to-local"
         empty.mkdir(parents=True, exist_ok=True)
-        rc, out, err, _ = self.run_main([
-            "--id", "EMPTY",
-            "--dirval-cmd", str(self.dirval),
-            "--timeout", SHORT_TIMEOUT,
-        ])
+        rc, out, err, _ = self.run_main(
+            [
+                "--backups-root",
+                str(self.backups_root),
+                "--id",
+                "EMPTY",
+                "--dirval-cmd",
+                str(self.dirval),
+                "--timeout",
+                SHORT_TIMEOUT,
+            ]
+        )
         self.assertEqual(rc, 0)
         self.assertIn("No subdirectories to validate. Nothing to do.", out)
 
     def test_interactive_keeps_when_no(self):
         with patch("builtins.input", return_value=""):
-            rc, out, err, _ = self.run_main([
-                "--id", "ID2",
-                "--dirval-cmd", str(self.dirval),
-                "--workers", "1",
-                "--timeout", SHORT_TIMEOUT,
-            ])
+            rc, out, err, _ = self.run_main(
+                [
+                    "--backups-root",
+                    str(self.backups_root),
+                    "--id",
+                    "ID2",
+                    "--dirval-cmd",
+                    str(self.dirval),
+                    "--workers",
+                    "1",
+                    "--timeout",
+                    SHORT_TIMEOUT,
+                ]
+            )
         self.assertEqual(rc, 0, msg=err or out)
         self.assertTrue(self.badY.exists(), "badY should be kept without confirmation")
         self.assertTrue(self.goodX.exists())
 
     def test_interactive_yes_deletes(self):
         with patch("builtins.input", return_value="y"):
-            rc, out, err, _ = self.run_main([
-                "--id", "ID2",
-                "--dirval-cmd", str(self.dirval),
-                "--workers", "1",
-                "--timeout", SHORT_TIMEOUT,
-            ])
+            rc, out, err, _ = self.run_main(
+                [
+                    "--backups-root",
+                    str(self.backups_root),
+                    "--id",
+                    "ID2",
+                    "--dirval-cmd",
+                    str(self.dirval),
+                    "--workers",
+                    "1",
+                    "--timeout",
+                    SHORT_TIMEOUT,
+                ]
+            )
         self.assertEqual(rc, 0, msg=err or out)
         self.assertFalse(self.badY.exists(), "badY should be deleted")
         self.assertTrue(self.goodX.exists())
 
+
 if __name__ == "__main__":
     unittest.main(verbosity=2)