3 Commits

Author SHA1 Message Date
bebf8d2273 Release version 1.1.0 2025-12-31 08:33:34 +01:00
bb5bdcf084 refactor(cleanback): make backups root configurable and update docs/tests
- require --backups-root instead of hardcoded /Backups
- update README examples and wording accordingly
- adjust CLI help text and internal path handling
- refactor unit and E2E tests to pass explicit backups root
- minor formatting and readability cleanups
2025-12-31 08:31:43 +01:00
a628f8d6a9 Ignored build files 2025-12-28 19:58:46 +01:00
7 changed files with 180 additions and 86 deletions

4
.gitignore vendored
View File

@@ -1 +1,3 @@
**__pycache__
**__pycache__
*.egg-info
dist/

View File

@@ -1,3 +1,8 @@
## [1.1.0] - 2025-12-31
* The backups directory is now configurable via --backups-root instead of being hardcoded to /Backups.
## [1.0.0] - 2025-12-28
* Official Release 🥳

View File

@@ -8,7 +8,7 @@
**Repository:** https://github.com/kevinveenbirkenbach/cleanup-failed-backups
`cleanback` validates and (optionally) cleans up **failed Docker backup directories**.
It scans backup folders under `/Backups`, uses :contentReference[oaicite:0]{index=0} to validate each subdirectory, and lets you delete the ones that fail validation.
It scans backup folders under a configurable backups root (e.g. `/Backups`), uses `dirval` to validate each subdirectory, and lets you delete the ones that fail validation.
Validation runs **in parallel** for performance; deletions are controlled and can be **interactive** or **fully automatic**.
@@ -51,7 +51,7 @@ pip install -e .
## 🔧 Requirements
* Python **3.8+**
* Access to the `/Backups` directory tree
* Access to the backups root directory tree (e.g. `/Backups`)
* `dirval` (installed automatically via pip dependency)
---
@@ -69,7 +69,7 @@ cleanback
### Validate a single backup ID
```bash
cleanback --id <ID>
cleanback --backups-root /Backups --id <ID>
```
Validates directories under:
@@ -81,7 +81,7 @@ Validates directories under:
### Validate all backups
```bash
cleanback --all
cleanback --backups-root /Backups --all
```
Scans:
@@ -107,13 +107,13 @@ Scans:
```bash
# Validate a single backup and prompt on failures
cleanback --id 2024-09-01T12-00-00
cleanback --backups-root /Backups --id 2024-09-01T12-00-00
# Validate everything with 8 workers and auto-delete failures
cleanback --all --workers 8 --yes
cleanback --backups-root /Backups --all --workers 8 --yes
# Use a custom dirval binary and short timeout
cleanback --all --dirval-cmd /usr/local/bin/dirval --timeout 5.0
cleanback --backups-root /Backups --all --dirval-cmd /usr/local/bin/dirval --timeout 5.0
```
---

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "cleanback"
version = "1.0.0"
version = "1.1.0"
description = "Cleanup Failed Docker Backups — parallel validator (using dirval)"
readme = "README.md"
requires-python = ">=3.8"

View File

@@ -3,8 +3,8 @@
Cleanup Failed Docker Backups — parallel validator (using dirval)
Validates backup subdirectories under:
- /Backups/<ID>/backup-docker-to-local (when --id is used)
- /Backups/*/backup-docker-to-local (when --all is used)
- <BACKUPS_ROOT>/<ID>/backup-docker-to-local (when --id is used)
- <BACKUPS_ROOT>/*/backup-docker-to-local (when --all is used)
For each subdirectory:
- Runs `dirval <subdir> --validate`.
@@ -19,17 +19,15 @@ Parallelism:
from __future__ import annotations
import argparse
import sys
import multiprocessing
import shutil
import subprocess
import sys
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass
from pathlib import Path
from typing import List, Optional, Tuple
import multiprocessing
import time
BACKUPS_ROOT = Path("/Backups")
@dataclass(frozen=True)
@@ -41,25 +39,28 @@ class ValidationResult:
stdout: str
def discover_target_subdirs(backup_id: Optional[str], all_mode: bool) -> List[Path]:
def discover_target_subdirs(
backups_root: Path, backup_id: Optional[str], all_mode: bool
) -> List[Path]:
"""
Return a list of subdirectories to validate:
- If backup_id is given: /Backups/<id>/backup-docker-to-local/* (dirs only)
- If --all: for each /Backups/* that has backup-docker-to-local, include its subdirs
- If backup_id is given: <root>/<id>/backup-docker-to-local/* (dirs only)
- If --all: for each <root>/* that has backup-docker-to-local, include its subdirs
"""
targets: List[Path] = []
if not backups_root.is_dir():
raise FileNotFoundError(f"Backups root does not exist: {backups_root}")
if all_mode:
if not BACKUPS_ROOT.is_dir():
raise FileNotFoundError(f"Backups root does not exist: {BACKUPS_ROOT}")
for backup_folder in sorted(p for p in BACKUPS_ROOT.iterdir() if p.is_dir()):
for backup_folder in sorted(p for p in backups_root.iterdir() if p.is_dir()):
candidate = backup_folder / "backup-docker-to-local"
if candidate.is_dir():
targets.extend(sorted([p for p in candidate.iterdir() if p.is_dir()]))
else:
if not backup_id:
raise ValueError("Either --id or --all must be provided.")
base = BACKUPS_ROOT / backup_id / "backup-docker-to-local"
base = backups_root / backup_id / "backup-docker-to-local"
if not base.is_dir():
raise FileNotFoundError(f"Directory does not exist: {base}")
targets = sorted([p for p in base.iterdir() if p.is_dir()])
@@ -67,7 +68,9 @@ def discover_target_subdirs(backup_id: Optional[str], all_mode: bool) -> List[Pa
return targets
def run_dirval_validate(subdir: Path, dirval_cmd: str, timeout: float) -> ValidationResult:
def run_dirval_validate(
subdir: Path, dirval_cmd: str, timeout: float
) -> ValidationResult:
"""
Execute dirval:
<dirval_cmd> "<SUBDIR>" --validate
@@ -108,16 +111,23 @@ def run_dirval_validate(subdir: Path, dirval_cmd: str, timeout: float) -> Valida
)
def parallel_validate(subdirs: List[Path], dirval_cmd: str, workers: int, timeout: float) -> List[ValidationResult]:
def parallel_validate(
subdirs: List[Path], dirval_cmd: str, workers: int, timeout: float
) -> List[ValidationResult]:
results: List[ValidationResult] = []
if not subdirs:
return results
print(f"Validating {len(subdirs)} directories with {workers} workers (dirval: {dirval_cmd})...")
print(
f"Validating {len(subdirs)} directories with {workers} workers (dirval: {dirval_cmd})..."
)
start = time.time()
with ThreadPoolExecutor(max_workers=workers) as pool:
future_map = {pool.submit(run_dirval_validate, sd, dirval_cmd, timeout): sd for sd in subdirs}
future_map = {
pool.submit(run_dirval_validate, sd, dirval_cmd, timeout): sd
for sd in subdirs
}
for fut in as_completed(future_map):
res = fut.result()
status = "ok" if res.ok else "error"
@@ -140,7 +150,7 @@ def print_dir_listing(path: Path, max_items: int = 50) -> None:
typ = "<DIR>" if entry.is_dir() else " "
print(f" {typ} {entry.name}")
if i + 1 >= max_items and len(entries) > i + 1:
print(f" ... (+{len(entries) - (i+1)} more)")
print(f" ... (+{len(entries) - (i + 1)} more)")
break
@@ -190,9 +200,24 @@ def parse_args(argv: Optional[List[str]] = None) -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Validate (and optionally delete) failed backup subdirectories in parallel using dirval."
)
parser.add_argument(
"--backups-root",
required=True,
type=Path,
help="Root directory containing backup folders (required).",
)
scope = parser.add_mutually_exclusive_group(required=True)
scope.add_argument("--id", dest="backup_id", help="Backup folder name under /Backups.")
scope.add_argument("--all", dest="all_mode", action="store_true", help="Scan all /Backups/* folders.")
scope.add_argument(
"--id", dest="backup_id", help="Backup folder name under backups root."
)
scope.add_argument(
"--all",
dest="all_mode",
action="store_true",
help="Scan all backups root/* folders.",
)
parser.add_argument(
"--dirval-cmd",
@@ -223,7 +248,9 @@ def main(argv: Optional[List[str]] = None) -> int:
args = parse_args(argv)
try:
subdirs = discover_target_subdirs(args.backup_id, bool(args.all_mode))
subdirs = discover_target_subdirs(
args.backups_root, args.backup_id, bool(args.all_mode)
)
except Exception as e:
print(f"ERROR: {e}", file=sys.stderr)
return 2
@@ -242,7 +269,9 @@ def main(argv: Optional[List[str]] = None) -> int:
print(f"\n{len(failures)} directory(ies) failed validation.")
deleted = process_deletions(failures, assume_yes=args.yes)
kept = len(failures) - deleted
print(f"\nSummary: deleted={deleted}, kept={kept}, ok={len(results) - len(failures)}")
print(
f"\nSummary: deleted={deleted}, kept={kept}, ok={len(results) - len(failures)}"
)
return 0

View File

@@ -118,7 +118,7 @@ class CleanbackE2EDockerTests(unittest.TestCase):
env = os.environ.copy()
# Prepend fake dirval path for this test run
env["PATH"] = f"{self.bin_dir}:{env.get('PATH','')}"
env["PATH"] = f"{self.bin_dir}:{env.get('PATH', '')}"
# Run: python -m cleanback --id <ID> --yes
# We must point BACKUPS_ROOT to our run_root. Easiest: set /Backups = run_root
@@ -131,11 +131,19 @@ class CleanbackE2EDockerTests(unittest.TestCase):
composite_id = f"{self.run_root.name}/{self.backup_id}"
cmd = [
"python", "-m", "cleanback",
"--id", composite_id,
"--dirval-cmd", "dirval",
"--workers", "4",
"--timeout", SHORT_TIMEOUT,
"python",
"-m",
"cleanback",
"--backups-root",
"/Backups",
"--id",
composite_id,
"--dirval-cmd",
"dirval",
"--workers",
"4",
"--timeout",
SHORT_TIMEOUT,
"--yes",
]
proc = subprocess.run(cmd, text=True, capture_output=True, env=env)
@@ -143,7 +151,10 @@ class CleanbackE2EDockerTests(unittest.TestCase):
self.assertEqual(proc.returncode, 0, msg=proc.stderr or proc.stdout)
self.assertTrue(self.good.exists(), "good should remain")
self.assertFalse(self.bad.exists(), "bad should be deleted")
self.assertFalse(self.timeout.exists(), "timeout should be deleted (timeout treated as failure)")
self.assertFalse(
self.timeout.exists(),
"timeout should be deleted (timeout treated as failure)",
)
self.assertIn("Summary:", proc.stdout)

View File

@@ -16,8 +16,8 @@ from cleanback import __main__ as main # noqa: E402
# Keep tests snappy but reliable:
# - "timeout" dirs sleep 0.3s in fake dirval
# - we pass --timeout 0.1s -> they will time out
FAKE_TIMEOUT_SLEEP = 0.3 # 300 ms
SHORT_TIMEOUT = "0.1" # 100 ms
FAKE_TIMEOUT_SLEEP = 0.3 # 300 ms
SHORT_TIMEOUT = "0.1" # 100 ms
FAKE_DIRVAL = f"""#!/usr/bin/env python3
import sys, time, argparse, pathlib
@@ -50,6 +50,7 @@ if __name__ == "__main__":
sys.exit(main())
"""
class CleanupBackupsUsingDirvalTests(unittest.TestCase):
def setUp(self):
# temp /Backups root
@@ -89,12 +90,7 @@ class CleanupBackupsUsingDirvalTests(unittest.TestCase):
self.stdout_cm.__enter__()
self.stderr_cm.__enter__()
# Patch BACKUPS_ROOT to temp root
self.backups_patcher = patch.object(main, "BACKUPS_ROOT", self.backups_root)
self.backups_patcher.start()
def tearDown(self):
self.backups_patcher.stop()
self.stdout_cm.__exit__(None, None, None)
self.stderr_cm.__exit__(None, None, None)
self.tmpdir.cleanup()
@@ -105,32 +101,52 @@ class CleanupBackupsUsingDirvalTests(unittest.TestCase):
out = self._stdout.getvalue()
err = self._stderr.getvalue()
dur = time.time() - start
self._stdout.seek(0); self._stdout.truncate(0)
self._stderr.seek(0); self._stderr.truncate(0)
self._stdout.seek(0)
self._stdout.truncate(0)
self._stderr.seek(0)
self._stderr.truncate(0)
return rc, out, err, dur
def test_id_mode_yes_deletes_failures(self):
rc, out, err, _ = self.run_main([
"--id", "ID1",
"--dirval-cmd", str(self.dirval),
"--workers", "4",
"--timeout", SHORT_TIMEOUT,
"--yes",
])
rc, out, err, _ = self.run_main(
[
"--backups-root",
str(self.backups_root),
"--id",
"ID1",
"--dirval-cmd",
str(self.dirval),
"--workers",
"4",
"--timeout",
SHORT_TIMEOUT,
"--yes",
]
)
self.assertEqual(rc, 0, msg=err or out)
self.assertTrue(self.goodA.exists(), "goodA should remain")
self.assertFalse(self.badB.exists(), "badB should be deleted")
self.assertFalse(self.timeoutC.exists(), "timeoutC should be deleted (timeout treated as failure)")
self.assertFalse(
self.timeoutC.exists(),
"timeoutC should be deleted (timeout treated as failure)",
)
self.assertIn("Summary:", out)
def test_all_mode(self):
rc, out, err, _ = self.run_main([
"--all",
"--dirval-cmd", str(self.dirval),
"--workers", "4",
"--timeout", SHORT_TIMEOUT,
"--yes",
])
rc, out, err, _ = self.run_main(
[
"--backups-root",
str(self.backups_root),
"--all",
"--dirval-cmd",
str(self.dirval),
"--workers",
"4",
"--timeout",
SHORT_TIMEOUT,
"--yes",
]
)
self.assertEqual(rc, 0, msg=err or out)
self.assertTrue(self.goodA.exists())
self.assertFalse(self.badB.exists())
@@ -139,49 +155,80 @@ class CleanupBackupsUsingDirvalTests(unittest.TestCase):
self.assertFalse(self.badY.exists())
def test_dirval_missing_errors(self):
rc, out, err, _ = self.run_main([
"--id", "ID1",
"--dirval-cmd", str(self.backups_root / "nope-dirval"),
"--timeout", SHORT_TIMEOUT,
"--yes",
])
rc, out, err, _ = self.run_main(
[
"--backups-root",
str(self.backups_root),
"--id",
"ID1",
"--dirval-cmd",
str(self.backups_root / "nope-dirval"),
"--timeout",
SHORT_TIMEOUT,
"--yes",
]
)
self.assertEqual(rc, 0, msg=err or out)
self.assertIn("dirval not found", out + err)
def test_no_targets_message(self):
empty = self.backups_root / "EMPTY" / "backup-docker-to-local"
empty.mkdir(parents=True, exist_ok=True)
rc, out, err, _ = self.run_main([
"--id", "EMPTY",
"--dirval-cmd", str(self.dirval),
"--timeout", SHORT_TIMEOUT,
])
rc, out, err, _ = self.run_main(
[
"--backups-root",
str(self.backups_root),
"--id",
"EMPTY",
"--dirval-cmd",
str(self.dirval),
"--timeout",
SHORT_TIMEOUT,
]
)
self.assertEqual(rc, 0)
self.assertIn("No subdirectories to validate. Nothing to do.", out)
def test_interactive_keeps_when_no(self):
with patch("builtins.input", return_value=""):
rc, out, err, _ = self.run_main([
"--id", "ID2",
"--dirval-cmd", str(self.dirval),
"--workers", "1",
"--timeout", SHORT_TIMEOUT,
])
rc, out, err, _ = self.run_main(
[
"--backups-root",
str(self.backups_root),
"--id",
"ID2",
"--dirval-cmd",
str(self.dirval),
"--workers",
"1",
"--timeout",
SHORT_TIMEOUT,
]
)
self.assertEqual(rc, 0, msg=err or out)
self.assertTrue(self.badY.exists(), "badY should be kept without confirmation")
self.assertTrue(self.goodX.exists())
def test_interactive_yes_deletes(self):
with patch("builtins.input", return_value="y"):
rc, out, err, _ = self.run_main([
"--id", "ID2",
"--dirval-cmd", str(self.dirval),
"--workers", "1",
"--timeout", SHORT_TIMEOUT,
])
rc, out, err, _ = self.run_main(
[
"--backups-root",
str(self.backups_root),
"--id",
"ID2",
"--dirval-cmd",
str(self.dirval),
"--workers",
"1",
"--timeout",
SHORT_TIMEOUT,
]
)
self.assertEqual(rc, 0, msg=err or out)
self.assertFalse(self.badY.exists(), "badY should be deleted")
self.assertTrue(self.goodX.exists())
if __name__ == "__main__":
unittest.main(verbosity=2)