mirror of
https://github.com/kevinveenbirkenbach/docker-volume-backup-cleanup.git
synced 2025-09-16 07:06:06 +02:00
Add CI, dirval-based validator, tests, and docs updates
• Add GitHub Actions workflow (Ubuntu, Python 3.10–3.12) • Add Makefile (test + pkgmgr install note) • Add requirements.yml (pkgmgr: dirval) • Replace shell scripts with parallel validator main.py using dirval • Add unit tests with fake dirval and timeouts • Update README to new repo name and pkgmgr alias 'cleanback' • Add .gitignore for __pycache__ Conversation context: https://chatgpt.com/share/68c309bf-8818-800f-84d9-c4aa74a4544c
This commit is contained in:
250
main.py
Executable file
250
main.py
Executable file
@@ -0,0 +1,250 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Cleanup Failed Docker Backups — parallel validator (using dirval)
|
||||
|
||||
Validates backup subdirectories under:
|
||||
- /Backups/<ID>/backup-docker-to-local (when --id is used)
|
||||
- /Backups/*/backup-docker-to-local (when --all is used)
|
||||
|
||||
For each subdirectory:
|
||||
- Runs `dirval <subdir> --validate`.
|
||||
- If validation fails, it lists the contents and asks whether to delete.
|
||||
- With --yes, deletions happen automatically (no prompt).
|
||||
|
||||
Parallelism:
|
||||
- Validation runs in parallel (thread pool). Deletions are performed afterwards
|
||||
sequentially (to keep prompts sane).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import shutil
|
||||
import subprocess
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Tuple
|
||||
import multiprocessing
|
||||
import time
|
||||
|
||||
BACKUPS_ROOT = Path("/Backups")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ValidationResult:
|
||||
subdir: Path
|
||||
ok: bool
|
||||
returncode: int
|
||||
stderr: str
|
||||
stdout: str
|
||||
|
||||
|
||||
def discover_target_subdirs(backup_id: Optional[str], all_mode: bool) -> List[Path]:
|
||||
"""
|
||||
Return a list of subdirectories to validate:
|
||||
- If backup_id is given: /Backups/<id>/backup-docker-to-local/* (dirs only)
|
||||
- If --all: for each /Backups/* that has backup-docker-to-local, include its subdirs
|
||||
"""
|
||||
targets: List[Path] = []
|
||||
|
||||
if all_mode:
|
||||
if not BACKUPS_ROOT.is_dir():
|
||||
raise FileNotFoundError(f"Backups root does not exist: {BACKUPS_ROOT}")
|
||||
for backup_folder in sorted(p for p in BACKUPS_ROOT.iterdir() if p.is_dir()):
|
||||
candidate = backup_folder / "backup-docker-to-local"
|
||||
if candidate.is_dir():
|
||||
targets.extend(sorted([p for p in candidate.iterdir() if p.is_dir()]))
|
||||
else:
|
||||
if not backup_id:
|
||||
raise ValueError("Either --id or --all must be provided.")
|
||||
base = BACKUPS_ROOT / backup_id / "backup-docker-to-local"
|
||||
if not base.is_dir():
|
||||
raise FileNotFoundError(f"Directory does not exist: {base}")
|
||||
targets = sorted([p for p in base.iterdir() if p.is_dir()])
|
||||
|
||||
return targets
|
||||
|
||||
|
||||
def run_dirval_validate(subdir: Path, dirval_cmd: str, timeout: float) -> ValidationResult:
|
||||
"""
|
||||
Execute dirval:
|
||||
<dirval_cmd> "<SUBDIR>" --validate
|
||||
Return ValidationResult with ok = (returncode == 0).
|
||||
"""
|
||||
cmd = [dirval_cmd, str(subdir), "--validate"]
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
check=False,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
)
|
||||
return ValidationResult(
|
||||
subdir=subdir,
|
||||
ok=(proc.returncode == 0),
|
||||
returncode=proc.returncode,
|
||||
stderr=(proc.stderr or "").strip(),
|
||||
stdout=(proc.stdout or "").strip(),
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return ValidationResult(
|
||||
subdir=subdir,
|
||||
ok=False,
|
||||
returncode=124,
|
||||
stderr=f"dirval timed out after {timeout}s",
|
||||
stdout="",
|
||||
)
|
||||
except FileNotFoundError:
|
||||
return ValidationResult(
|
||||
subdir=subdir,
|
||||
ok=False,
|
||||
returncode=127,
|
||||
stderr=f"dirval not found (dirval-cmd: {dirval_cmd})",
|
||||
stdout="",
|
||||
)
|
||||
|
||||
|
||||
def parallel_validate(subdirs: List[Path], dirval_cmd: str, workers: int, timeout: float) -> List[ValidationResult]:
|
||||
results: List[ValidationResult] = []
|
||||
if not subdirs:
|
||||
return results
|
||||
|
||||
print(f"Validating {len(subdirs)} directories with {workers} workers (dirval: {dirval_cmd})...")
|
||||
start = time.time()
|
||||
|
||||
with ThreadPoolExecutor(max_workers=workers) as pool:
|
||||
future_map = {pool.submit(run_dirval_validate, sd, dirval_cmd, timeout): sd for sd in subdirs}
|
||||
for fut in as_completed(future_map):
|
||||
res = fut.result()
|
||||
status = "ok" if res.ok else "error"
|
||||
print(f"[{status}] {res.subdir}")
|
||||
results.append(res)
|
||||
|
||||
elapsed = time.time() - start
|
||||
print(f"Validation finished in {elapsed:.2f}s")
|
||||
return results
|
||||
|
||||
|
||||
def print_dir_listing(path: Path, max_items: int = 50) -> None:
|
||||
try:
|
||||
entries = sorted(path.iterdir(), key=lambda p: (not p.is_dir(), p.name.lower()))
|
||||
except Exception as e:
|
||||
print(f" (unable to list: {e})")
|
||||
return
|
||||
|
||||
for i, entry in enumerate(entries):
|
||||
typ = "<DIR>" if entry.is_dir() else " "
|
||||
print(f" {typ} {entry.name}")
|
||||
if i + 1 >= max_items and len(entries) > i + 1:
|
||||
print(f" ... (+{len(entries) - (i+1)} more)")
|
||||
break
|
||||
|
||||
|
||||
def confirm(prompt: str) -> bool:
|
||||
try:
|
||||
return input(prompt).strip().lower() in {"y", "yes"}
|
||||
except EOFError:
|
||||
return False
|
||||
|
||||
|
||||
def delete_path(path: Path) -> Tuple[Path, bool, Optional[str]]:
|
||||
try:
|
||||
shutil.rmtree(path)
|
||||
return path, True, None
|
||||
except Exception as e:
|
||||
return path, False, str(e)
|
||||
|
||||
|
||||
def process_deletions(failures: List[ValidationResult], assume_yes: bool) -> int:
|
||||
deleted_count = 0
|
||||
for res in failures:
|
||||
print("\n" + "=" * 80)
|
||||
print(f"Validation failed for: {res.subdir}")
|
||||
if res.stderr:
|
||||
print(f"stderr: {res.stderr}")
|
||||
if res.stdout:
|
||||
print(f"stdout: {res.stdout}")
|
||||
print("Contents:")
|
||||
print_dir_listing(res.subdir)
|
||||
|
||||
should_delete = assume_yes or confirm("Delete this subdirectory? [y/N]: ")
|
||||
if not should_delete:
|
||||
continue
|
||||
|
||||
print(f"Deleting: {res.subdir}")
|
||||
path, ok, err = delete_path(res.subdir)
|
||||
if ok:
|
||||
print(f"Deleted: {path}")
|
||||
deleted_count += 1
|
||||
else:
|
||||
print(f"Failed to delete {path}: {err}")
|
||||
|
||||
return deleted_count
|
||||
|
||||
|
||||
def parse_args(argv: Optional[List[str]] = None) -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Validate (and optionally delete) failed backup subdirectories in parallel using dirval."
|
||||
)
|
||||
scope = parser.add_mutually_exclusive_group(required=True)
|
||||
scope.add_argument("--id", dest="backup_id", help="Backup folder name under /Backups.")
|
||||
scope.add_argument("--all", dest="all_mode", action="store_true", help="Scan all /Backups/* folders.")
|
||||
|
||||
parser.add_argument(
|
||||
"--dirval-cmd",
|
||||
default="dirval",
|
||||
help="dirval executable/command to run (default: 'dirval').",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--workers",
|
||||
type=int,
|
||||
default=max(2, multiprocessing.cpu_count()),
|
||||
help="Number of parallel validator workers (default: CPU count).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--timeout",
|
||||
type=float,
|
||||
default=300.0,
|
||||
help="Per-directory dirval timeout in seconds (supports floats; default: 300).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--yes",
|
||||
action="store_true",
|
||||
help="Do not prompt; delete failing directories automatically.",
|
||||
)
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
||||
def main(argv: Optional[List[str]] = None) -> int:
|
||||
args = parse_args(argv)
|
||||
|
||||
try:
|
||||
subdirs = discover_target_subdirs(args.backup_id, bool(args.all_mode))
|
||||
except Exception as e:
|
||||
print(f"ERROR: {e}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
if not subdirs:
|
||||
print("No subdirectories to validate. Nothing to do.")
|
||||
return 0
|
||||
|
||||
results = parallel_validate(subdirs, args.dirval_cmd, args.workers, args.timeout)
|
||||
failures = [r for r in results if not r.ok]
|
||||
|
||||
if not failures:
|
||||
print("\nAll directories validated successfully. No action required.")
|
||||
return 0
|
||||
|
||||
print(f"\n{len(failures)} directory(ies) failed validation.")
|
||||
deleted = process_deletions(failures, assume_yes=args.yes)
|
||||
kept = len(failures) - deleted
|
||||
print(f"\nSummary: deleted={deleted}, kept={kept}, ok={len(results) - len(failures)}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
Reference in New Issue
Block a user