3 Commits

Author SHA1 Message Date
e3cdfd6fc4 Release version 1.1.1 2025-12-28 22:52:31 +01:00
df32671cec fix(backup): fallback to file backup in dump-only mode when no DB dump is possible
- Change DB backup helpers to return whether a dump was actually produced
- Detect DB containers without successful dumps in --dump-only mode
- Fallback to file backups with a warning instead of skipping silently
- Refactor DB dump logic to return boolean status
- Add E2E test covering dump-only fallback when databases.csv entry is missing

https://chatgpt.com/share/6951a659-2b0c-800f-aafa-3e89ae1eb697
2025-12-28 22:51:12 +01:00
d563dce20f Ignored dist 2025-12-28 22:19:19 +01:00
8 changed files with 233 additions and 32 deletions

1
.gitignore vendored
View File

@@ -1,3 +1,4 @@
__pycache__ __pycache__
artifacts/ artifacts/
*.egg-info *.egg-info
dist/

View File

@@ -1,3 +1,8 @@
## [1.1.1] - 2025-12-28
* * **Backup:** In ***--dump-only*** mode, fall back to file backups with a warning when no database dump can be produced (e.g. missing `databases.csv` entry).
## [1.1.0] - 2025-12-28 ## [1.1.0] - 2025-12-28
* * **Backup:** Log a warning and skip database dumps when no databases.csv entry is present instead of raising an exception; introduce module-level logging and apply formatting cleanups across backup/restore code and tests. * * **Backup:** Log a warning and skip database dumps when no databases.csv entry is present instead of raising an exception; introduce module-level logging and apply formatting cleanups across backup/restore code and tests.

Binary file not shown.

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "backup-docker-to-local" name = "backup-docker-to-local"
version = "1.1.0" version = "1.1.1"
description = "Backup Docker volumes to local with rsync and optional DB dumps." description = "Backup Docker volumes to local with rsync and optional DB dumps."
readme = "README.md" readme = "README.md"
requires-python = ">=3.9" requires-python = ">=3.9"

View File

@@ -72,28 +72,27 @@ def requires_stop(containers: list[str], images_no_stop_required: list[str]) ->
return True return True
return False return False
def backup_mariadb_or_postgres( def backup_mariadb_or_postgres(
*, *,
container: str, container: str,
volume_dir: str, volume_dir: str,
databases_df: "pandas.DataFrame", databases_df: "pandas.DataFrame",
database_containers: list[str], database_containers: list[str],
) -> bool: ) -> tuple[bool, bool]:
""" """
Returns True if the container is a DB container we handled. Returns (is_db_container, dumped_any)
""" """
for img in ["mariadb", "postgres"]: for img in ["mariadb", "postgres"]:
if has_image(container, img): if has_image(container, img):
backup_database( dumped = backup_database(
container=container, container=container,
volume_dir=volume_dir, volume_dir=volume_dir,
db_type=img, db_type=img,
databases_df=databases_df, databases_df=databases_df,
database_containers=database_containers, database_containers=database_containers,
) )
return True return True, dumped
return False return False, False
def _backup_dumps_for_volume( def _backup_dumps_for_volume(
@@ -102,21 +101,26 @@ def _backup_dumps_for_volume(
vol_dir: str, vol_dir: str,
databases_df: "pandas.DataFrame", databases_df: "pandas.DataFrame",
database_containers: list[str], database_containers: list[str],
) -> bool: ) -> tuple[bool, bool]:
""" """
Create DB dumps for any mariadb/postgres containers attached to this volume. Returns (found_db_container, dumped_any)
Returns True if at least one dump was produced.
""" """
found_db = False
dumped_any = False dumped_any = False
for c in containers: for c in containers:
if backup_mariadb_or_postgres( is_db, dumped = backup_mariadb_or_postgres(
container=c, container=c,
volume_dir=vol_dir, volume_dir=vol_dir,
databases_df=databases_df, databases_df=databases_df,
database_containers=database_containers, database_containers=database_containers,
): )
if is_db:
found_db = True
if dumped:
dumped_any = True dumped_any = True
return dumped_any
return found_db, dumped_any
def main() -> int: def main() -> int:
@@ -138,18 +142,26 @@ def main() -> int:
vol_dir = create_volume_directory(version_dir, volume_name) vol_dir = create_volume_directory(version_dir, volume_name)
# Old behavior: DB dumps are additional to file backups. found_db, dumped_any = _backup_dumps_for_volume(
_backup_dumps_for_volume(
containers=containers, containers=containers,
vol_dir=vol_dir, vol_dir=vol_dir,
databases_df=databases_df, databases_df=databases_df,
database_containers=args.database_containers, database_containers=args.database_containers,
) )
# dump-only: skip ALL file rsync backups # dump-only logic:
if args.dump_only: if args.dump_only:
if found_db and not dumped_any:
print(
f"WARNING: dump-only requested but no DB dump was produced for DB volume '{volume_name}'. Falling back to file backup.",
flush=True,
)
# continue to file backup below
else:
# keep old behavior: skip file backups
continue continue
# skip file backup if all linked containers are ignored # skip file backup if all linked containers are ignored
if volume_is_fully_ignored(containers, args.images_no_backup_required): if volume_is_fully_ignored(containers, args.images_no_backup_required):
print( print(

View File

@@ -3,9 +3,8 @@ from __future__ import annotations
import os import os
import pathlib import pathlib
import re import re
import pandas
import logging import logging
import pandas
from .shell import BackupException, execute_shell_command from .shell import BackupException, execute_shell_command
@@ -18,9 +17,7 @@ def get_instance(container: str, database_containers: list[str]) -> str:
return re.split(r"(_|-)(database|db|postgres)", container)[0] return re.split(r"(_|-)(database|db|postgres)", container)[0]
def fallback_pg_dumpall( def fallback_pg_dumpall(container: str, username: str, password: str, out_file: str) -> None:
container: str, username: str, password: str, out_file: str
) -> None:
cmd = ( cmd = (
f"PGPASSWORD={password} docker exec -i {container} " f"PGPASSWORD={password} docker exec -i {container} "
f"pg_dumpall -U {username} -h localhost > {out_file}" f"pg_dumpall -U {username} -h localhost > {out_file}"
@@ -35,20 +32,25 @@ def backup_database(
db_type: str, db_type: str,
databases_df: "pandas.DataFrame", databases_df: "pandas.DataFrame",
database_containers: list[str], database_containers: list[str],
) -> None: ) -> bool:
"""
Returns True if at least one dump file was produced, else False.
"""
instance_name = get_instance(container, database_containers) instance_name = get_instance(container, database_containers)
entries = databases_df.loc[databases_df["instance"] == instance_name] entries = databases_df.loc[databases_df["instance"] == instance_name]
if entries.empty: if entries.empty:
log.warning("No entry found for instance '%s'", instance_name) log.warning("No entry found for instance '%s' (skipping DB dump)", instance_name)
return return False
out_dir = os.path.join(volume_dir, "sql") out_dir = os.path.join(volume_dir, "sql")
pathlib.Path(out_dir).mkdir(parents=True, exist_ok=True) pathlib.Path(out_dir).mkdir(parents=True, exist_ok=True)
for row in entries.iloc: produced = False
db_name = row["database"]
user = row["username"] for row in entries.itertuples(index=False):
password = row["password"] db_name = row.database
user = row.username
password = row.password
dump_file = os.path.join(out_dir, f"{db_name}.backup.sql") dump_file = os.path.join(out_dir, f"{db_name}.backup.sql")
@@ -58,13 +60,15 @@ def backup_database(
f"-u {user} -p{password} {db_name} > {dump_file}" f"-u {user} -p{password} {db_name} > {dump_file}"
) )
execute_shell_command(cmd) execute_shell_command(cmd)
produced = True
continue continue
if db_type == "postgres": if db_type == "postgres":
cluster_file = os.path.join(out_dir, f"{instance_name}.cluster.backup.sql") cluster_file = os.path.join(out_dir, f"{instance_name}.cluster.backup.sql")
if not db_name: if not db_name:
fallback_pg_dumpall(container, user, password, cluster_file) fallback_pg_dumpall(container, user, password, cluster_file)
return return True
try: try:
cmd = ( cmd = (
@@ -72,6 +76,7 @@ def backup_database(
f"pg_dump -U {user} -d {db_name} -h localhost > {dump_file}" f"pg_dump -U {user} -d {db_name} -h localhost > {dump_file}"
) )
execute_shell_command(cmd) execute_shell_command(cmd)
produced = True
except BackupException as e: except BackupException as e:
print(f"pg_dump failed: {e}", flush=True) print(f"pg_dump failed: {e}", flush=True)
print( print(
@@ -79,4 +84,7 @@ def backup_database(
flush=True, flush=True,
) )
fallback_pg_dumpall(container, user, password, cluster_file) fallback_pg_dumpall(container, user, password, cluster_file)
produced = True
continue continue
return produced

View File

@@ -0,0 +1,175 @@
# tests/e2e/test_e2e_dump_only_fallback_to_files.py
import unittest
from .helpers import (
backup_path,
cleanup_docker,
create_minimal_compose_dir,
ensure_empty_dir,
latest_version_dir,
require_docker,
run,
unique,
write_databases_csv,
wait_for_postgres,
)
class TestE2EDumpOnlyFallbackToFiles(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
require_docker()
cls.prefix = unique("baudolo-e2e-dump-only-fallback")
cls.backups_dir = f"/tmp/{cls.prefix}/Backups"
ensure_empty_dir(cls.backups_dir)
cls.compose_dir = create_minimal_compose_dir(f"/tmp/{cls.prefix}")
cls.repo_name = cls.prefix
cls.pg_container = f"{cls.prefix}-pg"
cls.pg_volume = f"{cls.prefix}-pg-vol"
cls.restore_volume = f"{cls.prefix}-restore-vol"
cls.containers = [cls.pg_container]
cls.volumes = [cls.pg_volume, cls.restore_volume]
run(["docker", "volume", "create", cls.pg_volume])
# Start Postgres (creates a real DB volume)
run(
[
"docker",
"run",
"-d",
"--name",
cls.pg_container,
"-e",
"POSTGRES_PASSWORD=pgpw",
"-e",
"POSTGRES_DB=appdb",
"-e",
"POSTGRES_USER=postgres",
"-v",
f"{cls.pg_volume}:/var/lib/postgresql/data",
"postgres:16",
]
)
wait_for_postgres(cls.pg_container, user="postgres", timeout_s=90)
# Add a deterministic marker file into the volume
cls.marker = "dump-only-fallback-marker"
run(
[
"docker",
"exec",
cls.pg_container,
"sh",
"-lc",
f"echo '{cls.marker}' > /var/lib/postgresql/data/marker.txt",
]
)
# databases.csv WITHOUT matching entry for this instance -> should skip dump
cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv"
write_databases_csv(cls.databases_csv, []) # empty except header
# Run baudolo with --dump-only and a DB container present:
# Expected: WARNING + FALLBACK to file backup (files/ must exist)
cmd = [
"baudolo",
"--compose-dir",
cls.compose_dir,
"--docker-compose-hard-restart-required",
"mailu",
"--repo-name",
cls.repo_name,
"--databases-csv",
cls.databases_csv,
"--backups-dir",
cls.backups_dir,
"--database-containers",
cls.pg_container,
"--images-no-stop-required",
"postgres",
"mariadb",
"mysql",
"alpine",
"--dump-only",
]
cp = run(cmd, capture=True, check=True)
cls.stdout = cp.stdout or ""
cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name)
# Restore files into a fresh volume to prove file backup happened
run(["docker", "volume", "create", cls.restore_volume])
run(
[
"baudolo-restore",
"files",
cls.restore_volume,
cls.hash,
cls.version,
"--backups-dir",
cls.backups_dir,
"--repo-name",
cls.repo_name,
"--source-volume",
cls.pg_volume,
"--rsync-image",
"ghcr.io/kevinveenbirkenbach/alpine-rsync",
]
)
@classmethod
def tearDownClass(cls) -> None:
cleanup_docker(containers=cls.containers, volumes=cls.volumes)
def test_warns_about_missing_dump_in_dump_only_mode(self) -> None:
self.assertIn(
"WARNING: dump-only requested but no DB dump was produced",
self.stdout,
f"Expected warning in baudolo output. STDOUT:\n{self.stdout}",
)
def test_files_backup_exists_due_to_fallback(self) -> None:
p = backup_path(
self.backups_dir,
self.repo_name,
self.version,
self.pg_volume,
) / "files"
self.assertTrue(p.is_dir(), f"Expected files backup dir at: {p}")
def test_sql_dump_not_present(self) -> None:
# There should be no sql dumps because databases.csv had no matching entry.
sql_dir = backup_path(
self.backups_dir,
self.repo_name,
self.version,
self.pg_volume,
) / "sql"
# Could exist (dir created) in some edge cases, but should contain no *.sql dumps.
if sql_dir.exists():
dumps = list(sql_dir.glob("*.sql"))
self.assertEqual(
len(dumps),
0,
f"Did not expect SQL dump files, found: {dumps}",
)
def test_restored_files_contain_marker(self) -> None:
p = run(
[
"docker",
"run",
"--rm",
"-v",
f"{self.restore_volume}:/data",
"alpine:3.20",
"sh",
"-lc",
"cat /data/marker.txt",
]
)
self.assertEqual((p.stdout or "").strip(), self.marker)