mirror of
https://github.com/kevinveenbirkenbach/docker-volume-backup.git
synced 2025-12-29 03:42:08 +00:00
fix(backup): fallback to file backup in dump-only mode when no DB dump is possible
- Change DB backup helpers to return whether a dump was actually produced - Detect DB containers without successful dumps in --dump-only mode - Fallback to file backups with a warning instead of skipping silently - Refactor DB dump logic to return boolean status - Add E2E test covering dump-only fallback when databases.csv entry is missing https://chatgpt.com/share/6951a659-2b0c-800f-aafa-3e89ae1eb697
This commit is contained in:
@@ -72,28 +72,27 @@ def requires_stop(containers: list[str], images_no_stop_required: list[str]) ->
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def backup_mariadb_or_postgres(
|
||||
*,
|
||||
container: str,
|
||||
volume_dir: str,
|
||||
databases_df: "pandas.DataFrame",
|
||||
database_containers: list[str],
|
||||
) -> bool:
|
||||
) -> tuple[bool, bool]:
|
||||
"""
|
||||
Returns True if the container is a DB container we handled.
|
||||
Returns (is_db_container, dumped_any)
|
||||
"""
|
||||
for img in ["mariadb", "postgres"]:
|
||||
if has_image(container, img):
|
||||
backup_database(
|
||||
dumped = backup_database(
|
||||
container=container,
|
||||
volume_dir=volume_dir,
|
||||
db_type=img,
|
||||
databases_df=databases_df,
|
||||
database_containers=database_containers,
|
||||
)
|
||||
return True
|
||||
return False
|
||||
return True, dumped
|
||||
return False, False
|
||||
|
||||
|
||||
def _backup_dumps_for_volume(
|
||||
@@ -102,21 +101,26 @@ def _backup_dumps_for_volume(
|
||||
vol_dir: str,
|
||||
databases_df: "pandas.DataFrame",
|
||||
database_containers: list[str],
|
||||
) -> bool:
|
||||
) -> tuple[bool, bool]:
|
||||
"""
|
||||
Create DB dumps for any mariadb/postgres containers attached to this volume.
|
||||
Returns True if at least one dump was produced.
|
||||
Returns (found_db_container, dumped_any)
|
||||
"""
|
||||
found_db = False
|
||||
dumped_any = False
|
||||
|
||||
for c in containers:
|
||||
if backup_mariadb_or_postgres(
|
||||
is_db, dumped = backup_mariadb_or_postgres(
|
||||
container=c,
|
||||
volume_dir=vol_dir,
|
||||
databases_df=databases_df,
|
||||
database_containers=database_containers,
|
||||
):
|
||||
)
|
||||
if is_db:
|
||||
found_db = True
|
||||
if dumped:
|
||||
dumped_any = True
|
||||
return dumped_any
|
||||
|
||||
return found_db, dumped_any
|
||||
|
||||
|
||||
def main() -> int:
|
||||
@@ -138,17 +142,25 @@ def main() -> int:
|
||||
|
||||
vol_dir = create_volume_directory(version_dir, volume_name)
|
||||
|
||||
# Old behavior: DB dumps are additional to file backups.
|
||||
_backup_dumps_for_volume(
|
||||
found_db, dumped_any = _backup_dumps_for_volume(
|
||||
containers=containers,
|
||||
vol_dir=vol_dir,
|
||||
databases_df=databases_df,
|
||||
database_containers=args.database_containers,
|
||||
)
|
||||
|
||||
# dump-only: skip ALL file rsync backups
|
||||
# dump-only logic:
|
||||
if args.dump_only:
|
||||
continue
|
||||
if found_db and not dumped_any:
|
||||
print(
|
||||
f"WARNING: dump-only requested but no DB dump was produced for DB volume '{volume_name}'. Falling back to file backup.",
|
||||
flush=True,
|
||||
)
|
||||
# continue to file backup below
|
||||
else:
|
||||
# keep old behavior: skip file backups
|
||||
continue
|
||||
|
||||
|
||||
# skip file backup if all linked containers are ignored
|
||||
if volume_is_fully_ignored(containers, args.images_no_backup_required):
|
||||
|
||||
@@ -3,9 +3,8 @@ from __future__ import annotations
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
|
||||
import pandas
|
||||
import logging
|
||||
import pandas
|
||||
|
||||
from .shell import BackupException, execute_shell_command
|
||||
|
||||
@@ -18,9 +17,7 @@ def get_instance(container: str, database_containers: list[str]) -> str:
|
||||
return re.split(r"(_|-)(database|db|postgres)", container)[0]
|
||||
|
||||
|
||||
def fallback_pg_dumpall(
|
||||
container: str, username: str, password: str, out_file: str
|
||||
) -> None:
|
||||
def fallback_pg_dumpall(container: str, username: str, password: str, out_file: str) -> None:
|
||||
cmd = (
|
||||
f"PGPASSWORD={password} docker exec -i {container} "
|
||||
f"pg_dumpall -U {username} -h localhost > {out_file}"
|
||||
@@ -35,20 +32,25 @@ def backup_database(
|
||||
db_type: str,
|
||||
databases_df: "pandas.DataFrame",
|
||||
database_containers: list[str],
|
||||
) -> None:
|
||||
) -> bool:
|
||||
"""
|
||||
Returns True if at least one dump file was produced, else False.
|
||||
"""
|
||||
instance_name = get_instance(container, database_containers)
|
||||
entries = databases_df.loc[databases_df["instance"] == instance_name]
|
||||
if entries.empty:
|
||||
log.warning("No entry found for instance '%s'", instance_name)
|
||||
return
|
||||
log.warning("No entry found for instance '%s' (skipping DB dump)", instance_name)
|
||||
return False
|
||||
|
||||
out_dir = os.path.join(volume_dir, "sql")
|
||||
pathlib.Path(out_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for row in entries.iloc:
|
||||
db_name = row["database"]
|
||||
user = row["username"]
|
||||
password = row["password"]
|
||||
produced = False
|
||||
|
||||
for row in entries.itertuples(index=False):
|
||||
db_name = row.database
|
||||
user = row.username
|
||||
password = row.password
|
||||
|
||||
dump_file = os.path.join(out_dir, f"{db_name}.backup.sql")
|
||||
|
||||
@@ -58,13 +60,15 @@ def backup_database(
|
||||
f"-u {user} -p{password} {db_name} > {dump_file}"
|
||||
)
|
||||
execute_shell_command(cmd)
|
||||
produced = True
|
||||
continue
|
||||
|
||||
if db_type == "postgres":
|
||||
cluster_file = os.path.join(out_dir, f"{instance_name}.cluster.backup.sql")
|
||||
|
||||
if not db_name:
|
||||
fallback_pg_dumpall(container, user, password, cluster_file)
|
||||
return
|
||||
return True
|
||||
|
||||
try:
|
||||
cmd = (
|
||||
@@ -72,6 +76,7 @@ def backup_database(
|
||||
f"pg_dump -U {user} -d {db_name} -h localhost > {dump_file}"
|
||||
)
|
||||
execute_shell_command(cmd)
|
||||
produced = True
|
||||
except BackupException as e:
|
||||
print(f"pg_dump failed: {e}", flush=True)
|
||||
print(
|
||||
@@ -79,4 +84,7 @@ def backup_database(
|
||||
flush=True,
|
||||
)
|
||||
fallback_pg_dumpall(container, user, password, cluster_file)
|
||||
produced = True
|
||||
continue
|
||||
|
||||
return produced
|
||||
|
||||
175
tests/e2e/test_e2e_dump_only_fallback_to_files.py
Normal file
175
tests/e2e/test_e2e_dump_only_fallback_to_files.py
Normal file
@@ -0,0 +1,175 @@
|
||||
# tests/e2e/test_e2e_dump_only_fallback_to_files.py
|
||||
import unittest
|
||||
|
||||
from .helpers import (
|
||||
backup_path,
|
||||
cleanup_docker,
|
||||
create_minimal_compose_dir,
|
||||
ensure_empty_dir,
|
||||
latest_version_dir,
|
||||
require_docker,
|
||||
run,
|
||||
unique,
|
||||
write_databases_csv,
|
||||
wait_for_postgres,
|
||||
)
|
||||
|
||||
|
||||
class TestE2EDumpOnlyFallbackToFiles(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls) -> None:
|
||||
require_docker()
|
||||
cls.prefix = unique("baudolo-e2e-dump-only-fallback")
|
||||
cls.backups_dir = f"/tmp/{cls.prefix}/Backups"
|
||||
ensure_empty_dir(cls.backups_dir)
|
||||
|
||||
cls.compose_dir = create_minimal_compose_dir(f"/tmp/{cls.prefix}")
|
||||
cls.repo_name = cls.prefix
|
||||
|
||||
cls.pg_container = f"{cls.prefix}-pg"
|
||||
cls.pg_volume = f"{cls.prefix}-pg-vol"
|
||||
cls.restore_volume = f"{cls.prefix}-restore-vol"
|
||||
|
||||
cls.containers = [cls.pg_container]
|
||||
cls.volumes = [cls.pg_volume, cls.restore_volume]
|
||||
|
||||
run(["docker", "volume", "create", cls.pg_volume])
|
||||
|
||||
# Start Postgres (creates a real DB volume)
|
||||
run(
|
||||
[
|
||||
"docker",
|
||||
"run",
|
||||
"-d",
|
||||
"--name",
|
||||
cls.pg_container,
|
||||
"-e",
|
||||
"POSTGRES_PASSWORD=pgpw",
|
||||
"-e",
|
||||
"POSTGRES_DB=appdb",
|
||||
"-e",
|
||||
"POSTGRES_USER=postgres",
|
||||
"-v",
|
||||
f"{cls.pg_volume}:/var/lib/postgresql/data",
|
||||
"postgres:16",
|
||||
]
|
||||
)
|
||||
wait_for_postgres(cls.pg_container, user="postgres", timeout_s=90)
|
||||
|
||||
# Add a deterministic marker file into the volume
|
||||
cls.marker = "dump-only-fallback-marker"
|
||||
run(
|
||||
[
|
||||
"docker",
|
||||
"exec",
|
||||
cls.pg_container,
|
||||
"sh",
|
||||
"-lc",
|
||||
f"echo '{cls.marker}' > /var/lib/postgresql/data/marker.txt",
|
||||
]
|
||||
)
|
||||
|
||||
# databases.csv WITHOUT matching entry for this instance -> should skip dump
|
||||
cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv"
|
||||
write_databases_csv(cls.databases_csv, []) # empty except header
|
||||
|
||||
# Run baudolo with --dump-only and a DB container present:
|
||||
# Expected: WARNING + FALLBACK to file backup (files/ must exist)
|
||||
cmd = [
|
||||
"baudolo",
|
||||
"--compose-dir",
|
||||
cls.compose_dir,
|
||||
"--docker-compose-hard-restart-required",
|
||||
"mailu",
|
||||
"--repo-name",
|
||||
cls.repo_name,
|
||||
"--databases-csv",
|
||||
cls.databases_csv,
|
||||
"--backups-dir",
|
||||
cls.backups_dir,
|
||||
"--database-containers",
|
||||
cls.pg_container,
|
||||
"--images-no-stop-required",
|
||||
"postgres",
|
||||
"mariadb",
|
||||
"mysql",
|
||||
"alpine",
|
||||
"--dump-only",
|
||||
]
|
||||
cp = run(cmd, capture=True, check=True)
|
||||
|
||||
cls.stdout = cp.stdout or ""
|
||||
cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name)
|
||||
|
||||
# Restore files into a fresh volume to prove file backup happened
|
||||
run(["docker", "volume", "create", cls.restore_volume])
|
||||
run(
|
||||
[
|
||||
"baudolo-restore",
|
||||
"files",
|
||||
cls.restore_volume,
|
||||
cls.hash,
|
||||
cls.version,
|
||||
"--backups-dir",
|
||||
cls.backups_dir,
|
||||
"--repo-name",
|
||||
cls.repo_name,
|
||||
"--source-volume",
|
||||
cls.pg_volume,
|
||||
"--rsync-image",
|
||||
"ghcr.io/kevinveenbirkenbach/alpine-rsync",
|
||||
]
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls) -> None:
|
||||
cleanup_docker(containers=cls.containers, volumes=cls.volumes)
|
||||
|
||||
def test_warns_about_missing_dump_in_dump_only_mode(self) -> None:
|
||||
self.assertIn(
|
||||
"WARNING: dump-only requested but no DB dump was produced",
|
||||
self.stdout,
|
||||
f"Expected warning in baudolo output. STDOUT:\n{self.stdout}",
|
||||
)
|
||||
|
||||
def test_files_backup_exists_due_to_fallback(self) -> None:
|
||||
p = backup_path(
|
||||
self.backups_dir,
|
||||
self.repo_name,
|
||||
self.version,
|
||||
self.pg_volume,
|
||||
) / "files"
|
||||
self.assertTrue(p.is_dir(), f"Expected files backup dir at: {p}")
|
||||
|
||||
def test_sql_dump_not_present(self) -> None:
|
||||
# There should be no sql dumps because databases.csv had no matching entry.
|
||||
sql_dir = backup_path(
|
||||
self.backups_dir,
|
||||
self.repo_name,
|
||||
self.version,
|
||||
self.pg_volume,
|
||||
) / "sql"
|
||||
# Could exist (dir created) in some edge cases, but should contain no *.sql dumps.
|
||||
if sql_dir.exists():
|
||||
dumps = list(sql_dir.glob("*.sql"))
|
||||
self.assertEqual(
|
||||
len(dumps),
|
||||
0,
|
||||
f"Did not expect SQL dump files, found: {dumps}",
|
||||
)
|
||||
|
||||
def test_restored_files_contain_marker(self) -> None:
|
||||
p = run(
|
||||
[
|
||||
"docker",
|
||||
"run",
|
||||
"--rm",
|
||||
"-v",
|
||||
f"{self.restore_volume}:/data",
|
||||
"alpine:3.20",
|
||||
"sh",
|
||||
"-lc",
|
||||
"cat /data/marker.txt",
|
||||
]
|
||||
)
|
||||
self.assertEqual((p.stdout or "").strip(), self.marker)
|
||||
Reference in New Issue
Block a user