mirror of
https://github.com/kevinveenbirkenbach/docker-volume-backup.git
synced 2025-12-29 03:42:08 +00:00
fix(backup): fallback to file backup in dump-only mode when no DB dump is possible
- Change DB backup helpers to return whether a dump was actually produced - Detect DB containers without successful dumps in --dump-only mode - Fallback to file backups with a warning instead of skipping silently - Refactor DB dump logic to return boolean status - Add E2E test covering dump-only fallback when databases.csv entry is missing https://chatgpt.com/share/6951a659-2b0c-800f-aafa-3e89ae1eb697
This commit is contained in:
@@ -72,28 +72,27 @@ def requires_stop(containers: list[str], images_no_stop_required: list[str]) ->
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def backup_mariadb_or_postgres(
|
def backup_mariadb_or_postgres(
|
||||||
*,
|
*,
|
||||||
container: str,
|
container: str,
|
||||||
volume_dir: str,
|
volume_dir: str,
|
||||||
databases_df: "pandas.DataFrame",
|
databases_df: "pandas.DataFrame",
|
||||||
database_containers: list[str],
|
database_containers: list[str],
|
||||||
) -> bool:
|
) -> tuple[bool, bool]:
|
||||||
"""
|
"""
|
||||||
Returns True if the container is a DB container we handled.
|
Returns (is_db_container, dumped_any)
|
||||||
"""
|
"""
|
||||||
for img in ["mariadb", "postgres"]:
|
for img in ["mariadb", "postgres"]:
|
||||||
if has_image(container, img):
|
if has_image(container, img):
|
||||||
backup_database(
|
dumped = backup_database(
|
||||||
container=container,
|
container=container,
|
||||||
volume_dir=volume_dir,
|
volume_dir=volume_dir,
|
||||||
db_type=img,
|
db_type=img,
|
||||||
databases_df=databases_df,
|
databases_df=databases_df,
|
||||||
database_containers=database_containers,
|
database_containers=database_containers,
|
||||||
)
|
)
|
||||||
return True
|
return True, dumped
|
||||||
return False
|
return False, False
|
||||||
|
|
||||||
|
|
||||||
def _backup_dumps_for_volume(
|
def _backup_dumps_for_volume(
|
||||||
@@ -102,21 +101,26 @@ def _backup_dumps_for_volume(
|
|||||||
vol_dir: str,
|
vol_dir: str,
|
||||||
databases_df: "pandas.DataFrame",
|
databases_df: "pandas.DataFrame",
|
||||||
database_containers: list[str],
|
database_containers: list[str],
|
||||||
) -> bool:
|
) -> tuple[bool, bool]:
|
||||||
"""
|
"""
|
||||||
Create DB dumps for any mariadb/postgres containers attached to this volume.
|
Returns (found_db_container, dumped_any)
|
||||||
Returns True if at least one dump was produced.
|
|
||||||
"""
|
"""
|
||||||
|
found_db = False
|
||||||
dumped_any = False
|
dumped_any = False
|
||||||
|
|
||||||
for c in containers:
|
for c in containers:
|
||||||
if backup_mariadb_or_postgres(
|
is_db, dumped = backup_mariadb_or_postgres(
|
||||||
container=c,
|
container=c,
|
||||||
volume_dir=vol_dir,
|
volume_dir=vol_dir,
|
||||||
databases_df=databases_df,
|
databases_df=databases_df,
|
||||||
database_containers=database_containers,
|
database_containers=database_containers,
|
||||||
):
|
)
|
||||||
|
if is_db:
|
||||||
|
found_db = True
|
||||||
|
if dumped:
|
||||||
dumped_any = True
|
dumped_any = True
|
||||||
return dumped_any
|
|
||||||
|
return found_db, dumped_any
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
@@ -137,18 +141,26 @@ def main() -> int:
|
|||||||
containers = containers_using_volume(volume_name)
|
containers = containers_using_volume(volume_name)
|
||||||
|
|
||||||
vol_dir = create_volume_directory(version_dir, volume_name)
|
vol_dir = create_volume_directory(version_dir, volume_name)
|
||||||
|
|
||||||
# Old behavior: DB dumps are additional to file backups.
|
found_db, dumped_any = _backup_dumps_for_volume(
|
||||||
_backup_dumps_for_volume(
|
|
||||||
containers=containers,
|
containers=containers,
|
||||||
vol_dir=vol_dir,
|
vol_dir=vol_dir,
|
||||||
databases_df=databases_df,
|
databases_df=databases_df,
|
||||||
database_containers=args.database_containers,
|
database_containers=args.database_containers,
|
||||||
)
|
)
|
||||||
|
|
||||||
# dump-only: skip ALL file rsync backups
|
# dump-only logic:
|
||||||
if args.dump_only:
|
if args.dump_only:
|
||||||
continue
|
if found_db and not dumped_any:
|
||||||
|
print(
|
||||||
|
f"WARNING: dump-only requested but no DB dump was produced for DB volume '{volume_name}'. Falling back to file backup.",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
# continue to file backup below
|
||||||
|
else:
|
||||||
|
# keep old behavior: skip file backups
|
||||||
|
continue
|
||||||
|
|
||||||
|
|
||||||
# skip file backup if all linked containers are ignored
|
# skip file backup if all linked containers are ignored
|
||||||
if volume_is_fully_ignored(containers, args.images_no_backup_required):
|
if volume_is_fully_ignored(containers, args.images_no_backup_required):
|
||||||
|
|||||||
@@ -3,9 +3,8 @@ from __future__ import annotations
|
|||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import pandas
|
|
||||||
import logging
|
import logging
|
||||||
|
import pandas
|
||||||
|
|
||||||
from .shell import BackupException, execute_shell_command
|
from .shell import BackupException, execute_shell_command
|
||||||
|
|
||||||
@@ -18,9 +17,7 @@ def get_instance(container: str, database_containers: list[str]) -> str:
|
|||||||
return re.split(r"(_|-)(database|db|postgres)", container)[0]
|
return re.split(r"(_|-)(database|db|postgres)", container)[0]
|
||||||
|
|
||||||
|
|
||||||
def fallback_pg_dumpall(
|
def fallback_pg_dumpall(container: str, username: str, password: str, out_file: str) -> None:
|
||||||
container: str, username: str, password: str, out_file: str
|
|
||||||
) -> None:
|
|
||||||
cmd = (
|
cmd = (
|
||||||
f"PGPASSWORD={password} docker exec -i {container} "
|
f"PGPASSWORD={password} docker exec -i {container} "
|
||||||
f"pg_dumpall -U {username} -h localhost > {out_file}"
|
f"pg_dumpall -U {username} -h localhost > {out_file}"
|
||||||
@@ -35,20 +32,25 @@ def backup_database(
|
|||||||
db_type: str,
|
db_type: str,
|
||||||
databases_df: "pandas.DataFrame",
|
databases_df: "pandas.DataFrame",
|
||||||
database_containers: list[str],
|
database_containers: list[str],
|
||||||
) -> None:
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Returns True if at least one dump file was produced, else False.
|
||||||
|
"""
|
||||||
instance_name = get_instance(container, database_containers)
|
instance_name = get_instance(container, database_containers)
|
||||||
entries = databases_df.loc[databases_df["instance"] == instance_name]
|
entries = databases_df.loc[databases_df["instance"] == instance_name]
|
||||||
if entries.empty:
|
if entries.empty:
|
||||||
log.warning("No entry found for instance '%s'", instance_name)
|
log.warning("No entry found for instance '%s' (skipping DB dump)", instance_name)
|
||||||
return
|
return False
|
||||||
|
|
||||||
out_dir = os.path.join(volume_dir, "sql")
|
out_dir = os.path.join(volume_dir, "sql")
|
||||||
pathlib.Path(out_dir).mkdir(parents=True, exist_ok=True)
|
pathlib.Path(out_dir).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
for row in entries.iloc:
|
produced = False
|
||||||
db_name = row["database"]
|
|
||||||
user = row["username"]
|
for row in entries.itertuples(index=False):
|
||||||
password = row["password"]
|
db_name = row.database
|
||||||
|
user = row.username
|
||||||
|
password = row.password
|
||||||
|
|
||||||
dump_file = os.path.join(out_dir, f"{db_name}.backup.sql")
|
dump_file = os.path.join(out_dir, f"{db_name}.backup.sql")
|
||||||
|
|
||||||
@@ -58,13 +60,15 @@ def backup_database(
|
|||||||
f"-u {user} -p{password} {db_name} > {dump_file}"
|
f"-u {user} -p{password} {db_name} > {dump_file}"
|
||||||
)
|
)
|
||||||
execute_shell_command(cmd)
|
execute_shell_command(cmd)
|
||||||
|
produced = True
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if db_type == "postgres":
|
if db_type == "postgres":
|
||||||
cluster_file = os.path.join(out_dir, f"{instance_name}.cluster.backup.sql")
|
cluster_file = os.path.join(out_dir, f"{instance_name}.cluster.backup.sql")
|
||||||
|
|
||||||
if not db_name:
|
if not db_name:
|
||||||
fallback_pg_dumpall(container, user, password, cluster_file)
|
fallback_pg_dumpall(container, user, password, cluster_file)
|
||||||
return
|
return True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cmd = (
|
cmd = (
|
||||||
@@ -72,6 +76,7 @@ def backup_database(
|
|||||||
f"pg_dump -U {user} -d {db_name} -h localhost > {dump_file}"
|
f"pg_dump -U {user} -d {db_name} -h localhost > {dump_file}"
|
||||||
)
|
)
|
||||||
execute_shell_command(cmd)
|
execute_shell_command(cmd)
|
||||||
|
produced = True
|
||||||
except BackupException as e:
|
except BackupException as e:
|
||||||
print(f"pg_dump failed: {e}", flush=True)
|
print(f"pg_dump failed: {e}", flush=True)
|
||||||
print(
|
print(
|
||||||
@@ -79,4 +84,7 @@ def backup_database(
|
|||||||
flush=True,
|
flush=True,
|
||||||
)
|
)
|
||||||
fallback_pg_dumpall(container, user, password, cluster_file)
|
fallback_pg_dumpall(container, user, password, cluster_file)
|
||||||
|
produced = True
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
return produced
|
||||||
|
|||||||
175
tests/e2e/test_e2e_dump_only_fallback_to_files.py
Normal file
175
tests/e2e/test_e2e_dump_only_fallback_to_files.py
Normal file
@@ -0,0 +1,175 @@
|
|||||||
|
# tests/e2e/test_e2e_dump_only_fallback_to_files.py
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from .helpers import (
|
||||||
|
backup_path,
|
||||||
|
cleanup_docker,
|
||||||
|
create_minimal_compose_dir,
|
||||||
|
ensure_empty_dir,
|
||||||
|
latest_version_dir,
|
||||||
|
require_docker,
|
||||||
|
run,
|
||||||
|
unique,
|
||||||
|
write_databases_csv,
|
||||||
|
wait_for_postgres,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestE2EDumpOnlyFallbackToFiles(unittest.TestCase):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls) -> None:
|
||||||
|
require_docker()
|
||||||
|
cls.prefix = unique("baudolo-e2e-dump-only-fallback")
|
||||||
|
cls.backups_dir = f"/tmp/{cls.prefix}/Backups"
|
||||||
|
ensure_empty_dir(cls.backups_dir)
|
||||||
|
|
||||||
|
cls.compose_dir = create_minimal_compose_dir(f"/tmp/{cls.prefix}")
|
||||||
|
cls.repo_name = cls.prefix
|
||||||
|
|
||||||
|
cls.pg_container = f"{cls.prefix}-pg"
|
||||||
|
cls.pg_volume = f"{cls.prefix}-pg-vol"
|
||||||
|
cls.restore_volume = f"{cls.prefix}-restore-vol"
|
||||||
|
|
||||||
|
cls.containers = [cls.pg_container]
|
||||||
|
cls.volumes = [cls.pg_volume, cls.restore_volume]
|
||||||
|
|
||||||
|
run(["docker", "volume", "create", cls.pg_volume])
|
||||||
|
|
||||||
|
# Start Postgres (creates a real DB volume)
|
||||||
|
run(
|
||||||
|
[
|
||||||
|
"docker",
|
||||||
|
"run",
|
||||||
|
"-d",
|
||||||
|
"--name",
|
||||||
|
cls.pg_container,
|
||||||
|
"-e",
|
||||||
|
"POSTGRES_PASSWORD=pgpw",
|
||||||
|
"-e",
|
||||||
|
"POSTGRES_DB=appdb",
|
||||||
|
"-e",
|
||||||
|
"POSTGRES_USER=postgres",
|
||||||
|
"-v",
|
||||||
|
f"{cls.pg_volume}:/var/lib/postgresql/data",
|
||||||
|
"postgres:16",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
wait_for_postgres(cls.pg_container, user="postgres", timeout_s=90)
|
||||||
|
|
||||||
|
# Add a deterministic marker file into the volume
|
||||||
|
cls.marker = "dump-only-fallback-marker"
|
||||||
|
run(
|
||||||
|
[
|
||||||
|
"docker",
|
||||||
|
"exec",
|
||||||
|
cls.pg_container,
|
||||||
|
"sh",
|
||||||
|
"-lc",
|
||||||
|
f"echo '{cls.marker}' > /var/lib/postgresql/data/marker.txt",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# databases.csv WITHOUT matching entry for this instance -> should skip dump
|
||||||
|
cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv"
|
||||||
|
write_databases_csv(cls.databases_csv, []) # empty except header
|
||||||
|
|
||||||
|
# Run baudolo with --dump-only and a DB container present:
|
||||||
|
# Expected: WARNING + FALLBACK to file backup (files/ must exist)
|
||||||
|
cmd = [
|
||||||
|
"baudolo",
|
||||||
|
"--compose-dir",
|
||||||
|
cls.compose_dir,
|
||||||
|
"--docker-compose-hard-restart-required",
|
||||||
|
"mailu",
|
||||||
|
"--repo-name",
|
||||||
|
cls.repo_name,
|
||||||
|
"--databases-csv",
|
||||||
|
cls.databases_csv,
|
||||||
|
"--backups-dir",
|
||||||
|
cls.backups_dir,
|
||||||
|
"--database-containers",
|
||||||
|
cls.pg_container,
|
||||||
|
"--images-no-stop-required",
|
||||||
|
"postgres",
|
||||||
|
"mariadb",
|
||||||
|
"mysql",
|
||||||
|
"alpine",
|
||||||
|
"--dump-only",
|
||||||
|
]
|
||||||
|
cp = run(cmd, capture=True, check=True)
|
||||||
|
|
||||||
|
cls.stdout = cp.stdout or ""
|
||||||
|
cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name)
|
||||||
|
|
||||||
|
# Restore files into a fresh volume to prove file backup happened
|
||||||
|
run(["docker", "volume", "create", cls.restore_volume])
|
||||||
|
run(
|
||||||
|
[
|
||||||
|
"baudolo-restore",
|
||||||
|
"files",
|
||||||
|
cls.restore_volume,
|
||||||
|
cls.hash,
|
||||||
|
cls.version,
|
||||||
|
"--backups-dir",
|
||||||
|
cls.backups_dir,
|
||||||
|
"--repo-name",
|
||||||
|
cls.repo_name,
|
||||||
|
"--source-volume",
|
||||||
|
cls.pg_volume,
|
||||||
|
"--rsync-image",
|
||||||
|
"ghcr.io/kevinveenbirkenbach/alpine-rsync",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def tearDownClass(cls) -> None:
|
||||||
|
cleanup_docker(containers=cls.containers, volumes=cls.volumes)
|
||||||
|
|
||||||
|
def test_warns_about_missing_dump_in_dump_only_mode(self) -> None:
|
||||||
|
self.assertIn(
|
||||||
|
"WARNING: dump-only requested but no DB dump was produced",
|
||||||
|
self.stdout,
|
||||||
|
f"Expected warning in baudolo output. STDOUT:\n{self.stdout}",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_files_backup_exists_due_to_fallback(self) -> None:
|
||||||
|
p = backup_path(
|
||||||
|
self.backups_dir,
|
||||||
|
self.repo_name,
|
||||||
|
self.version,
|
||||||
|
self.pg_volume,
|
||||||
|
) / "files"
|
||||||
|
self.assertTrue(p.is_dir(), f"Expected files backup dir at: {p}")
|
||||||
|
|
||||||
|
def test_sql_dump_not_present(self) -> None:
|
||||||
|
# There should be no sql dumps because databases.csv had no matching entry.
|
||||||
|
sql_dir = backup_path(
|
||||||
|
self.backups_dir,
|
||||||
|
self.repo_name,
|
||||||
|
self.version,
|
||||||
|
self.pg_volume,
|
||||||
|
) / "sql"
|
||||||
|
# Could exist (dir created) in some edge cases, but should contain no *.sql dumps.
|
||||||
|
if sql_dir.exists():
|
||||||
|
dumps = list(sql_dir.glob("*.sql"))
|
||||||
|
self.assertEqual(
|
||||||
|
len(dumps),
|
||||||
|
0,
|
||||||
|
f"Did not expect SQL dump files, found: {dumps}",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_restored_files_contain_marker(self) -> None:
|
||||||
|
p = run(
|
||||||
|
[
|
||||||
|
"docker",
|
||||||
|
"run",
|
||||||
|
"--rm",
|
||||||
|
"-v",
|
||||||
|
f"{self.restore_volume}:/data",
|
||||||
|
"alpine:3.20",
|
||||||
|
"sh",
|
||||||
|
"-lc",
|
||||||
|
"cat /data/marker.txt",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
self.assertEqual((p.stdout or "").strip(), self.marker)
|
||||||
Reference in New Issue
Block a user