diff --git a/src/baudolo/backup/app.py b/src/baudolo/backup/app.py index 00fa9af..73a6219 100644 --- a/src/baudolo/backup/app.py +++ b/src/baudolo/backup/app.py @@ -72,28 +72,27 @@ def requires_stop(containers: list[str], images_no_stop_required: list[str]) -> return True return False - def backup_mariadb_or_postgres( *, container: str, volume_dir: str, databases_df: "pandas.DataFrame", database_containers: list[str], -) -> bool: +) -> tuple[bool, bool]: """ - Returns True if the container is a DB container we handled. + Returns (is_db_container, dumped_any) """ for img in ["mariadb", "postgres"]: if has_image(container, img): - backup_database( + dumped = backup_database( container=container, volume_dir=volume_dir, db_type=img, databases_df=databases_df, database_containers=database_containers, ) - return True - return False + return True, dumped + return False, False def _backup_dumps_for_volume( @@ -102,21 +101,26 @@ def _backup_dumps_for_volume( vol_dir: str, databases_df: "pandas.DataFrame", database_containers: list[str], -) -> bool: +) -> tuple[bool, bool]: """ - Create DB dumps for any mariadb/postgres containers attached to this volume. - Returns True if at least one dump was produced. + Returns (found_db_container, dumped_any) """ + found_db = False dumped_any = False + for c in containers: - if backup_mariadb_or_postgres( + is_db, dumped = backup_mariadb_or_postgres( container=c, volume_dir=vol_dir, databases_df=databases_df, database_containers=database_containers, - ): + ) + if is_db: + found_db = True + if dumped: dumped_any = True - return dumped_any + + return found_db, dumped_any def main() -> int: @@ -137,18 +141,26 @@ def main() -> int: containers = containers_using_volume(volume_name) vol_dir = create_volume_directory(version_dir, volume_name) - - # Old behavior: DB dumps are additional to file backups. - _backup_dumps_for_volume( + + found_db, dumped_any = _backup_dumps_for_volume( containers=containers, vol_dir=vol_dir, databases_df=databases_df, database_containers=args.database_containers, ) - # dump-only: skip ALL file rsync backups + # dump-only logic: if args.dump_only: - continue + if found_db and not dumped_any: + print( + f"WARNING: dump-only requested but no DB dump was produced for DB volume '{volume_name}'. Falling back to file backup.", + flush=True, + ) + # continue to file backup below + else: + # keep old behavior: skip file backups + continue + # skip file backup if all linked containers are ignored if volume_is_fully_ignored(containers, args.images_no_backup_required): diff --git a/src/baudolo/backup/db.py b/src/baudolo/backup/db.py index f36c2ba..f1798e2 100644 --- a/src/baudolo/backup/db.py +++ b/src/baudolo/backup/db.py @@ -3,9 +3,8 @@ from __future__ import annotations import os import pathlib import re - -import pandas import logging +import pandas from .shell import BackupException, execute_shell_command @@ -18,9 +17,7 @@ def get_instance(container: str, database_containers: list[str]) -> str: return re.split(r"(_|-)(database|db|postgres)", container)[0] -def fallback_pg_dumpall( - container: str, username: str, password: str, out_file: str -) -> None: +def fallback_pg_dumpall(container: str, username: str, password: str, out_file: str) -> None: cmd = ( f"PGPASSWORD={password} docker exec -i {container} " f"pg_dumpall -U {username} -h localhost > {out_file}" @@ -35,20 +32,25 @@ def backup_database( db_type: str, databases_df: "pandas.DataFrame", database_containers: list[str], -) -> None: +) -> bool: + """ + Returns True if at least one dump file was produced, else False. + """ instance_name = get_instance(container, database_containers) entries = databases_df.loc[databases_df["instance"] == instance_name] if entries.empty: - log.warning("No entry found for instance '%s'", instance_name) - return + log.warning("No entry found for instance '%s' (skipping DB dump)", instance_name) + return False out_dir = os.path.join(volume_dir, "sql") pathlib.Path(out_dir).mkdir(parents=True, exist_ok=True) - for row in entries.iloc: - db_name = row["database"] - user = row["username"] - password = row["password"] + produced = False + + for row in entries.itertuples(index=False): + db_name = row.database + user = row.username + password = row.password dump_file = os.path.join(out_dir, f"{db_name}.backup.sql") @@ -58,13 +60,15 @@ def backup_database( f"-u {user} -p{password} {db_name} > {dump_file}" ) execute_shell_command(cmd) + produced = True continue if db_type == "postgres": cluster_file = os.path.join(out_dir, f"{instance_name}.cluster.backup.sql") + if not db_name: fallback_pg_dumpall(container, user, password, cluster_file) - return + return True try: cmd = ( @@ -72,6 +76,7 @@ def backup_database( f"pg_dump -U {user} -d {db_name} -h localhost > {dump_file}" ) execute_shell_command(cmd) + produced = True except BackupException as e: print(f"pg_dump failed: {e}", flush=True) print( @@ -79,4 +84,7 @@ def backup_database( flush=True, ) fallback_pg_dumpall(container, user, password, cluster_file) + produced = True continue + + return produced diff --git a/tests/e2e/test_e2e_dump_only_fallback_to_files.py b/tests/e2e/test_e2e_dump_only_fallback_to_files.py new file mode 100644 index 0000000..4be274f --- /dev/null +++ b/tests/e2e/test_e2e_dump_only_fallback_to_files.py @@ -0,0 +1,175 @@ +# tests/e2e/test_e2e_dump_only_fallback_to_files.py +import unittest + +from .helpers import ( + backup_path, + cleanup_docker, + create_minimal_compose_dir, + ensure_empty_dir, + latest_version_dir, + require_docker, + run, + unique, + write_databases_csv, + wait_for_postgres, +) + + +class TestE2EDumpOnlyFallbackToFiles(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + require_docker() + cls.prefix = unique("baudolo-e2e-dump-only-fallback") + cls.backups_dir = f"/tmp/{cls.prefix}/Backups" + ensure_empty_dir(cls.backups_dir) + + cls.compose_dir = create_minimal_compose_dir(f"/tmp/{cls.prefix}") + cls.repo_name = cls.prefix + + cls.pg_container = f"{cls.prefix}-pg" + cls.pg_volume = f"{cls.prefix}-pg-vol" + cls.restore_volume = f"{cls.prefix}-restore-vol" + + cls.containers = [cls.pg_container] + cls.volumes = [cls.pg_volume, cls.restore_volume] + + run(["docker", "volume", "create", cls.pg_volume]) + + # Start Postgres (creates a real DB volume) + run( + [ + "docker", + "run", + "-d", + "--name", + cls.pg_container, + "-e", + "POSTGRES_PASSWORD=pgpw", + "-e", + "POSTGRES_DB=appdb", + "-e", + "POSTGRES_USER=postgres", + "-v", + f"{cls.pg_volume}:/var/lib/postgresql/data", + "postgres:16", + ] + ) + wait_for_postgres(cls.pg_container, user="postgres", timeout_s=90) + + # Add a deterministic marker file into the volume + cls.marker = "dump-only-fallback-marker" + run( + [ + "docker", + "exec", + cls.pg_container, + "sh", + "-lc", + f"echo '{cls.marker}' > /var/lib/postgresql/data/marker.txt", + ] + ) + + # databases.csv WITHOUT matching entry for this instance -> should skip dump + cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv" + write_databases_csv(cls.databases_csv, []) # empty except header + + # Run baudolo with --dump-only and a DB container present: + # Expected: WARNING + FALLBACK to file backup (files/ must exist) + cmd = [ + "baudolo", + "--compose-dir", + cls.compose_dir, + "--docker-compose-hard-restart-required", + "mailu", + "--repo-name", + cls.repo_name, + "--databases-csv", + cls.databases_csv, + "--backups-dir", + cls.backups_dir, + "--database-containers", + cls.pg_container, + "--images-no-stop-required", + "postgres", + "mariadb", + "mysql", + "alpine", + "--dump-only", + ] + cp = run(cmd, capture=True, check=True) + + cls.stdout = cp.stdout or "" + cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name) + + # Restore files into a fresh volume to prove file backup happened + run(["docker", "volume", "create", cls.restore_volume]) + run( + [ + "baudolo-restore", + "files", + cls.restore_volume, + cls.hash, + cls.version, + "--backups-dir", + cls.backups_dir, + "--repo-name", + cls.repo_name, + "--source-volume", + cls.pg_volume, + "--rsync-image", + "ghcr.io/kevinveenbirkenbach/alpine-rsync", + ] + ) + + @classmethod + def tearDownClass(cls) -> None: + cleanup_docker(containers=cls.containers, volumes=cls.volumes) + + def test_warns_about_missing_dump_in_dump_only_mode(self) -> None: + self.assertIn( + "WARNING: dump-only requested but no DB dump was produced", + self.stdout, + f"Expected warning in baudolo output. STDOUT:\n{self.stdout}", + ) + + def test_files_backup_exists_due_to_fallback(self) -> None: + p = backup_path( + self.backups_dir, + self.repo_name, + self.version, + self.pg_volume, + ) / "files" + self.assertTrue(p.is_dir(), f"Expected files backup dir at: {p}") + + def test_sql_dump_not_present(self) -> None: + # There should be no sql dumps because databases.csv had no matching entry. + sql_dir = backup_path( + self.backups_dir, + self.repo_name, + self.version, + self.pg_volume, + ) / "sql" + # Could exist (dir created) in some edge cases, but should contain no *.sql dumps. + if sql_dir.exists(): + dumps = list(sql_dir.glob("*.sql")) + self.assertEqual( + len(dumps), + 0, + f"Did not expect SQL dump files, found: {dumps}", + ) + + def test_restored_files_contain_marker(self) -> None: + p = run( + [ + "docker", + "run", + "--rm", + "-v", + f"{self.restore_volume}:/data", + "alpine:3.20", + "sh", + "-lc", + "cat /data/marker.txt", + ] + ) + self.assertEqual((p.stdout or "").strip(), self.marker)