From c01ab55f2d41814ea3b58c935453ad6ddfa081da Mon Sep 17 00:00:00 2001 From: Kevin Veen-Birkenbach Date: Mon, 29 Dec 2025 08:28:23 +0100 Subject: [PATCH] test(e2e): add dump-only-sql mixed-run + CLI contract coverage - rename dump-only flag to --dump-only-sql across docs and tests - update backup logic: skip files/ only for DB volumes when dumps succeed; fallback to files when dumps fail - extend e2e helpers to support dump_only_sql - add e2e mixed-run regression test (DB dump => no files/, non-DB => files/) - add e2e CLI/argparse contract test (--dump-only-sql present, --dump-only rejected) - fix e2e files test to expect file backups for non-DB volumes in dump-only-sql mode and verify restore - update changelog + README flag table https://chatgpt.com/share/69522d9c-ce08-800f-9070-71df3bd779ae --- CHANGELOG.md | 2 +- README.md | 2 +- src/baudolo/backup/app.py | 25 +-- src/baudolo/backup/cli.py | 11 +- tests/e2e/helpers.py | 6 +- .../test_e2e_cli_contract_dump_only_sql.py | 38 ++++ .../test_e2e_dump_only_fallback_to_files.py | 10 +- tests/e2e/test_e2e_dump_only_sql_mixed_run.py | 182 ++++++++++++++++++ tests/e2e/test_e2e_files_no_copy.py | 47 +++-- tests/e2e/test_e2e_mariadb_no_copy.py | 4 +- tests/e2e/test_e2e_postgres_no_copy.py | 2 +- 11 files changed, 291 insertions(+), 38 deletions(-) create mode 100644 tests/e2e/test_e2e_cli_contract_dump_only_sql.py create mode 100644 tests/e2e/test_e2e_dump_only_sql_mixed_run.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 41b1f20..7b5e66c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ ## [1.1.1] - 2025-12-28 -* * **Backup:** In ***--dump-only*** mode, fall back to file backups with a warning when no database dump can be produced (e.g. missing `databases.csv` entry). +* * **Backup:** In ***--dump-only-sql*** mode, fall back to file backups with a warning when no database dump can be produced (e.g. missing `databases.csv` entry). ## [1.1.0] - 2025-12-28 diff --git a/README.md b/README.md index 62eed98..7578eed 100644 --- a/README.md +++ b/README.md @@ -134,7 +134,7 @@ baudolo \ | Flag | Description | | --------------- | ------------------------------------------- | | `--everything` | Always stop containers and re-run rsync | -| `--dump-only` | Only create SQL dumps, skip file backups | +| `--dump-only-sql`| Skip file backups only for DB volumes when dumps succeed; non-DB volumes are still backed up; fallback to files if no dump. | | `--shutdown` | Do not restart containers after backup | | `--backups-dir` | Backup root directory (default: `/Backups`) | | `--repo-name` | Backup namespace under machine hash | diff --git a/src/baudolo/backup/app.py b/src/baudolo/backup/app.py index 73a6219..b541cb5 100644 --- a/src/baudolo/backup/app.py +++ b/src/baudolo/backup/app.py @@ -149,17 +149,20 @@ def main() -> int: database_containers=args.database_containers, ) - # dump-only logic: - if args.dump_only: - if found_db and not dumped_any: - print( - f"WARNING: dump-only requested but no DB dump was produced for DB volume '{volume_name}'. Falling back to file backup.", - flush=True, - ) - # continue to file backup below - else: - # keep old behavior: skip file backups - continue + # dump-only-sql logic: + if args.dump_only_sql: + if found_db: + if not dumped_any: + print( + f"WARNING: dump-only-sql requested but no DB dump was produced for DB volume '{volume_name}'. Falling back to file backup.", + flush=True, + ) + # fall through to file backup below + else: + # DB volume successfully dumped -> skip file backup + continue + # Non-DB volume -> always do file backup (fall through) + # skip file backup if all linked containers are ignored diff --git a/src/baudolo/backup/cli.py b/src/baudolo/backup/cli.py index 343fd8d..7615a91 100644 --- a/src/baudolo/backup/cli.py +++ b/src/baudolo/backup/cli.py @@ -68,10 +68,15 @@ def parse_args() -> argparse.Namespace: action="store_true", help="Do not restart containers after backup", ) + p.add_argument( - "--dump-only", + "--dump-only-sql", action="store_true", - help="Only create DB dumps (skip ALL file rsync backups)", + help=( + "Create database dumps only for DB volumes. " + "File backups are skipped for DB volumes if a dump succeeds, " + "but non-DB volumes are still backed up. " + "If a DB dump cannot be produced, baudolo falls back to a file backup." + ), ) - return p.parse_args() diff --git a/tests/e2e/helpers.py b/tests/e2e/helpers.py index 82a55a9..2d4a609 100644 --- a/tests/e2e/helpers.py +++ b/tests/e2e/helpers.py @@ -166,7 +166,7 @@ def backup_run( database_containers: list[str], images_no_stop_required: list[str], images_no_backup_required: list[str] | None = None, - dump_only: bool = False, + dump_only_sql: bool = False, ) -> None: cmd = [ "baudolo", @@ -187,8 +187,8 @@ def backup_run( ] if images_no_backup_required: cmd += ["--images-no-backup-required", *images_no_backup_required] - if dump_only: - cmd += ["--dump-only"] + if dump_only_sql: + cmd += ["--dump-only-sql"] try: run(cmd, capture=True, check=True) diff --git a/tests/e2e/test_e2e_cli_contract_dump_only_sql.py b/tests/e2e/test_e2e_cli_contract_dump_only_sql.py new file mode 100644 index 0000000..e028ced --- /dev/null +++ b/tests/e2e/test_e2e_cli_contract_dump_only_sql.py @@ -0,0 +1,38 @@ +import unittest + +from .helpers import run + + +class TestE2ECLIContractDumpOnlySql(unittest.TestCase): + def test_help_mentions_new_flag(self) -> None: + cp = run(["baudolo", "--help"], capture=True, check=True) + out = (cp.stdout or "") + "\n" + (cp.stderr or "") + self.assertIn( + "--dump-only-sql", + out, + f"Expected '--dump-only-sql' to appear in --help output. Output:\n{out}", + ) + + def test_help_does_not_mention_old_flag(self) -> None: + cp = run(["baudolo", "--help"], capture=True, check=True) + out = (cp.stdout or "") + "\n" + (cp.stderr or "") + self.assertNotIn( + "--dump-only", + out, + f"Did not expect legacy '--dump-only' to appear in --help output. Output:\n{out}", + ) + + def test_old_flag_is_rejected(self) -> None: + cp = run(["baudolo", "--dump-only"], capture=True, check=False) + self.assertEqual( + cp.returncode, + 2, + f"Expected exitcode 2 for unknown args, got {cp.returncode}\n" + f"STDOUT={cp.stdout}\nSTDERR={cp.stderr}", + ) + err = (cp.stderr or "") + "\n" + (cp.stdout or "") + # Argparse typically prints "unrecognized arguments" + self.assertTrue( + ("unrecognized arguments" in err) or ("usage:" in err.lower()), + f"Expected argparse-style error output. Output:\n{err}", + ) diff --git a/tests/e2e/test_e2e_dump_only_fallback_to_files.py b/tests/e2e/test_e2e_dump_only_fallback_to_files.py index 4be274f..cdb2231 100644 --- a/tests/e2e/test_e2e_dump_only_fallback_to_files.py +++ b/tests/e2e/test_e2e_dump_only_fallback_to_files.py @@ -19,7 +19,7 @@ class TestE2EDumpOnlyFallbackToFiles(unittest.TestCase): @classmethod def setUpClass(cls) -> None: require_docker() - cls.prefix = unique("baudolo-e2e-dump-only-fallback") + cls.prefix = unique("baudolo-e2e-dump-only-sql-fallback") cls.backups_dir = f"/tmp/{cls.prefix}/Backups" ensure_empty_dir(cls.backups_dir) @@ -57,7 +57,7 @@ class TestE2EDumpOnlyFallbackToFiles(unittest.TestCase): wait_for_postgres(cls.pg_container, user="postgres", timeout_s=90) # Add a deterministic marker file into the volume - cls.marker = "dump-only-fallback-marker" + cls.marker = "dump-only-sql-fallback-marker" run( [ "docker", @@ -73,7 +73,7 @@ class TestE2EDumpOnlyFallbackToFiles(unittest.TestCase): cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv" write_databases_csv(cls.databases_csv, []) # empty except header - # Run baudolo with --dump-only and a DB container present: + # Run baudolo with --dump-only-sql and a DB container present: # Expected: WARNING + FALLBACK to file backup (files/ must exist) cmd = [ "baudolo", @@ -94,7 +94,7 @@ class TestE2EDumpOnlyFallbackToFiles(unittest.TestCase): "mariadb", "mysql", "alpine", - "--dump-only", + "--dump-only-sql", ] cp = run(cmd, capture=True, check=True) @@ -127,7 +127,7 @@ class TestE2EDumpOnlyFallbackToFiles(unittest.TestCase): def test_warns_about_missing_dump_in_dump_only_mode(self) -> None: self.assertIn( - "WARNING: dump-only requested but no DB dump was produced", + "WARNING: dump-only-sql requested but no DB dump was produced", self.stdout, f"Expected warning in baudolo output. STDOUT:\n{self.stdout}", ) diff --git a/tests/e2e/test_e2e_dump_only_sql_mixed_run.py b/tests/e2e/test_e2e_dump_only_sql_mixed_run.py new file mode 100644 index 0000000..798a3b4 --- /dev/null +++ b/tests/e2e/test_e2e_dump_only_sql_mixed_run.py @@ -0,0 +1,182 @@ +import unittest + +from .helpers import ( + backup_path, + cleanup_docker, + create_minimal_compose_dir, + ensure_empty_dir, + latest_version_dir, + require_docker, + run, + unique, + wait_for_postgres, + write_databases_csv, +) + + +class TestE2EDumpOnlySqlMixedRun(unittest.TestCase): + @classmethod + def setUpClass(cls) -> None: + require_docker() + cls.prefix = unique("baudolo-e2e-dump-only-sql-mixed-run") + cls.backups_dir = f"/tmp/{cls.prefix}/Backups" + ensure_empty_dir(cls.backups_dir) + + cls.compose_dir = create_minimal_compose_dir(f"/tmp/{cls.prefix}") + cls.repo_name = cls.prefix + + # --- Volumes --- + cls.db_volume = f"{cls.prefix}-vol-db" + cls.files_volume = f"{cls.prefix}-vol-files" + + # Track for cleanup + cls.containers: list[str] = [] + cls.volumes = [cls.db_volume, cls.files_volume] + + # Create volumes + run(["docker", "volume", "create", cls.db_volume]) + run(["docker", "volume", "create", cls.files_volume]) + + # Put a marker into the non-db volume + run( + [ + "docker", + "run", + "--rm", + "-v", + f"{cls.files_volume}:/data", + "alpine:3.20", + "sh", + "-lc", + "echo 'hello-non-db' > /data/hello.txt", + ] + ) + + # --- Start Postgres container using the DB volume --- + cls.pg_container = f"{cls.prefix}-pg" + cls.containers.append(cls.pg_container) + + cls.pg_password = "postgres" + cls.pg_db = "testdb" + cls.pg_user = "postgres" + + run( + [ + "docker", + "run", + "-d", + "--name", + cls.pg_container, + "-e", + f"POSTGRES_PASSWORD={cls.pg_password}", + "-v", + f"{cls.db_volume}:/var/lib/postgresql/data", + "postgres:16-alpine", + ] + ) + wait_for_postgres(cls.pg_container, user="postgres", timeout_s=90) + + # Create deterministic content in DB so dump is non-empty + run( + [ + "docker", + "exec", + cls.pg_container, + "sh", + "-lc", + f'psql -U postgres -c "CREATE DATABASE {cls.pg_db};" || true', + ], + check=True, + ) + run( + [ + "docker", + "exec", + cls.pg_container, + "sh", + "-lc", + ( + f'psql -U postgres -d {cls.pg_db} -c ' + '"CREATE TABLE IF NOT EXISTS t (id INT PRIMARY KEY, v TEXT);' + "INSERT INTO t(id,v) VALUES (1,'hello-db') " + "ON CONFLICT (id) DO UPDATE SET v=EXCLUDED.v;\"" + ), + ], + check=True, + ) + + # databases.csv with an entry => dump should succeed + cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv" + write_databases_csv( + cls.databases_csv, + [(cls.pg_container, cls.pg_db, cls.pg_user, cls.pg_password)], + ) + + # Run baudolo with dump-only-sql + cmd = [ + "baudolo", + "--compose-dir", + cls.compose_dir, + "--databases-csv", + cls.databases_csv, + "--database-containers", + cls.pg_container, + "--images-no-stop-required", + "alpine", + "postgres", + "mariadb", + "mysql", + "--dump-only-sql", + "--backups-dir", + cls.backups_dir, + "--repo-name", + cls.repo_name, + ] + cp = run(cmd, capture=True, check=True) + cls.stdout = cp.stdout + cls.stderr = cp.stderr + + cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name) + + @classmethod + def tearDownClass(cls) -> None: + cleanup_docker(containers=cls.containers, volumes=cls.volumes) + + def test_db_volume_has_dump_and_no_files_dir(self) -> None: + base = backup_path(self.backups_dir, self.repo_name, self.version, self.db_volume) + + dumps = base / "dumps" + files = base / "files" + + self.assertTrue(dumps.exists(), f"Expected dumps dir for DB volume at: {dumps}") + self.assertFalse( + files.exists(), + f"Did not expect files dir for DB volume when dump succeeded at: {files}", + ) + + # Optional: at least one dump file exists + dump_files = list(dumps.glob("*.sql")) + list(dumps.glob("*.sql.gz")) + self.assertTrue( + dump_files, + f"Expected at least one SQL dump file in {dumps}, found none.", + ) + + def test_non_db_volume_has_files_dir(self) -> None: + base = backup_path( + self.backups_dir, self.repo_name, self.version, self.files_volume + ) + files = base / "files" + self.assertTrue( + files.exists(), + f"Expected files dir for non-DB volume at: {files}", + ) + + def test_dump_only_sql_does_not_disable_non_db_files_backup(self) -> None: + # Regression guard: even with --dump-only-sql, non-DB volumes must still be backed up as files + base = backup_path( + self.backups_dir, self.repo_name, self.version, self.files_volume + ) + self.assertTrue( + (base / "files").exists(), + f"Expected non-DB volume files backup to exist at: {base / 'files'}", + ) diff --git a/tests/e2e/test_e2e_files_no_copy.py b/tests/e2e/test_e2e_files_no_copy.py index 1d8ce3e..6f6a892 100644 --- a/tests/e2e/test_e2e_files_no_copy.py +++ b/tests/e2e/test_e2e_files_no_copy.py @@ -26,10 +26,10 @@ class TestE2EFilesNoCopy(unittest.TestCase): cls.repo_name = cls.prefix cls.volume_src = f"{cls.prefix}-vol-src" - cls.volume_dst = f"{cls.prefix}-vol-dst" - cls.containers = [] - cls.volumes = [cls.volume_src, cls.volume_dst] + cls.containers: list[str] = [] + cls.volumes = [cls.volume_src] + # Create source volume and write a marker file run(["docker", "volume", "create", cls.volume_src]) run( [ @@ -48,7 +48,7 @@ class TestE2EFilesNoCopy(unittest.TestCase): cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv" write_databases_csv(cls.databases_csv, []) - # dump-only => NO file rsync backups + # dump-only-sql => non-DB volumes are STILL backed up as files backup_run( backups_dir=cls.backups_dir, repo_name=cls.repo_name, @@ -56,28 +56,32 @@ class TestE2EFilesNoCopy(unittest.TestCase): databases_csv=cls.databases_csv, database_containers=["dummy-db"], images_no_stop_required=["alpine", "postgres", "mariadb", "mysql"], - dump_only=True, + dump_only_sql=True, ) cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name) + # Wipe the volume to ensure restore actually restores something + run(["docker", "volume", "rm", "-f", cls.volume_src]) + run(["docker", "volume", "create", cls.volume_src]) + @classmethod def tearDownClass(cls) -> None: cleanup_docker(containers=cls.containers, volumes=cls.volumes) - def test_files_backup_not_present(self) -> None: + def test_files_backup_present_for_non_db_volume(self) -> None: p = ( backup_path(self.backups_dir, self.repo_name, self.version, self.volume_src) / "files" ) - self.assertFalse(p.exists(), f"Did not expect files backup dir at: {p}") + self.assertTrue(p.exists(), f"Expected files backup dir at: {p}") - def test_restore_files_fails_expected(self) -> None: + def test_restore_files_succeeds_and_restores_content(self) -> None: p = run( [ "baudolo-restore", "files", - self.volume_dst, + self.volume_src, self.hash, self.version, "--backups-dir", @@ -89,6 +93,27 @@ class TestE2EFilesNoCopy(unittest.TestCase): ) self.assertEqual( p.returncode, - 2, - f"Expected exitcode 2, got {p.returncode}\nSTDOUT={p.stdout}\nSTDERR={p.stderr}", + 0, + f"Expected exitcode 0, got {p.returncode}\nSTDOUT={p.stdout}\nSTDERR={p.stderr}", + ) + + cp = run( + [ + "docker", + "run", + "--rm", + "-v", + f"{self.volume_src}:/data", + "alpine:3.20", + "sh", + "-lc", + "cat /data/hello.txt", + ], + capture=True, + check=True, + ) + self.assertEqual( + cp.stdout.strip(), + "hello", + f"Unexpected restored content. STDOUT={cp.stdout}\nSTDERR={cp.stderr}", ) diff --git a/tests/e2e/test_e2e_mariadb_no_copy.py b/tests/e2e/test_e2e_mariadb_no_copy.py index 470ca61..68a0dca 100644 --- a/tests/e2e/test_e2e_mariadb_no_copy.py +++ b/tests/e2e/test_e2e_mariadb_no_copy.py @@ -87,7 +87,7 @@ class TestE2EMariaDBNoCopy(unittest.TestCase): [(cls.db_container, cls.db_name, cls.db_user, cls.db_password)], ) - # dump-only => no files + # dump-only-sql => no files backup_run( backups_dir=cls.backups_dir, repo_name=cls.repo_name, @@ -95,7 +95,7 @@ class TestE2EMariaDBNoCopy(unittest.TestCase): databases_csv=cls.databases_csv, database_containers=[cls.db_container], images_no_stop_required=["mariadb", "mysql", "alpine", "postgres"], - dump_only=True, + dump_only_sql=True, ) cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name) diff --git a/tests/e2e/test_e2e_postgres_no_copy.py b/tests/e2e/test_e2e_postgres_no_copy.py index d0c6c84..28570a8 100644 --- a/tests/e2e/test_e2e_postgres_no_copy.py +++ b/tests/e2e/test_e2e_postgres_no_copy.py @@ -75,7 +75,7 @@ class TestE2EPostgresNoCopy(unittest.TestCase): databases_csv=cls.databases_csv, database_containers=[cls.pg_container], images_no_stop_required=["postgres", "mariadb", "mysql", "alpine"], - dump_only=True, + dump_only_sql=True, ) cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name)