fix(seed): handle empty databases.csv and add unit tests

- Gracefully handle empty databases.csv by creating header columns and emitting a warning
- Add _empty_df() helper for consistent DataFrame initialization
- Add unit tests for baudolo-seed including empty-file regression case
- Apply minor formatting fixes across backup and e2e test files

https://chatgpt.com/share/69628f0b-8744-800f-b08d-2633e05167da
This commit is contained in:
2026-01-10 18:40:22 +01:00
parent e4bc075474
commit d976640312
8 changed files with 289 additions and 39 deletions

View File

@@ -133,22 +133,28 @@ class TestE2EDumpOnlyFallbackToFiles(unittest.TestCase):
)
def test_files_backup_exists_due_to_fallback(self) -> None:
p = backup_path(
self.backups_dir,
self.repo_name,
self.version,
self.pg_volume,
) / "files"
p = (
backup_path(
self.backups_dir,
self.repo_name,
self.version,
self.pg_volume,
)
/ "files"
)
self.assertTrue(p.is_dir(), f"Expected files backup dir at: {p}")
def test_sql_dump_not_present(self) -> None:
# There should be no sql dumps because databases.csv had no matching entry.
sql_dir = backup_path(
self.backups_dir,
self.repo_name,
self.version,
self.pg_volume,
) / "sql"
sql_dir = (
backup_path(
self.backups_dir,
self.repo_name,
self.version,
self.pg_volume,
)
/ "sql"
)
# Could exist (dir created) in some edge cases, but should contain no *.sql dumps.
if sql_dir.exists():
dumps = list(sql_dir.glob("*.sql"))

View File

@@ -96,10 +96,10 @@ class TestE2EDumpOnlySqlMixedRun(unittest.TestCase):
"sh",
"-lc",
(
f'psql -U postgres -d {cls.pg_db} -c '
f"psql -U postgres -d {cls.pg_db} -c "
'"CREATE TABLE IF NOT EXISTS t (id INT PRIMARY KEY, v TEXT);'
"INSERT INTO t(id,v) VALUES (1,'hello-db') "
"ON CONFLICT (id) DO UPDATE SET v=EXCLUDED.v;\""
'ON CONFLICT (id) DO UPDATE SET v=EXCLUDED.v;"'
),
],
check=True,
@@ -143,7 +143,9 @@ class TestE2EDumpOnlySqlMixedRun(unittest.TestCase):
cleanup_docker(containers=cls.containers, volumes=cls.volumes)
def test_db_volume_has_dump_and_no_files_dir(self) -> None:
base = backup_path(self.backups_dir, self.repo_name, self.version, self.db_volume)
base = backup_path(
self.backups_dir, self.repo_name, self.version, self.db_volume
)
dumps = base / "sql"
files = base / "files"

View File

@@ -99,10 +99,10 @@ class TestE2ESeedStarAndDbEntriesBackupPostgres(unittest.TestCase):
"sh",
"-lc",
(
f'psql -U {cls.pg_user} -d {cls.pg_db1} -c '
f"psql -U {cls.pg_user} -d {cls.pg_db1} -c "
'"CREATE TABLE IF NOT EXISTS t (id INT PRIMARY KEY, v TEXT);'
"INSERT INTO t(id,v) VALUES (1,'hello-db1') "
"ON CONFLICT (id) DO UPDATE SET v=EXCLUDED.v;\""
'ON CONFLICT (id) DO UPDATE SET v=EXCLUDED.v;"'
),
],
check=True,
@@ -115,10 +115,10 @@ class TestE2ESeedStarAndDbEntriesBackupPostgres(unittest.TestCase):
"sh",
"-lc",
(
f'psql -U {cls.pg_user} -d {cls.pg_db2} -c '
f"psql -U {cls.pg_user} -d {cls.pg_db2} -c "
'"CREATE TABLE IF NOT EXISTS t (id INT PRIMARY KEY, v TEXT);'
"INSERT INTO t(id,v) VALUES (1,'hello-db2') "
"ON CONFLICT (id) DO UPDATE SET v=EXCLUDED.v;\""
'ON CONFLICT (id) DO UPDATE SET v=EXCLUDED.v;"'
),
],
check=True,
@@ -132,7 +132,16 @@ class TestE2ESeedStarAndDbEntriesBackupPostgres(unittest.TestCase):
instance = cls.pg_container
# Seed star entry (pg_dumpall)
run(["baudolo-seed", cls.databases_csv, instance, "*", cls.pg_user, cls.pg_password])
run(
[
"baudolo-seed",
cls.databases_csv,
instance,
"*",
cls.pg_user,
cls.pg_password,
]
)
# Seed concrete DB entry (pg_dump)
run(
@@ -177,7 +186,9 @@ class TestE2ESeedStarAndDbEntriesBackupPostgres(unittest.TestCase):
cleanup_docker(containers=cls.containers, volumes=cls.volumes)
def test_db_volume_has_cluster_dump_and_concrete_db_dump_and_no_files(self) -> None:
base = backup_path(self.backups_dir, self.repo_name, self.version, self.db_volume)
base = backup_path(
self.backups_dir, self.repo_name, self.version, self.db_volume
)
sql_dir = base / "sql"
files_dir = base / "files"
@@ -204,10 +215,14 @@ class TestE2ESeedStarAndDbEntriesBackupPostgres(unittest.TestCase):
)
def test_non_db_volume_still_has_files_backup(self) -> None:
base = backup_path(self.backups_dir, self.repo_name, self.version, self.files_volume)
base = backup_path(
self.backups_dir, self.repo_name, self.version, self.files_volume
)
files_dir = base / "files"
self.assertTrue(files_dir.exists(), f"Expected files dir for non-DB volume at: {files_dir}")
self.assertTrue(
files_dir.exists(), f"Expected files dir for non-DB volume at: {files_dir}"
)
marker = files_dir / "hello.txt"
self.assertTrue(marker.is_file(), f"Expected marker file at: {marker}")