feat(backup): stricter databases.csv semantics + atomic SQL dumps

- read databases.csv with stable types (dtype=str, keep_default_na=False)
- validate database field: require '*' or concrete name (no empty/NaN)
- support Postgres cluster dumps via '*' entries (pg_dumpall)
- write SQL dumps atomically to avoid partial/empty files
- early-skip fully ignored volumes before creating backup directories
- update seed CLI to enforce new contract and update by (instance,database)
- adjust tests: sql dir naming + add E2E coverage for early-skip and '*' seeding
This commit is contained in:
2025-12-29 11:39:57 +01:00
parent c01ab55f2d
commit f3ef86a444
8 changed files with 667 additions and 109 deletions

View File

@@ -132,7 +132,12 @@ def main() -> int:
versions_dir = os.path.join(args.backups_dir, machine_id, args.repo_name) versions_dir = os.path.join(args.backups_dir, machine_id, args.repo_name)
version_dir = create_version_directory(versions_dir, backup_time) version_dir = create_version_directory(versions_dir, backup_time)
databases_df = pandas.read_csv(args.databases_csv, sep=";") # IMPORTANT:
# - keep_default_na=False prevents empty fields from turning into NaN
# - dtype=str keeps all columns stable for comparisons/validation
databases_df = pandas.read_csv(
args.databases_csv, sep=";", keep_default_na=False, dtype=str
)
print("💾 Start volume backups...", flush=True) print("💾 Start volume backups...", flush=True)
@@ -140,8 +145,16 @@ def main() -> int:
print(f"Start backup routine for volume: {volume_name}", flush=True) print(f"Start backup routine for volume: {volume_name}", flush=True)
containers = containers_using_volume(volume_name) containers = containers_using_volume(volume_name)
# EARLY SKIP: if all linked containers are ignored, do not create any dirs
if volume_is_fully_ignored(containers, args.images_no_backup_required):
print(
f"Skipping volume '{volume_name}' entirely (all linked containers are ignored).",
flush=True,
)
continue
vol_dir = create_volume_directory(version_dir, volume_name) vol_dir = create_volume_directory(version_dir, volume_name)
found_db, dumped_any = _backup_dumps_for_volume( found_db, dumped_any = _backup_dumps_for_volume(
containers=containers, containers=containers,
vol_dir=vol_dir, vol_dir=vol_dir,
@@ -163,16 +176,6 @@ def main() -> int:
continue continue
# Non-DB volume -> always do file backup (fall through) # Non-DB volume -> always do file backup (fall through)
# skip file backup if all linked containers are ignored
if volume_is_fully_ignored(containers, args.images_no_backup_required):
print(
f"Skipping file backup for volume '{volume_name}' (all linked containers are ignored).",
flush=True,
)
continue
if args.everything: if args.everything:
# "everything": always do pre-rsync, then stop + rsync again # "everything": always do pre-rsync, then stop + rsync again
backup_volume(versions_dir, volume_name, vol_dir) backup_volume(versions_dir, volume_name, vol_dir)

View File

@@ -4,6 +4,8 @@ import os
import pathlib import pathlib
import re import re
import logging import logging
from typing import Optional
import pandas import pandas
from .shell import BackupException, execute_shell_command from .shell import BackupException, execute_shell_command
@@ -12,17 +14,53 @@ log = logging.getLogger(__name__)
def get_instance(container: str, database_containers: list[str]) -> str: def get_instance(container: str, database_containers: list[str]) -> str:
"""
Derive a stable instance name from the container name.
"""
if container in database_containers: if container in database_containers:
return container return container
return re.split(r"(_|-)(database|db|postgres)", container)[0] return re.split(r"(_|-)(database|db|postgres)", container)[0]
def _validate_database_value(value: Optional[str], *, instance: str) -> str:
"""
Enforce explicit database semantics:
- "*" => dump ALL databases (cluster dump for Postgres)
- "<name>" => dump exactly this database
- "" => invalid configuration (would previously result in NaN / nan.backup.sql)
"""
v = (value or "").strip()
if v == "":
raise ValueError(
f"Invalid databases.csv entry for instance '{instance}': "
"column 'database' must be '*' or a concrete database name (not empty)."
)
return v
def _atomic_write_cmd(cmd: str, out_file: str) -> None:
"""
Write dump output atomically:
- write to <file>.tmp
- rename to <file> only on success
This prevents empty or partial dump files from being treated as valid backups.
"""
tmp = f"{out_file}.tmp"
execute_shell_command(f"{cmd} > {tmp}")
execute_shell_command(f"mv {tmp} {out_file}")
def fallback_pg_dumpall(container: str, username: str, password: str, out_file: str) -> None: def fallback_pg_dumpall(container: str, username: str, password: str, out_file: str) -> None:
"""
Perform a full Postgres cluster dump using pg_dumpall.
"""
cmd = ( cmd = (
f"PGPASSWORD={password} docker exec -i {container} " f"PGPASSWORD={password} docker exec -i {container} "
f"pg_dumpall -U {username} -h localhost > {out_file}" f"pg_dumpall -U {username} -h localhost"
) )
execute_shell_command(cmd) _atomic_write_cmd(cmd, out_file)
def backup_database( def backup_database(
@@ -34,12 +72,15 @@ def backup_database(
database_containers: list[str], database_containers: list[str],
) -> bool: ) -> bool:
""" """
Returns True if at least one dump file was produced, else False. Backup databases for a given DB container.
Returns True if at least one dump was produced.
""" """
instance_name = get_instance(container, database_containers) instance_name = get_instance(container, database_containers)
entries = databases_df.loc[databases_df["instance"] == instance_name]
entries = databases_df[databases_df["instance"] == instance_name]
if entries.empty: if entries.empty:
log.warning("No entry found for instance '%s' (skipping DB dump)", instance_name) log.debug("No database entries for instance '%s'", instance_name)
return False return False
out_dir = os.path.join(volume_dir, "sql") out_dir = os.path.join(volume_dir, "sql")
@@ -48,43 +89,56 @@ def backup_database(
produced = False produced = False
for row in entries.itertuples(index=False): for row in entries.itertuples(index=False):
db_name = row.database raw_db = getattr(row, "database", "")
user = row.username user = (getattr(row, "username", "") or "").strip()
password = row.password password = (getattr(row, "password", "") or "").strip()
db_value = _validate_database_value(raw_db, instance=instance_name)
# Explicit: dump ALL databases
if db_value == "*":
if db_type != "postgres":
raise ValueError(
f"databases.csv entry for instance '{instance_name}': "
"'*' is currently only supported for Postgres."
)
cluster_file = os.path.join(
out_dir, f"{instance_name}.cluster.backup.sql"
)
fallback_pg_dumpall(container, user, password, cluster_file)
produced = True
continue
# Concrete database dump
db_name = db_value
dump_file = os.path.join(out_dir, f"{db_name}.backup.sql") dump_file = os.path.join(out_dir, f"{db_name}.backup.sql")
if db_type == "mariadb": if db_type == "mariadb":
cmd = ( cmd = (
f"docker exec {container} /usr/bin/mariadb-dump " f"docker exec {container} /usr/bin/mariadb-dump "
f"-u {user} -p{password} {db_name} > {dump_file}" f"-u {user} -p{password} {db_name}"
) )
execute_shell_command(cmd) _atomic_write_cmd(cmd, dump_file)
produced = True produced = True
continue continue
if db_type == "postgres": if db_type == "postgres":
cluster_file = os.path.join(out_dir, f"{instance_name}.cluster.backup.sql")
if not db_name:
fallback_pg_dumpall(container, user, password, cluster_file)
return True
try: try:
cmd = ( cmd = (
f"PGPASSWORD={password} docker exec -i {container} " f"PGPASSWORD={password} docker exec -i {container} "
f"pg_dump -U {user} -d {db_name} -h localhost > {dump_file}" f"pg_dump -U {user} -d {db_name} -h localhost"
) )
execute_shell_command(cmd) _atomic_write_cmd(cmd, dump_file)
produced = True produced = True
except BackupException as e: except BackupException as e:
print(f"pg_dump failed: {e}", flush=True) # Explicit DB dump failed -> hard error
print( raise BackupException(
f"Falling back to pg_dumpall for instance '{instance_name}'", f"Postgres dump failed for instance '{instance_name}', "
flush=True, f"database '{db_name}'. This database was explicitly configured "
"and therefore must succeed.\n"
f"{e}"
) )
fallback_pg_dumpall(container, user, password, cluster_file)
produced = True
continue continue
return produced return produced

View File

@@ -1,67 +1,106 @@
import pandas as pd #!/usr/bin/env python3
from __future__ import annotations
import argparse import argparse
import os import os
import re
import sys
import pandas as pd
from typing import Optional
def check_and_add_entry(file_path, instance, database, username, password): DB_NAME_RE = re.compile(r"^[a-zA-Z0-9_][a-zA-Z0-9_-]*$")
# Check if the file exists and is not empty
if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
# Read the existing CSV file with header
df = pd.read_csv(file_path, sep=";")
else:
# Create a new DataFrame with columns if file does not exist
df = pd.DataFrame(columns=["instance", "database", "username", "password"])
# Check if the entry exists and remove it def _validate_database_value(value: Optional[str], *, instance: str) -> str:
mask = ( v = (value or "").strip()
(df["instance"] == instance) if v == "":
& ( raise ValueError(
(df["database"] == database) f"Invalid databases.csv entry for instance '{instance}': "
| (((df["database"].isna()) | (df["database"] == "")) & (database == "")) "column 'database' must be '*' or a concrete database name (not empty)."
) )
& (df["username"] == username) if v == "*":
) return "*"
if v.lower() == "nan":
raise ValueError(
f"Invalid databases.csv entry for instance '{instance}': database must not be 'nan'."
)
if not DB_NAME_RE.match(v):
raise ValueError(
f"Invalid databases.csv entry for instance '{instance}': "
f"invalid database name '{v}'. Allowed: letters, numbers, '_' and '-'."
)
return v
if not df[mask].empty: def check_and_add_entry(
print("Replacing existing entry.") file_path: str,
df = df[~mask] instance: str,
database: Optional[str],
username: str,
password: str,
) -> None:
"""
Add or update an entry in databases.csv.
The function enforces strict validation:
- database MUST be set
- database MUST be '*' or a valid database name
"""
database = _validate_database_value(database, instance=instance)
if os.path.exists(file_path):
df = pd.read_csv(
file_path,
sep=";",
dtype=str,
keep_default_na=False,
)
else:
df = pd.DataFrame(
columns=["instance", "database", "username", "password"]
)
mask = (df["instance"] == instance) & (df["database"] == database)
if mask.any():
print("Updating existing entry.")
df.loc[mask, ["username", "password"]] = [username, password]
else: else:
print("Adding new entry.") print("Adding new entry.")
new_entry = pd.DataFrame(
[[instance, database, username, password]],
columns=["instance", "database", "username", "password"],
)
df = pd.concat([df, new_entry], ignore_index=True)
# Create a new DataFrame for the new entry
new_entry = pd.DataFrame(
[
{
"instance": instance,
"database": database,
"username": username,
"password": password,
}
]
)
# Add (or replace) the entry using concat
df = pd.concat([df, new_entry], ignore_index=True)
# Save the updated CSV file
df.to_csv(file_path, sep=";", index=False) df.to_csv(file_path, sep=";", index=False)
def main(): def main() -> None:
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Check and replace (or add) a database entry in a CSV file." description="Seed or update databases.csv for backup configuration."
) )
parser.add_argument("file_path", help="Path to the CSV file") parser.add_argument("file", help="Path to databases.csv")
parser.add_argument("instance", help="Database instance") parser.add_argument("instance", help="Instance name (e.g. bigbluebutton)")
parser.add_argument("database", help="Database name") parser.add_argument(
parser.add_argument("username", help="Username") "database",
parser.add_argument("password", nargs="?", default="", help="Password (optional)") help="Database name or '*' to dump all databases",
)
parser.add_argument("username", help="Database username")
parser.add_argument("password", help="Database password")
args = parser.parse_args() args = parser.parse_args()
check_and_add_entry( try:
args.file_path, args.instance, args.database, args.username, args.password check_and_add_entry(
) file_path=args.file,
instance=args.instance,
database=args.database,
username=args.username,
password=args.password,
)
except Exception as exc:
print(f"ERROR: {exc}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -13,15 +13,6 @@ class TestE2ECLIContractDumpOnlySql(unittest.TestCase):
f"Expected '--dump-only-sql' to appear in --help output. Output:\n{out}", f"Expected '--dump-only-sql' to appear in --help output. Output:\n{out}",
) )
def test_help_does_not_mention_old_flag(self) -> None:
cp = run(["baudolo", "--help"], capture=True, check=True)
out = (cp.stdout or "") + "\n" + (cp.stderr or "")
self.assertNotIn(
"--dump-only",
out,
f"Did not expect legacy '--dump-only' to appear in --help output. Output:\n{out}",
)
def test_old_flag_is_rejected(self) -> None: def test_old_flag_is_rejected(self) -> None:
cp = run(["baudolo", "--dump-only"], capture=True, check=False) cp = run(["baudolo", "--dump-only"], capture=True, check=False)
self.assertEqual( self.assertEqual(

View File

@@ -145,7 +145,7 @@ class TestE2EDumpOnlySqlMixedRun(unittest.TestCase):
def test_db_volume_has_dump_and_no_files_dir(self) -> None: def test_db_volume_has_dump_and_no_files_dir(self) -> None:
base = backup_path(self.backups_dir, self.repo_name, self.version, self.db_volume) base = backup_path(self.backups_dir, self.repo_name, self.version, self.db_volume)
dumps = base / "dumps" dumps = base / "sql"
files = base / "files" files = base / "files"
self.assertTrue(dumps.exists(), f"Expected dumps dir for DB volume at: {dumps}") self.assertTrue(dumps.exists(), f"Expected dumps dir for DB volume at: {dumps}")

View File

@@ -0,0 +1,131 @@
# tests/e2e/test_e2e_images_no_backup_required_early_skip.py
import unittest
from .helpers import (
backup_path,
cleanup_docker,
create_minimal_compose_dir,
ensure_empty_dir,
latest_version_dir,
require_docker,
run,
unique,
write_databases_csv,
)
class TestE2EImagesNoBackupRequiredEarlySkip(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
require_docker()
cls.prefix = unique("baudolo-e2e-early-skip-no-backup-required")
cls.backups_dir = f"/tmp/{cls.prefix}/Backups"
ensure_empty_dir(cls.backups_dir)
cls.compose_dir = create_minimal_compose_dir(f"/tmp/{cls.prefix}")
cls.repo_name = cls.prefix
# --- Docker resources ---
cls.redis_container = f"{cls.prefix}-redis"
cls.ignored_volume = f"{cls.prefix}-redis-vol"
cls.normal_volume = f"{cls.prefix}-files-vol"
cls.containers = [cls.redis_container]
cls.volumes = [cls.ignored_volume, cls.normal_volume]
# Create volumes
run(["docker", "volume", "create", cls.ignored_volume])
run(["docker", "volume", "create", cls.normal_volume])
# Start redis container using the ignored volume
run(
[
"docker",
"run",
"-d",
"--name",
cls.redis_container,
"-v",
f"{cls.ignored_volume}:/data",
"redis:alpine",
]
)
# Put deterministic content into the normal volume
run(
[
"docker",
"run",
"--rm",
"-v",
f"{cls.normal_volume}:/data",
"alpine:3.20",
"sh",
"-lc",
"mkdir -p /data && echo 'hello' > /data/hello.txt",
]
)
# databases.csv required by CLI (can be empty)
cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv"
write_databases_csv(cls.databases_csv, [])
# Run baudolo with images-no-backup-required redis
cmd = [
"baudolo",
"--compose-dir",
cls.compose_dir,
"--docker-compose-hard-restart-required",
"mailu",
"--repo-name",
cls.repo_name,
"--databases-csv",
cls.databases_csv,
"--backups-dir",
cls.backups_dir,
"--database-containers",
"dummy-db",
"--images-no-stop-required",
"alpine",
"redis",
"postgres",
"mariadb",
"mysql",
"--images-no-backup-required",
"redis",
]
cp = run(cmd, capture=True, check=True)
cls.stdout = cp.stdout or ""
cls.stderr = cp.stderr or ""
cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name)
@classmethod
def tearDownClass(cls) -> None:
cleanup_docker(containers=cls.containers, volumes=cls.volumes)
def test_ignored_volume_has_no_backup_directory_at_all(self) -> None:
p = backup_path(
self.backups_dir,
self.repo_name,
self.version,
self.ignored_volume,
)
self.assertFalse(
p.exists(),
f"Expected NO backup directory to be created for ignored volume, but found: {p}",
)
def test_normal_volume_is_still_backed_up(self) -> None:
p = (
backup_path(
self.backups_dir,
self.repo_name,
self.version,
self.normal_volume,
)
/ "files"
/ "hello.txt"
)
self.assertTrue(p.is_file(), f"Expected backed up file at: {p}")

View File

@@ -0,0 +1,217 @@
import unittest
from .helpers import (
backup_path,
cleanup_docker,
create_minimal_compose_dir,
ensure_empty_dir,
latest_version_dir,
require_docker,
run,
unique,
wait_for_postgres,
)
class TestE2ESeedStarAndDbEntriesBackupPostgres(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
require_docker()
cls.prefix = unique("baudolo-e2e-seed-star-and-db")
cls.backups_dir = f"/tmp/{cls.prefix}/Backups"
ensure_empty_dir(cls.backups_dir)
cls.compose_dir = create_minimal_compose_dir(f"/tmp/{cls.prefix}")
cls.repo_name = cls.prefix
# --- Volumes ---
cls.db_volume = f"{cls.prefix}-vol-db"
cls.files_volume = f"{cls.prefix}-vol-files"
cls.volumes = [cls.db_volume, cls.files_volume]
run(["docker", "volume", "create", cls.db_volume])
run(["docker", "volume", "create", cls.files_volume])
# Put a marker into the non-db volume
cls.marker = "hello-non-db-seed-star"
run(
[
"docker",
"run",
"--rm",
"-v",
f"{cls.files_volume}:/data",
"alpine:3.20",
"sh",
"-lc",
f"echo '{cls.marker}' > /data/hello.txt",
]
)
# --- Start Postgres container using the DB volume ---
cls.pg_container = f"{cls.prefix}-pg"
cls.containers = [cls.pg_container]
cls.pg_password = "postgres"
cls.pg_user = "postgres"
run(
[
"docker",
"run",
"-d",
"--name",
cls.pg_container,
"-e",
f"POSTGRES_PASSWORD={cls.pg_password}",
"-v",
f"{cls.db_volume}:/var/lib/postgresql/data",
"postgres:16-alpine",
]
)
wait_for_postgres(cls.pg_container, user="postgres", timeout_s=90)
# Create two DBs and deterministic content, so pg_dumpall is meaningful
cls.pg_db1 = "testdb1"
cls.pg_db2 = "testdb2"
run(
[
"docker",
"exec",
cls.pg_container,
"sh",
"-lc",
(
f'psql -U {cls.pg_user} -c "CREATE DATABASE {cls.pg_db1};" || true; '
f'psql -U {cls.pg_user} -c "CREATE DATABASE {cls.pg_db2};" || true; '
),
],
check=True,
)
run(
[
"docker",
"exec",
cls.pg_container,
"sh",
"-lc",
(
f'psql -U {cls.pg_user} -d {cls.pg_db1} -c '
'"CREATE TABLE IF NOT EXISTS t (id INT PRIMARY KEY, v TEXT);'
"INSERT INTO t(id,v) VALUES (1,'hello-db1') "
"ON CONFLICT (id) DO UPDATE SET v=EXCLUDED.v;\""
),
],
check=True,
)
run(
[
"docker",
"exec",
cls.pg_container,
"sh",
"-lc",
(
f'psql -U {cls.pg_user} -d {cls.pg_db2} -c '
'"CREATE TABLE IF NOT EXISTS t (id INT PRIMARY KEY, v TEXT);'
"INSERT INTO t(id,v) VALUES (1,'hello-db2') "
"ON CONFLICT (id) DO UPDATE SET v=EXCLUDED.v;\""
),
],
check=True,
)
# --- Seed databases.csv using CLI (star + concrete db) ---
cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv"
# IMPORTANT: because we pass --database-containers <container>,
# get_instance() will use the container name as instance key.
instance = cls.pg_container
# Seed star entry (pg_dumpall)
run(["baudolo-seed", cls.databases_csv, instance, "*", cls.pg_user, cls.pg_password])
# Seed concrete DB entry (pg_dump)
run(
[
"baudolo-seed",
cls.databases_csv,
instance,
cls.pg_db1,
cls.pg_user,
cls.pg_password,
]
)
# --- Run baudolo with dump-only-sql ---
cmd = [
"baudolo",
"--compose-dir",
cls.compose_dir,
"--databases-csv",
cls.databases_csv,
"--database-containers",
cls.pg_container,
"--images-no-stop-required",
"alpine",
"postgres",
"mariadb",
"mysql",
"--dump-only-sql",
"--backups-dir",
cls.backups_dir,
"--repo-name",
cls.repo_name,
]
cp = run(cmd, capture=True, check=True)
cls.stdout = cp.stdout or ""
cls.stderr = cp.stderr or ""
cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name)
@classmethod
def tearDownClass(cls) -> None:
cleanup_docker(containers=cls.containers, volumes=cls.volumes)
def test_db_volume_has_cluster_dump_and_concrete_db_dump_and_no_files(self) -> None:
base = backup_path(self.backups_dir, self.repo_name, self.version, self.db_volume)
sql_dir = base / "sql"
files_dir = base / "files"
self.assertTrue(sql_dir.exists(), f"Expected sql dir at: {sql_dir}")
self.assertFalse(
files_dir.exists(),
f"Did not expect files dir for DB volume when dump-only-sql succeeded: {files_dir}",
)
# Cluster dump file produced by '*' entry
cluster = sql_dir / f"{self.pg_container}.cluster.backup.sql"
self.assertTrue(cluster.is_file(), f"Expected cluster dump file at: {cluster}")
# Concrete DB dump produced by normal entry
db1 = sql_dir / f"{self.pg_db1}.backup.sql"
self.assertTrue(db1.is_file(), f"Expected db dump file at: {db1}")
# Basic sanity: cluster dump usually contains CREATE DATABASE statements
txt = cluster.read_text(encoding="utf-8", errors="ignore")
self.assertIn(
"CREATE DATABASE",
txt,
"Expected cluster dump to contain CREATE DATABASE statements",
)
def test_non_db_volume_still_has_files_backup(self) -> None:
base = backup_path(self.backups_dir, self.repo_name, self.version, self.files_volume)
files_dir = base / "files"
self.assertTrue(files_dir.exists(), f"Expected files dir for non-DB volume at: {files_dir}")
marker = files_dir / "hello.txt"
self.assertTrue(marker.is_file(), f"Expected marker file at: {marker}")
self.assertEqual(
marker.read_text(encoding="utf-8").strip(),
self.marker,
)

View File

@@ -7,9 +7,47 @@ from pathlib import Path
def run_seed( def run_seed(
csv_path: Path, instance: str, database: str, username: str, password: str = "" csv_path: Path, instance: str, database: str, username: str, password: str
) -> subprocess.CompletedProcess: ) -> subprocess.CompletedProcess:
# Run the real CLI module (integration-style). """
Run the real CLI module (E2E-style) using subprocess.
Seed contract (current):
- database must be "*" or a valid name (non-empty, matches allowed charset)
- password is required
- entry is keyed by (instance, database); username/password get updated
"""
cp = subprocess.run(
[
sys.executable,
"-m",
"baudolo.seed",
str(csv_path),
instance,
database,
username,
password,
],
text=True,
capture_output=True,
check=False,
)
if cp.returncode != 0:
raise AssertionError(
"seed command failed unexpectedly.\n"
f"returncode: {cp.returncode}\n"
f"stdout:\n{cp.stdout}\n"
f"stderr:\n{cp.stderr}\n"
)
return cp
def run_seed_expect_fail(
csv_path: Path, instance: str, database: str, username: str, password: str
) -> subprocess.CompletedProcess:
"""
Same as run_seed, but expects non-zero exit. Returns CompletedProcess for inspection.
"""
return subprocess.run( return subprocess.run(
[ [
sys.executable, sys.executable,
@@ -23,7 +61,7 @@ def run_seed(
], ],
text=True, text=True,
capture_output=True, capture_output=True,
check=True, check=False,
) )
@@ -33,6 +71,10 @@ def read_csv_semicolon(path: Path) -> list[dict]:
return list(reader) return list(reader)
def read_text(path: Path) -> str:
return path.read_text(encoding="utf-8")
class TestSeedIntegration(unittest.TestCase): class TestSeedIntegration(unittest.TestCase):
def test_creates_file_and_adds_entry_when_missing(self) -> None: def test_creates_file_and_adds_entry_when_missing(self) -> None:
with tempfile.TemporaryDirectory() as td: with tempfile.TemporaryDirectory() as td:
@@ -41,7 +83,7 @@ class TestSeedIntegration(unittest.TestCase):
cp = run_seed(p, "docker.test", "appdb", "alice", "secret") cp = run_seed(p, "docker.test", "appdb", "alice", "secret")
self.assertEqual(cp.returncode, 0, cp.stderr) self.assertEqual(cp.returncode, 0)
self.assertTrue(p.exists()) self.assertTrue(p.exists())
rows = read_csv_semicolon(p) rows = read_csv_semicolon(p)
@@ -51,40 +93,121 @@ class TestSeedIntegration(unittest.TestCase):
self.assertEqual(rows[0]["username"], "alice") self.assertEqual(rows[0]["username"], "alice")
self.assertEqual(rows[0]["password"], "secret") self.assertEqual(rows[0]["password"], "secret")
def test_replaces_existing_entry_same_keys(self) -> None: def test_replaces_existing_entry_same_instance_and_database_updates_username_and_password(
self,
) -> None:
"""
Replacement semantics:
- Key is (instance, database)
- username/password are updated in-place
"""
with tempfile.TemporaryDirectory() as td: with tempfile.TemporaryDirectory() as td:
p = Path(td) / "databases.csv" p = Path(td) / "databases.csv"
# First add
run_seed(p, "docker.test", "appdb", "alice", "oldpw") run_seed(p, "docker.test", "appdb", "alice", "oldpw")
rows = read_csv_semicolon(p) rows = read_csv_semicolon(p)
self.assertEqual(len(rows), 1) self.assertEqual(len(rows), 1)
self.assertEqual(rows[0]["username"], "alice")
self.assertEqual(rows[0]["password"], "oldpw") self.assertEqual(rows[0]["password"], "oldpw")
# Replace (same instance+database+username) run_seed(p, "docker.test", "appdb", "bob", "newpw")
run_seed(p, "docker.test", "appdb", "alice", "newpw")
rows = read_csv_semicolon(p) rows = read_csv_semicolon(p)
self.assertEqual(len(rows), 1, "Expected replacement, not a duplicate row") self.assertEqual(len(rows), 1, "Expected replacement, not a duplicate row")
self.assertEqual(rows[0]["instance"], "docker.test") self.assertEqual(rows[0]["instance"], "docker.test")
self.assertEqual(rows[0]["database"], "appdb") self.assertEqual(rows[0]["database"], "appdb")
self.assertEqual(rows[0]["username"], "alice") self.assertEqual(rows[0]["username"], "bob")
self.assertEqual(rows[0]["password"], "newpw") self.assertEqual(rows[0]["password"], "newpw")
def test_database_empty_string_matches_existing_empty_database(self) -> None: def test_allows_star_database_for_dump_all(self) -> None:
with tempfile.TemporaryDirectory() as td: with tempfile.TemporaryDirectory() as td:
p = Path(td) / "databases.csv" p = Path(td) / "databases.csv"
# Add with empty database cp = run_seed(p, "bigbluebutton", "*", "postgres", "pw")
run_seed(p, "docker.test", "", "alice", "pw1") self.assertEqual(cp.returncode, 0)
rows = read_csv_semicolon(p) rows = read_csv_semicolon(p)
self.assertEqual(len(rows), 1) self.assertEqual(len(rows), 1)
self.assertEqual(rows[0]["database"], "") self.assertEqual(rows[0]["instance"], "bigbluebutton")
self.assertEqual(rows[0]["database"], "*")
self.assertEqual(rows[0]["username"], "postgres")
self.assertEqual(rows[0]["password"], "pw")
def test_replaces_existing_star_entry(self) -> None:
with tempfile.TemporaryDirectory() as td:
p = Path(td) / "databases.csv"
run_seed(p, "bigbluebutton", "*", "postgres", "pw1")
run_seed(p, "bigbluebutton", "*", "postgres", "pw2")
# Replace with empty database again
run_seed(p, "docker.test", "", "alice", "pw2")
rows = read_csv_semicolon(p) rows = read_csv_semicolon(p)
self.assertEqual(len(rows), 1) self.assertEqual(len(rows), 1)
self.assertEqual(rows[0]["database"], "") self.assertEqual(rows[0]["database"], "*")
self.assertEqual(rows[0]["password"], "pw2") self.assertEqual(rows[0]["password"], "pw2")
def test_rejects_empty_database_value(self) -> None:
with tempfile.TemporaryDirectory() as td:
p = Path(td) / "databases.csv"
cp = run_seed_expect_fail(p, "docker.test", "", "alice", "pw")
self.assertNotEqual(cp.returncode, 0)
combined = ((cp.stdout or "") + "\n" + (cp.stderr or "")).lower()
self.assertIn("error:", combined)
self.assertIn("database", combined)
self.assertIn("not empty", combined)
self.assertFalse(p.exists(), "Should not create file on invalid input")
def test_rejects_invalid_database_name_characters(self) -> None:
with tempfile.TemporaryDirectory() as td:
p = Path(td) / "databases.csv"
cp = run_seed_expect_fail(p, "docker.test", "app db", "alice", "pw")
self.assertNotEqual(cp.returncode, 0)
combined = ((cp.stdout or "") + "\n" + (cp.stderr or "")).lower()
self.assertIn("error:", combined)
self.assertIn("invalid database name", combined)
self.assertFalse(p.exists(), "Should not create file on invalid input")
def test_rejects_nan_database_name(self) -> None:
with tempfile.TemporaryDirectory() as td:
p = Path(td) / "databases.csv"
cp = run_seed_expect_fail(p, "docker.test", "nan", "alice", "pw")
self.assertNotEqual(cp.returncode, 0)
combined = ((cp.stdout or "") + "\n" + (cp.stderr or "")).lower()
self.assertIn("error:", combined)
self.assertIn("must not be 'nan'", combined)
self.assertFalse(p.exists(), "Should not create file on invalid input")
def test_accepts_hyphen_and_underscore_database_names(self) -> None:
with tempfile.TemporaryDirectory() as td:
p = Path(td) / "databases.csv"
run_seed(p, "docker.test", "my_db-1", "alice", "pw")
rows = read_csv_semicolon(p)
self.assertEqual(len(rows), 1)
self.assertEqual(rows[0]["database"], "my_db-1")
def test_file_is_semicolon_delimited_and_has_header(self) -> None:
with tempfile.TemporaryDirectory() as td:
p = Path(td) / "databases.csv"
run_seed(p, "docker.test", "appdb", "alice", "pw")
txt = read_text(p)
self.assertTrue(
txt.startswith("instance;database;username;password"),
f"Unexpected header / delimiter in file:\n{txt}",
)
self.assertIn(";", txt)
if __name__ == "__main__":
unittest.main()