feat(backup): stricter databases.csv semantics + atomic SQL dumps

- read databases.csv with stable types (dtype=str, keep_default_na=False) - validate database field: require '*' or concrete name (no empty/NaN) - support Postgres cluster dumps via '*' entries (pg_dumpall) - write SQL dumps atomically to avoid partial/empty files - early-skip fully ignored volumes before creating backup directories - update seed CLI to enforce new contract and update by (instance,database) - adjust tests: sql dir naming + add E2E coverage for early-skip and '*' seeding
2026-02-19 02:44:08 +00:00 · 2025-12-29 11:39:57 +01:00
parent c01ab55f2d
commit f3ef86a444
8 changed files with 667 additions and 109 deletions
--- a/src/baudolo/backup/app.py
+++ b/src/baudolo/backup/app.py
@@ -132,7 +132,12 @@ def main() -> int:
    versions_dir = os.path.join(args.backups_dir, machine_id, args.repo_name)
    version_dir = create_version_directory(versions_dir, backup_time)
-    databases_df = pandas.read_csv(args.databases_csv, sep=";")
+    # IMPORTANT:
    # - keep_default_na=False prevents empty fields from turning into NaN
    # - dtype=str keeps all columns stable for comparisons/validation
    databases_df = pandas.read_csv(
        args.databases_csv, sep=";", keep_default_na=False, dtype=str
    )
    print("💾 Start volume backups...", flush=True)
@@ -140,8 +145,16 @@ def main() -> int:
        print(f"Start backup routine for volume: {volume_name}", flush=True)
        containers = containers_using_volume(volume_name)
        # EARLY SKIP: if all linked containers are ignored, do not create any dirs
        if volume_is_fully_ignored(containers, args.images_no_backup_required):
            print(
                f"Skipping volume '{volume_name}' entirely (all linked containers are ignored).",
                flush=True,
            )
            continue
        vol_dir = create_volume_directory(version_dir, volume_name)
-            
+
        found_db, dumped_any = _backup_dumps_for_volume(
            containers=containers,
            vol_dir=vol_dir,
@@ -163,16 +176,6 @@ def main() -> int:
                    continue
            # Non-DB volume -> always do file backup (fall through)
        # skip file backup if all linked containers are ignored
        if volume_is_fully_ignored(containers, args.images_no_backup_required):
            print(
                f"Skipping file backup for volume '{volume_name}' (all linked containers are ignored).",
                flush=True,
            )
            continue
        if args.everything:
            # "everything": always do pre-rsync, then stop + rsync again
            backup_volume(versions_dir, volume_name, vol_dir)
--- a/src/baudolo/backup/db.py
+++ b/src/baudolo/backup/db.py
@@ -4,6 +4,8 @@ import os
 import pathlib
 import re
 import logging
 from typing import Optional
 import pandas
 from .shell import BackupException, execute_shell_command
@@ -12,17 +14,53 @@ log = logging.getLogger(__name__)
 def get_instance(container: str, database_containers: list[str]) -> str:
    """
    Derive a stable instance name from the container name.
    """
    if container in database_containers:
        return container
    return re.split(r"(_|-)(database|db|postgres)", container)[0]
 def _validate_database_value(value: Optional[str], *, instance: str) -> str:
    """
    Enforce explicit database semantics:
    - "*"       => dump ALL databases (cluster dump for Postgres)
    - "<name>"  => dump exactly this database
    - ""        => invalid configuration (would previously result in NaN / nan.backup.sql)
    """
    v = (value or "").strip()
    if v == "":
        raise ValueError(
            f"Invalid databases.csv entry for instance '{instance}': "
            "column 'database' must be '*' or a concrete database name (not empty)."
        )
    return v
 def _atomic_write_cmd(cmd: str, out_file: str) -> None:
    """
    Write dump output atomically:
    - write to <file>.tmp
    - rename to <file> only on success
    This prevents empty or partial dump files from being treated as valid backups.
    """
    tmp = f"{out_file}.tmp"
    execute_shell_command(f"{cmd} > {tmp}")
    execute_shell_command(f"mv {tmp} {out_file}")
 def fallback_pg_dumpall(container: str, username: str, password: str, out_file: str) -> None:
    """
    Perform a full Postgres cluster dump using pg_dumpall.
    """
    cmd = (
        f"PGPASSWORD={password} docker exec -i {container} "
-        f"pg_dumpall -U {username} -h localhost > {out_file}"
+        f"pg_dumpall -U {username} -h localhost"
    )
-    execute_shell_command(cmd)
+    _atomic_write_cmd(cmd, out_file)
 def backup_database(
@@ -34,12 +72,15 @@ def backup_database(
    database_containers: list[str],
 ) -> bool:
    """
-    Returns True if at least one dump file was produced, else False.
+    Backup databases for a given DB container.
    Returns True if at least one dump was produced.
    """
    instance_name = get_instance(container, database_containers)
-    entries = databases_df.loc[databases_df["instance"] == instance_name]
+
    entries = databases_df[databases_df["instance"] == instance_name]
    if entries.empty:
-        log.warning("No entry found for instance '%s' (skipping DB dump)", instance_name)
+        log.debug("No database entries for instance '%s'", instance_name)
        return False
    out_dir = os.path.join(volume_dir, "sql")
@@ -48,43 +89,56 @@ def backup_database(
    produced = False
    for row in entries.itertuples(index=False):
-        db_name = row.database
+        raw_db = getattr(row, "database", "")
-        user = row.username
+        user = (getattr(row, "username", "") or "").strip()
-        password = row.password
+        password = (getattr(row, "password", "") or "").strip()
        db_value = _validate_database_value(raw_db, instance=instance_name)
        # Explicit: dump ALL databases
        if db_value == "*":
            if db_type != "postgres":
                raise ValueError(
                    f"databases.csv entry for instance '{instance_name}': "
                    "'*' is currently only supported for Postgres."
                )
            cluster_file = os.path.join(
                out_dir, f"{instance_name}.cluster.backup.sql"
            )
            fallback_pg_dumpall(container, user, password, cluster_file)
            produced = True
            continue
        # Concrete database dump
        db_name = db_value
        dump_file = os.path.join(out_dir, f"{db_name}.backup.sql")
        if db_type == "mariadb":
            cmd = (
                f"docker exec {container} /usr/bin/mariadb-dump "
-                f"-u {user} -p{password} {db_name} > {dump_file}"
+                f"-u {user} -p{password} {db_name}"
            )
-            execute_shell_command(cmd)
+            _atomic_write_cmd(cmd, dump_file)
            produced = True
            continue
        if db_type == "postgres":
            cluster_file = os.path.join(out_dir, f"{instance_name}.cluster.backup.sql")
            if not db_name:
                fallback_pg_dumpall(container, user, password, cluster_file)
                return True
            try:
                cmd = (
                    f"PGPASSWORD={password} docker exec -i {container} "
-                    f"pg_dump -U {user} -d {db_name} -h localhost > {dump_file}"
+                    f"pg_dump -U {user} -d {db_name} -h localhost"
                )
-                execute_shell_command(cmd)
+                _atomic_write_cmd(cmd, dump_file)
                produced = True
            except BackupException as e:
-                print(f"pg_dump failed: {e}", flush=True)
+                # Explicit DB dump failed -> hard error
-                print(
+                raise BackupException(
-                    f"Falling back to pg_dumpall for instance '{instance_name}'",
+                    f"Postgres dump failed for instance '{instance_name}', "
-                    flush=True,
+                    f"database '{db_name}'. This database was explicitly configured "
                    "and therefore must succeed.\n"
                    f"{e}"
                )
                fallback_pg_dumpall(container, user, password, cluster_file)
                produced = True
            continue
    return produced
--- a/src/baudolo/seed/main.py
+++ b/src/baudolo/seed/main.py
@@ -1,67 +1,106 @@
-import pandas as pd
+#!/usr/bin/env python3
 from __future__ import annotations
 import argparse
 import os
 import re
 import sys
 import pandas as pd
 from typing import Optional
-def check_and_add_entry(file_path, instance, database, username, password):
+DB_NAME_RE = re.compile(r"^[a-zA-Z0-9_][a-zA-Z0-9_-]*$")
    # Check if the file exists and is not empty
    if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
        # Read the existing CSV file with header
        df = pd.read_csv(file_path, sep=";")
    else:
        # Create a new DataFrame with columns if file does not exist
        df = pd.DataFrame(columns=["instance", "database", "username", "password"])
-    # Check if the entry exists and remove it
+def _validate_database_value(value: Optional[str], *, instance: str) -> str:
-    mask = (
+    v = (value or "").strip()
-        (df["instance"] == instance)
+    if v == "":
-        & (
+        raise ValueError(
-            (df["database"] == database)
+            f"Invalid databases.csv entry for instance '{instance}': "
-            | (((df["database"].isna()) | (df["database"] == "")) & (database == ""))
+            "column 'database' must be '*' or a concrete database name (not empty)."
        )
-        & (df["username"] == username)
+    if v == "*":
-    )
+        return "*"
    if v.lower() == "nan":
        raise ValueError(
            f"Invalid databases.csv entry for instance '{instance}': database must not be 'nan'."
        )
    if not DB_NAME_RE.match(v):
        raise ValueError(
            f"Invalid databases.csv entry for instance '{instance}': "
            f"invalid database name '{v}'. Allowed: letters, numbers, '_' and '-'."
        )
    return v
-    if not df[mask].empty:
+def check_and_add_entry(
-        print("Replacing existing entry.")
+    file_path: str,
-        df = df[~mask]
+    instance: str,
    database: Optional[str],
    username: str,
    password: str,
 ) -> None:
    """
    Add or update an entry in databases.csv.
    The function enforces strict validation:
    - database MUST be set
    - database MUST be '*' or a valid database name
    """
    database = _validate_database_value(database, instance=instance)
    if os.path.exists(file_path):
        df = pd.read_csv(
            file_path,
            sep=";",
            dtype=str,
            keep_default_na=False,
        )
    else:
        df = pd.DataFrame(
            columns=["instance", "database", "username", "password"]
        )
    mask = (df["instance"] == instance) & (df["database"] == database)
    if mask.any():
        print("Updating existing entry.")
        df.loc[mask, ["username", "password"]] = [username, password]
    else:
        print("Adding new entry.")
        new_entry = pd.DataFrame(
            [[instance, database, username, password]],
            columns=["instance", "database", "username", "password"],
        )
        df = pd.concat([df, new_entry], ignore_index=True)
    # Create a new DataFrame for the new entry
    new_entry = pd.DataFrame(
        [
            {
                "instance": instance,
                "database": database,
                "username": username,
                "password": password,
            }
        ]
    )
    # Add (or replace) the entry using concat
    df = pd.concat([df, new_entry], ignore_index=True)
    # Save the updated CSV file
    df.to_csv(file_path, sep=";", index=False)
-def main():
+def main() -> None:
    parser = argparse.ArgumentParser(
-        description="Check and replace (or add) a database entry in a CSV file."
+        description="Seed or update databases.csv for backup configuration."
    )
-    parser.add_argument("file_path", help="Path to the CSV file")
+    parser.add_argument("file", help="Path to databases.csv")
-    parser.add_argument("instance", help="Database instance")
+    parser.add_argument("instance", help="Instance name (e.g. bigbluebutton)")
-    parser.add_argument("database", help="Database name")
+    parser.add_argument(
-    parser.add_argument("username", help="Username")
+        "database",
-    parser.add_argument("password", nargs="?", default="", help="Password (optional)")
+        help="Database name or '*' to dump all databases",
    )
    parser.add_argument("username", help="Database username")
    parser.add_argument("password", help="Database password")
    args = parser.parse_args()
-    check_and_add_entry(
+    try:
-        args.file_path, args.instance, args.database, args.username, args.password
+        check_and_add_entry(
-    )
+            file_path=args.file,
            instance=args.instance,
            database=args.database,
            username=args.username,
            password=args.password,
        )
    except Exception as exc:
        print(f"ERROR: {exc}", file=sys.stderr)
        sys.exit(1)
 if __name__ == "__main__":
--- a/tests/e2e/test_e2e_cli_contract_dump_only_sql.py
+++ b/tests/e2e/test_e2e_cli_contract_dump_only_sql.py
@@ -13,15 +13,6 @@ class TestE2ECLIContractDumpOnlySql(unittest.TestCase):
            f"Expected '--dump-only-sql' to appear in --help output. Output:\n{out}",
        )
    def test_help_does_not_mention_old_flag(self) -> None:
        cp = run(["baudolo", "--help"], capture=True, check=True)
        out = (cp.stdout or "") + "\n" + (cp.stderr or "")
        self.assertNotIn(
            "--dump-only",
            out,
            f"Did not expect legacy '--dump-only' to appear in --help output. Output:\n{out}",
        )
    def test_old_flag_is_rejected(self) -> None:
        cp = run(["baudolo", "--dump-only"], capture=True, check=False)
        self.assertEqual(
--- a/tests/e2e/test_e2e_dump_only_sql_mixed_run.py
+++ b/tests/e2e/test_e2e_dump_only_sql_mixed_run.py
@@ -145,7 +145,7 @@ class TestE2EDumpOnlySqlMixedRun(unittest.TestCase):
    def test_db_volume_has_dump_and_no_files_dir(self) -> None:
        base = backup_path(self.backups_dir, self.repo_name, self.version, self.db_volume)
-        dumps = base / "dumps"
+        dumps = base / "sql"
        files = base / "files"
        self.assertTrue(dumps.exists(), f"Expected dumps dir for DB volume at: {dumps}")
--- a/tests/e2e/test_e2e_images_no_backup_required_early_skip.py
+++ b/tests/e2e/test_e2e_images_no_backup_required_early_skip.py
@@ -0,0 +1,131 @@
 # tests/e2e/test_e2e_images_no_backup_required_early_skip.py
 import unittest
 from .helpers import (
    backup_path,
    cleanup_docker,
    create_minimal_compose_dir,
    ensure_empty_dir,
    latest_version_dir,
    require_docker,
    run,
    unique,
    write_databases_csv,
 )
 class TestE2EImagesNoBackupRequiredEarlySkip(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        require_docker()
        cls.prefix = unique("baudolo-e2e-early-skip-no-backup-required")
        cls.backups_dir = f"/tmp/{cls.prefix}/Backups"
        ensure_empty_dir(cls.backups_dir)
        cls.compose_dir = create_minimal_compose_dir(f"/tmp/{cls.prefix}")
        cls.repo_name = cls.prefix
        # --- Docker resources ---
        cls.redis_container = f"{cls.prefix}-redis"
        cls.ignored_volume = f"{cls.prefix}-redis-vol"
        cls.normal_volume = f"{cls.prefix}-files-vol"
        cls.containers = [cls.redis_container]
        cls.volumes = [cls.ignored_volume, cls.normal_volume]
        # Create volumes
        run(["docker", "volume", "create", cls.ignored_volume])
        run(["docker", "volume", "create", cls.normal_volume])
        # Start redis container using the ignored volume
        run(
            [
                "docker",
                "run",
                "-d",
                "--name",
                cls.redis_container,
                "-v",
                f"{cls.ignored_volume}:/data",
                "redis:alpine",
            ]
        )
        # Put deterministic content into the normal volume
        run(
            [
                "docker",
                "run",
                "--rm",
                "-v",
                f"{cls.normal_volume}:/data",
                "alpine:3.20",
                "sh",
                "-lc",
                "mkdir -p /data && echo 'hello' > /data/hello.txt",
            ]
        )
        # databases.csv required by CLI (can be empty)
        cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv"
        write_databases_csv(cls.databases_csv, [])
        # Run baudolo with images-no-backup-required redis
        cmd = [
            "baudolo",
            "--compose-dir",
            cls.compose_dir,
            "--docker-compose-hard-restart-required",
            "mailu",
            "--repo-name",
            cls.repo_name,
            "--databases-csv",
            cls.databases_csv,
            "--backups-dir",
            cls.backups_dir,
            "--database-containers",
            "dummy-db",
            "--images-no-stop-required",
            "alpine",
            "redis",
            "postgres",
            "mariadb",
            "mysql",
            "--images-no-backup-required",
            "redis",
        ]
        cp = run(cmd, capture=True, check=True)
        cls.stdout = cp.stdout or ""
        cls.stderr = cp.stderr or ""
        cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name)
    @classmethod
    def tearDownClass(cls) -> None:
        cleanup_docker(containers=cls.containers, volumes=cls.volumes)
    def test_ignored_volume_has_no_backup_directory_at_all(self) -> None:
        p = backup_path(
            self.backups_dir,
            self.repo_name,
            self.version,
            self.ignored_volume,
        )
        self.assertFalse(
            p.exists(),
            f"Expected NO backup directory to be created for ignored volume, but found: {p}",
        )
    def test_normal_volume_is_still_backed_up(self) -> None:
        p = (
            backup_path(
                self.backups_dir,
                self.repo_name,
                self.version,
                self.normal_volume,
            )
            / "files"
            / "hello.txt"
        )
        self.assertTrue(p.is_file(), f"Expected backed up file at: {p}")
--- a/tests/e2e/test_e2e_seed_star_and_db_entries_backup_postgres.py
+++ b/tests/e2e/test_e2e_seed_star_and_db_entries_backup_postgres.py
@@ -0,0 +1,217 @@
 import unittest
 from .helpers import (
    backup_path,
    cleanup_docker,
    create_minimal_compose_dir,
    ensure_empty_dir,
    latest_version_dir,
    require_docker,
    run,
    unique,
    wait_for_postgres,
 )
 class TestE2ESeedStarAndDbEntriesBackupPostgres(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        require_docker()
        cls.prefix = unique("baudolo-e2e-seed-star-and-db")
        cls.backups_dir = f"/tmp/{cls.prefix}/Backups"
        ensure_empty_dir(cls.backups_dir)
        cls.compose_dir = create_minimal_compose_dir(f"/tmp/{cls.prefix}")
        cls.repo_name = cls.prefix
        # --- Volumes ---
        cls.db_volume = f"{cls.prefix}-vol-db"
        cls.files_volume = f"{cls.prefix}-vol-files"
        cls.volumes = [cls.db_volume, cls.files_volume]
        run(["docker", "volume", "create", cls.db_volume])
        run(["docker", "volume", "create", cls.files_volume])
        # Put a marker into the non-db volume
        cls.marker = "hello-non-db-seed-star"
        run(
            [
                "docker",
                "run",
                "--rm",
                "-v",
                f"{cls.files_volume}:/data",
                "alpine:3.20",
                "sh",
                "-lc",
                f"echo '{cls.marker}' > /data/hello.txt",
            ]
        )
        # --- Start Postgres container using the DB volume ---
        cls.pg_container = f"{cls.prefix}-pg"
        cls.containers = [cls.pg_container]
        cls.pg_password = "postgres"
        cls.pg_user = "postgres"
        run(
            [
                "docker",
                "run",
                "-d",
                "--name",
                cls.pg_container,
                "-e",
                f"POSTGRES_PASSWORD={cls.pg_password}",
                "-v",
                f"{cls.db_volume}:/var/lib/postgresql/data",
                "postgres:16-alpine",
            ]
        )
        wait_for_postgres(cls.pg_container, user="postgres", timeout_s=90)
        # Create two DBs and deterministic content, so pg_dumpall is meaningful
        cls.pg_db1 = "testdb1"
        cls.pg_db2 = "testdb2"
        run(
            [
                "docker",
                "exec",
                cls.pg_container,
                "sh",
                "-lc",
                (
                    f'psql -U {cls.pg_user} -c "CREATE DATABASE {cls.pg_db1};" || true; '
                    f'psql -U {cls.pg_user} -c "CREATE DATABASE {cls.pg_db2};" || true; '
                ),
            ],
            check=True,
        )
        run(
            [
                "docker",
                "exec",
                cls.pg_container,
                "sh",
                "-lc",
                (
                    f'psql -U {cls.pg_user} -d {cls.pg_db1} -c '
                    '"CREATE TABLE IF NOT EXISTS t (id INT PRIMARY KEY, v TEXT);'
                    "INSERT INTO t(id,v) VALUES (1,'hello-db1') "
                    "ON CONFLICT (id) DO UPDATE SET v=EXCLUDED.v;\""
                ),
            ],
            check=True,
        )
        run(
            [
                "docker",
                "exec",
                cls.pg_container,
                "sh",
                "-lc",
                (
                    f'psql -U {cls.pg_user} -d {cls.pg_db2} -c '
                    '"CREATE TABLE IF NOT EXISTS t (id INT PRIMARY KEY, v TEXT);'
                    "INSERT INTO t(id,v) VALUES (1,'hello-db2') "
                    "ON CONFLICT (id) DO UPDATE SET v=EXCLUDED.v;\""
                ),
            ],
            check=True,
        )
        # --- Seed databases.csv using CLI (star + concrete db) ---
        cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv"
        # IMPORTANT: because we pass --database-containers <container>,
        # get_instance() will use the container name as instance key.
        instance = cls.pg_container
        # Seed star entry (pg_dumpall)
        run(["baudolo-seed", cls.databases_csv, instance, "*", cls.pg_user, cls.pg_password])
        # Seed concrete DB entry (pg_dump)
        run(
            [
                "baudolo-seed",
                cls.databases_csv,
                instance,
                cls.pg_db1,
                cls.pg_user,
                cls.pg_password,
            ]
        )
        # --- Run baudolo with dump-only-sql ---
        cmd = [
            "baudolo",
            "--compose-dir",
            cls.compose_dir,
            "--databases-csv",
            cls.databases_csv,
            "--database-containers",
            cls.pg_container,
            "--images-no-stop-required",
            "alpine",
            "postgres",
            "mariadb",
            "mysql",
            "--dump-only-sql",
            "--backups-dir",
            cls.backups_dir,
            "--repo-name",
            cls.repo_name,
        ]
        cp = run(cmd, capture=True, check=True)
        cls.stdout = cp.stdout or ""
        cls.stderr = cp.stderr or ""
        cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name)
    @classmethod
    def tearDownClass(cls) -> None:
        cleanup_docker(containers=cls.containers, volumes=cls.volumes)
    def test_db_volume_has_cluster_dump_and_concrete_db_dump_and_no_files(self) -> None:
        base = backup_path(self.backups_dir, self.repo_name, self.version, self.db_volume)
        sql_dir = base / "sql"
        files_dir = base / "files"
        self.assertTrue(sql_dir.exists(), f"Expected sql dir at: {sql_dir}")
        self.assertFalse(
            files_dir.exists(),
            f"Did not expect files dir for DB volume when dump-only-sql succeeded: {files_dir}",
        )
        # Cluster dump file produced by '*' entry
        cluster = sql_dir / f"{self.pg_container}.cluster.backup.sql"
        self.assertTrue(cluster.is_file(), f"Expected cluster dump file at: {cluster}")
        # Concrete DB dump produced by normal entry
        db1 = sql_dir / f"{self.pg_db1}.backup.sql"
        self.assertTrue(db1.is_file(), f"Expected db dump file at: {db1}")
        # Basic sanity: cluster dump usually contains CREATE DATABASE statements
        txt = cluster.read_text(encoding="utf-8", errors="ignore")
        self.assertIn(
            "CREATE DATABASE",
            txt,
            "Expected cluster dump to contain CREATE DATABASE statements",
        )
    def test_non_db_volume_still_has_files_backup(self) -> None:
        base = backup_path(self.backups_dir, self.repo_name, self.version, self.files_volume)
        files_dir = base / "files"
        self.assertTrue(files_dir.exists(), f"Expected files dir for non-DB volume at: {files_dir}")
        marker = files_dir / "hello.txt"
        self.assertTrue(marker.is_file(), f"Expected marker file at: {marker}")
        self.assertEqual(
            marker.read_text(encoding="utf-8").strip(),
            self.marker,
        )
--- a/tests/integration/test_seed_integration.py
+++ b/tests/integration/test_seed_integration.py
@@ -7,9 +7,47 @@ from pathlib import Path
 def run_seed(
-    csv_path: Path, instance: str, database: str, username: str, password: str = ""
+    csv_path: Path, instance: str, database: str, username: str, password: str
 ) -> subprocess.CompletedProcess:
-    # Run the real CLI module (integration-style).
+    """
    Run the real CLI module (E2E-style) using subprocess.
    Seed contract (current):
    - database must be "*" or a valid name (non-empty, matches allowed charset)
    - password is required
    - entry is keyed by (instance, database); username/password get updated
    """
    cp = subprocess.run(
        [
            sys.executable,
            "-m",
            "baudolo.seed",
            str(csv_path),
            instance,
            database,
            username,
            password,
        ],
        text=True,
        capture_output=True,
        check=False,
    )
    if cp.returncode != 0:
        raise AssertionError(
            "seed command failed unexpectedly.\n"
            f"returncode: {cp.returncode}\n"
            f"stdout:\n{cp.stdout}\n"
            f"stderr:\n{cp.stderr}\n"
        )
    return cp
 def run_seed_expect_fail(
    csv_path: Path, instance: str, database: str, username: str, password: str
 ) -> subprocess.CompletedProcess:
    """
    Same as run_seed, but expects non-zero exit. Returns CompletedProcess for inspection.
    """
    return subprocess.run(
        [
            sys.executable,
@@ -23,7 +61,7 @@ def run_seed(
        ],
        text=True,
        capture_output=True,
-        check=True,
+        check=False,
    )
@@ -33,6 +71,10 @@ def read_csv_semicolon(path: Path) -> list[dict]:
        return list(reader)
 def read_text(path: Path) -> str:
    return path.read_text(encoding="utf-8")
 class TestSeedIntegration(unittest.TestCase):
    def test_creates_file_and_adds_entry_when_missing(self) -> None:
        with tempfile.TemporaryDirectory() as td:
@@ -41,7 +83,7 @@ class TestSeedIntegration(unittest.TestCase):
            cp = run_seed(p, "docker.test", "appdb", "alice", "secret")
-            self.assertEqual(cp.returncode, 0, cp.stderr)
+            self.assertEqual(cp.returncode, 0)
            self.assertTrue(p.exists())
            rows = read_csv_semicolon(p)
@@ -51,40 +93,121 @@ class TestSeedIntegration(unittest.TestCase):
            self.assertEqual(rows[0]["username"], "alice")
            self.assertEqual(rows[0]["password"], "secret")
-    def test_replaces_existing_entry_same_keys(self) -> None:
+    def test_replaces_existing_entry_same_instance_and_database_updates_username_and_password(
        self,
    ) -> None:
        """
        Replacement semantics:
        - Key is (instance, database)
        - username/password are updated in-place
        """
        with tempfile.TemporaryDirectory() as td:
            p = Path(td) / "databases.csv"
            # First add
            run_seed(p, "docker.test", "appdb", "alice", "oldpw")
            rows = read_csv_semicolon(p)
            self.assertEqual(len(rows), 1)
            self.assertEqual(rows[0]["username"], "alice")
            self.assertEqual(rows[0]["password"], "oldpw")
-            # Replace (same instance+database+username)
+            run_seed(p, "docker.test", "appdb", "bob", "newpw")
            run_seed(p, "docker.test", "appdb", "alice", "newpw")
            rows = read_csv_semicolon(p)
            self.assertEqual(len(rows), 1, "Expected replacement, not a duplicate row")
            self.assertEqual(rows[0]["instance"], "docker.test")
            self.assertEqual(rows[0]["database"], "appdb")
-            self.assertEqual(rows[0]["username"], "alice")
+            self.assertEqual(rows[0]["username"], "bob")
            self.assertEqual(rows[0]["password"], "newpw")
-    def test_database_empty_string_matches_existing_empty_database(self) -> None:
+    def test_allows_star_database_for_dump_all(self) -> None:
        with tempfile.TemporaryDirectory() as td:
            p = Path(td) / "databases.csv"
-            # Add with empty database
+            cp = run_seed(p, "bigbluebutton", "*", "postgres", "pw")
-            run_seed(p, "docker.test", "", "alice", "pw1")
+            self.assertEqual(cp.returncode, 0)
            rows = read_csv_semicolon(p)
            self.assertEqual(len(rows), 1)
-            self.assertEqual(rows[0]["database"], "")
+            self.assertEqual(rows[0]["instance"], "bigbluebutton")
            self.assertEqual(rows[0]["database"], "*")
            self.assertEqual(rows[0]["username"], "postgres")
            self.assertEqual(rows[0]["password"], "pw")
    def test_replaces_existing_star_entry(self) -> None:
        with tempfile.TemporaryDirectory() as td:
            p = Path(td) / "databases.csv"
            run_seed(p, "bigbluebutton", "*", "postgres", "pw1")
            run_seed(p, "bigbluebutton", "*", "postgres", "pw2")
            # Replace with empty database again
            run_seed(p, "docker.test", "", "alice", "pw2")
            rows = read_csv_semicolon(p)
            self.assertEqual(len(rows), 1)
-            self.assertEqual(rows[0]["database"], "")
+            self.assertEqual(rows[0]["database"], "*")
            self.assertEqual(rows[0]["password"], "pw2")
    def test_rejects_empty_database_value(self) -> None:
        with tempfile.TemporaryDirectory() as td:
            p = Path(td) / "databases.csv"
            cp = run_seed_expect_fail(p, "docker.test", "", "alice", "pw")
            self.assertNotEqual(cp.returncode, 0)
            combined = ((cp.stdout or "") + "\n" + (cp.stderr or "")).lower()
            self.assertIn("error:", combined)
            self.assertIn("database", combined)
            self.assertIn("not empty", combined)
            self.assertFalse(p.exists(), "Should not create file on invalid input")
    def test_rejects_invalid_database_name_characters(self) -> None:
        with tempfile.TemporaryDirectory() as td:
            p = Path(td) / "databases.csv"
            cp = run_seed_expect_fail(p, "docker.test", "app db", "alice", "pw")
            self.assertNotEqual(cp.returncode, 0)
            combined = ((cp.stdout or "") + "\n" + (cp.stderr or "")).lower()
            self.assertIn("error:", combined)
            self.assertIn("invalid database name", combined)
            self.assertFalse(p.exists(), "Should not create file on invalid input")
    def test_rejects_nan_database_name(self) -> None:
        with tempfile.TemporaryDirectory() as td:
            p = Path(td) / "databases.csv"
            cp = run_seed_expect_fail(p, "docker.test", "nan", "alice", "pw")
            self.assertNotEqual(cp.returncode, 0)
            combined = ((cp.stdout or "") + "\n" + (cp.stderr or "")).lower()
            self.assertIn("error:", combined)
            self.assertIn("must not be 'nan'", combined)
            self.assertFalse(p.exists(), "Should not create file on invalid input")
    def test_accepts_hyphen_and_underscore_database_names(self) -> None:
        with tempfile.TemporaryDirectory() as td:
            p = Path(td) / "databases.csv"
            run_seed(p, "docker.test", "my_db-1", "alice", "pw")
            rows = read_csv_semicolon(p)
            self.assertEqual(len(rows), 1)
            self.assertEqual(rows[0]["database"], "my_db-1")
    def test_file_is_semicolon_delimited_and_has_header(self) -> None:
        with tempfile.TemporaryDirectory() as td:
            p = Path(td) / "databases.csv"
            run_seed(p, "docker.test", "appdb", "alice", "pw")
            txt = read_text(p)
            self.assertTrue(
                txt.startswith("instance;database;username;password"),
                f"Unexpected header / delimiter in file:\n{txt}",
            )
            self.assertIn(";", txt)
 if __name__ == "__main__":
    unittest.main()