11 Commits

Author SHA1 Message Date
e4bc075474 Release version 1.2.0 2025-12-29 11:46:39 +01:00
f3ef86a444 feat(backup): stricter databases.csv semantics + atomic SQL dumps
- read databases.csv with stable types (dtype=str, keep_default_na=False)
- validate database field: require '*' or concrete name (no empty/NaN)
- support Postgres cluster dumps via '*' entries (pg_dumpall)
- write SQL dumps atomically to avoid partial/empty files
- early-skip fully ignored volumes before creating backup directories
- update seed CLI to enforce new contract and update by (instance,database)
- adjust tests: sql dir naming + add E2E coverage for early-skip and '*' seeding
2025-12-29 11:39:57 +01:00
c01ab55f2d test(e2e): add dump-only-sql mixed-run + CLI contract coverage
- rename dump-only flag to --dump-only-sql across docs and tests
- update backup logic: skip files/ only for DB volumes when dumps succeed; fallback to files when dumps fail
- extend e2e helpers to support dump_only_sql
- add e2e mixed-run regression test (DB dump => no files/, non-DB => files/)
- add e2e CLI/argparse contract test (--dump-only-sql present, --dump-only rejected)
- fix e2e files test to expect file backups for non-DB volumes in dump-only-sql mode and verify restore
- update changelog + README flag table

https://chatgpt.com/share/69522d9c-ce08-800f-9070-71df3bd779ae
2025-12-29 08:28:23 +01:00
e3cdfd6fc4 Release version 1.1.1 2025-12-28 22:52:31 +01:00
df32671cec fix(backup): fallback to file backup in dump-only mode when no DB dump is possible
- Change DB backup helpers to return whether a dump was actually produced
- Detect DB containers without successful dumps in --dump-only mode
- Fallback to file backups with a warning instead of skipping silently
- Refactor DB dump logic to return boolean status
- Add E2E test covering dump-only fallback when databases.csv entry is missing

https://chatgpt.com/share/6951a659-2b0c-800f-aafa-3e89ae1eb697
2025-12-28 22:51:12 +01:00
d563dce20f Ignored dist 2025-12-28 22:19:19 +01:00
0222f7f109 Release version 1.1.0 2025-12-28 22:16:41 +01:00
6adafe6b1f fix(backup): log missing db config instead of raising
- Use module logger in backup/db.py
- Skip db dump when no databases.csv entry is present
- Apply black/formatting cleanup across backup/restore/tests

https://chatgpt.com/share/69519d45-b0dc-800f-acb6-6fb8504e9b46
2025-12-28 22:12:31 +01:00
88b35ee923 backup(cli): use FHS-compliant default backup directory
- Replace dynamic repo name detection with stable default
- Switch default backups directory from /Backups to /var/lib/backup
- Align CLI defaults with Linux FHS best practices

https://chatgpt.com/share/69515eed-001c-800f-b1da-aee8d8683e63
2025-12-28 17:46:31 +01:00
71f79929be Changedf pi update mirror 2025-12-27 12:49:24 +01:00
0fb8efba4f Ignored .egg-info 2025-12-27 09:33:59 +01:00
31 changed files with 1555 additions and 293 deletions

2
.gitignore vendored
View File

@@ -1,2 +1,4 @@
__pycache__ __pycache__
artifacts/ artifacts/
*.egg-info
dist/

View File

@@ -1,3 +1,25 @@
## [1.2.0] - 2025-12-29
* * Introduced **`--dump-only-sql`** mode for reliable, SQL-only database backups (replaces `--dump-only`).
* Database configuration in `databases.csv` is now **strict and explicit** (`*` or concrete database name only).
* **PostgreSQL cluster backups** are supported via `*`.
* SQL dumps are written **atomically** to avoid corrupted or empty files.
* Backups are **smarter and faster**: ignored volumes are skipped early, file backups run only when needed.
* Improved reliability through expanded end-to-end tests and safer defaults.
## [1.1.1] - 2025-12-28
* * **Backup:** In ***--dump-only-sql*** mode, fall back to file backups with a warning when no database dump can be produced (e.g. missing `databases.csv` entry).
## [1.1.0] - 2025-12-28
* * **Backup:** Log a warning and skip database dumps when no databases.csv entry is present instead of raising an exception; introduce module-level logging and apply formatting cleanups across backup/restore code and tests.
* **CLI:** Switch to an FHS-compliant default backup directory (/var/lib/backup) and use a stable default repository name instead of dynamic detection.
* **Maintenance:** Update mirror configuration and ignore generated .egg-info files.
## [1.0.0] - 2025-12-27 ## [1.0.0] - 2025-12-27
* Official Release 🥳 * Official Release 🥳

View File

@@ -1,4 +1,4 @@
git@github.com:kevinveenbirkenbach/backup-docker-to-local.git git@github.com:kevinveenbirkenbach/backup-docker-to-local.git
ssh://git@git.veen.world:2201/kevinveenbirkenbach/backup-docker-to-local.git ssh://git@git.veen.world:2201/kevinveenbirkenbach/backup-docker-to-local.git
ssh://git@code.infinito.nexus:2201/kevinveenbirkenbach/backup-docker-to-local.git ssh://git@code.infinito.nexus:2201/kevinveenbirkenbach/backup-docker-to-local.git
https://pypi.org/project/baudolo/ https://pypi.org/project/backup-docker-to-local/

View File

@@ -134,7 +134,7 @@ baudolo \
| Flag | Description | | Flag | Description |
| --------------- | ------------------------------------------- | | --------------- | ------------------------------------------- |
| `--everything` | Always stop containers and re-run rsync | | `--everything` | Always stop containers and re-run rsync |
| `--dump-only` | Only create SQL dumps, skip file backups | | `--dump-only-sql`| Skip file backups only for DB volumes when dumps succeed; non-DB volumes are still backed up; fallback to files if no dump. |
| `--shutdown` | Do not restart containers after backup | | `--shutdown` | Do not restart containers after backup |
| `--backups-dir` | Backup root directory (default: `/Backups`) | | `--backups-dir` | Backup root directory (default: `/Backups`) |
| `--repo-name` | Backup namespace under machine hash | | `--repo-name` | Backup namespace under machine hash |

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "backup-docker-to-local" name = "backup-docker-to-local"
version = "1.0.0" version = "1.2.0"
description = "Backup Docker volumes to local with rsync and optional DB dumps." description = "Backup Docker volumes to local with rsync and optional DB dumps."
readme = "README.md" readme = "README.md"
requires-python = ">=3.9" requires-python = ">=3.9"

View File

@@ -51,7 +51,9 @@ def is_image_ignored(container: str, images_no_backup_required: list[str]) -> bo
return any(pat in img for pat in images_no_backup_required) return any(pat in img for pat in images_no_backup_required)
def volume_is_fully_ignored(containers: list[str], images_no_backup_required: list[str]) -> bool: def volume_is_fully_ignored(
containers: list[str], images_no_backup_required: list[str]
) -> bool:
""" """
Skip file backup only if all containers linked to the volume are ignored. Skip file backup only if all containers linked to the volume are ignored.
""" """
@@ -70,28 +72,27 @@ def requires_stop(containers: list[str], images_no_stop_required: list[str]) ->
return True return True
return False return False
def backup_mariadb_or_postgres( def backup_mariadb_or_postgres(
*, *,
container: str, container: str,
volume_dir: str, volume_dir: str,
databases_df: "pandas.DataFrame", databases_df: "pandas.DataFrame",
database_containers: list[str], database_containers: list[str],
) -> bool: ) -> tuple[bool, bool]:
""" """
Returns True if the container is a DB container we handled. Returns (is_db_container, dumped_any)
""" """
for img in ["mariadb", "postgres"]: for img in ["mariadb", "postgres"]:
if has_image(container, img): if has_image(container, img):
backup_database( dumped = backup_database(
container=container, container=container,
volume_dir=volume_dir, volume_dir=volume_dir,
db_type=img, db_type=img,
databases_df=databases_df, databases_df=databases_df,
database_containers=database_containers, database_containers=database_containers,
) )
return True return True, dumped
return False return False, False
def _backup_dumps_for_volume( def _backup_dumps_for_volume(
@@ -100,21 +101,26 @@ def _backup_dumps_for_volume(
vol_dir: str, vol_dir: str,
databases_df: "pandas.DataFrame", databases_df: "pandas.DataFrame",
database_containers: list[str], database_containers: list[str],
) -> bool: ) -> tuple[bool, bool]:
""" """
Create DB dumps for any mariadb/postgres containers attached to this volume. Returns (found_db_container, dumped_any)
Returns True if at least one dump was produced.
""" """
found_db = False
dumped_any = False dumped_any = False
for c in containers: for c in containers:
if backup_mariadb_or_postgres( is_db, dumped = backup_mariadb_or_postgres(
container=c, container=c,
volume_dir=vol_dir, volume_dir=vol_dir,
databases_df=databases_df, databases_df=databases_df,
database_containers=database_containers, database_containers=database_containers,
): )
if is_db:
found_db = True
if dumped:
dumped_any = True dumped_any = True
return dumped_any
return found_db, dumped_any
def main() -> int: def main() -> int:
@@ -126,7 +132,12 @@ def main() -> int:
versions_dir = os.path.join(args.backups_dir, machine_id, args.repo_name) versions_dir = os.path.join(args.backups_dir, machine_id, args.repo_name)
version_dir = create_version_directory(versions_dir, backup_time) version_dir = create_version_directory(versions_dir, backup_time)
databases_df = pandas.read_csv(args.databases_csv, sep=";") # IMPORTANT:
# - keep_default_na=False prevents empty fields from turning into NaN
# - dtype=str keeps all columns stable for comparisons/validation
databases_df = pandas.read_csv(
args.databases_csv, sep=";", keep_default_na=False, dtype=str
)
print("💾 Start volume backups...", flush=True) print("💾 Start volume backups...", flush=True)
@@ -134,27 +145,36 @@ def main() -> int:
print(f"Start backup routine for volume: {volume_name}", flush=True) print(f"Start backup routine for volume: {volume_name}", flush=True)
containers = containers_using_volume(volume_name) containers = containers_using_volume(volume_name)
# EARLY SKIP: if all linked containers are ignored, do not create any dirs
if volume_is_fully_ignored(containers, args.images_no_backup_required):
print(
f"Skipping volume '{volume_name}' entirely (all linked containers are ignored).",
flush=True,
)
continue
vol_dir = create_volume_directory(version_dir, volume_name) vol_dir = create_volume_directory(version_dir, volume_name)
# Old behavior: DB dumps are additional to file backups. found_db, dumped_any = _backup_dumps_for_volume(
_backup_dumps_for_volume(
containers=containers, containers=containers,
vol_dir=vol_dir, vol_dir=vol_dir,
databases_df=databases_df, databases_df=databases_df,
database_containers=args.database_containers, database_containers=args.database_containers,
) )
# dump-only: skip ALL file rsync backups # dump-only-sql logic:
if args.dump_only: if args.dump_only_sql:
continue if found_db:
if not dumped_any:
# skip file backup if all linked containers are ignored print(
if volume_is_fully_ignored(containers, args.images_no_backup_required): f"WARNING: dump-only-sql requested but no DB dump was produced for DB volume '{volume_name}'. Falling back to file backup.",
print( flush=True,
f"Skipping file backup for volume '{volume_name}' (all linked containers are ignored).", )
flush=True, # fall through to file backup below
) else:
continue # DB volume successfully dumped -> skip file backup
continue
# Non-DB volume -> always do file backup (fall through)
if args.everything: if args.everything:
# "everything": always do pre-rsync, then stop + rsync again # "everything": always do pre-rsync, then stop + rsync again
@@ -178,6 +198,8 @@ def main() -> int:
print("Finished volume backups.", flush=True) print("Finished volume backups.", flush=True)
print("Handling Docker Compose services...", flush=True) print("Handling Docker Compose services...", flush=True)
handle_docker_compose_services(args.compose_dir, args.docker_compose_hard_restart_required) handle_docker_compose_services(
args.compose_dir, args.docker_compose_hard_restart_required
)
return 0 return 0

View File

@@ -2,22 +2,6 @@ from __future__ import annotations
import argparse import argparse
import os import os
from pathlib import Path
def _default_repo_name() -> str:
"""
Derive the repository name from the folder that contains `src/`.
Expected layout:
<repo-root>/src/baudolo/backup/cli.py
=> parents[0]=backup, [1]=baudolo, [2]=src, [3]=repo-root
"""
try:
return Path(__file__).resolve().parents[3].name
except Exception:
return "backup-docker-to-local"
def parse_args() -> argparse.Namespace: def parse_args() -> argparse.Namespace:
@@ -41,7 +25,7 @@ def parse_args() -> argparse.Namespace:
p.add_argument( p.add_argument(
"--repo-name", "--repo-name",
default=_default_repo_name(), default="backup-docker-to-local",
help="Backup repo folder name under <backups-dir>/<machine-id>/ (default: git repo folder name)", help="Backup repo folder name under <backups-dir>/<machine-id>/ (default: git repo folder name)",
) )
p.add_argument( p.add_argument(
@@ -51,8 +35,8 @@ def parse_args() -> argparse.Namespace:
) )
p.add_argument( p.add_argument(
"--backups-dir", "--backups-dir",
default="/Backups", default="/var/lib/backup/",
help="Backup root directory (default: /Backups)", help="Backup root directory (default: /var/lib/backup/)",
) )
p.add_argument( p.add_argument(
@@ -84,10 +68,15 @@ def parse_args() -> argparse.Namespace:
action="store_true", action="store_true",
help="Do not restart containers after backup", help="Do not restart containers after backup",
) )
p.add_argument(
"--dump-only",
action="store_true",
help="Only create DB dumps (skip ALL file rsync backups)",
)
p.add_argument(
"--dump-only-sql",
action="store_true",
help=(
"Create database dumps only for DB volumes. "
"File backups are skipped for DB volumes if a dump succeeds, "
"but non-DB volumes are still backed up. "
"If a DB dump cannot be produced, baudolo falls back to a file backup."
),
)
return p.parse_args() return p.parse_args()

View File

@@ -10,7 +10,9 @@ def hard_restart_docker_services(dir_path: str) -> None:
subprocess.run(["docker-compose", "up", "-d"], cwd=dir_path, check=True) subprocess.run(["docker-compose", "up", "-d"], cwd=dir_path, check=True)
def handle_docker_compose_services(parent_directory: str, hard_restart_required: list[str]) -> None: def handle_docker_compose_services(
parent_directory: str, hard_restart_required: list[str]
) -> None:
for entry in os.scandir(parent_directory): for entry in os.scandir(parent_directory):
if not entry.is_dir(): if not entry.is_dir():
continue continue

View File

@@ -3,24 +3,64 @@ from __future__ import annotations
import os import os
import pathlib import pathlib
import re import re
import logging
from typing import Optional
import pandas import pandas
from .shell import BackupException, execute_shell_command from .shell import BackupException, execute_shell_command
log = logging.getLogger(__name__)
def get_instance(container: str, database_containers: list[str]) -> str: def get_instance(container: str, database_containers: list[str]) -> str:
"""
Derive a stable instance name from the container name.
"""
if container in database_containers: if container in database_containers:
return container return container
return re.split(r"(_|-)(database|db|postgres)", container)[0] return re.split(r"(_|-)(database|db|postgres)", container)[0]
def _validate_database_value(value: Optional[str], *, instance: str) -> str:
"""
Enforce explicit database semantics:
- "*" => dump ALL databases (cluster dump for Postgres)
- "<name>" => dump exactly this database
- "" => invalid configuration (would previously result in NaN / nan.backup.sql)
"""
v = (value or "").strip()
if v == "":
raise ValueError(
f"Invalid databases.csv entry for instance '{instance}': "
"column 'database' must be '*' or a concrete database name (not empty)."
)
return v
def _atomic_write_cmd(cmd: str, out_file: str) -> None:
"""
Write dump output atomically:
- write to <file>.tmp
- rename to <file> only on success
This prevents empty or partial dump files from being treated as valid backups.
"""
tmp = f"{out_file}.tmp"
execute_shell_command(f"{cmd} > {tmp}")
execute_shell_command(f"mv {tmp} {out_file}")
def fallback_pg_dumpall(container: str, username: str, password: str, out_file: str) -> None: def fallback_pg_dumpall(container: str, username: str, password: str, out_file: str) -> None:
"""
Perform a full Postgres cluster dump using pg_dumpall.
"""
cmd = ( cmd = (
f"PGPASSWORD={password} docker exec -i {container} " f"PGPASSWORD={password} docker exec -i {container} "
f"pg_dumpall -U {username} -h localhost > {out_file}" f"pg_dumpall -U {username} -h localhost"
) )
execute_shell_command(cmd) _atomic_write_cmd(cmd, out_file)
def backup_database( def backup_database(
@@ -30,44 +70,75 @@ def backup_database(
db_type: str, db_type: str,
databases_df: "pandas.DataFrame", databases_df: "pandas.DataFrame",
database_containers: list[str], database_containers: list[str],
) -> None: ) -> bool:
"""
Backup databases for a given DB container.
Returns True if at least one dump was produced.
"""
instance_name = get_instance(container, database_containers) instance_name = get_instance(container, database_containers)
entries = databases_df.loc[databases_df["instance"] == instance_name]
entries = databases_df[databases_df["instance"] == instance_name]
if entries.empty: if entries.empty:
raise BackupException(f"No entry found for instance '{instance_name}'") log.debug("No database entries for instance '%s'", instance_name)
return False
out_dir = os.path.join(volume_dir, "sql") out_dir = os.path.join(volume_dir, "sql")
pathlib.Path(out_dir).mkdir(parents=True, exist_ok=True) pathlib.Path(out_dir).mkdir(parents=True, exist_ok=True)
for row in entries.iloc: produced = False
db_name = row["database"]
user = row["username"]
password = row["password"]
for row in entries.itertuples(index=False):
raw_db = getattr(row, "database", "")
user = (getattr(row, "username", "") or "").strip()
password = (getattr(row, "password", "") or "").strip()
db_value = _validate_database_value(raw_db, instance=instance_name)
# Explicit: dump ALL databases
if db_value == "*":
if db_type != "postgres":
raise ValueError(
f"databases.csv entry for instance '{instance_name}': "
"'*' is currently only supported for Postgres."
)
cluster_file = os.path.join(
out_dir, f"{instance_name}.cluster.backup.sql"
)
fallback_pg_dumpall(container, user, password, cluster_file)
produced = True
continue
# Concrete database dump
db_name = db_value
dump_file = os.path.join(out_dir, f"{db_name}.backup.sql") dump_file = os.path.join(out_dir, f"{db_name}.backup.sql")
if db_type == "mariadb": if db_type == "mariadb":
cmd = ( cmd = (
f"docker exec {container} /usr/bin/mariadb-dump " f"docker exec {container} /usr/bin/mariadb-dump "
f"-u {user} -p{password} {db_name} > {dump_file}" f"-u {user} -p{password} {db_name}"
) )
execute_shell_command(cmd) _atomic_write_cmd(cmd, dump_file)
produced = True
continue continue
if db_type == "postgres": if db_type == "postgres":
cluster_file = os.path.join(out_dir, f"{instance_name}.cluster.backup.sql")
if not db_name:
fallback_pg_dumpall(container, user, password, cluster_file)
return
try: try:
cmd = ( cmd = (
f"PGPASSWORD={password} docker exec -i {container} " f"PGPASSWORD={password} docker exec -i {container} "
f"pg_dump -U {user} -d {db_name} -h localhost > {dump_file}" f"pg_dump -U {user} -d {db_name} -h localhost"
) )
execute_shell_command(cmd) _atomic_write_cmd(cmd, dump_file)
produced = True
except BackupException as e: except BackupException as e:
print(f"pg_dump failed: {e}", flush=True) # Explicit DB dump failed -> hard error
print(f"Falling back to pg_dumpall for instance '{instance_name}'", flush=True) raise BackupException(
fallback_pg_dumpall(container, user, password, cluster_file) f"Postgres dump failed for instance '{instance_name}', "
f"database '{db_name}'. This database was explicitly configured "
"and therefore must succeed.\n"
f"{e}"
)
continue continue
return produced

View File

@@ -37,7 +37,9 @@ def change_containers_status(containers: list[str], status: str) -> None:
def docker_volume_exists(volume: str) -> bool: def docker_volume_exists(volume: str) -> bool:
# Avoid throwing exceptions for exists checks. # Avoid throwing exceptions for exists checks.
try: try:
execute_shell_command(f"docker volume inspect {volume} >/dev/null 2>&1 && echo OK") execute_shell_command(
f"docker volume inspect {volume} >/dev/null 2>&1 && echo OK"
)
return True return True
except Exception: except Exception:
return False return False

View File

@@ -13,7 +13,9 @@ def get_storage_path(volume_name: str) -> str:
return f"{path}/" return f"{path}/"
def get_last_backup_dir(versions_dir: str, volume_name: str, current_backup_dir: str) -> str | None: def get_last_backup_dir(
versions_dir: str, volume_name: str, current_backup_dir: str
) -> str | None:
versions = sorted(os.listdir(versions_dir), reverse=True) versions = sorted(os.listdir(versions_dir), reverse=True)
for version in versions: for version in versions:
candidate = os.path.join(versions_dir, version, volume_name, "files", "") candidate = os.path.join(versions_dir, version, volume_name, "files", "")
@@ -37,6 +39,8 @@ def backup_volume(versions_dir: str, volume_name: str, volume_dir: str) -> None:
execute_shell_command(cmd) execute_shell_command(cmd)
except BackupException as e: except BackupException as e:
if "file has vanished" in str(e): if "file has vanished" in str(e):
print("Warning: Some files vanished before transfer. Continuing.", flush=True) print(
"Warning: Some files vanished before transfer. Continuing.", flush=True
)
else: else:
raise raise

View File

@@ -66,7 +66,9 @@ def main(argv: list[str] | None = None) -> int:
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# mariadb # mariadb
# ------------------------------------------------------------------ # ------------------------------------------------------------------
p_mdb = sub.add_parser("mariadb", help="Restore a single MariaDB/MySQL-compatible dump") p_mdb = sub.add_parser(
"mariadb", help="Restore a single MariaDB/MySQL-compatible dump"
)
_add_common_backup_args(p_mdb) _add_common_backup_args(p_mdb)
p_mdb.add_argument("--container", required=True) p_mdb.add_argument("--container", required=True)
p_mdb.add_argument("--db-name", required=True) p_mdb.add_argument("--db-name", required=True)

View File

@@ -23,7 +23,9 @@ exit 42
raise RuntimeError("empty client detection output") raise RuntimeError("empty client detection output")
return out return out
except Exception as e: except Exception as e:
print("ERROR: neither 'mariadb' nor 'mysql' found in container.", file=sys.stderr) print(
"ERROR: neither 'mariadb' nor 'mysql' found in container.", file=sys.stderr
)
raise e raise e
@@ -47,7 +49,14 @@ def restore_mariadb_sql(
# MariaDB 11 images may not contain the mysql binary at all. # MariaDB 11 images may not contain the mysql binary at all.
docker_exec( docker_exec(
container, container,
[client, "-u", user, f"--password={password}", "-e", "SET FOREIGN_KEY_CHECKS=0;"], [
client,
"-u",
user,
f"--password={password}",
"-e",
"SET FOREIGN_KEY_CHECKS=0;",
],
) )
result = docker_exec( result = docker_exec(
@@ -80,10 +89,19 @@ def restore_mariadb_sql(
docker_exec( docker_exec(
container, container,
[client, "-u", user, f"--password={password}", "-e", "SET FOREIGN_KEY_CHECKS=1;"], [
client,
"-u",
user,
f"--password={password}",
"-e",
"SET FOREIGN_KEY_CHECKS=1;",
],
) )
with open(sql_path, "rb") as f: with open(sql_path, "rb") as f:
docker_exec(container, [client, "-u", user, f"--password={password}", db_name], stdin=f) docker_exec(
container, [client, "-u", user, f"--password={password}", db_name], stdin=f
)
print(f"MariaDB/MySQL restore complete for db '{db_name}'.") print(f"MariaDB/MySQL restore complete for db '{db_name}'.")

View File

@@ -6,7 +6,9 @@ import sys
from .run import run, docker_volume_exists from .run import run, docker_volume_exists
def restore_volume_files(volume_name: str, backup_files_dir: str, *, rsync_image: str) -> int: def restore_volume_files(
volume_name: str, backup_files_dir: str, *, rsync_image: str
) -> int:
if not os.path.isdir(backup_files_dir): if not os.path.isdir(backup_files_dir):
print(f"ERROR: backup files dir not found: {backup_files_dir}", file=sys.stderr) print(f"ERROR: backup files dir not found: {backup_files_dir}", file=sys.stderr)
return 2 return 2

View File

@@ -1,50 +1,107 @@
import pandas as pd #!/usr/bin/env python3
from __future__ import annotations
import argparse import argparse
import os import os
import re
import sys
import pandas as pd
from typing import Optional
def check_and_add_entry(file_path, instance, database, username, password):
# Check if the file exists and is not empty DB_NAME_RE = re.compile(r"^[a-zA-Z0-9_][a-zA-Z0-9_-]*$")
if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
# Read the existing CSV file with header def _validate_database_value(value: Optional[str], *, instance: str) -> str:
df = pd.read_csv(file_path, sep=';') v = (value or "").strip()
if v == "":
raise ValueError(
f"Invalid databases.csv entry for instance '{instance}': "
"column 'database' must be '*' or a concrete database name (not empty)."
)
if v == "*":
return "*"
if v.lower() == "nan":
raise ValueError(
f"Invalid databases.csv entry for instance '{instance}': database must not be 'nan'."
)
if not DB_NAME_RE.match(v):
raise ValueError(
f"Invalid databases.csv entry for instance '{instance}': "
f"invalid database name '{v}'. Allowed: letters, numbers, '_' and '-'."
)
return v
def check_and_add_entry(
file_path: str,
instance: str,
database: Optional[str],
username: str,
password: str,
) -> None:
"""
Add or update an entry in databases.csv.
The function enforces strict validation:
- database MUST be set
- database MUST be '*' or a valid database name
"""
database = _validate_database_value(database, instance=instance)
if os.path.exists(file_path):
df = pd.read_csv(
file_path,
sep=";",
dtype=str,
keep_default_na=False,
)
else: else:
# Create a new DataFrame with columns if file does not exist df = pd.DataFrame(
df = pd.DataFrame(columns=['instance', 'database', 'username', 'password']) columns=["instance", "database", "username", "password"]
)
# Check if the entry exists and remove it mask = (df["instance"] == instance) & (df["database"] == database)
mask = (
(df['instance'] == instance) &
((df['database'] == database) |
(((df['database'].isna()) | (df['database'] == '')) & (database == ''))) &
(df['username'] == username)
)
if not df[mask].empty: if mask.any():
print("Replacing existing entry.") print("Updating existing entry.")
df = df[~mask] df.loc[mask, ["username", "password"]] = [username, password]
else: else:
print("Adding new entry.") print("Adding new entry.")
new_entry = pd.DataFrame(
[[instance, database, username, password]],
columns=["instance", "database", "username", "password"],
)
df = pd.concat([df, new_entry], ignore_index=True)
# Create a new DataFrame for the new entry df.to_csv(file_path, sep=";", index=False)
new_entry = pd.DataFrame([{'instance': instance, 'database': database, 'username': username, 'password': password}])
# Add (or replace) the entry using concat
df = pd.concat([df, new_entry], ignore_index=True)
# Save the updated CSV file def main() -> None:
df.to_csv(file_path, sep=';', index=False) parser = argparse.ArgumentParser(
description="Seed or update databases.csv for backup configuration."
def main(): )
parser = argparse.ArgumentParser(description="Check and replace (or add) a database entry in a CSV file.") parser.add_argument("file", help="Path to databases.csv")
parser.add_argument("file_path", help="Path to the CSV file") parser.add_argument("instance", help="Instance name (e.g. bigbluebutton)")
parser.add_argument("instance", help="Database instance") parser.add_argument(
parser.add_argument("database", help="Database name") "database",
parser.add_argument("username", help="Username") help="Database name or '*' to dump all databases",
parser.add_argument("password", nargs='?', default="", help="Password (optional)") )
parser.add_argument("username", help="Database username")
parser.add_argument("password", help="Database password")
args = parser.parse_args() args = parser.parse_args()
check_and_add_entry(args.file_path, args.instance, args.database, args.username, args.password) try:
check_and_add_entry(
file_path=args.file,
instance=args.instance,
database=args.database,
username=args.username,
password=args.password,
)
except Exception as exc:
print(f"ERROR: {exc}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@@ -34,7 +34,9 @@ def run(
raise raise
def sh(cmd: str, *, capture: bool = True, check: bool = True) -> subprocess.CompletedProcess: def sh(
cmd: str, *, capture: bool = True, check: bool = True
) -> subprocess.CompletedProcess:
return run(["sh", "-lc", cmd], capture=capture, check=check) return run(["sh", "-lc", cmd], capture=capture, check=check)
@@ -63,24 +65,37 @@ def wait_for_log(container: str, pattern: str, timeout_s: int = 60) -> None:
raise TimeoutError(f"Timed out waiting for log pattern '{pattern}' in {container}") raise TimeoutError(f"Timed out waiting for log pattern '{pattern}' in {container}")
def wait_for_postgres(container: str, *, user: str = "postgres", timeout_s: int = 90) -> None: def wait_for_postgres(
container: str, *, user: str = "postgres", timeout_s: int = 90
) -> None:
""" """
Docker-outside-of-Docker friendly readiness: check from inside the DB container. Docker-outside-of-Docker friendly readiness: check from inside the DB container.
""" """
deadline = time.time() + timeout_s deadline = time.time() + timeout_s
while time.time() < deadline: while time.time() < deadline:
p = run( p = run(
["docker", "exec", container, "sh", "-lc", f"pg_isready -U {user} -h localhost"], [
"docker",
"exec",
container,
"sh",
"-lc",
f"pg_isready -U {user} -h localhost",
],
capture=True, capture=True,
check=False, check=False,
) )
if p.returncode == 0: if p.returncode == 0:
return return
time.sleep(1) time.sleep(1)
raise TimeoutError(f"Timed out waiting for Postgres readiness in container {container}") raise TimeoutError(
f"Timed out waiting for Postgres readiness in container {container}"
)
def wait_for_mariadb(container: str, *, root_password: str, timeout_s: int = 90) -> None: def wait_for_mariadb(
container: str, *, root_password: str, timeout_s: int = 90
) -> None:
""" """
Liveness probe for MariaDB. Liveness probe for MariaDB.
@@ -92,17 +107,28 @@ def wait_for_mariadb(container: str, *, root_password: str, timeout_s: int = 90)
deadline = time.time() + timeout_s deadline = time.time() + timeout_s
while time.time() < deadline: while time.time() < deadline:
p = run( p = run(
["docker", "exec", container, "sh", "-lc", "mariadb -uroot --protocol=socket -e \"SELECT 1;\""], [
"docker",
"exec",
container,
"sh",
"-lc",
'mariadb -uroot --protocol=socket -e "SELECT 1;"',
],
capture=True, capture=True,
check=False, check=False,
) )
if p.returncode == 0: if p.returncode == 0:
return return
time.sleep(1) time.sleep(1)
raise TimeoutError(f"Timed out waiting for MariaDB readiness in container {container}") raise TimeoutError(
f"Timed out waiting for MariaDB readiness in container {container}"
)
def wait_for_mariadb_sql(container: str, *, user: str, password: str, timeout_s: int = 90) -> None: def wait_for_mariadb_sql(
container: str, *, user: str, password: str, timeout_s: int = 90
) -> None:
""" """
SQL login readiness for the *dedicated test user* over TCP. SQL login readiness for the *dedicated test user* over TCP.
@@ -118,7 +144,7 @@ def wait_for_mariadb_sql(container: str, *, user: str, password: str, timeout_s:
container, container,
"sh", "sh",
"-lc", "-lc",
f"mariadb -h 127.0.0.1 -u{user} -p{password} -e \"SELECT 1;\"", f'mariadb -h 127.0.0.1 -u{user} -p{password} -e "SELECT 1;"',
], ],
capture=True, capture=True,
check=False, check=False,
@@ -126,7 +152,9 @@ def wait_for_mariadb_sql(container: str, *, user: str, password: str, timeout_s:
if p.returncode == 0: if p.returncode == 0:
return return
time.sleep(1) time.sleep(1)
raise TimeoutError(f"Timed out waiting for MariaDB SQL login readiness in container {container}") raise TimeoutError(
f"Timed out waiting for MariaDB SQL login readiness in container {container}"
)
def backup_run( def backup_run(
@@ -138,22 +166,29 @@ def backup_run(
database_containers: list[str], database_containers: list[str],
images_no_stop_required: list[str], images_no_stop_required: list[str],
images_no_backup_required: list[str] | None = None, images_no_backup_required: list[str] | None = None,
dump_only: bool = False, dump_only_sql: bool = False,
) -> None: ) -> None:
cmd = [ cmd = [
"baudolo", "baudolo",
"--compose-dir", compose_dir, "--compose-dir",
"--docker-compose-hard-restart-required", "mailu", compose_dir,
"--repo-name", repo_name, "--docker-compose-hard-restart-required",
"--databases-csv", databases_csv, "mailu",
"--backups-dir", backups_dir, "--repo-name",
"--database-containers", *database_containers, repo_name,
"--images-no-stop-required", *images_no_stop_required, "--databases-csv",
databases_csv,
"--backups-dir",
backups_dir,
"--database-containers",
*database_containers,
"--images-no-stop-required",
*images_no_stop_required,
] ]
if images_no_backup_required: if images_no_backup_required:
cmd += ["--images-no-backup-required", *images_no_backup_required] cmd += ["--images-no-backup-required", *images_no_backup_required]
if dump_only: if dump_only_sql:
cmd += ["--dump-only"] cmd += ["--dump-only-sql"]
try: try:
run(cmd, capture=True, check=True) run(cmd, capture=True, check=True)

View File

@@ -0,0 +1,29 @@
import unittest
from .helpers import run
class TestE2ECLIContractDumpOnlySql(unittest.TestCase):
def test_help_mentions_new_flag(self) -> None:
cp = run(["baudolo", "--help"], capture=True, check=True)
out = (cp.stdout or "") + "\n" + (cp.stderr or "")
self.assertIn(
"--dump-only-sql",
out,
f"Expected '--dump-only-sql' to appear in --help output. Output:\n{out}",
)
def test_old_flag_is_rejected(self) -> None:
cp = run(["baudolo", "--dump-only"], capture=True, check=False)
self.assertEqual(
cp.returncode,
2,
f"Expected exitcode 2 for unknown args, got {cp.returncode}\n"
f"STDOUT={cp.stdout}\nSTDERR={cp.stderr}",
)
err = (cp.stderr or "") + "\n" + (cp.stdout or "")
# Argparse typically prints "unrecognized arguments"
self.assertTrue(
("unrecognized arguments" in err) or ("usage:" in err.lower()),
f"Expected argparse-style error output. Output:\n{err}",
)

View File

@@ -0,0 +1,175 @@
# tests/e2e/test_e2e_dump_only_fallback_to_files.py
import unittest
from .helpers import (
backup_path,
cleanup_docker,
create_minimal_compose_dir,
ensure_empty_dir,
latest_version_dir,
require_docker,
run,
unique,
write_databases_csv,
wait_for_postgres,
)
class TestE2EDumpOnlyFallbackToFiles(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
require_docker()
cls.prefix = unique("baudolo-e2e-dump-only-sql-fallback")
cls.backups_dir = f"/tmp/{cls.prefix}/Backups"
ensure_empty_dir(cls.backups_dir)
cls.compose_dir = create_minimal_compose_dir(f"/tmp/{cls.prefix}")
cls.repo_name = cls.prefix
cls.pg_container = f"{cls.prefix}-pg"
cls.pg_volume = f"{cls.prefix}-pg-vol"
cls.restore_volume = f"{cls.prefix}-restore-vol"
cls.containers = [cls.pg_container]
cls.volumes = [cls.pg_volume, cls.restore_volume]
run(["docker", "volume", "create", cls.pg_volume])
# Start Postgres (creates a real DB volume)
run(
[
"docker",
"run",
"-d",
"--name",
cls.pg_container,
"-e",
"POSTGRES_PASSWORD=pgpw",
"-e",
"POSTGRES_DB=appdb",
"-e",
"POSTGRES_USER=postgres",
"-v",
f"{cls.pg_volume}:/var/lib/postgresql/data",
"postgres:16",
]
)
wait_for_postgres(cls.pg_container, user="postgres", timeout_s=90)
# Add a deterministic marker file into the volume
cls.marker = "dump-only-sql-fallback-marker"
run(
[
"docker",
"exec",
cls.pg_container,
"sh",
"-lc",
f"echo '{cls.marker}' > /var/lib/postgresql/data/marker.txt",
]
)
# databases.csv WITHOUT matching entry for this instance -> should skip dump
cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv"
write_databases_csv(cls.databases_csv, []) # empty except header
# Run baudolo with --dump-only-sql and a DB container present:
# Expected: WARNING + FALLBACK to file backup (files/ must exist)
cmd = [
"baudolo",
"--compose-dir",
cls.compose_dir,
"--docker-compose-hard-restart-required",
"mailu",
"--repo-name",
cls.repo_name,
"--databases-csv",
cls.databases_csv,
"--backups-dir",
cls.backups_dir,
"--database-containers",
cls.pg_container,
"--images-no-stop-required",
"postgres",
"mariadb",
"mysql",
"alpine",
"--dump-only-sql",
]
cp = run(cmd, capture=True, check=True)
cls.stdout = cp.stdout or ""
cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name)
# Restore files into a fresh volume to prove file backup happened
run(["docker", "volume", "create", cls.restore_volume])
run(
[
"baudolo-restore",
"files",
cls.restore_volume,
cls.hash,
cls.version,
"--backups-dir",
cls.backups_dir,
"--repo-name",
cls.repo_name,
"--source-volume",
cls.pg_volume,
"--rsync-image",
"ghcr.io/kevinveenbirkenbach/alpine-rsync",
]
)
@classmethod
def tearDownClass(cls) -> None:
cleanup_docker(containers=cls.containers, volumes=cls.volumes)
def test_warns_about_missing_dump_in_dump_only_mode(self) -> None:
self.assertIn(
"WARNING: dump-only-sql requested but no DB dump was produced",
self.stdout,
f"Expected warning in baudolo output. STDOUT:\n{self.stdout}",
)
def test_files_backup_exists_due_to_fallback(self) -> None:
p = backup_path(
self.backups_dir,
self.repo_name,
self.version,
self.pg_volume,
) / "files"
self.assertTrue(p.is_dir(), f"Expected files backup dir at: {p}")
def test_sql_dump_not_present(self) -> None:
# There should be no sql dumps because databases.csv had no matching entry.
sql_dir = backup_path(
self.backups_dir,
self.repo_name,
self.version,
self.pg_volume,
) / "sql"
# Could exist (dir created) in some edge cases, but should contain no *.sql dumps.
if sql_dir.exists():
dumps = list(sql_dir.glob("*.sql"))
self.assertEqual(
len(dumps),
0,
f"Did not expect SQL dump files, found: {dumps}",
)
def test_restored_files_contain_marker(self) -> None:
p = run(
[
"docker",
"run",
"--rm",
"-v",
f"{self.restore_volume}:/data",
"alpine:3.20",
"sh",
"-lc",
"cat /data/marker.txt",
]
)
self.assertEqual((p.stdout or "").strip(), self.marker)

View File

@@ -0,0 +1,182 @@
import unittest
from .helpers import (
backup_path,
cleanup_docker,
create_minimal_compose_dir,
ensure_empty_dir,
latest_version_dir,
require_docker,
run,
unique,
wait_for_postgres,
write_databases_csv,
)
class TestE2EDumpOnlySqlMixedRun(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
require_docker()
cls.prefix = unique("baudolo-e2e-dump-only-sql-mixed-run")
cls.backups_dir = f"/tmp/{cls.prefix}/Backups"
ensure_empty_dir(cls.backups_dir)
cls.compose_dir = create_minimal_compose_dir(f"/tmp/{cls.prefix}")
cls.repo_name = cls.prefix
# --- Volumes ---
cls.db_volume = f"{cls.prefix}-vol-db"
cls.files_volume = f"{cls.prefix}-vol-files"
# Track for cleanup
cls.containers: list[str] = []
cls.volumes = [cls.db_volume, cls.files_volume]
# Create volumes
run(["docker", "volume", "create", cls.db_volume])
run(["docker", "volume", "create", cls.files_volume])
# Put a marker into the non-db volume
run(
[
"docker",
"run",
"--rm",
"-v",
f"{cls.files_volume}:/data",
"alpine:3.20",
"sh",
"-lc",
"echo 'hello-non-db' > /data/hello.txt",
]
)
# --- Start Postgres container using the DB volume ---
cls.pg_container = f"{cls.prefix}-pg"
cls.containers.append(cls.pg_container)
cls.pg_password = "postgres"
cls.pg_db = "testdb"
cls.pg_user = "postgres"
run(
[
"docker",
"run",
"-d",
"--name",
cls.pg_container,
"-e",
f"POSTGRES_PASSWORD={cls.pg_password}",
"-v",
f"{cls.db_volume}:/var/lib/postgresql/data",
"postgres:16-alpine",
]
)
wait_for_postgres(cls.pg_container, user="postgres", timeout_s=90)
# Create deterministic content in DB so dump is non-empty
run(
[
"docker",
"exec",
cls.pg_container,
"sh",
"-lc",
f'psql -U postgres -c "CREATE DATABASE {cls.pg_db};" || true',
],
check=True,
)
run(
[
"docker",
"exec",
cls.pg_container,
"sh",
"-lc",
(
f'psql -U postgres -d {cls.pg_db} -c '
'"CREATE TABLE IF NOT EXISTS t (id INT PRIMARY KEY, v TEXT);'
"INSERT INTO t(id,v) VALUES (1,'hello-db') "
"ON CONFLICT (id) DO UPDATE SET v=EXCLUDED.v;\""
),
],
check=True,
)
# databases.csv with an entry => dump should succeed
cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv"
write_databases_csv(
cls.databases_csv,
[(cls.pg_container, cls.pg_db, cls.pg_user, cls.pg_password)],
)
# Run baudolo with dump-only-sql
cmd = [
"baudolo",
"--compose-dir",
cls.compose_dir,
"--databases-csv",
cls.databases_csv,
"--database-containers",
cls.pg_container,
"--images-no-stop-required",
"alpine",
"postgres",
"mariadb",
"mysql",
"--dump-only-sql",
"--backups-dir",
cls.backups_dir,
"--repo-name",
cls.repo_name,
]
cp = run(cmd, capture=True, check=True)
cls.stdout = cp.stdout
cls.stderr = cp.stderr
cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name)
@classmethod
def tearDownClass(cls) -> None:
cleanup_docker(containers=cls.containers, volumes=cls.volumes)
def test_db_volume_has_dump_and_no_files_dir(self) -> None:
base = backup_path(self.backups_dir, self.repo_name, self.version, self.db_volume)
dumps = base / "sql"
files = base / "files"
self.assertTrue(dumps.exists(), f"Expected dumps dir for DB volume at: {dumps}")
self.assertFalse(
files.exists(),
f"Did not expect files dir for DB volume when dump succeeded at: {files}",
)
# Optional: at least one dump file exists
dump_files = list(dumps.glob("*.sql")) + list(dumps.glob("*.sql.gz"))
self.assertTrue(
dump_files,
f"Expected at least one SQL dump file in {dumps}, found none.",
)
def test_non_db_volume_has_files_dir(self) -> None:
base = backup_path(
self.backups_dir, self.repo_name, self.version, self.files_volume
)
files = base / "files"
self.assertTrue(
files.exists(),
f"Expected files dir for non-DB volume at: {files}",
)
def test_dump_only_sql_does_not_disable_non_db_files_backup(self) -> None:
# Regression guard: even with --dump-only-sql, non-DB volumes must still be backed up as files
base = backup_path(
self.backups_dir, self.repo_name, self.version, self.files_volume
)
self.assertTrue(
(base / "files").exists(),
f"Expected non-DB volume files backup to exist at: {base / 'files'}",
)

View File

@@ -1,5 +1,4 @@
import unittest import unittest
from pathlib import Path
from .helpers import ( from .helpers import (
backup_run, backup_run,
@@ -33,12 +32,19 @@ class TestE2EFilesFull(unittest.TestCase):
# create source volume with a file # create source volume with a file
run(["docker", "volume", "create", cls.volume_src]) run(["docker", "volume", "create", cls.volume_src])
run([ run(
"docker", "run", "--rm", [
"-v", f"{cls.volume_src}:/data", "docker",
"alpine:3.20", "run",
"sh", "-lc", "mkdir -p /data && echo 'hello' > /data/hello.txt", "--rm",
]) "-v",
f"{cls.volume_src}:/data",
"alpine:3.20",
"sh",
"-lc",
"mkdir -p /data && echo 'hello' > /data/hello.txt",
]
)
# databases.csv (unused, but required by CLI) # databases.csv (unused, but required by CLI)
cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv" cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv"
@@ -75,20 +81,36 @@ class TestE2EFilesFull(unittest.TestCase):
def test_restore_files_into_new_volume(self) -> None: def test_restore_files_into_new_volume(self) -> None:
# restore files from volume_src backup into volume_dst # restore files from volume_src backup into volume_dst
run([ run(
"baudolo-restore", "files", [
self.volume_dst, self.hash, self.version, "baudolo-restore",
"--backups-dir", self.backups_dir, "files",
"--repo-name", self.repo_name, self.volume_dst,
"--source-volume", self.volume_src, self.hash,
"--rsync-image", "ghcr.io/kevinveenbirkenbach/alpine-rsync", self.version,
]) "--backups-dir",
self.backups_dir,
"--repo-name",
self.repo_name,
"--source-volume",
self.volume_src,
"--rsync-image",
"ghcr.io/kevinveenbirkenbach/alpine-rsync",
]
)
# verify restored file exists in dst volume # verify restored file exists in dst volume
p = run([ p = run(
"docker", "run", "--rm", [
"-v", f"{self.volume_dst}:/data", "docker",
"alpine:3.20", "run",
"sh", "-lc", "cat /data/hello.txt", "--rm",
]) "-v",
f"{self.volume_dst}:/data",
"alpine:3.20",
"sh",
"-lc",
"cat /data/hello.txt",
]
)
self.assertEqual((p.stdout or "").strip(), "hello") self.assertEqual((p.stdout or "").strip(), "hello")

View File

@@ -26,22 +26,29 @@ class TestE2EFilesNoCopy(unittest.TestCase):
cls.repo_name = cls.prefix cls.repo_name = cls.prefix
cls.volume_src = f"{cls.prefix}-vol-src" cls.volume_src = f"{cls.prefix}-vol-src"
cls.volume_dst = f"{cls.prefix}-vol-dst" cls.containers: list[str] = []
cls.containers = [] cls.volumes = [cls.volume_src]
cls.volumes = [cls.volume_src, cls.volume_dst]
# Create source volume and write a marker file
run(["docker", "volume", "create", cls.volume_src]) run(["docker", "volume", "create", cls.volume_src])
run([ run(
"docker", "run", "--rm", [
"-v", f"{cls.volume_src}:/data", "docker",
"alpine:3.20", "run",
"sh", "-lc", "echo 'hello' > /data/hello.txt", "--rm",
]) "-v",
f"{cls.volume_src}:/data",
"alpine:3.20",
"sh",
"-lc",
"echo 'hello' > /data/hello.txt",
]
)
cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv" cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv"
write_databases_csv(cls.databases_csv, []) write_databases_csv(cls.databases_csv, [])
# dump-only => NO file rsync backups # dump-only-sql => non-DB volumes are STILL backed up as files
backup_run( backup_run(
backups_dir=cls.backups_dir, backups_dir=cls.backups_dir,
repo_name=cls.repo_name, repo_name=cls.repo_name,
@@ -49,24 +56,64 @@ class TestE2EFilesNoCopy(unittest.TestCase):
databases_csv=cls.databases_csv, databases_csv=cls.databases_csv,
database_containers=["dummy-db"], database_containers=["dummy-db"],
images_no_stop_required=["alpine", "postgres", "mariadb", "mysql"], images_no_stop_required=["alpine", "postgres", "mariadb", "mysql"],
dump_only=True, dump_only_sql=True,
) )
cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name) cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name)
# Wipe the volume to ensure restore actually restores something
run(["docker", "volume", "rm", "-f", cls.volume_src])
run(["docker", "volume", "create", cls.volume_src])
@classmethod @classmethod
def tearDownClass(cls) -> None: def tearDownClass(cls) -> None:
cleanup_docker(containers=cls.containers, volumes=cls.volumes) cleanup_docker(containers=cls.containers, volumes=cls.volumes)
def test_files_backup_not_present(self) -> None: def test_files_backup_present_for_non_db_volume(self) -> None:
p = backup_path(self.backups_dir, self.repo_name, self.version, self.volume_src) / "files" p = (
self.assertFalse(p.exists(), f"Did not expect files backup dir at: {p}") backup_path(self.backups_dir, self.repo_name, self.version, self.volume_src)
/ "files"
)
self.assertTrue(p.exists(), f"Expected files backup dir at: {p}")
def test_restore_files_fails_expected(self) -> None: def test_restore_files_succeeds_and_restores_content(self) -> None:
p = run([ p = run(
"baudolo-restore", "files", [
self.volume_dst, self.hash, self.version, "baudolo-restore",
"--backups-dir", self.backups_dir, "files",
"--repo-name", self.repo_name, self.volume_src,
], check=False) self.hash,
self.assertEqual(p.returncode, 2, f"Expected exitcode 2, got {p.returncode}\nSTDOUT={p.stdout}\nSTDERR={p.stderr}") self.version,
"--backups-dir",
self.backups_dir,
"--repo-name",
self.repo_name,
],
check=False,
)
self.assertEqual(
p.returncode,
0,
f"Expected exitcode 0, got {p.returncode}\nSTDOUT={p.stdout}\nSTDERR={p.stderr}",
)
cp = run(
[
"docker",
"run",
"--rm",
"-v",
f"{self.volume_src}:/data",
"alpine:3.20",
"sh",
"-lc",
"cat /data/hello.txt",
],
capture=True,
check=True,
)
self.assertEqual(
cp.stdout.strip(),
"hello",
f"Unexpected restored content. STDOUT={cp.stdout}\nSTDERR={cp.stderr}",
)

View File

@@ -0,0 +1,131 @@
# tests/e2e/test_e2e_images_no_backup_required_early_skip.py
import unittest
from .helpers import (
backup_path,
cleanup_docker,
create_minimal_compose_dir,
ensure_empty_dir,
latest_version_dir,
require_docker,
run,
unique,
write_databases_csv,
)
class TestE2EImagesNoBackupRequiredEarlySkip(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
require_docker()
cls.prefix = unique("baudolo-e2e-early-skip-no-backup-required")
cls.backups_dir = f"/tmp/{cls.prefix}/Backups"
ensure_empty_dir(cls.backups_dir)
cls.compose_dir = create_minimal_compose_dir(f"/tmp/{cls.prefix}")
cls.repo_name = cls.prefix
# --- Docker resources ---
cls.redis_container = f"{cls.prefix}-redis"
cls.ignored_volume = f"{cls.prefix}-redis-vol"
cls.normal_volume = f"{cls.prefix}-files-vol"
cls.containers = [cls.redis_container]
cls.volumes = [cls.ignored_volume, cls.normal_volume]
# Create volumes
run(["docker", "volume", "create", cls.ignored_volume])
run(["docker", "volume", "create", cls.normal_volume])
# Start redis container using the ignored volume
run(
[
"docker",
"run",
"-d",
"--name",
cls.redis_container,
"-v",
f"{cls.ignored_volume}:/data",
"redis:alpine",
]
)
# Put deterministic content into the normal volume
run(
[
"docker",
"run",
"--rm",
"-v",
f"{cls.normal_volume}:/data",
"alpine:3.20",
"sh",
"-lc",
"mkdir -p /data && echo 'hello' > /data/hello.txt",
]
)
# databases.csv required by CLI (can be empty)
cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv"
write_databases_csv(cls.databases_csv, [])
# Run baudolo with images-no-backup-required redis
cmd = [
"baudolo",
"--compose-dir",
cls.compose_dir,
"--docker-compose-hard-restart-required",
"mailu",
"--repo-name",
cls.repo_name,
"--databases-csv",
cls.databases_csv,
"--backups-dir",
cls.backups_dir,
"--database-containers",
"dummy-db",
"--images-no-stop-required",
"alpine",
"redis",
"postgres",
"mariadb",
"mysql",
"--images-no-backup-required",
"redis",
]
cp = run(cmd, capture=True, check=True)
cls.stdout = cp.stdout or ""
cls.stderr = cp.stderr or ""
cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name)
@classmethod
def tearDownClass(cls) -> None:
cleanup_docker(containers=cls.containers, volumes=cls.volumes)
def test_ignored_volume_has_no_backup_directory_at_all(self) -> None:
p = backup_path(
self.backups_dir,
self.repo_name,
self.version,
self.ignored_volume,
)
self.assertFalse(
p.exists(),
f"Expected NO backup directory to be created for ignored volume, but found: {p}",
)
def test_normal_volume_is_still_backed_up(self) -> None:
p = (
backup_path(
self.backups_dir,
self.repo_name,
self.version,
self.normal_volume,
)
/ "files"
/ "hello.txt"
)
self.assertTrue(p.is_file(), f"Expected backed up file at: {p}")

View File

@@ -62,8 +62,12 @@ class TestE2EMariaDBFull(unittest.TestCase):
) )
# Liveness + actual SQL login readiness (TCP) # Liveness + actual SQL login readiness (TCP)
wait_for_mariadb(cls.db_container, root_password=cls.root_password, timeout_s=90) wait_for_mariadb(
wait_for_mariadb_sql(cls.db_container, user=cls.db_user, password=cls.db_password, timeout_s=90) cls.db_container, root_password=cls.root_password, timeout_s=90
)
wait_for_mariadb_sql(
cls.db_container, user=cls.db_user, password=cls.db_password, timeout_s=90
)
# Create table + data via the dedicated user (TCP) # Create table + data via the dedicated user (TCP)
run( run(
@@ -74,14 +78,17 @@ class TestE2EMariaDBFull(unittest.TestCase):
"sh", "sh",
"-lc", "-lc",
f"mariadb -h 127.0.0.1 -u{cls.db_user} -p{cls.db_password} " f"mariadb -h 127.0.0.1 -u{cls.db_user} -p{cls.db_password} "
f"-e \"CREATE TABLE {cls.db_name}.t (id INT PRIMARY KEY, v VARCHAR(50)); " f'-e "CREATE TABLE {cls.db_name}.t (id INT PRIMARY KEY, v VARCHAR(50)); '
f"INSERT INTO {cls.db_name}.t VALUES (1,'ok');\"", f"INSERT INTO {cls.db_name}.t VALUES (1,'ok');\"",
] ]
) )
cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv" cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv"
# IMPORTANT: baudolo backup expects credentials for the DB dump. # IMPORTANT: baudolo backup expects credentials for the DB dump.
write_databases_csv(cls.databases_csv, [(cls.db_container, cls.db_name, cls.db_user, cls.db_password)]) write_databases_csv(
cls.databases_csv,
[(cls.db_container, cls.db_name, cls.db_user, cls.db_password)],
)
# Backup with file+dump # Backup with file+dump
backup_run( backup_run(
@@ -104,7 +111,7 @@ class TestE2EMariaDBFull(unittest.TestCase):
"sh", "sh",
"-lc", "-lc",
f"mariadb -h 127.0.0.1 -u{cls.db_user} -p{cls.db_password} " f"mariadb -h 127.0.0.1 -u{cls.db_user} -p{cls.db_password} "
f"-e \"DROP TABLE {cls.db_name}.t;\"", f'-e "DROP TABLE {cls.db_name}.t;"',
] ]
) )
@@ -137,7 +144,11 @@ class TestE2EMariaDBFull(unittest.TestCase):
cleanup_docker(containers=cls.containers, volumes=cls.volumes) cleanup_docker(containers=cls.containers, volumes=cls.volumes)
def test_dump_file_exists(self) -> None: def test_dump_file_exists(self) -> None:
p = backup_path(self.backups_dir, self.repo_name, self.version, self.db_volume) / "sql" / f"{self.db_name}.backup.sql" p = (
backup_path(self.backups_dir, self.repo_name, self.version, self.db_volume)
/ "sql"
/ f"{self.db_name}.backup.sql"
)
self.assertTrue(p.is_file(), f"Expected dump file at: {p}") self.assertTrue(p.is_file(), f"Expected dump file at: {p}")
def test_data_restored(self) -> None: def test_data_restored(self) -> None:
@@ -149,7 +160,7 @@ class TestE2EMariaDBFull(unittest.TestCase):
"sh", "sh",
"-lc", "-lc",
f"mariadb -h 127.0.0.1 -u{self.db_user} -p{self.db_password} " f"mariadb -h 127.0.0.1 -u{self.db_user} -p{self.db_password} "
f"-N -e \"SELECT v FROM {self.db_name}.t WHERE id=1;\"", f'-N -e "SELECT v FROM {self.db_name}.t WHERE id=1;"',
] ]
) )
self.assertEqual((p.stdout or "").strip(), "ok") self.assertEqual((p.stdout or "").strip(), "ok")

View File

@@ -60,8 +60,12 @@ class TestE2EMariaDBNoCopy(unittest.TestCase):
] ]
) )
wait_for_mariadb(cls.db_container, root_password=cls.root_password, timeout_s=90) wait_for_mariadb(
wait_for_mariadb_sql(cls.db_container, user=cls.db_user, password=cls.db_password, timeout_s=90) cls.db_container, root_password=cls.root_password, timeout_s=90
)
wait_for_mariadb_sql(
cls.db_container, user=cls.db_user, password=cls.db_password, timeout_s=90
)
# Create table + data (TCP) # Create table + data (TCP)
run( run(
@@ -72,15 +76,18 @@ class TestE2EMariaDBNoCopy(unittest.TestCase):
"sh", "sh",
"-lc", "-lc",
f"mariadb -h 127.0.0.1 -u{cls.db_user} -p{cls.db_password} " f"mariadb -h 127.0.0.1 -u{cls.db_user} -p{cls.db_password} "
f"-e \"CREATE TABLE {cls.db_name}.t (id INT PRIMARY KEY, v VARCHAR(50)); " f'-e "CREATE TABLE {cls.db_name}.t (id INT PRIMARY KEY, v VARCHAR(50)); '
f"INSERT INTO {cls.db_name}.t VALUES (1,'ok');\"", f"INSERT INTO {cls.db_name}.t VALUES (1,'ok');\"",
] ]
) )
cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv" cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv"
write_databases_csv(cls.databases_csv, [(cls.db_container, cls.db_name, cls.db_user, cls.db_password)]) write_databases_csv(
cls.databases_csv,
[(cls.db_container, cls.db_name, cls.db_user, cls.db_password)],
)
# dump-only => no files # dump-only-sql => no files
backup_run( backup_run(
backups_dir=cls.backups_dir, backups_dir=cls.backups_dir,
repo_name=cls.repo_name, repo_name=cls.repo_name,
@@ -88,7 +95,7 @@ class TestE2EMariaDBNoCopy(unittest.TestCase):
databases_csv=cls.databases_csv, databases_csv=cls.databases_csv,
database_containers=[cls.db_container], database_containers=[cls.db_container],
images_no_stop_required=["mariadb", "mysql", "alpine", "postgres"], images_no_stop_required=["mariadb", "mysql", "alpine", "postgres"],
dump_only=True, dump_only_sql=True,
) )
cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name) cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name)
@@ -102,7 +109,7 @@ class TestE2EMariaDBNoCopy(unittest.TestCase):
"sh", "sh",
"-lc", "-lc",
f"mariadb -h 127.0.0.1 -u{cls.db_user} -p{cls.db_password} " f"mariadb -h 127.0.0.1 -u{cls.db_user} -p{cls.db_password} "
f"-e \"DROP TABLE {cls.db_name}.t;\"", f'-e "DROP TABLE {cls.db_name}.t;"',
] ]
) )
@@ -135,7 +142,10 @@ class TestE2EMariaDBNoCopy(unittest.TestCase):
cleanup_docker(containers=cls.containers, volumes=cls.volumes) cleanup_docker(containers=cls.containers, volumes=cls.volumes)
def test_files_backup_not_present(self) -> None: def test_files_backup_not_present(self) -> None:
p = backup_path(self.backups_dir, self.repo_name, self.version, self.db_volume) / "files" p = (
backup_path(self.backups_dir, self.repo_name, self.version, self.db_volume)
/ "files"
)
self.assertFalse(p.exists(), f"Did not expect files backup dir at: {p}") self.assertFalse(p.exists(), f"Did not expect files backup dir at: {p}")
def test_data_restored(self) -> None: def test_data_restored(self) -> None:
@@ -147,7 +157,7 @@ class TestE2EMariaDBNoCopy(unittest.TestCase):
"sh", "sh",
"-lc", "-lc",
f"mariadb -h 127.0.0.1 -u{self.db_user} -p{self.db_password} " f"mariadb -h 127.0.0.1 -u{self.db_user} -p{self.db_password} "
f"-N -e \"SELECT v FROM {self.db_name}.t WHERE id=1;\"", f'-N -e "SELECT v FROM {self.db_name}.t WHERE id=1;"',
] ]
) )
self.assertEqual((p.stdout or "").strip(), "ok") self.assertEqual((p.stdout or "").strip(), "ok")

View File

@@ -33,26 +33,42 @@ class TestE2EPostgresFull(unittest.TestCase):
run(["docker", "volume", "create", cls.pg_volume]) run(["docker", "volume", "create", cls.pg_volume])
run([ run(
"docker", "run", "-d", [
"--name", cls.pg_container, "docker",
"-e", "POSTGRES_PASSWORD=pgpw", "run",
"-e", "POSTGRES_DB=appdb", "-d",
"-e", "POSTGRES_USER=postgres", "--name",
"-v", f"{cls.pg_volume}:/var/lib/postgresql/data", cls.pg_container,
"postgres:16", "-e",
]) "POSTGRES_PASSWORD=pgpw",
"-e",
"POSTGRES_DB=appdb",
"-e",
"POSTGRES_USER=postgres",
"-v",
f"{cls.pg_volume}:/var/lib/postgresql/data",
"postgres:16",
]
)
wait_for_postgres(cls.pg_container, user="postgres", timeout_s=90) wait_for_postgres(cls.pg_container, user="postgres", timeout_s=90)
# Create a table + data # Create a table + data
run([ run(
"docker", "exec", cls.pg_container, [
"sh", "-lc", "docker",
"psql -U postgres -d appdb -c \"CREATE TABLE t (id int primary key, v text); INSERT INTO t VALUES (1,'ok');\"", "exec",
]) cls.pg_container,
"sh",
"-lc",
"psql -U postgres -d appdb -c \"CREATE TABLE t (id int primary key, v text); INSERT INTO t VALUES (1,'ok');\"",
]
)
cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv" cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv"
write_databases_csv(cls.databases_csv, [(cls.pg_container, "appdb", "postgres", "pgpw")]) write_databases_csv(
cls.databases_csv, [(cls.pg_container, "appdb", "postgres", "pgpw")]
)
backup_run( backup_run(
backups_dir=cls.backups_dir, backups_dir=cls.backups_dir,
@@ -66,37 +82,62 @@ class TestE2EPostgresFull(unittest.TestCase):
cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name) cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name)
# Wipe schema # Wipe schema
run([ run(
"docker", "exec", cls.pg_container, [
"sh", "-lc", "docker",
"psql -U postgres -d appdb -c \"DROP TABLE t;\"", "exec",
]) cls.pg_container,
"sh",
"-lc",
'psql -U postgres -d appdb -c "DROP TABLE t;"',
]
)
# Restore # Restore
run([ run(
"baudolo-restore", "postgres", [
cls.pg_volume, cls.hash, cls.version, "baudolo-restore",
"--backups-dir", cls.backups_dir, "postgres",
"--repo-name", cls.repo_name, cls.pg_volume,
"--container", cls.pg_container, cls.hash,
"--db-name", "appdb", cls.version,
"--db-user", "postgres", "--backups-dir",
"--db-password", "pgpw", cls.backups_dir,
"--empty", "--repo-name",
]) cls.repo_name,
"--container",
cls.pg_container,
"--db-name",
"appdb",
"--db-user",
"postgres",
"--db-password",
"pgpw",
"--empty",
]
)
@classmethod @classmethod
def tearDownClass(cls) -> None: def tearDownClass(cls) -> None:
cleanup_docker(containers=cls.containers, volumes=cls.volumes) cleanup_docker(containers=cls.containers, volumes=cls.volumes)
def test_dump_file_exists(self) -> None: def test_dump_file_exists(self) -> None:
p = backup_path(self.backups_dir, self.repo_name, self.version, self.pg_volume) / "sql" / "appdb.backup.sql" p = (
backup_path(self.backups_dir, self.repo_name, self.version, self.pg_volume)
/ "sql"
/ "appdb.backup.sql"
)
self.assertTrue(p.is_file(), f"Expected dump file at: {p}") self.assertTrue(p.is_file(), f"Expected dump file at: {p}")
def test_data_restored(self) -> None: def test_data_restored(self) -> None:
p = run([ p = run(
"docker", "exec", self.pg_container, [
"sh", "-lc", "docker",
"psql -U postgres -d appdb -t -c \"SELECT v FROM t WHERE id=1;\"", "exec",
]) self.pg_container,
"sh",
"-lc",
'psql -U postgres -d appdb -t -c "SELECT v FROM t WHERE id=1;"',
]
)
self.assertEqual((p.stdout or "").strip(), "ok") self.assertEqual((p.stdout or "").strip(), "ok")

View File

@@ -32,25 +32,41 @@ class TestE2EPostgresNoCopy(unittest.TestCase):
cls.volumes = [cls.pg_volume] cls.volumes = [cls.pg_volume]
run(["docker", "volume", "create", cls.pg_volume]) run(["docker", "volume", "create", cls.pg_volume])
run([ run(
"docker", "run", "-d", [
"--name", cls.pg_container, "docker",
"-e", "POSTGRES_PASSWORD=pgpw", "run",
"-e", "POSTGRES_DB=appdb", "-d",
"-e", "POSTGRES_USER=postgres", "--name",
"-v", f"{cls.pg_volume}:/var/lib/postgresql/data", cls.pg_container,
"postgres:16", "-e",
]) "POSTGRES_PASSWORD=pgpw",
"-e",
"POSTGRES_DB=appdb",
"-e",
"POSTGRES_USER=postgres",
"-v",
f"{cls.pg_volume}:/var/lib/postgresql/data",
"postgres:16",
]
)
wait_for_postgres(cls.pg_container, user="postgres", timeout_s=90) wait_for_postgres(cls.pg_container, user="postgres", timeout_s=90)
run([ run(
"docker", "exec", cls.pg_container, [
"sh", "-lc", "docker",
"psql -U postgres -d appdb -c \"CREATE TABLE t (id int primary key, v text); INSERT INTO t VALUES (1,'ok');\"", "exec",
]) cls.pg_container,
"sh",
"-lc",
"psql -U postgres -d appdb -c \"CREATE TABLE t (id int primary key, v text); INSERT INTO t VALUES (1,'ok');\"",
]
)
cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv" cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv"
write_databases_csv(cls.databases_csv, [(cls.pg_container, "appdb", "postgres", "pgpw")]) write_databases_csv(
cls.databases_csv, [(cls.pg_container, "appdb", "postgres", "pgpw")]
)
backup_run( backup_run(
backups_dir=cls.backups_dir, backups_dir=cls.backups_dir,
@@ -59,41 +75,65 @@ class TestE2EPostgresNoCopy(unittest.TestCase):
databases_csv=cls.databases_csv, databases_csv=cls.databases_csv,
database_containers=[cls.pg_container], database_containers=[cls.pg_container],
images_no_stop_required=["postgres", "mariadb", "mysql", "alpine"], images_no_stop_required=["postgres", "mariadb", "mysql", "alpine"],
dump_only=True, dump_only_sql=True,
) )
cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name) cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name)
run([ run(
"docker", "exec", cls.pg_container, [
"sh", "-lc", "docker",
"psql -U postgres -d appdb -c \"DROP TABLE t;\"", "exec",
]) cls.pg_container,
"sh",
"-lc",
'psql -U postgres -d appdb -c "DROP TABLE t;"',
]
)
run([ run(
"baudolo-restore", "postgres", [
cls.pg_volume, cls.hash, cls.version, "baudolo-restore",
"--backups-dir", cls.backups_dir, "postgres",
"--repo-name", cls.repo_name, cls.pg_volume,
"--container", cls.pg_container, cls.hash,
"--db-name", "appdb", cls.version,
"--db-user", "postgres", "--backups-dir",
"--db-password", "pgpw", cls.backups_dir,
"--empty", "--repo-name",
]) cls.repo_name,
"--container",
cls.pg_container,
"--db-name",
"appdb",
"--db-user",
"postgres",
"--db-password",
"pgpw",
"--empty",
]
)
@classmethod @classmethod
def tearDownClass(cls) -> None: def tearDownClass(cls) -> None:
cleanup_docker(containers=cls.containers, volumes=cls.volumes) cleanup_docker(containers=cls.containers, volumes=cls.volumes)
def test_files_backup_not_present(self) -> None: def test_files_backup_not_present(self) -> None:
p = backup_path(self.backups_dir, self.repo_name, self.version, self.pg_volume) / "files" p = (
backup_path(self.backups_dir, self.repo_name, self.version, self.pg_volume)
/ "files"
)
self.assertFalse(p.exists(), f"Did not expect files backup dir at: {p}") self.assertFalse(p.exists(), f"Did not expect files backup dir at: {p}")
def test_data_restored(self) -> None: def test_data_restored(self) -> None:
p = run([ p = run(
"docker", "exec", self.pg_container, [
"sh", "-lc", "docker",
"psql -U postgres -d appdb -t -c \"SELECT v FROM t WHERE id=1;\"", "exec",
]) self.pg_container,
"sh",
"-lc",
'psql -U postgres -d appdb -t -c "SELECT v FROM t WHERE id=1;"',
]
)
self.assertEqual((p.stdout or "").strip(), "ok") self.assertEqual((p.stdout or "").strip(), "ok")

View File

@@ -0,0 +1,217 @@
import unittest
from .helpers import (
backup_path,
cleanup_docker,
create_minimal_compose_dir,
ensure_empty_dir,
latest_version_dir,
require_docker,
run,
unique,
wait_for_postgres,
)
class TestE2ESeedStarAndDbEntriesBackupPostgres(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
require_docker()
cls.prefix = unique("baudolo-e2e-seed-star-and-db")
cls.backups_dir = f"/tmp/{cls.prefix}/Backups"
ensure_empty_dir(cls.backups_dir)
cls.compose_dir = create_minimal_compose_dir(f"/tmp/{cls.prefix}")
cls.repo_name = cls.prefix
# --- Volumes ---
cls.db_volume = f"{cls.prefix}-vol-db"
cls.files_volume = f"{cls.prefix}-vol-files"
cls.volumes = [cls.db_volume, cls.files_volume]
run(["docker", "volume", "create", cls.db_volume])
run(["docker", "volume", "create", cls.files_volume])
# Put a marker into the non-db volume
cls.marker = "hello-non-db-seed-star"
run(
[
"docker",
"run",
"--rm",
"-v",
f"{cls.files_volume}:/data",
"alpine:3.20",
"sh",
"-lc",
f"echo '{cls.marker}' > /data/hello.txt",
]
)
# --- Start Postgres container using the DB volume ---
cls.pg_container = f"{cls.prefix}-pg"
cls.containers = [cls.pg_container]
cls.pg_password = "postgres"
cls.pg_user = "postgres"
run(
[
"docker",
"run",
"-d",
"--name",
cls.pg_container,
"-e",
f"POSTGRES_PASSWORD={cls.pg_password}",
"-v",
f"{cls.db_volume}:/var/lib/postgresql/data",
"postgres:16-alpine",
]
)
wait_for_postgres(cls.pg_container, user="postgres", timeout_s=90)
# Create two DBs and deterministic content, so pg_dumpall is meaningful
cls.pg_db1 = "testdb1"
cls.pg_db2 = "testdb2"
run(
[
"docker",
"exec",
cls.pg_container,
"sh",
"-lc",
(
f'psql -U {cls.pg_user} -c "CREATE DATABASE {cls.pg_db1};" || true; '
f'psql -U {cls.pg_user} -c "CREATE DATABASE {cls.pg_db2};" || true; '
),
],
check=True,
)
run(
[
"docker",
"exec",
cls.pg_container,
"sh",
"-lc",
(
f'psql -U {cls.pg_user} -d {cls.pg_db1} -c '
'"CREATE TABLE IF NOT EXISTS t (id INT PRIMARY KEY, v TEXT);'
"INSERT INTO t(id,v) VALUES (1,'hello-db1') "
"ON CONFLICT (id) DO UPDATE SET v=EXCLUDED.v;\""
),
],
check=True,
)
run(
[
"docker",
"exec",
cls.pg_container,
"sh",
"-lc",
(
f'psql -U {cls.pg_user} -d {cls.pg_db2} -c '
'"CREATE TABLE IF NOT EXISTS t (id INT PRIMARY KEY, v TEXT);'
"INSERT INTO t(id,v) VALUES (1,'hello-db2') "
"ON CONFLICT (id) DO UPDATE SET v=EXCLUDED.v;\""
),
],
check=True,
)
# --- Seed databases.csv using CLI (star + concrete db) ---
cls.databases_csv = f"/tmp/{cls.prefix}/databases.csv"
# IMPORTANT: because we pass --database-containers <container>,
# get_instance() will use the container name as instance key.
instance = cls.pg_container
# Seed star entry (pg_dumpall)
run(["baudolo-seed", cls.databases_csv, instance, "*", cls.pg_user, cls.pg_password])
# Seed concrete DB entry (pg_dump)
run(
[
"baudolo-seed",
cls.databases_csv,
instance,
cls.pg_db1,
cls.pg_user,
cls.pg_password,
]
)
# --- Run baudolo with dump-only-sql ---
cmd = [
"baudolo",
"--compose-dir",
cls.compose_dir,
"--databases-csv",
cls.databases_csv,
"--database-containers",
cls.pg_container,
"--images-no-stop-required",
"alpine",
"postgres",
"mariadb",
"mysql",
"--dump-only-sql",
"--backups-dir",
cls.backups_dir,
"--repo-name",
cls.repo_name,
]
cp = run(cmd, capture=True, check=True)
cls.stdout = cp.stdout or ""
cls.stderr = cp.stderr or ""
cls.hash, cls.version = latest_version_dir(cls.backups_dir, cls.repo_name)
@classmethod
def tearDownClass(cls) -> None:
cleanup_docker(containers=cls.containers, volumes=cls.volumes)
def test_db_volume_has_cluster_dump_and_concrete_db_dump_and_no_files(self) -> None:
base = backup_path(self.backups_dir, self.repo_name, self.version, self.db_volume)
sql_dir = base / "sql"
files_dir = base / "files"
self.assertTrue(sql_dir.exists(), f"Expected sql dir at: {sql_dir}")
self.assertFalse(
files_dir.exists(),
f"Did not expect files dir for DB volume when dump-only-sql succeeded: {files_dir}",
)
# Cluster dump file produced by '*' entry
cluster = sql_dir / f"{self.pg_container}.cluster.backup.sql"
self.assertTrue(cluster.is_file(), f"Expected cluster dump file at: {cluster}")
# Concrete DB dump produced by normal entry
db1 = sql_dir / f"{self.pg_db1}.backup.sql"
self.assertTrue(db1.is_file(), f"Expected db dump file at: {db1}")
# Basic sanity: cluster dump usually contains CREATE DATABASE statements
txt = cluster.read_text(encoding="utf-8", errors="ignore")
self.assertIn(
"CREATE DATABASE",
txt,
"Expected cluster dump to contain CREATE DATABASE statements",
)
def test_non_db_volume_still_has_files_backup(self) -> None:
base = backup_path(self.backups_dir, self.repo_name, self.version, self.files_volume)
files_dir = base / "files"
self.assertTrue(files_dir.exists(), f"Expected files dir for non-DB volume at: {files_dir}")
marker = files_dir / "hello.txt"
self.assertTrue(marker.is_file(), f"Expected marker file at: {marker}")
self.assertEqual(
marker.read_text(encoding="utf-8").strip(),
self.marker,
)

View File

@@ -6,8 +6,48 @@ import unittest
from pathlib import Path from pathlib import Path
def run_seed(csv_path: Path, instance: str, database: str, username: str, password: str = "") -> subprocess.CompletedProcess: def run_seed(
# Run the real CLI module (integration-style). csv_path: Path, instance: str, database: str, username: str, password: str
) -> subprocess.CompletedProcess:
"""
Run the real CLI module (E2E-style) using subprocess.
Seed contract (current):
- database must be "*" or a valid name (non-empty, matches allowed charset)
- password is required
- entry is keyed by (instance, database); username/password get updated
"""
cp = subprocess.run(
[
sys.executable,
"-m",
"baudolo.seed",
str(csv_path),
instance,
database,
username,
password,
],
text=True,
capture_output=True,
check=False,
)
if cp.returncode != 0:
raise AssertionError(
"seed command failed unexpectedly.\n"
f"returncode: {cp.returncode}\n"
f"stdout:\n{cp.stdout}\n"
f"stderr:\n{cp.stderr}\n"
)
return cp
def run_seed_expect_fail(
csv_path: Path, instance: str, database: str, username: str, password: str
) -> subprocess.CompletedProcess:
"""
Same as run_seed, but expects non-zero exit. Returns CompletedProcess for inspection.
"""
return subprocess.run( return subprocess.run(
[ [
sys.executable, sys.executable,
@@ -21,7 +61,7 @@ def run_seed(csv_path: Path, instance: str, database: str, username: str, passwo
], ],
text=True, text=True,
capture_output=True, capture_output=True,
check=True, check=False,
) )
@@ -31,6 +71,10 @@ def read_csv_semicolon(path: Path) -> list[dict]:
return list(reader) return list(reader)
def read_text(path: Path) -> str:
return path.read_text(encoding="utf-8")
class TestSeedIntegration(unittest.TestCase): class TestSeedIntegration(unittest.TestCase):
def test_creates_file_and_adds_entry_when_missing(self) -> None: def test_creates_file_and_adds_entry_when_missing(self) -> None:
with tempfile.TemporaryDirectory() as td: with tempfile.TemporaryDirectory() as td:
@@ -39,7 +83,7 @@ class TestSeedIntegration(unittest.TestCase):
cp = run_seed(p, "docker.test", "appdb", "alice", "secret") cp = run_seed(p, "docker.test", "appdb", "alice", "secret")
self.assertEqual(cp.returncode, 0, cp.stderr) self.assertEqual(cp.returncode, 0)
self.assertTrue(p.exists()) self.assertTrue(p.exists())
rows = read_csv_semicolon(p) rows = read_csv_semicolon(p)
@@ -49,40 +93,121 @@ class TestSeedIntegration(unittest.TestCase):
self.assertEqual(rows[0]["username"], "alice") self.assertEqual(rows[0]["username"], "alice")
self.assertEqual(rows[0]["password"], "secret") self.assertEqual(rows[0]["password"], "secret")
def test_replaces_existing_entry_same_keys(self) -> None: def test_replaces_existing_entry_same_instance_and_database_updates_username_and_password(
self,
) -> None:
"""
Replacement semantics:
- Key is (instance, database)
- username/password are updated in-place
"""
with tempfile.TemporaryDirectory() as td: with tempfile.TemporaryDirectory() as td:
p = Path(td) / "databases.csv" p = Path(td) / "databases.csv"
# First add
run_seed(p, "docker.test", "appdb", "alice", "oldpw") run_seed(p, "docker.test", "appdb", "alice", "oldpw")
rows = read_csv_semicolon(p) rows = read_csv_semicolon(p)
self.assertEqual(len(rows), 1) self.assertEqual(len(rows), 1)
self.assertEqual(rows[0]["username"], "alice")
self.assertEqual(rows[0]["password"], "oldpw") self.assertEqual(rows[0]["password"], "oldpw")
# Replace (same instance+database+username) run_seed(p, "docker.test", "appdb", "bob", "newpw")
run_seed(p, "docker.test", "appdb", "alice", "newpw")
rows = read_csv_semicolon(p) rows = read_csv_semicolon(p)
self.assertEqual(len(rows), 1, "Expected replacement, not a duplicate row") self.assertEqual(len(rows), 1, "Expected replacement, not a duplicate row")
self.assertEqual(rows[0]["instance"], "docker.test") self.assertEqual(rows[0]["instance"], "docker.test")
self.assertEqual(rows[0]["database"], "appdb") self.assertEqual(rows[0]["database"], "appdb")
self.assertEqual(rows[0]["username"], "alice") self.assertEqual(rows[0]["username"], "bob")
self.assertEqual(rows[0]["password"], "newpw") self.assertEqual(rows[0]["password"], "newpw")
def test_database_empty_string_matches_existing_empty_database(self) -> None: def test_allows_star_database_for_dump_all(self) -> None:
with tempfile.TemporaryDirectory() as td: with tempfile.TemporaryDirectory() as td:
p = Path(td) / "databases.csv" p = Path(td) / "databases.csv"
# Add with empty database cp = run_seed(p, "bigbluebutton", "*", "postgres", "pw")
run_seed(p, "docker.test", "", "alice", "pw1") self.assertEqual(cp.returncode, 0)
rows = read_csv_semicolon(p) rows = read_csv_semicolon(p)
self.assertEqual(len(rows), 1) self.assertEqual(len(rows), 1)
self.assertEqual(rows[0]["database"], "") self.assertEqual(rows[0]["instance"], "bigbluebutton")
self.assertEqual(rows[0]["database"], "*")
self.assertEqual(rows[0]["username"], "postgres")
self.assertEqual(rows[0]["password"], "pw")
def test_replaces_existing_star_entry(self) -> None:
with tempfile.TemporaryDirectory() as td:
p = Path(td) / "databases.csv"
run_seed(p, "bigbluebutton", "*", "postgres", "pw1")
run_seed(p, "bigbluebutton", "*", "postgres", "pw2")
# Replace with empty database again
run_seed(p, "docker.test", "", "alice", "pw2")
rows = read_csv_semicolon(p) rows = read_csv_semicolon(p)
self.assertEqual(len(rows), 1) self.assertEqual(len(rows), 1)
self.assertEqual(rows[0]["database"], "") self.assertEqual(rows[0]["database"], "*")
self.assertEqual(rows[0]["password"], "pw2") self.assertEqual(rows[0]["password"], "pw2")
def test_rejects_empty_database_value(self) -> None:
with tempfile.TemporaryDirectory() as td:
p = Path(td) / "databases.csv"
cp = run_seed_expect_fail(p, "docker.test", "", "alice", "pw")
self.assertNotEqual(cp.returncode, 0)
combined = ((cp.stdout or "") + "\n" + (cp.stderr or "")).lower()
self.assertIn("error:", combined)
self.assertIn("database", combined)
self.assertIn("not empty", combined)
self.assertFalse(p.exists(), "Should not create file on invalid input")
def test_rejects_invalid_database_name_characters(self) -> None:
with tempfile.TemporaryDirectory() as td:
p = Path(td) / "databases.csv"
cp = run_seed_expect_fail(p, "docker.test", "app db", "alice", "pw")
self.assertNotEqual(cp.returncode, 0)
combined = ((cp.stdout or "") + "\n" + (cp.stderr or "")).lower()
self.assertIn("error:", combined)
self.assertIn("invalid database name", combined)
self.assertFalse(p.exists(), "Should not create file on invalid input")
def test_rejects_nan_database_name(self) -> None:
with tempfile.TemporaryDirectory() as td:
p = Path(td) / "databases.csv"
cp = run_seed_expect_fail(p, "docker.test", "nan", "alice", "pw")
self.assertNotEqual(cp.returncode, 0)
combined = ((cp.stdout or "") + "\n" + (cp.stderr or "")).lower()
self.assertIn("error:", combined)
self.assertIn("must not be 'nan'", combined)
self.assertFalse(p.exists(), "Should not create file on invalid input")
def test_accepts_hyphen_and_underscore_database_names(self) -> None:
with tempfile.TemporaryDirectory() as td:
p = Path(td) / "databases.csv"
run_seed(p, "docker.test", "my_db-1", "alice", "pw")
rows = read_csv_semicolon(p)
self.assertEqual(len(rows), 1)
self.assertEqual(rows[0]["database"], "my_db-1")
def test_file_is_semicolon_delimited_and_has_header(self) -> None:
with tempfile.TemporaryDirectory() as td:
p = Path(td) / "databases.csv"
run_seed(p, "docker.test", "appdb", "alice", "pw")
txt = read_text(p)
self.assertTrue(
txt.startswith("instance;database;username;password"),
f"Unexpected header / delimiter in file:\n{txt}",
)
self.assertIn(";", txt)
if __name__ == "__main__":
unittest.main()

View File

@@ -6,7 +6,9 @@ from baudolo.backup.app import requires_stop
class TestRequiresStop(unittest.TestCase): class TestRequiresStop(unittest.TestCase):
@patch("baudolo.backup.app.get_image_info") @patch("baudolo.backup.app.get_image_info")
def test_requires_stop_false_when_all_images_are_whitelisted(self, mock_get_image_info): def test_requires_stop_false_when_all_images_are_whitelisted(
self, mock_get_image_info
):
# All containers use images containing allowed substrings # All containers use images containing allowed substrings
mock_get_image_info.side_effect = [ mock_get_image_info.side_effect = [
"repo/mastodon:v4", "repo/mastodon:v4",
@@ -17,7 +19,9 @@ class TestRequiresStop(unittest.TestCase):
self.assertFalse(requires_stop(containers, whitelist)) self.assertFalse(requires_stop(containers, whitelist))
@patch("baudolo.backup.app.get_image_info") @patch("baudolo.backup.app.get_image_info")
def test_requires_stop_true_when_any_image_is_not_whitelisted(self, mock_get_image_info): def test_requires_stop_true_when_any_image_is_not_whitelisted(
self, mock_get_image_info
):
mock_get_image_info.side_effect = [ mock_get_image_info.side_effect = [
"repo/mastodon:v4", "repo/mastodon:v4",
"repo/nginx:latest", "repo/nginx:latest",