mirror of
https://github.com/kevinveenbirkenbach/docker-volume-backup.git
synced 2025-12-29 11:43:17 +00:00
- read databases.csv with stable types (dtype=str, keep_default_na=False) - validate database field: require '*' or concrete name (no empty/NaN) - support Postgres cluster dumps via '*' entries (pg_dumpall) - write SQL dumps atomically to avoid partial/empty files - early-skip fully ignored volumes before creating backup directories - update seed CLI to enforce new contract and update by (instance,database) - adjust tests: sql dir naming + add E2E coverage for early-skip and '*' seeding
145 lines
4.4 KiB
Python
145 lines
4.4 KiB
Python
from __future__ import annotations
|
|
|
|
import os
|
|
import pathlib
|
|
import re
|
|
import logging
|
|
from typing import Optional
|
|
|
|
import pandas
|
|
|
|
from .shell import BackupException, execute_shell_command
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
def get_instance(container: str, database_containers: list[str]) -> str:
|
|
"""
|
|
Derive a stable instance name from the container name.
|
|
"""
|
|
if container in database_containers:
|
|
return container
|
|
return re.split(r"(_|-)(database|db|postgres)", container)[0]
|
|
|
|
|
|
def _validate_database_value(value: Optional[str], *, instance: str) -> str:
|
|
"""
|
|
Enforce explicit database semantics:
|
|
|
|
- "*" => dump ALL databases (cluster dump for Postgres)
|
|
- "<name>" => dump exactly this database
|
|
- "" => invalid configuration (would previously result in NaN / nan.backup.sql)
|
|
"""
|
|
v = (value or "").strip()
|
|
if v == "":
|
|
raise ValueError(
|
|
f"Invalid databases.csv entry for instance '{instance}': "
|
|
"column 'database' must be '*' or a concrete database name (not empty)."
|
|
)
|
|
return v
|
|
|
|
|
|
def _atomic_write_cmd(cmd: str, out_file: str) -> None:
|
|
"""
|
|
Write dump output atomically:
|
|
- write to <file>.tmp
|
|
- rename to <file> only on success
|
|
|
|
This prevents empty or partial dump files from being treated as valid backups.
|
|
"""
|
|
tmp = f"{out_file}.tmp"
|
|
execute_shell_command(f"{cmd} > {tmp}")
|
|
execute_shell_command(f"mv {tmp} {out_file}")
|
|
|
|
|
|
def fallback_pg_dumpall(container: str, username: str, password: str, out_file: str) -> None:
|
|
"""
|
|
Perform a full Postgres cluster dump using pg_dumpall.
|
|
"""
|
|
cmd = (
|
|
f"PGPASSWORD={password} docker exec -i {container} "
|
|
f"pg_dumpall -U {username} -h localhost"
|
|
)
|
|
_atomic_write_cmd(cmd, out_file)
|
|
|
|
|
|
def backup_database(
|
|
*,
|
|
container: str,
|
|
volume_dir: str,
|
|
db_type: str,
|
|
databases_df: "pandas.DataFrame",
|
|
database_containers: list[str],
|
|
) -> bool:
|
|
"""
|
|
Backup databases for a given DB container.
|
|
|
|
Returns True if at least one dump was produced.
|
|
"""
|
|
instance_name = get_instance(container, database_containers)
|
|
|
|
entries = databases_df[databases_df["instance"] == instance_name]
|
|
if entries.empty:
|
|
log.debug("No database entries for instance '%s'", instance_name)
|
|
return False
|
|
|
|
out_dir = os.path.join(volume_dir, "sql")
|
|
pathlib.Path(out_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
produced = False
|
|
|
|
for row in entries.itertuples(index=False):
|
|
raw_db = getattr(row, "database", "")
|
|
user = (getattr(row, "username", "") or "").strip()
|
|
password = (getattr(row, "password", "") or "").strip()
|
|
|
|
db_value = _validate_database_value(raw_db, instance=instance_name)
|
|
|
|
# Explicit: dump ALL databases
|
|
if db_value == "*":
|
|
if db_type != "postgres":
|
|
raise ValueError(
|
|
f"databases.csv entry for instance '{instance_name}': "
|
|
"'*' is currently only supported for Postgres."
|
|
)
|
|
|
|
cluster_file = os.path.join(
|
|
out_dir, f"{instance_name}.cluster.backup.sql"
|
|
)
|
|
fallback_pg_dumpall(container, user, password, cluster_file)
|
|
produced = True
|
|
continue
|
|
|
|
# Concrete database dump
|
|
db_name = db_value
|
|
dump_file = os.path.join(out_dir, f"{db_name}.backup.sql")
|
|
|
|
if db_type == "mariadb":
|
|
cmd = (
|
|
f"docker exec {container} /usr/bin/mariadb-dump "
|
|
f"-u {user} -p{password} {db_name}"
|
|
)
|
|
_atomic_write_cmd(cmd, dump_file)
|
|
produced = True
|
|
continue
|
|
|
|
if db_type == "postgres":
|
|
try:
|
|
cmd = (
|
|
f"PGPASSWORD={password} docker exec -i {container} "
|
|
f"pg_dump -U {user} -d {db_name} -h localhost"
|
|
)
|
|
_atomic_write_cmd(cmd, dump_file)
|
|
produced = True
|
|
except BackupException as e:
|
|
# Explicit DB dump failed -> hard error
|
|
raise BackupException(
|
|
f"Postgres dump failed for instance '{instance_name}', "
|
|
f"database '{db_name}'. This database was explicitly configured "
|
|
"and therefore must succeed.\n"
|
|
f"{e}"
|
|
)
|
|
continue
|
|
|
|
return produced
|