mirror of
https://github.com/kevinveenbirkenbach/docker-volume-backup.git
synced 2026-02-02 11:04:06 +00:00
fix(backup,ci): make databases.csv optional and upgrade Docker CLI in image
- Handle missing or empty databases.csv gracefully with warnings and empty DataFrame - Add unit tests for robust databases.csv loading behavior - Adjust seed tests to assert warnings across multiple print calls - Replace Debian docker.io with docker-ce-cli to avoid Docker API version mismatch - Install required build tools (curl, gnupg) for Docker repo setup https://chatgpt.com/share/697e6d9d-6458-800f-9d12-1e337509be4e
This commit is contained in:
32
Dockerfile
32
Dockerfile
@@ -3,33 +3,35 @@ FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Runtime + build essentials:
|
||||
# - rsync: required for file backup/restore
|
||||
# - ca-certificates: TLS
|
||||
# - docker-cli: needed if you want to control the host Docker engine (via /var/run/docker.sock mount)
|
||||
# - make: to delegate install logic to Makefile
|
||||
#
|
||||
# Notes:
|
||||
# - On Debian slim, the docker client package is typically "docker.io".
|
||||
# - If you only want restore-without-docker, you can drop docker.io later.
|
||||
# Base deps for build/runtime + docker repo key
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
make \
|
||||
rsync \
|
||||
ca-certificates \
|
||||
docker.io \
|
||||
bash \
|
||||
curl \
|
||||
gnupg \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Docker CLI (docker-ce-cli) from Docker's official apt repo
|
||||
RUN bash -lc "set -euo pipefail \
|
||||
&& install -m 0755 -d /etc/apt/keyrings \
|
||||
&& curl -fsSL https://download.docker.com/linux/debian/gpg \
|
||||
| gpg --dearmor -o /etc/apt/keyrings/docker.gpg \
|
||||
&& chmod a+r /etc/apt/keyrings/docker.gpg \
|
||||
&& . /etc/os-release \
|
||||
&& echo \"deb [arch=\$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian \${VERSION_CODENAME} stable\" \
|
||||
> /etc/apt/sources.list.d/docker.list \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y --no-install-recommends docker-ce-cli \
|
||||
&& rm -rf /var/lib/apt/lists/*"
|
||||
|
||||
# Fail fast if docker client is missing
|
||||
RUN docker version || true
|
||||
RUN command -v docker
|
||||
|
||||
COPY . .
|
||||
|
||||
# All install decisions are handled by the Makefile.
|
||||
RUN make install
|
||||
|
||||
# Sensible defaults (can be overridden at runtime)
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# Default: show CLI help
|
||||
CMD ["baudolo", "--help"]
|
||||
|
||||
@@ -2,10 +2,12 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
import pandas
|
||||
from dirval import create_stamp_file
|
||||
from pandas.errors import EmptyDataError
|
||||
|
||||
from .cli import parse_args
|
||||
from .compose import handle_docker_compose_services
|
||||
@@ -96,6 +98,42 @@ def backup_mariadb_or_postgres(
|
||||
return False, False
|
||||
|
||||
|
||||
def _empty_databases_df() -> "pandas.DataFrame":
|
||||
"""
|
||||
Create an empty DataFrame with the expected schema for databases.csv.
|
||||
|
||||
This allows the backup to continue without DB dumps when the CSV is missing
|
||||
or empty (pandas EmptyDataError).
|
||||
"""
|
||||
return pandas.DataFrame(columns=["instance", "database", "username", "password"])
|
||||
|
||||
|
||||
def _load_databases_df(csv_path: str) -> "pandas.DataFrame":
|
||||
"""
|
||||
Load databases.csv robustly.
|
||||
|
||||
- Missing file -> warn, continue with empty df
|
||||
- Empty file -> warn, continue with empty df
|
||||
- Valid CSV -> return dataframe
|
||||
"""
|
||||
try:
|
||||
return pandas.read_csv(csv_path, sep=";", keep_default_na=False, dtype=str)
|
||||
except FileNotFoundError:
|
||||
print(
|
||||
f"WARNING: databases.csv not found: {csv_path}. Continuing without database dumps.",
|
||||
file=sys.stderr,
|
||||
flush=True,
|
||||
)
|
||||
return _empty_databases_df()
|
||||
except EmptyDataError:
|
||||
print(
|
||||
f"WARNING: databases.csv exists but is empty: {csv_path}. Continuing without database dumps.",
|
||||
file=sys.stderr,
|
||||
flush=True,
|
||||
)
|
||||
return _empty_databases_df()
|
||||
|
||||
|
||||
def _backup_dumps_for_volume(
|
||||
*,
|
||||
containers: list[str],
|
||||
@@ -136,9 +174,10 @@ def main() -> int:
|
||||
# IMPORTANT:
|
||||
# - keep_default_na=False prevents empty fields from turning into NaN
|
||||
# - dtype=str keeps all columns stable for comparisons/validation
|
||||
databases_df = pandas.read_csv(
|
||||
args.databases_csv, sep=";", keep_default_na=False, dtype=str
|
||||
)
|
||||
#
|
||||
# Robust behavior:
|
||||
# - if the file is missing or empty, we continue without DB dumps.
|
||||
databases_df = _load_databases_df(args.databases_csv)
|
||||
|
||||
print("💾 Start volume backups...", flush=True)
|
||||
|
||||
@@ -168,7 +207,8 @@ def main() -> int:
|
||||
if found_db:
|
||||
if not dumped_any:
|
||||
print(
|
||||
f"WARNING: dump-only-sql requested but no DB dump was produced for DB volume '{volume_name}'. Falling back to file backup.",
|
||||
f"WARNING: dump-only-sql requested but no DB dump was produced for DB volume '{volume_name}'. "
|
||||
"Falling back to file backup.",
|
||||
flush=True,
|
||||
)
|
||||
# fall through to file backup below
|
||||
|
||||
77
tests/unit/backup/test_app_databases_csv.py
Normal file
77
tests/unit/backup/test_app_databases_csv.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import io
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from contextlib import redirect_stderr
|
||||
|
||||
import pandas as pd
|
||||
|
||||
# Adjust if your package name/import path differs.
|
||||
from baudolo.backup.app import _load_databases_df
|
||||
|
||||
|
||||
EXPECTED_COLUMNS = ["instance", "database", "username", "password"]
|
||||
|
||||
|
||||
class TestLoadDatabasesDf(unittest.TestCase):
|
||||
def test_missing_csv_is_handled_with_warning_and_empty_df(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as td:
|
||||
missing_path = os.path.join(td, "does-not-exist.csv")
|
||||
|
||||
buf = io.StringIO()
|
||||
with redirect_stderr(buf):
|
||||
df = _load_databases_df(missing_path)
|
||||
|
||||
stderr = buf.getvalue()
|
||||
self.assertIn("WARNING:", stderr)
|
||||
self.assertIn("databases.csv not found", stderr)
|
||||
|
||||
self.assertIsInstance(df, pd.DataFrame)
|
||||
self.assertListEqual(list(df.columns), EXPECTED_COLUMNS)
|
||||
self.assertTrue(df.empty)
|
||||
|
||||
def test_empty_csv_is_handled_with_warning_and_empty_df(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as td:
|
||||
empty_path = os.path.join(td, "databases.csv")
|
||||
# Create an empty file (0 bytes)
|
||||
with open(empty_path, "w", encoding="utf-8") as f:
|
||||
f.write("")
|
||||
|
||||
buf = io.StringIO()
|
||||
with redirect_stderr(buf):
|
||||
df = _load_databases_df(empty_path)
|
||||
|
||||
stderr = buf.getvalue()
|
||||
self.assertIn("WARNING:", stderr)
|
||||
self.assertIn("exists but is empty", stderr)
|
||||
|
||||
self.assertIsInstance(df, pd.DataFrame)
|
||||
self.assertListEqual(list(df.columns), EXPECTED_COLUMNS)
|
||||
self.assertTrue(df.empty)
|
||||
|
||||
def test_valid_csv_loads_without_warning(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as td:
|
||||
csv_path = os.path.join(td, "databases.csv")
|
||||
|
||||
content = "instance;database;username;password\nmyapp;*;dbuser;secret\n"
|
||||
with open(csv_path, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
|
||||
buf = io.StringIO()
|
||||
with redirect_stderr(buf):
|
||||
df = _load_databases_df(csv_path)
|
||||
|
||||
stderr = buf.getvalue()
|
||||
self.assertEqual(stderr, "") # no warning expected
|
||||
|
||||
self.assertIsInstance(df, pd.DataFrame)
|
||||
self.assertListEqual(list(df.columns), EXPECTED_COLUMNS)
|
||||
self.assertEqual(len(df), 1)
|
||||
self.assertEqual(df.loc[0, "instance"], "myapp")
|
||||
self.assertEqual(df.loc[0, "database"], "*")
|
||||
self.assertEqual(df.loc[0, "username"], "dbuser")
|
||||
self.assertEqual(df.loc[0, "password"], "secret")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -84,7 +84,9 @@ class TestSeedMain(unittest.TestCase):
|
||||
read_csv.assert_not_called()
|
||||
empty_df.assert_called_once()
|
||||
concat.assert_called_once()
|
||||
df_out.to_csv.assert_called_once_with("/tmp/databases.csv", sep=";", index=False)
|
||||
df_out.to_csv.assert_called_once_with(
|
||||
"/tmp/databases.csv", sep=";", index=False
|
||||
)
|
||||
|
||||
@patch("baudolo.seed.__main__.os.path.exists", return_value=True)
|
||||
@patch("baudolo.seed.__main__.pd.read_csv", side_effect=EmptyDataError("empty"))
|
||||
@@ -116,16 +118,26 @@ class TestSeedMain(unittest.TestCase):
|
||||
exists.assert_called_once_with("/tmp/databases.csv")
|
||||
read_csv.assert_called_once()
|
||||
empty_df.assert_called_once()
|
||||
|
||||
# warning printed to stderr
|
||||
self.assertTrue(print_.called)
|
||||
args, kwargs = print_.call_args
|
||||
self.assertIn("WARNING: databases.csv exists but is empty", args[0])
|
||||
self.assertEqual(kwargs.get("file"), seed_main.sys.stderr)
|
||||
|
||||
concat.assert_called_once()
|
||||
df_out.toF.to_csv.assert_not_called() # keep lint happy if you use it
|
||||
df_out.to_csv.assert_called_once_with("/tmp/databases.csv", sep=";", index=False)
|
||||
|
||||
# Assert: at least one print call contains the WARNING and prints to stderr
|
||||
warning_calls = []
|
||||
for call in print_.call_args_list:
|
||||
args, kwargs = call
|
||||
if args and "WARNING: databases.csv exists but is empty" in str(args[0]):
|
||||
warning_calls.append((args, kwargs))
|
||||
|
||||
self.assertTrue(
|
||||
warning_calls,
|
||||
"Expected a WARNING print when databases.csv is empty, but none was found.",
|
||||
)
|
||||
# Ensure the warning goes to stderr
|
||||
_, warn_kwargs = warning_calls[0]
|
||||
self.assertEqual(warn_kwargs.get("file"), seed_main.sys.stderr)
|
||||
|
||||
df_out.to_csv.assert_called_once_with(
|
||||
"/tmp/databases.csv", sep=";", index=False
|
||||
)
|
||||
|
||||
@patch("baudolo.seed.__main__.os.path.exists", return_value=True)
|
||||
@patch("baudolo.seed.__main__.pd.read_csv")
|
||||
@@ -172,7 +184,9 @@ class TestSeedMain(unittest.TestCase):
|
||||
|
||||
@patch("baudolo.seed.__main__.sys.exit")
|
||||
@patch("baudolo.seed.__main__.print")
|
||||
@patch("baudolo.seed.__main__.check_and_add_entry", side_effect=RuntimeError("boom"))
|
||||
@patch(
|
||||
"baudolo.seed.__main__.check_and_add_entry", side_effect=RuntimeError("boom")
|
||||
)
|
||||
@patch("baudolo.seed.__main__.argparse.ArgumentParser.parse_args")
|
||||
def test_main_exits_nonzero_on_error(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user