diff --git a/Dockerfile b/Dockerfile index 4e4a693..9895f85 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,33 +3,35 @@ FROM python:3.11-slim WORKDIR /app -# Runtime + build essentials: -# - rsync: required for file backup/restore -# - ca-certificates: TLS -# - docker-cli: needed if you want to control the host Docker engine (via /var/run/docker.sock mount) -# - make: to delegate install logic to Makefile -# -# Notes: -# - On Debian slim, the docker client package is typically "docker.io". -# - If you only want restore-without-docker, you can drop docker.io later. +# Base deps for build/runtime + docker repo key RUN apt-get update && apt-get install -y --no-install-recommends \ make \ rsync \ ca-certificates \ - docker.io \ bash \ + curl \ + gnupg \ && rm -rf /var/lib/apt/lists/* +# Install Docker CLI (docker-ce-cli) from Docker's official apt repo +RUN bash -lc "set -euo pipefail \ + && install -m 0755 -d /etc/apt/keyrings \ + && curl -fsSL https://download.docker.com/linux/debian/gpg \ + | gpg --dearmor -o /etc/apt/keyrings/docker.gpg \ + && chmod a+r /etc/apt/keyrings/docker.gpg \ + && . /etc/os-release \ + && echo \"deb [arch=\$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian \${VERSION_CODENAME} stable\" \ + > /etc/apt/sources.list.d/docker.list \ + && apt-get update \ + && apt-get install -y --no-install-recommends docker-ce-cli \ + && rm -rf /var/lib/apt/lists/*" + # Fail fast if docker client is missing +RUN docker version || true RUN command -v docker COPY . . - -# All install decisions are handled by the Makefile. RUN make install -# Sensible defaults (can be overridden at runtime) ENV PYTHONUNBUFFERED=1 - -# Default: show CLI help CMD ["baudolo", "--help"] diff --git a/src/baudolo/backup/app.py b/src/baudolo/backup/app.py index 8ffb296..8f2a951 100644 --- a/src/baudolo/backup/app.py +++ b/src/baudolo/backup/app.py @@ -2,10 +2,12 @@ from __future__ import annotations import os import pathlib +import sys from datetime import datetime import pandas from dirval import create_stamp_file +from pandas.errors import EmptyDataError from .cli import parse_args from .compose import handle_docker_compose_services @@ -96,6 +98,42 @@ def backup_mariadb_or_postgres( return False, False +def _empty_databases_df() -> "pandas.DataFrame": + """ + Create an empty DataFrame with the expected schema for databases.csv. + + This allows the backup to continue without DB dumps when the CSV is missing + or empty (pandas EmptyDataError). + """ + return pandas.DataFrame(columns=["instance", "database", "username", "password"]) + + +def _load_databases_df(csv_path: str) -> "pandas.DataFrame": + """ + Load databases.csv robustly. + + - Missing file -> warn, continue with empty df + - Empty file -> warn, continue with empty df + - Valid CSV -> return dataframe + """ + try: + return pandas.read_csv(csv_path, sep=";", keep_default_na=False, dtype=str) + except FileNotFoundError: + print( + f"WARNING: databases.csv not found: {csv_path}. Continuing without database dumps.", + file=sys.stderr, + flush=True, + ) + return _empty_databases_df() + except EmptyDataError: + print( + f"WARNING: databases.csv exists but is empty: {csv_path}. Continuing without database dumps.", + file=sys.stderr, + flush=True, + ) + return _empty_databases_df() + + def _backup_dumps_for_volume( *, containers: list[str], @@ -136,9 +174,10 @@ def main() -> int: # IMPORTANT: # - keep_default_na=False prevents empty fields from turning into NaN # - dtype=str keeps all columns stable for comparisons/validation - databases_df = pandas.read_csv( - args.databases_csv, sep=";", keep_default_na=False, dtype=str - ) + # + # Robust behavior: + # - if the file is missing or empty, we continue without DB dumps. + databases_df = _load_databases_df(args.databases_csv) print("💾 Start volume backups...", flush=True) @@ -168,7 +207,8 @@ def main() -> int: if found_db: if not dumped_any: print( - f"WARNING: dump-only-sql requested but no DB dump was produced for DB volume '{volume_name}'. Falling back to file backup.", + f"WARNING: dump-only-sql requested but no DB dump was produced for DB volume '{volume_name}'. " + "Falling back to file backup.", flush=True, ) # fall through to file backup below diff --git a/tests/unit/backup/test_app_databases_csv.py b/tests/unit/backup/test_app_databases_csv.py new file mode 100644 index 0000000..3791416 --- /dev/null +++ b/tests/unit/backup/test_app_databases_csv.py @@ -0,0 +1,77 @@ +import io +import os +import tempfile +import unittest +from contextlib import redirect_stderr + +import pandas as pd + +# Adjust if your package name/import path differs. +from baudolo.backup.app import _load_databases_df + + +EXPECTED_COLUMNS = ["instance", "database", "username", "password"] + + +class TestLoadDatabasesDf(unittest.TestCase): + def test_missing_csv_is_handled_with_warning_and_empty_df(self) -> None: + with tempfile.TemporaryDirectory() as td: + missing_path = os.path.join(td, "does-not-exist.csv") + + buf = io.StringIO() + with redirect_stderr(buf): + df = _load_databases_df(missing_path) + + stderr = buf.getvalue() + self.assertIn("WARNING:", stderr) + self.assertIn("databases.csv not found", stderr) + + self.assertIsInstance(df, pd.DataFrame) + self.assertListEqual(list(df.columns), EXPECTED_COLUMNS) + self.assertTrue(df.empty) + + def test_empty_csv_is_handled_with_warning_and_empty_df(self) -> None: + with tempfile.TemporaryDirectory() as td: + empty_path = os.path.join(td, "databases.csv") + # Create an empty file (0 bytes) + with open(empty_path, "w", encoding="utf-8") as f: + f.write("") + + buf = io.StringIO() + with redirect_stderr(buf): + df = _load_databases_df(empty_path) + + stderr = buf.getvalue() + self.assertIn("WARNING:", stderr) + self.assertIn("exists but is empty", stderr) + + self.assertIsInstance(df, pd.DataFrame) + self.assertListEqual(list(df.columns), EXPECTED_COLUMNS) + self.assertTrue(df.empty) + + def test_valid_csv_loads_without_warning(self) -> None: + with tempfile.TemporaryDirectory() as td: + csv_path = os.path.join(td, "databases.csv") + + content = "instance;database;username;password\nmyapp;*;dbuser;secret\n" + with open(csv_path, "w", encoding="utf-8") as f: + f.write(content) + + buf = io.StringIO() + with redirect_stderr(buf): + df = _load_databases_df(csv_path) + + stderr = buf.getvalue() + self.assertEqual(stderr, "") # no warning expected + + self.assertIsInstance(df, pd.DataFrame) + self.assertListEqual(list(df.columns), EXPECTED_COLUMNS) + self.assertEqual(len(df), 1) + self.assertEqual(df.loc[0, "instance"], "myapp") + self.assertEqual(df.loc[0, "database"], "*") + self.assertEqual(df.loc[0, "username"], "dbuser") + self.assertEqual(df.loc[0, "password"], "secret") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/seed/test_main.py b/tests/unit/seed/test_main.py index ce9199b..a6e87b5 100644 --- a/tests/unit/seed/test_main.py +++ b/tests/unit/seed/test_main.py @@ -84,7 +84,9 @@ class TestSeedMain(unittest.TestCase): read_csv.assert_not_called() empty_df.assert_called_once() concat.assert_called_once() - df_out.to_csv.assert_called_once_with("/tmp/databases.csv", sep=";", index=False) + df_out.to_csv.assert_called_once_with( + "/tmp/databases.csv", sep=";", index=False + ) @patch("baudolo.seed.__main__.os.path.exists", return_value=True) @patch("baudolo.seed.__main__.pd.read_csv", side_effect=EmptyDataError("empty")) @@ -116,16 +118,26 @@ class TestSeedMain(unittest.TestCase): exists.assert_called_once_with("/tmp/databases.csv") read_csv.assert_called_once() empty_df.assert_called_once() - - # warning printed to stderr - self.assertTrue(print_.called) - args, kwargs = print_.call_args - self.assertIn("WARNING: databases.csv exists but is empty", args[0]) - self.assertEqual(kwargs.get("file"), seed_main.sys.stderr) - concat.assert_called_once() - df_out.toF.to_csv.assert_not_called() # keep lint happy if you use it - df_out.to_csv.assert_called_once_with("/tmp/databases.csv", sep=";", index=False) + + # Assert: at least one print call contains the WARNING and prints to stderr + warning_calls = [] + for call in print_.call_args_list: + args, kwargs = call + if args and "WARNING: databases.csv exists but is empty" in str(args[0]): + warning_calls.append((args, kwargs)) + + self.assertTrue( + warning_calls, + "Expected a WARNING print when databases.csv is empty, but none was found.", + ) + # Ensure the warning goes to stderr + _, warn_kwargs = warning_calls[0] + self.assertEqual(warn_kwargs.get("file"), seed_main.sys.stderr) + + df_out.to_csv.assert_called_once_with( + "/tmp/databases.csv", sep=";", index=False + ) @patch("baudolo.seed.__main__.os.path.exists", return_value=True) @patch("baudolo.seed.__main__.pd.read_csv") @@ -172,7 +184,9 @@ class TestSeedMain(unittest.TestCase): @patch("baudolo.seed.__main__.sys.exit") @patch("baudolo.seed.__main__.print") - @patch("baudolo.seed.__main__.check_and_add_entry", side_effect=RuntimeError("boom")) + @patch( + "baudolo.seed.__main__.check_and_add_entry", side_effect=RuntimeError("boom") + ) @patch("baudolo.seed.__main__.argparse.ArgumentParser.parse_args") def test_main_exits_nonzero_on_error( self,