Implemented new parameters to make it more flexibel for cymais

This commit is contained in:
Kevin Veen-Birkenbach 2025-07-14 18:47:25 +02:00
parent 8c4ae60a6a
commit 2d2376eac8
No known key found for this signature in database
GPG Key ID: 44D8F11FD62F878E
7 changed files with 220 additions and 146 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
databases.csv databases.csv
__pycache__

4
Makefile Normal file
View File

@ -0,0 +1,4 @@
.PHONY: test
test:
python -m unittest discover -s tests/unit -p "test_*.py"

0
__init__.py Normal file
View File

View File

@ -16,10 +16,19 @@ class BackupException(Exception):
def execute_shell_command(command): def execute_shell_command(command):
"""Execute a shell command and return its output.""" """Execute a shell command and return its output."""
print(command) print(command)
process = subprocess.Popen([command], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) process = subprocess.Popen(
[command],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True
)
out, err = process.communicate() out, err = process.communicate()
if process.returncode != 0: if process.returncode != 0:
raise BackupException(f"Error in command: {command}\nOutput: {out}\nError: {err}\nExit code: {process.returncode}") raise BackupException(
f"Error in command: {command}\n"
f"Output: {out}\nError: {err}\n"
f"Exit code: {process.returncode}"
)
return [line.decode("utf-8") for line in out.splitlines()] return [line.decode("utf-8") for line in out.splitlines()]
def create_version_directory(): def create_version_directory():
@ -34,8 +43,9 @@ def get_machine_id():
### GLOBAL CONFIGURATION ### ### GLOBAL CONFIGURATION ###
DOCKER_COMPOSE_HARD_RESTART_REQUIRED = ['mailu'] # Container names treated as special instances for database backups
SPECIAL_INSTANCES = ['central-mariadb', 'central-postgres']
# Images which do not require container stop for file backups
IMAGES_NO_STOP_REQUIRED = [ IMAGES_NO_STOP_REQUIRED = [
'akaunting', 'akaunting',
'baserow', 'baserow',
@ -49,13 +59,15 @@ IMAGES_NO_STOP_REQUIRED = [
'openproject', 'openproject',
'peertube', 'peertube',
'pixelfed', 'pixelfed',
'wordpress' 'wordpress'
] ]
# Images to skip entirely
IMAGES_NO_BACKUP_REQUIRED = [ IMAGES_NO_BACKUP_REQUIRED = [
'redis', 'redis',
'memcached' 'memcached'
] ]
# Compose dirs requiring hard restart
DOCKER_COMPOSE_HARD_RESTART_REQUIRED = ['mailu']
# DEFINE CONSTANTS # DEFINE CONSTANTS
DIRNAME = os.path.dirname(__file__) DIRNAME = os.path.dirname(__file__)
@ -69,33 +81,20 @@ BACKUP_TIME = datetime.now().strftime("%Y%m%d%H%M%S")
VERSION_DIR = create_version_directory() VERSION_DIR = create_version_directory()
def get_instance(container): def get_instance(container):
# The function is defined to take one parameter, 'container', """Extract the database instance name based on container name."""
# which is expected to be a string. if container in SPECIAL_INSTANCES:
# This line uses regular expressions to split the 'container' string.
# 're.split' is a method that divides a string into a list, based on the occurrences of a pattern.
# @todo add option to pass database names to make it more flexibel
if container in ['central-mariadb', 'central-postgres']:
instance_name = container instance_name = container
else: else:
instance_name = re.split("(_|-)(database|db|postgres)", container)[0] instance_name = re.split("(_|-)(database|db|postgres)", container)[0]
# The pattern "(_|-)(database|db|postgres)" is explained as follows:
# - "(_|-)": Matches an underscore '_' or a hyphen '-'.
# - "(database|db|postgres)": Matches one of the strings "database", "db", or "postgres".
# So, this pattern will match segments like "_database", "-db", "_postgres", etc.
# For example, in "central-db", it matches "-db".
# After splitting, [0] is used to select the first element of the list resulting from the split.
# This element is the string portion before the matched pattern.
# For "central-db", the split results in ["central", "db"], and [0] selects "central".
print(f"Extracted instance name: {instance_name}") print(f"Extracted instance name: {instance_name}")
return instance_name return instance_name
def stamp_directory(): def stamp_directory():
"""Stamp a directory using directory-validator.""" """Stamp a directory using directory-validator."""
stamp_command = f"python {SCRIPTS_DIRECTORY}/directory-validator/directory-validator.py --stamp {VERSION_DIR}" stamp_command = (
f"python {SCRIPTS_DIRECTORY}/directory-validator/"
f"directory-validator.py --stamp {VERSION_DIR}"
)
try: try:
execute_shell_command(stamp_command) execute_shell_command(stamp_command)
print(f"Successfully stamped directory: {VERSION_DIR}") print(f"Successfully stamped directory: {VERSION_DIR}")
@ -107,66 +106,80 @@ def backup_database(container, volume_dir, db_type):
"""Backup database (MariaDB or PostgreSQL) if applicable.""" """Backup database (MariaDB or PostgreSQL) if applicable."""
print(f"Starting database backup for {container} using {db_type}...") print(f"Starting database backup for {container} using {db_type}...")
instance_name = get_instance(container) instance_name = get_instance(container)
# Filter the DataFrame for the given instance_name
database_entries = DATABASES.loc[DATABASES['instance'] == instance_name] database_entries = DATABASES.loc[DATABASES['instance'] == instance_name]
# Check if there is no entry
if database_entries.empty: if database_entries.empty:
raise BackupException(f"No entry found for instance '{instance_name}'") raise BackupException(f"No entry found for instance '{instance_name}'")
# Get the first (and only) entry
for database_entry in database_entries.iloc: for database_entry in database_entries.iloc:
database_name = database_entry['database'] database_name = database_entry['database']
database_username = database_entry['username'] database_username = database_entry['username']
database_password = database_entry['password'] database_password = database_entry['password']
backup_destination_dir = os.path.join(volume_dir, "sql") backup_destination_dir = os.path.join(volume_dir, "sql")
pathlib.Path(backup_destination_dir).mkdir(parents=True, exist_ok=True) pathlib.Path(backup_destination_dir).mkdir(parents=True, exist_ok=True)
backup_destination_file = os.path.join(backup_destination_dir, f"{database_name}.backup.sql") backup_destination_file = os.path.join(
backup_destination_dir,
f"{database_name}.backup.sql"
)
if db_type == 'mariadb': if db_type == 'mariadb':
backup_command = f"docker exec {container} /usr/bin/mariadb-dump -u {database_username} -p{database_password} {database_name} > {backup_destination_file}" cmd = (
execute_shell_command(backup_command) f"docker exec {container} "
f"/usr/bin/mariadb-dump -u {database_username} "
f"-p{database_password} {database_name} > {backup_destination_file}"
)
execute_shell_command(cmd)
if db_type == 'postgres': if db_type == 'postgres':
cluster_file = os.path.join(backup_destination_dir, f"{instance_name}.cluster.backup.sql") cluster_file = os.path.join(
backup_destination_dir,
f"{instance_name}.cluster.backup.sql"
)
if not database_name: if not database_name:
fallback_pg_dumpall(container, database_username, database_password, cluster_file) fallback_pg_dumpall(
container,
database_username,
database_password,
cluster_file
)
return return
try: try:
if database_password: if database_password:
backup_command = ( cmd = (
f"PGPASSWORD={database_password} docker exec -i {container} " f"PGPASSWORD={database_password} docker exec -i {container} "
f"pg_dump -U {database_username} -d {database_name} " f"pg_dump -U {database_username} -d {database_name} "
f"-h localhost > {backup_destination_file}" f"-h localhost > {backup_destination_file}"
) )
else: else:
backup_command = ( cmd = (
f"docker exec -i {container} pg_dump -U {database_username} " f"docker exec -i {container} pg_dump -U {database_username} "
f"-d {database_name} -h localhost --no-password " f"-d {database_name} -h localhost --no-password "
f"> {backup_destination_file}" f"> {backup_destination_file}"
) )
execute_shell_command(backup_command) execute_shell_command(cmd)
except BackupException as e: except BackupException as e:
print(f"pg_dump failed: {e}") print(f"pg_dump failed: {e}")
print(f"Falling back to pg_dumpall for instance '{instance_name}'") print(f"Falling back to pg_dumpall for instance '{instance_name}'")
fallback_pg_dumpall(container, database_username, database_password, cluster_file) fallback_pg_dumpall(
container,
database_username,
database_password,
cluster_file
)
print(f"Database backup for database {container} completed.") print(f"Database backup for database {container} completed.")
def get_last_backup_dir(volume_name, current_backup_dir): def get_last_backup_dir(volume_name, current_backup_dir):
"""Get the most recent backup directory for the specified volume.""" """Get the most recent backup directory for the specified volume."""
versions = sorted(os.listdir(VERSIONS_DIR), reverse=True) versions = sorted(os.listdir(VERSIONS_DIR), reverse=True)
for version in versions: for version in versions:
backup_dir = os.path.join(VERSIONS_DIR, version, volume_name, "files", "") backup_dir = os.path.join(
# Ignore current backup dir VERSIONS_DIR, version, volume_name, "files", ""
if backup_dir != current_backup_dir: )
if os.path.isdir(backup_dir): if backup_dir != current_backup_dir and os.path.isdir(backup_dir):
return backup_dir return backup_dir
print(f"No previous backups available for volume: {volume_name}") print(f"No previous backups available for volume: {volume_name}")
return None return None
def getStoragePath(volume_name): def getStoragePath(volume_name):
path = execute_shell_command(f"docker volume inspect --format '{{{{ .Mountpoint }}}}' {volume_name}")[0] path = execute_shell_command(
f"docker volume inspect --format '{{{{ .Mountpoint }}}}' {volume_name}"
)[0]
return f"{path}/" return f"{path}/"
def getFileRsyncDestinationPath(volume_dir): def getFileRsyncDestinationPath(volume_dir):
@ -176,133 +189,110 @@ def getFileRsyncDestinationPath(volume_dir):
def fallback_pg_dumpall(container, username, password, backup_destination_file): def fallback_pg_dumpall(container, username, password, backup_destination_file):
"""Fallback function to run pg_dumpall if pg_dump fails or no DB is defined.""" """Fallback function to run pg_dumpall if pg_dump fails or no DB is defined."""
print(f"Running pg_dumpall for container '{container}'...") print(f"Running pg_dumpall for container '{container}'...")
command = ( cmd = (
f"PGPASSWORD={password} docker exec -i {container} " f"PGPASSWORD={password} docker exec -i {container} "
f"pg_dumpall -U {username} -h localhost > {backup_destination_file}" f"pg_dumpall -U {username} -h localhost > {backup_destination_file}"
) )
execute_shell_command(command) execute_shell_command(cmd)
def backup_volume(volume_name, volume_dir): def backup_volume(volume_name, volume_dir):
"""Perform incremental file backup of a Docker volume."""
try: try:
"""Backup files of a volume with incremental backups."""
print(f"Starting backup routine for volume: {volume_name}") print(f"Starting backup routine for volume: {volume_name}")
files_rsync_destination_path = getFileRsyncDestinationPath(volume_dir) dest = getFileRsyncDestinationPath(volume_dir)
pathlib.Path(files_rsync_destination_path).mkdir(parents=True, exist_ok=True) pathlib.Path(dest).mkdir(parents=True, exist_ok=True)
last = get_last_backup_dir(volume_name, dest)
last_backup_dir = get_last_backup_dir(volume_name, files_rsync_destination_path) link_dest = f"--link-dest='{last}'" if last else ""
link_dest_option = f"--link-dest='{last_backup_dir}'" if last_backup_dir else "" source = getStoragePath(volume_name)
cmd = (
source_dir = getStoragePath(volume_name) f"rsync -abP --delete --delete-excluded "
rsync_command = f"rsync -abP --delete --delete-excluded {link_dest_option} {source_dir} {files_rsync_destination_path}" f"{link_dest} {source} {dest}"
execute_shell_command(rsync_command) )
execute_shell_command(cmd)
except BackupException as e: except BackupException as e:
if "file has vanished" in e.args[0]: if "file has vanished" in str(e):
print("Warning: Some files vanished before transfer. Continuing.") print("Warning: Some files vanished before transfer. Continuing.")
else: else:
raise raise
print(f"Backup routine for volume: {volume_name} completed.") print(f"Backup routine for volume: {volume_name} completed.")
def get_image_info(container): def get_image_info(container):
return execute_shell_command(f"docker inspect --format '{{{{.Config.Image}}}}' {container}") return execute_shell_command(
f"docker inspect --format '{{{{.Config.Image}}}}' {container}"
)
def has_image(container,image): def has_image(container, image):
"""Check if the container is using the image""" """Check if the container is using the image"""
image_info = get_image_info(container) info = get_image_info(container)[0]
return image in image_info[0] return image in info
def change_containers_status(containers,status): def change_containers_status(containers, status):
"""Stop a list of containers.""" """Stop or start a list of containers."""
if containers: if containers:
container_list = ' '.join(containers) names = ' '.join(containers)
print(f"{status} containers {container_list}...") print(f"{status.capitalize()} containers: {names}...")
execute_shell_command(f"docker {status} {container_list}") execute_shell_command(f"docker {status} {names}")
else: else:
print(f"No containers to {status}.") print(f"No containers to {status}.")
def get_container_with_image(containers,image):
for container in containers:
if has_image(container,image):
return container
return False
def is_image_whitelisted(container, images): def is_image_whitelisted(container, images):
"""Check if the container's image is one of the whitelisted images.""" info = get_image_info(container)[0]
image_info = get_image_info(container) return any(img in info for img in images)
container_image = image_info[0]
for image in images:
if image in container_image:
return True
return False
def is_container_stop_required(containers): def is_container_stop_required(containers):
"""Check if any of the containers are using images that are not whitelisted.""" """Check if any of the containers are using images that are not whitelisted."""
return any(not is_image_whitelisted(container, IMAGES_NO_STOP_REQUIRED) for container in containers) return any(
not is_image_whitelisted(c, IMAGES_NO_STOP_REQUIRED)
for c in containers
)
def create_volume_directory(volume_name): def create_volume_directory(volume_name):
"""Create necessary directories for backup.""" """Create necessary directories for backup."""
volume_dir = os.path.join(VERSION_DIR, volume_name) path = os.path.join(VERSION_DIR, volume_name)
pathlib.Path(volume_dir).mkdir(parents=True, exist_ok=True) pathlib.Path(path).mkdir(parents=True, exist_ok=True)
return volume_dir return path
def is_image_ignored(container): def is_image_ignored(container):
"""Check if the container's image is one of the ignored images.""" """Check if the container's image is one of the ignored images."""
for image in IMAGES_NO_BACKUP_REQUIRED: return any(has_image(container, img) for img in IMAGES_NO_BACKUP_REQUIRED)
if has_image(container, image):
return True
return False
def backup_with_containers_paused(volume_name, volume_dir, containers, shutdown): def backup_with_containers_paused(volume_name, volume_dir, containers, shutdown):
change_containers_status(containers,'stop') change_containers_status(containers, 'stop')
backup_volume(volume_name, volume_dir) backup_volume(volume_name, volume_dir)
# Just restart containers if shutdown is false
if not shutdown: if not shutdown:
change_containers_status(containers,'start') change_containers_status(containers, 'start')
def backup_mariadb_or_postgres(container, volume_dir): def backup_mariadb_or_postgres(container, volume_dir):
'''Performs database image specific backup procedures''' """Performs database image specific backup procedures"""
for image in ['mariadb','postgres']: for img in ['mariadb', 'postgres']:
if has_image(container, image): if has_image(container, img):
backup_database(container, volume_dir, image) backup_database(container, volume_dir, img)
return True return True
return False return False
def default_backup_routine_for_volume(volume_name, containers, shutdown): def default_backup_routine_for_volume(volume_name, containers, shutdown):
"""Perform backup routine for a given volume.""" """Perform backup routine for a given volume."""
volume_dir="" vol_dir = ""
for container in containers: for c in containers:
if is_image_ignored(c):
# Skip ignored images print(f"Ignoring volume '{volume_name}' linked to container '{c}'.")
if is_image_ignored(container): continue
print(f"Ignoring volume '{volume_name}' linked to container '{container}' with ignored image.") vol_dir = create_volume_directory(volume_name)
continue if backup_mariadb_or_postgres(c, vol_dir):
# Directory which contains files and sqls
volume_dir = create_volume_directory(volume_name)
# Execute Database backup and exit if successfull
if backup_mariadb_or_postgres(container, volume_dir):
return return
if vol_dir:
# Execute backup if image is not ignored backup_volume(volume_name, vol_dir)
if volume_dir:
backup_volume(volume_name, volume_dir)
if is_container_stop_required(containers): if is_container_stop_required(containers):
backup_with_containers_paused(volume_name, volume_dir, containers, shutdown) backup_with_containers_paused(volume_name, vol_dir, containers, shutdown)
def backup_everything(volume_name, containers, shutdown): def backup_everything(volume_name, containers, shutdown):
"""Perform file backup routine for a given volume.""" """Perform file backup routine for a given volume."""
volume_dir=create_volume_directory(volume_name) vol_dir = create_volume_directory(volume_name)
for c in containers:
# Execute sql dumps backup_mariadb_or_postgres(c, vol_dir)
for container in containers: backup_volume(volume_name, vol_dir)
backup_mariadb_or_postgres(container, volume_dir) backup_with_containers_paused(volume_name, vol_dir, containers, shutdown)
# Execute file backups
backup_volume(volume_name, volume_dir)
backup_with_containers_paused(volume_name, volume_dir, containers, shutdown)
def hard_restart_docker_services(dir_path): def hard_restart_docker_services(dir_path):
"""Perform a hard restart of docker-compose services in the given directory.""" """Perform a hard restart of docker-compose services in the given directory."""
try: try:
@ -316,18 +306,16 @@ def hard_restart_docker_services(dir_path):
def handle_docker_compose_services(parent_directory): def handle_docker_compose_services(parent_directory):
"""Iterate through directories and restart or hard restart services as needed.""" """Iterate through directories and restart or hard restart services as needed."""
for dir_entry in os.scandir(parent_directory): for entry in os.scandir(parent_directory):
if dir_entry.is_dir(): if entry.is_dir():
dir_path = dir_entry.path dir_path = entry.path
dir_name = os.path.basename(dir_path) name = os.path.basename(dir_path)
print(f"Checking directory: {dir_path}") print(f"Checking directory: {dir_path}")
compose_file = os.path.join(dir_path, "docker-compose.yml")
docker_compose_file = os.path.join(dir_path, "docker-compose.yml") if os.path.isfile(compose_file):
if os.path.isfile(docker_compose_file):
print(f"Found docker-compose.yml in {dir_path}.") print(f"Found docker-compose.yml in {dir_path}.")
if dir_name in DOCKER_COMPOSE_HARD_RESTART_REQUIRED: if name in DOCKER_COMPOSE_HARD_RESTART_REQUIRED:
print(f"Directory {dir_name} detected. Performing hard restart...") print(f"Directory {name} detected. Performing hard restart...")
hard_restart_docker_services(dir_path) hard_restart_docker_services(dir_path)
else: else:
print(f"No restart required for services in {dir_path}...") print(f"No restart required for services in {dir_path}...")
@ -335,28 +323,45 @@ def handle_docker_compose_services(parent_directory):
print(f"No docker-compose.yml found in {dir_path}. Skipping.") print(f"No docker-compose.yml found in {dir_path}. Skipping.")
def main(): def main():
global SPECIAL_INSTANCES, IMAGES_NO_STOP_REQUIRED
parser = argparse.ArgumentParser(description='Backup Docker volumes.') parser = argparse.ArgumentParser(description='Backup Docker volumes.')
parser.add_argument('--everything', action='store_true', parser.add_argument('--everything', action='store_true',
help='Force file backup for all volumes and additional execute database dumps') help='Force file backup for all volumes and additional execute database dumps')
parser.add_argument('--shutdown', action='store_true', parser.add_argument('--shutdown', action='store_true',
help='Doesn\'t restart containers after backup') help='Doesn\'t restart containers after backup')
parser.add_argument('--compose-dir', type=str, required=True, help='Path to the parent directory containing docker-compose setups') parser.add_argument('--compose-dir', type=str, required=True,
help='Path to the parent directory containing docker-compose setups')
parser.add_argument(
'--special-instances',
nargs='+',
default=SPECIAL_INSTANCES,
help='List of container names treated as special instances for database backups'
)
parser.add_argument(
'--images-no-stop-required',
nargs='+',
default=IMAGES_NO_STOP_REQUIRED,
help='List of image names for which containers should not be stopped during file backup'
)
args = parser.parse_args() args = parser.parse_args()
SPECIAL_INSTANCES = args.special_instances
IMAGES_NO_STOP_REQUIRED = args.images_no_stop_required
print('Start volume backups...') print('Start volume backups...')
volume_names = execute_shell_command("docker volume ls --format '{{.Name}}'") volume_names = execute_shell_command("docker volume ls --format '{{.Name}}'")
for volume_name in volume_names: for volume_name in volume_names:
print(f'Start backup routine for volume: {volume_name}') print(f'Start backup routine for volume: {volume_name}')
containers = execute_shell_command(f"docker ps --filter volume=\"{volume_name}\" --format '{{{{.Names}}}}'") containers = execute_shell_command(
f"docker ps --filter volume=\"{volume_name}\" --format '{{{{.Names}}}}'"
)
if args.everything: if args.everything:
backup_everything(volume_name, containers, args.shutdown) backup_everything(volume_name, containers, args.shutdown)
else: else:
default_backup_routine_for_volume(volume_name, containers, args.shutdown) default_backup_routine_for_volume(volume_name, containers, args.shutdown)
stamp_directory() stamp_directory()
print('Finished volume backups.') print('Finished volume backups.')
# Handle Docker Compose services
print('Handling Docker Compose services...') print('Handling Docker Compose services...')
handle_docker_compose_services(args.compose_dir) handle_docker_compose_services(args.compose_dir)

0
tests/__init__.py Normal file
View File

0
tests/unit/__init__.py Normal file
View File

64
tests/unit/test_backup.py Normal file
View File

@ -0,0 +1,64 @@
# tests/unit/test_backup.py
import unittest
from unittest.mock import patch
import importlib.util
import sys
import os
import pathlib
# Prevent actual directory creation in backup script import
dummy_mkdir = lambda self, *args, **kwargs: None
original_mkdir = pathlib.Path.mkdir
pathlib.Path.mkdir = dummy_mkdir
# Create a virtual databases.csv in the project root for the module import
test_dir = os.path.dirname(__file__)
project_root = os.path.abspath(os.path.join(test_dir, '../../'))
sys.path.insert(0, project_root)
db_csv_path = os.path.join(project_root, 'databases.csv')
with open(db_csv_path, 'w') as f:
f.write('instance;database;username;password\n')
# Dynamically load the hyphenated script as module 'backup'
script_path = os.path.join(project_root, 'backup-docker-to-local.py')
spec = importlib.util.spec_from_file_location('backup', script_path)
backup = importlib.util.module_from_spec(spec)
sys.modules['backup'] = backup
spec.loader.exec_module(backup)
# Restore original mkdir
pathlib.Path.mkdir = original_mkdir
class TestIsImageWhitelisted(unittest.TestCase):
@patch('backup.get_image_info')
def test_returns_true_when_image_matches(self, mock_get_image_info):
# Simulate a container image containing 'mastodon'
mock_get_image_info.return_value = ['repo/mastodon:v4']
images = ['mastodon', 'wordpress']
self.assertTrue(
backup.is_image_whitelisted('any_container', images),
"Should return True when at least one image substring matches"
)
@patch('backup.get_image_info')
def test_returns_false_when_no_image_matches(self, mock_get_image_info):
# Simulate a container image without matching substrings
mock_get_image_info.return_value = ['repo/nginx:latest']
images = ['mastodon', 'wordpress']
self.assertFalse(
backup.is_image_whitelisted('any_container', images),
"Should return False when no image substring matches"
)
@patch('backup.get_image_info')
def test_returns_false_with_empty_image_list(self, mock_get_image_info):
# Even if get_image_info returns something, an empty list yields False
mock_get_image_info.return_value = ['repo/element:1.0']
self.assertFalse(
backup.is_image_whitelisted('any_container', []),
"Should return False when the images list is empty"
)
if __name__ == '__main__':
unittest.main()