2022-01-23 13:01:49 +01:00
|
|
|
#!/bin/python
|
|
|
|
# Backups volumes of running containers
|
2023-12-25 20:31:56 +01:00
|
|
|
|
2022-03-29 19:20:35 +02:00
|
|
|
import subprocess
|
|
|
|
import os
|
2022-04-04 11:22:26 +02:00
|
|
|
import re
|
2022-03-29 19:20:35 +02:00
|
|
|
import pathlib
|
|
|
|
import pandas
|
2022-01-23 13:01:49 +01:00
|
|
|
from datetime import datetime
|
|
|
|
|
2023-12-25 20:31:56 +01:00
|
|
|
class BackupException(Exception):
|
|
|
|
"""Generic exception for backup errors."""
|
2023-12-13 08:55:02 +01:00
|
|
|
pass
|
2022-03-28 16:37:59 +02:00
|
|
|
|
2023-12-25 20:31:56 +01:00
|
|
|
def execute_shell_command(command):
|
|
|
|
"""Execute a shell command and return its output."""
|
2022-03-28 16:37:59 +02:00
|
|
|
print(command)
|
|
|
|
process = subprocess.Popen([command], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
|
|
|
|
out, err = process.communicate()
|
2023-12-25 20:31:56 +01:00
|
|
|
if process.returncode != 0:
|
|
|
|
raise BackupException(f"Error in command: {command}\nOutput: {out}\nError: {err}\nExit code: {process.returncode}")
|
|
|
|
return [line.decode("utf-8") for line in out.splitlines()]
|
2022-01-23 14:04:38 +01:00
|
|
|
|
2023-12-25 20:31:56 +01:00
|
|
|
def get_machine_id():
|
|
|
|
"""Get the machine identifier."""
|
|
|
|
return execute_shell_command("sha256sum /etc/machine-id")[0][0:64]
|
2023-12-13 08:55:02 +01:00
|
|
|
|
2023-12-26 14:34:32 +01:00
|
|
|
def create_version_directory(base_dir, machine_id, repository_name, backup_time):
|
2023-12-25 20:31:56 +01:00
|
|
|
"""Create necessary directories for backup."""
|
|
|
|
version_dir = os.path.join(base_dir, machine_id, repository_name, backup_time)
|
|
|
|
pathlib.Path(version_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
return version_dir
|
2022-03-28 16:37:59 +02:00
|
|
|
|
2023-12-26 00:27:27 +01:00
|
|
|
def get_instance(container):
|
|
|
|
instance_name = re.split("(_|-)(database|db|postgres)", container)[0]
|
|
|
|
print(f"Extracted instance name: {instance_name}")
|
|
|
|
return instance_name
|
2022-01-23 13:01:49 +01:00
|
|
|
|
2023-12-26 14:34:32 +01:00
|
|
|
def backup_database(container, databases, volume_dir, db_type):
|
2023-12-25 21:57:23 +01:00
|
|
|
"""Backup database (MariaDB or PostgreSQL) if applicable."""
|
2023-12-25 22:19:26 +01:00
|
|
|
print(f"Starting database backup for {container} using {db_type}...")
|
2023-12-26 00:27:27 +01:00
|
|
|
instance_name = get_instance(container)
|
|
|
|
|
|
|
|
# Filter the DataFrame for the given instance_name
|
|
|
|
database_entries = databases.loc[databases['instance'] == instance_name]
|
|
|
|
|
|
|
|
# Check if there are more than one entries
|
|
|
|
if len(database_entries) > 1:
|
|
|
|
raise BackupException(f"More than one entry found for instance '{instance_name}'")
|
|
|
|
|
|
|
|
# Check if there is no entry
|
|
|
|
if database_entries.empty:
|
|
|
|
raise BackupException(f"No entry found for instance '{instance_name}'")
|
|
|
|
|
|
|
|
# Get the first (and only) entry
|
|
|
|
database_entry = database_entries.iloc[0]
|
|
|
|
|
2023-12-26 14:34:32 +01:00
|
|
|
backup_destination_dir = os.path.join(volume_dir, "sql")
|
2023-12-25 21:57:23 +01:00
|
|
|
pathlib.Path(backup_destination_dir).mkdir(parents=True, exist_ok=True)
|
2023-12-26 00:27:27 +01:00
|
|
|
backup_destination_file = os.path.join(backup_destination_dir, f"backup.sql")
|
2023-12-25 21:57:23 +01:00
|
|
|
|
|
|
|
if db_type == 'mariadb':
|
|
|
|
backup_command = f"docker exec {container} /usr/bin/mariadb-dump -u {database_entry['username']} -p{database_entry['password']} {database_entry['database']} > {backup_destination_file}"
|
|
|
|
elif db_type == 'postgres':
|
2023-12-26 02:56:12 +01:00
|
|
|
if database_entry['password']:
|
|
|
|
# Include PGPASSWORD in the command when a password is provided
|
|
|
|
backup_command = (
|
|
|
|
f"PGPASSWORD={database_entry['password']} docker exec -i {container} "
|
|
|
|
f"pg_dump -U {database_entry['username']} -d {database_entry['database']} "
|
|
|
|
f"-h localhost > {backup_destination_file}"
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
# Exclude PGPASSWORD and use --no-password when the password is empty
|
|
|
|
backup_command = (
|
|
|
|
f"docker exec -i {container} pg_dump -U {database_entry['username']} "
|
|
|
|
f"-d {database_entry['database']} -h localhost --no-password "
|
|
|
|
f"> {backup_destination_file}"
|
|
|
|
)
|
|
|
|
|
2023-12-25 21:57:23 +01:00
|
|
|
execute_shell_command(backup_command)
|
2023-12-25 22:19:26 +01:00
|
|
|
print(f"Database backup for {container} completed.")
|
2023-12-25 21:39:50 +01:00
|
|
|
|
2023-12-26 18:22:06 +01:00
|
|
|
def get_last_backup_dir(versions_dir, volume_name):
|
|
|
|
"""Get the most recent backup directory for the specified volume."""
|
|
|
|
versions = sorted(os.listdir(versions_dir), reverse=True)
|
|
|
|
for version in versions:
|
|
|
|
backup_dir = os.path.join(versions_dir, version, volume_name, "files")
|
|
|
|
if os.path.isdir(backup_dir):
|
|
|
|
return backup_dir
|
|
|
|
return None
|
|
|
|
|
|
|
|
def backup_volume(volume_name, volume_dir, versions_dir):
|
|
|
|
"""Backup files of a volume with incremental backups."""
|
2023-12-25 22:19:26 +01:00
|
|
|
print(f"Starting backup routine for volume: {volume_name}")
|
2023-12-26 14:34:32 +01:00
|
|
|
files_rsync_destination_path = os.path.join(volume_dir, "files")
|
2023-12-25 20:31:56 +01:00
|
|
|
pathlib.Path(files_rsync_destination_path).mkdir(parents=True, exist_ok=True)
|
2023-12-26 18:22:06 +01:00
|
|
|
|
|
|
|
last_backup_dir = get_last_backup_dir(versions_dir, volume_name)
|
|
|
|
link_dest_option = f"--link-dest='{last_backup_dir}'" if last_backup_dir else ""
|
|
|
|
|
2023-12-25 20:31:56 +01:00
|
|
|
source_dir = f"/var/lib/docker/volumes/{volume_name}/_data/"
|
2023-12-26 18:22:06 +01:00
|
|
|
rsync_command = f"rsync -abP --delete --delete-excluded {link_dest_option} {source_dir} {files_rsync_destination_path}"
|
2023-12-25 20:31:56 +01:00
|
|
|
execute_shell_command(rsync_command)
|
2023-12-25 22:19:26 +01:00
|
|
|
print(f"Backup routine for volume: {volume_name} completed.")
|
2022-03-28 16:37:59 +02:00
|
|
|
|
2023-12-26 16:09:18 +01:00
|
|
|
def get_image_info(container):
|
|
|
|
return execute_shell_command(f"docker inspect {container} | jq -r '.[].Config.Image'")
|
|
|
|
|
2023-12-25 21:49:06 +01:00
|
|
|
def has_image(container,image):
|
|
|
|
"""Check if the container is using the image"""
|
2023-12-26 16:09:18 +01:00
|
|
|
image_info = get_image_info(container)
|
2023-12-25 21:39:50 +01:00
|
|
|
return image in image_info[0]
|
|
|
|
|
|
|
|
def stop_containers(containers):
|
|
|
|
"""Stop a list of containers."""
|
|
|
|
for container in containers:
|
|
|
|
print(f"Stopping container {container}...")
|
|
|
|
execute_shell_command(f"docker stop {container}")
|
2023-12-26 02:56:12 +01:00
|
|
|
|
2023-12-25 21:39:50 +01:00
|
|
|
def start_containers(containers):
|
|
|
|
"""Start a list of stopped containers."""
|
|
|
|
for container in containers:
|
|
|
|
print(f"Starting container {container}...")
|
|
|
|
execute_shell_command(f"docker start {container}")
|
|
|
|
|
2023-12-25 21:49:06 +01:00
|
|
|
def get_container_with_image(containers,image):
|
2023-12-25 21:39:50 +01:00
|
|
|
for container in containers:
|
2023-12-25 21:49:06 +01:00
|
|
|
if has_image(container,image):
|
2023-12-25 21:39:50 +01:00
|
|
|
return container
|
|
|
|
return False
|
|
|
|
|
2023-12-25 21:49:06 +01:00
|
|
|
def is_image_whitelisted(container, images):
|
2023-12-25 21:39:50 +01:00
|
|
|
"""Check if the container's image is one of the whitelisted images."""
|
2023-12-26 16:09:18 +01:00
|
|
|
image_info = get_image_info(container)
|
2023-12-25 21:39:50 +01:00
|
|
|
container_image = image_info[0]
|
|
|
|
|
|
|
|
for image in images:
|
|
|
|
if image in container_image:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2023-12-25 21:49:06 +01:00
|
|
|
def is_any_image_not_whitelisted(containers, images):
|
2023-12-25 21:39:50 +01:00
|
|
|
"""Check if any of the containers are using images that are not whitelisted."""
|
2023-12-25 21:49:06 +01:00
|
|
|
return any(not is_image_whitelisted(container, images) for container in containers)
|
2023-12-25 21:39:50 +01:00
|
|
|
|
2023-12-26 14:34:32 +01:00
|
|
|
def create_volume_directory(version_dir,volume_name):
|
|
|
|
"""Create necessary directories for backup."""
|
|
|
|
volume_dir = os.path.join(version_dir, volume_name)
|
|
|
|
pathlib.Path(volume_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
return volume_dir
|
|
|
|
|
2023-12-25 21:57:23 +01:00
|
|
|
def backup_routine_for_volume(volume_name, containers, databases, version_dir, whitelisted_images):
|
|
|
|
"""Perform backup routine for a given volume."""
|
|
|
|
for container in containers:
|
2023-12-26 14:34:32 +01:00
|
|
|
volume_dir = create_volume_directory(version_dir,volume_name)
|
2023-12-25 21:57:23 +01:00
|
|
|
if has_image(container, 'mariadb'):
|
2023-12-26 14:34:32 +01:00
|
|
|
backup_database(container, databases, volume_dir, 'mariadb')
|
2023-12-25 21:57:23 +01:00
|
|
|
elif has_image(container, 'postgres'):
|
2023-12-26 14:34:32 +01:00
|
|
|
backup_database(container, databases, volume_dir, 'postgres')
|
2023-12-25 21:57:23 +01:00
|
|
|
else:
|
2023-12-26 18:22:06 +01:00
|
|
|
# Backup without start, stop to keep downtime low
|
|
|
|
backup_volume(volume_name, volume_dir, version_dir)
|
2023-12-25 21:57:23 +01:00
|
|
|
if is_any_image_not_whitelisted(containers, whitelisted_images):
|
|
|
|
stop_containers(containers)
|
2023-12-26 18:22:06 +01:00
|
|
|
backup_volume(volume_name, volume_dir, version_dir)
|
2023-12-25 21:57:23 +01:00
|
|
|
start_containers(containers)
|
|
|
|
|
2023-12-25 20:31:56 +01:00
|
|
|
def main():
|
|
|
|
print('Start backup routine...')
|
|
|
|
dirname = os.path.dirname(__file__)
|
|
|
|
repository_name = os.path.basename(dirname)
|
|
|
|
machine_id = get_machine_id()
|
|
|
|
backups_dir = '/Backups/'
|
|
|
|
backup_time = datetime.now().strftime("%Y%m%d%H%M%S")
|
2023-12-26 14:34:32 +01:00
|
|
|
version_dir = create_version_directory(backups_dir, machine_id, repository_name, backup_time)
|
2022-01-23 13:01:49 +01:00
|
|
|
|
2023-12-25 20:31:56 +01:00
|
|
|
print('Start volume backups...')
|
|
|
|
databases = pandas.read_csv(os.path.join(dirname, "databases.csv"), sep=";")
|
|
|
|
volume_names = execute_shell_command("docker volume ls --format '{{.Name}}'")
|
|
|
|
|
2023-12-25 22:33:28 +01:00
|
|
|
# This whitelist is configurated for https://github.com/kevinveenbirkenbach/backup-docker-to-local
|
|
|
|
stop_and_restart_not_needed = [
|
2023-12-26 02:56:12 +01:00
|
|
|
# 'baserow', Doesn't use an extra database
|
2023-12-25 22:33:28 +01:00
|
|
|
'element',
|
|
|
|
'gitea',
|
|
|
|
'listmonk',
|
|
|
|
'mastodon',
|
|
|
|
'matomo',
|
|
|
|
'memcached',
|
|
|
|
'nextcloud',
|
|
|
|
'openproject',
|
|
|
|
'pixelfed',
|
|
|
|
'redis',
|
|
|
|
'wordpress'
|
|
|
|
]
|
|
|
|
|
2023-12-25 20:31:56 +01:00
|
|
|
for volume_name in volume_names:
|
|
|
|
print(f'Start backup routine for volume: {volume_name}')
|
2023-12-25 22:46:14 +01:00
|
|
|
containers = execute_shell_command(f"docker ps --filter volume=\"{volume_name}\" --format '{{{{.Names}}}}'")
|
2023-12-25 20:31:56 +01:00
|
|
|
if not containers:
|
|
|
|
print('Skipped due to no running containers using this volume.')
|
|
|
|
continue
|
2023-12-25 21:57:23 +01:00
|
|
|
|
2023-12-25 22:33:28 +01:00
|
|
|
backup_routine_for_volume(volume_name, containers, databases, version_dir, stop_and_restart_not_needed)
|
2023-04-19 13:20:07 +02:00
|
|
|
|
2023-12-25 20:31:56 +01:00
|
|
|
print('Finished volume backups.')
|
2022-03-29 21:29:25 +02:00
|
|
|
|
2023-12-25 20:31:56 +01:00
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|