docker-volume-backup/backup-docker-to-local.py

293 lines
11 KiB
Python
Raw Normal View History

2022-01-23 13:01:49 +01:00
#!/bin/python
# Backups volumes of running containers
2023-12-25 20:31:56 +01:00
2022-03-29 19:20:35 +02:00
import subprocess
import os
2022-04-04 11:22:26 +02:00
import re
2022-03-29 19:20:35 +02:00
import pathlib
import pandas
2022-01-23 13:01:49 +01:00
from datetime import datetime
2023-12-27 20:46:56 +01:00
import argparse
2022-01-23 13:01:49 +01:00
2023-12-27 23:31:21 +01:00
class BackupException(Exception):
"""Generic exception for backup errors."""
pass
def execute_shell_command(command):
"""Execute a shell command and return its output."""
print(command)
process = subprocess.Popen([command], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
out, err = process.communicate()
if process.returncode != 0:
raise BackupException(f"Error in command: {command}\nOutput: {out}\nError: {err}\nExit code: {process.returncode}")
return [line.decode("utf-8") for line in out.splitlines()]
def create_version_directory():
"""Create necessary directories for backup."""
version_dir = os.path.join(VERSIONS_DIR, BACKUP_TIME)
pathlib.Path(version_dir).mkdir(parents=True, exist_ok=True)
return version_dir
def get_machine_id():
"""Get the machine identifier."""
return execute_shell_command("sha256sum /etc/machine-id")[0][0:64]
### GLOBAL CONFIGURATION ###
IMAGES_NO_STOP_REQUIRED = [
'akaunting',
'baserow',
'discourse',
'element',
'gitea',
'listmonk',
'mastodon',
'matomo',
'nextcloud',
'openproject',
'peertube',
'pixelfed',
'wordpress'
]
IMAGES_NO_BACKUP_REQUIRED = [
'redis',
'memcached'
]
DIRNAME = os.path.dirname(__file__)
DATABASES = pandas.read_csv(os.path.join(DIRNAME, "databases.csv"), sep=";")
REPOSITORY_NAME = os.path.basename(DIRNAME)
MACHINE_ID = get_machine_id()
BACKUPS_DIR = '/Backups/'
VERSIONS_DIR = os.path.join(BACKUPS_DIR, MACHINE_ID, REPOSITORY_NAME)
BACKUP_TIME = datetime.now().strftime("%Y%m%d%H%M%S")
VERSION_DIR = create_version_directory()
2023-12-26 00:27:27 +01:00
def get_instance(container):
# The function is defined to take one parameter, 'container',
# which is expected to be a string.
# This line uses regular expressions to split the 'container' string.
# 're.split' is a method that divides a string into a list, based on the occurrences of a pattern.
2024-01-11 20:51:55 +01:00
if container in ['central-mariadb', 'central-postgres']:
instance_name = container
2024-01-11 20:47:57 +01:00
else:
2024-01-11 20:51:55 +01:00
instance_name = re.split("(_|-)(database|db|postgres)", container)[0]
2024-01-11 20:47:57 +01:00
# The pattern "(_|-)(database|db|postgres)" is explained as follows:
# - "(_|-)": Matches an underscore '_' or a hyphen '-'.
# - "(database|db|postgres)": Matches one of the strings "database", "db", or "postgres".
# So, this pattern will match segments like "_database", "-db", "_postgres", etc.
# For example, in "central-db", it matches "-db".
# After splitting, [0] is used to select the first element of the list resulting from the split.
# This element is the string portion before the matched pattern.
# For "central-db", the split results in ["central", "db"], and [0] selects "central".
2023-12-26 00:27:27 +01:00
print(f"Extracted instance name: {instance_name}")
return instance_name
2022-01-23 13:01:49 +01:00
def backup_database(container, volume_dir, db_type):
2023-12-25 21:57:23 +01:00
"""Backup database (MariaDB or PostgreSQL) if applicable."""
2023-12-25 22:19:26 +01:00
print(f"Starting database backup for {container} using {db_type}...")
2023-12-26 00:27:27 +01:00
instance_name = get_instance(container)
# Filter the DataFrame for the given instance_name
database_entries = DATABASES.loc[DATABASES['instance'] == instance_name]
2023-12-26 00:27:27 +01:00
# Check if there is no entry
if database_entries.empty:
raise BackupException(f"No entry found for instance '{instance_name}'")
# Get the first (and only) entry
for database_entry in database_entries.iloc:
database_name = database_entry['database']
database_username = database_entry['username']
database_password = database_entry['password']
backup_destination_dir = os.path.join(volume_dir, "sql")
pathlib.Path(backup_destination_dir).mkdir(parents=True, exist_ok=True)
backup_destination_file = os.path.join(backup_destination_dir, f"{database_name}.backup.sql")
if db_type == 'mariadb':
backup_command = f"docker exec {container} /usr/bin/mariadb-dump -u {database_username} -p{database_password} {database_name} > {backup_destination_file}"
elif db_type == 'postgres':
if database_password:
# Include PGPASSWORD in the command when a password is provided
backup_command = (
f"PGPASSWORD={database_password} docker exec -i {container} "
f"pg_dump -U {database_username} -d {database_name} "
f"-h localhost > {backup_destination_file}"
)
else:
# Exclude PGPASSWORD and use --no-password when the password is empty
backup_command = (
f"docker exec -i {container} pg_dump -U {database_username} "
f"-d {database_name} -h localhost --no-password "
f"> {backup_destination_file}"
)
execute_shell_command(backup_command)
print(f"Database backup for database {container} completed.")
def get_last_backup_dir(volume_name, current_backup_dir):
"""Get the most recent backup directory for the specified volume."""
versions = sorted(os.listdir(VERSIONS_DIR), reverse=True)
for version in versions:
2024-01-11 16:21:39 +01:00
backup_dir = os.path.join(VERSIONS_DIR, version, volume_name, "files", "")
2023-12-26 19:46:20 +01:00
# Ignore current backup dir
if backup_dir != current_backup_dir:
if os.path.isdir(backup_dir):
return backup_dir
2023-12-26 18:53:58 +01:00
print(f"No previous backups available for volume: {volume_name}")
return None
def getStoragePath(volume_name):
2024-01-11 12:20:38 +01:00
path = execute_shell_command(f"docker volume inspect --format '{{{{ .Mountpoint }}}}' {volume_name}")[0]
return f"{path}/"
def getFileRsyncDestinationPath(volume_dir):
path = os.path.join(volume_dir, "files")
return f"{path}/"
def backup_volume(volume_name, volume_dir):
"""Backup files of a volume with incremental backups."""
2023-12-25 22:19:26 +01:00
print(f"Starting backup routine for volume: {volume_name}")
2024-01-11 12:20:38 +01:00
files_rsync_destination_path = getFileRsyncDestinationPath(volume_dir)
2024-01-11 16:21:39 +01:00
pathlib.Path(files_rsync_destination_path).mkdir(parents=True, exist_ok=True)
last_backup_dir = get_last_backup_dir(volume_name, files_rsync_destination_path)
link_dest_option = f"--link-dest='{last_backup_dir}'" if last_backup_dir else ""
source_dir = getStoragePath(volume_name)
rsync_command = f"rsync -abP --delete --delete-excluded {link_dest_option} {source_dir} {files_rsync_destination_path}"
2023-12-25 20:31:56 +01:00
execute_shell_command(rsync_command)
2023-12-25 22:19:26 +01:00
print(f"Backup routine for volume: {volume_name} completed.")
2022-03-28 16:37:59 +02:00
2023-12-26 16:09:18 +01:00
def get_image_info(container):
2024-01-08 20:43:42 +01:00
return execute_shell_command(f"docker inspect --format '{{{{.Config.Image}}}}' {container}")
2023-12-26 16:09:18 +01:00
2023-12-25 21:49:06 +01:00
def has_image(container,image):
"""Check if the container is using the image"""
2023-12-26 16:09:18 +01:00
image_info = get_image_info(container)
return image in image_info[0]
2024-01-09 12:59:53 +01:00
def stop_containers(containers):
"""Stop a list of containers."""
2024-01-09 12:59:53 +01:00
container_list = ' '.join(containers)
print(f"Stopping containers {container_list}...")
execute_shell_command(f"docker stop {container_list}")
def start_containers(containers):
2024-01-09 12:59:53 +01:00
"""Start a list of containers."""
container_list = ' '.join(containers)
print(f"Start containers {container_list}...")
execute_shell_command(f"docker start {container_list}")
2023-12-25 21:49:06 +01:00
def get_container_with_image(containers,image):
for container in containers:
2023-12-25 21:49:06 +01:00
if has_image(container,image):
return container
return False
2023-12-25 21:49:06 +01:00
def is_image_whitelisted(container, images):
"""Check if the container's image is one of the whitelisted images."""
2023-12-26 16:09:18 +01:00
image_info = get_image_info(container)
container_image = image_info[0]
for image in images:
if image in container_image:
return True
return False
def is_container_stop_required(containers):
"""Check if any of the containers are using images that are not whitelisted."""
return any(not is_image_whitelisted(container, IMAGES_NO_STOP_REQUIRED) for container in containers)
def create_volume_directory(volume_name):
2023-12-26 14:34:32 +01:00
"""Create necessary directories for backup."""
volume_dir = os.path.join(VERSION_DIR, volume_name)
2023-12-26 14:34:32 +01:00
pathlib.Path(volume_dir).mkdir(parents=True, exist_ok=True)
return volume_dir
def is_image_ignored(container):
2023-12-26 20:07:49 +01:00
"""Check if the container's image is one of the ignored images."""
for image in IMAGES_NO_BACKUP_REQUIRED:
2023-12-26 20:07:49 +01:00
if has_image(container, image):
return True
return False
2024-01-08 19:48:50 +01:00
def backup_with_containers_paused(volume_name, volume_dir, containers, shutdown):
2023-12-27 20:46:56 +01:00
stop_containers(containers)
backup_volume(volume_name, volume_dir)
2024-01-08 19:48:50 +01:00
# Just restart containers if shutdown is false
if not shutdown:
start_containers(containers)
2023-12-27 20:46:56 +01:00
def backup_mariadb_or_postgres(container, volume_dir):
2023-12-27 21:36:07 +01:00
'''Performs database image specific backup procedures'''
for image in ['mariadb','postgres']:
if has_image(container, image):
backup_database(container, volume_dir, image)
return True
return False
2024-01-08 19:48:50 +01:00
def default_backup_routine_for_volume(volume_name, containers, shutdown):
2023-12-25 21:57:23 +01:00
"""Perform backup routine for a given volume."""
2023-12-26 20:33:35 +01:00
volume_dir=""
2023-12-25 21:57:23 +01:00
for container in containers:
# Skip ignored images
if is_image_ignored(container):
2023-12-26 20:07:49 +01:00
print(f"Ignoring volume '{volume_name}' linked to container '{container}' with ignored image.")
continue
2023-12-26 20:07:49 +01:00
# Directory which contains files and sqls
volume_dir = create_volume_directory(volume_name)
# Execute Database backup and exit if successfull
if backup_mariadb_or_postgres(container, volume_dir):
return
# Execute backup if image is not ignored
if volume_dir:
backup_volume(volume_name, volume_dir)
if is_container_stop_required(containers):
2024-01-08 19:48:50 +01:00
backup_with_containers_paused(volume_name, volume_dir, containers, shutdown)
2023-12-26 20:07:49 +01:00
2024-01-08 19:48:50 +01:00
def backup_everything(volume_name, containers, shutdown):
2023-12-27 20:46:56 +01:00
"""Perform file backup routine for a given volume."""
volume_dir=create_volume_directory(volume_name)
2023-12-27 21:36:07 +01:00
# Execute sql dumps
for container in containers:
backup_mariadb_or_postgres(container, volume_dir)
2023-12-27 21:36:07 +01:00
# Execute file backups
backup_volume(volume_name, volume_dir)
2024-01-08 19:48:50 +01:00
backup_with_containers_paused(volume_name, volume_dir, containers, shutdown)
2023-12-25 21:57:23 +01:00
2023-12-25 20:31:56 +01:00
def main():
2023-12-27 20:46:56 +01:00
parser = argparse.ArgumentParser(description='Backup Docker volumes.')
2023-12-27 21:37:06 +01:00
parser.add_argument('--everything', action='store_true',
help='Force file backup for all volumes and additional execute database dumps')
2024-01-08 19:48:50 +01:00
parser.add_argument('--shutdown', action='store_true',
help='Doesn\'t restart containers after backup')
2023-12-27 20:46:56 +01:00
args = parser.parse_args()
2022-01-23 13:01:49 +01:00
2023-12-25 20:31:56 +01:00
print('Start volume backups...')
volume_names = execute_shell_command("docker volume ls --format '{{.Name}}'")
for volume_name in volume_names:
print(f'Start backup routine for volume: {volume_name}')
2023-12-25 22:46:14 +01:00
containers = execute_shell_command(f"docker ps --filter volume=\"{volume_name}\" --format '{{{{.Names}}}}'")
2023-12-27 23:33:17 +01:00
if args.everything:
2024-01-08 19:48:50 +01:00
backup_everything(volume_name, containers, args.shutdown)
2023-12-27 20:46:56 +01:00
else:
2024-01-08 19:48:50 +01:00
default_backup_routine_for_volume(volume_name, containers, args.shutdown)
2023-12-25 20:31:56 +01:00
print('Finished volume backups.')
2023-12-25 20:31:56 +01:00
if __name__ == "__main__":
main()