mirror of
https://github.com/kevinveenbirkenbach/docker-volume-backup.git
synced 2024-11-29 11:31:03 +01:00
Compare commits
No commits in common. "7853283ef3c5c4cc0e760d0f8f6f7fc70e685d9a" and "b83e481d017cd74061d52efd0a2f0213ba73721b" have entirely different histories.
7853283ef3
...
b83e481d01
@ -1,6 +1,6 @@
|
|||||||
#!/bin/python
|
#!/bin/python
|
||||||
# Backups volumes of running containers
|
# Backups volumes of running containers
|
||||||
|
#
|
||||||
import subprocess
|
import subprocess
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@ -8,179 +8,114 @@ import pathlib
|
|||||||
import pandas
|
import pandas
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
class BackupException(Exception):
|
class RsyncCode24Exception(Exception):
|
||||||
"""Generic exception for backup errors."""
|
"""Exception for rsync error code 24."""
|
||||||
|
"""rsync warning: some files vanished before they could be transferred"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def execute_shell_command(command):
|
def bash(command):
|
||||||
"""Execute a shell command and return its output."""
|
|
||||||
print(command)
|
print(command)
|
||||||
process = subprocess.Popen([command], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
|
process = subprocess.Popen([command], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
|
||||||
out, err = process.communicate()
|
out, err = process.communicate()
|
||||||
if process.returncode != 0:
|
stdout = out.splitlines()
|
||||||
raise BackupException(f"Error in command: {command}\nOutput: {out}\nError: {err}\nExit code: {process.returncode}")
|
stderr = err.decode("utf-8")
|
||||||
return [line.decode("utf-8") for line in out.splitlines()]
|
output = [line.decode("utf-8") for line in stdout]
|
||||||
|
|
||||||
def get_machine_id():
|
exitcode = process.wait()
|
||||||
"""Get the machine identifier."""
|
if exitcode != 0:
|
||||||
return execute_shell_command("sha256sum /etc/machine-id")[0][0:64]
|
print(f"Error in command: {command}\nOutput: {out}\nError: {err}\nExit code: {exitcode}")
|
||||||
|
|
||||||
def create_backup_directories(base_dir, machine_id, repository_name, backup_time):
|
if "rsync" in command and exitcode == 24:
|
||||||
"""Create necessary directories for backup."""
|
raise RsyncCode24Exception(f"rsync error code 24 encountered: {stderr}")
|
||||||
version_dir = os.path.join(base_dir, machine_id, repository_name, backup_time)
|
|
||||||
pathlib.Path(version_dir).mkdir(parents=True, exist_ok=True)
|
|
||||||
return version_dir
|
|
||||||
|
|
||||||
def get_instance(container):
|
raise Exception("Exit code is greater than 0")
|
||||||
instance_name = re.split("(_|-)(database|db|postgres)", container)[0]
|
|
||||||
print(f"Extracted instance name: {instance_name}")
|
|
||||||
return instance_name
|
|
||||||
|
|
||||||
def backup_database(container, databases, version_dir, db_type):
|
return output
|
||||||
"""Backup database (MariaDB or PostgreSQL) if applicable."""
|
|
||||||
print(f"Starting database backup for {container} using {db_type}...")
|
|
||||||
instance_name = get_instance(container)
|
|
||||||
|
|
||||||
# Filter the DataFrame for the given instance_name
|
def print_bash(command):
|
||||||
database_entries = databases.loc[databases['instance'] == instance_name]
|
output = bash(command)
|
||||||
|
print(list_to_string(output))
|
||||||
|
return output
|
||||||
|
|
||||||
# Check if there are more than one entries
|
|
||||||
if len(database_entries) > 1:
|
|
||||||
raise BackupException(f"More than one entry found for instance '{instance_name}'")
|
|
||||||
|
|
||||||
# Check if there is no entry
|
def list_to_string(list):
|
||||||
if database_entries.empty:
|
return str(' '.join(list))
|
||||||
raise BackupException(f"No entry found for instance '{instance_name}'")
|
|
||||||
|
|
||||||
# Get the first (and only) entry
|
|
||||||
database_entry = database_entries.iloc[0]
|
|
||||||
|
|
||||||
backup_destination_dir = os.path.join(version_dir, "sql")
|
print('start backup routine...')
|
||||||
pathlib.Path(backup_destination_dir).mkdir(parents=True, exist_ok=True)
|
|
||||||
backup_destination_file = os.path.join(backup_destination_dir, f"backup.sql")
|
|
||||||
|
|
||||||
if db_type == 'mariadb':
|
dirname = os.path.dirname(__file__)
|
||||||
backup_command = f"docker exec {container} /usr/bin/mariadb-dump -u {database_entry['username']} -p{database_entry['password']} {database_entry['database']} > {backup_destination_file}"
|
repository_name = os.path.basename(dirname)
|
||||||
elif db_type == 'postgres':
|
# identifier of this backups
|
||||||
if database_entry['password']:
|
machine_id = bash("sha256sum /etc/machine-id")[0][0:64]
|
||||||
# Include PGPASSWORD in the command when a password is provided
|
# Folder in which all Backups are stored
|
||||||
backup_command = (
|
backups_dir = '/Backups/'
|
||||||
f"PGPASSWORD={database_entry['password']} docker exec -i {container} "
|
# Folder in which the versions off docker volume backups are stored
|
||||||
f"pg_dump -U {database_entry['username']} -d {database_entry['database']} "
|
versions_dir = backups_dir + machine_id + "/" + repository_name + "/"
|
||||||
f"-h localhost > {backup_destination_file}"
|
# Time when the backup started
|
||||||
)
|
backup_time = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||||
else:
|
# Folder containing the current version
|
||||||
# Exclude PGPASSWORD and use --no-password when the password is empty
|
version_dir = versions_dir + backup_time + "/"
|
||||||
backup_command = (
|
|
||||||
f"docker exec -i {container} pg_dump -U {database_entry['username']} "
|
|
||||||
f"-d {database_entry['database']} -h localhost --no-password "
|
|
||||||
f"> {backup_destination_file}"
|
|
||||||
)
|
|
||||||
|
|
||||||
execute_shell_command(backup_command)
|
# Create folder to store version in
|
||||||
print(f"Database backup for {container} completed.")
|
pathlib.Path(version_dir).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
def backup_volume(volume_name, version_dir):
|
print('start volume backups...')
|
||||||
"""Backup files of a volume."""
|
print('load connection data...')
|
||||||
print(f"Starting backup routine for volume: {volume_name}")
|
databases = pandas.read_csv(dirname + "/databases.csv", sep=";")
|
||||||
files_rsync_destination_path = os.path.join(version_dir, volume_name, "files")
|
volume_names = bash("docker volume ls --format '{{.Name}}'")
|
||||||
pathlib.Path(files_rsync_destination_path).mkdir(parents=True, exist_ok=True)
|
for volume_name in volume_names:
|
||||||
source_dir = f"/var/lib/docker/volumes/{volume_name}/_data/"
|
print('start backup routine for volume: ' + volume_name)
|
||||||
rsync_command = f"rsync -abP --delete --delete-excluded {source_dir} {files_rsync_destination_path}"
|
containers = bash("docker ps --filter volume=\"" + volume_name + "\" --format '{{.Names}}'")
|
||||||
execute_shell_command(rsync_command)
|
if len(containers) == 0:
|
||||||
print(f"Backup routine for volume: {volume_name} completed.")
|
print('skipped due to no running containers using this volume.')
|
||||||
|
else:
|
||||||
def has_image(container,image):
|
container = containers[0]
|
||||||
"""Check if the container is using the image"""
|
# Folder to which the volumes are copied
|
||||||
image_info = execute_shell_command(f"docker inspect {container} | jq -r '.[].Config.Image'")
|
volume_destination_dir = version_dir + volume_name
|
||||||
return image in image_info[0]
|
# Database name
|
||||||
|
database_name = re.split("(_|-)(database|db)", container)[0]
|
||||||
def stop_containers(containers):
|
# Entries with database login data concerning this container
|
||||||
"""Stop a list of containers."""
|
databases_entries = databases.loc[databases['database'] == database_name]
|
||||||
for container in containers:
|
# Exception for akaunting due to fast implementation
|
||||||
print(f"Stopping container {container}...")
|
if len(databases_entries) == 1 and container != 'akaunting':
|
||||||
execute_shell_command(f"docker stop {container}")
|
print("Backup database...")
|
||||||
|
mysqldump_destination_dir = volume_destination_dir + "/sql"
|
||||||
def start_containers(containers):
|
mysqldump_destination_file = mysqldump_destination_dir + "/backup.sql"
|
||||||
"""Start a list of stopped containers."""
|
pathlib.Path(mysqldump_destination_dir).mkdir(parents=True, exist_ok=True)
|
||||||
for container in containers:
|
database_entry = databases_entries.iloc[0]
|
||||||
print(f"Starting container {container}...")
|
database_backup_command = "docker exec " + container + " /usr/bin/mariadb-dump -u " + database_entry["username"] + " -p" + database_entry["password"] + " " + database_entry["database"] + " > " + mysqldump_destination_file
|
||||||
execute_shell_command(f"docker start {container}")
|
print_bash(database_backup_command)
|
||||||
|
print("Backup files...")
|
||||||
def get_container_with_image(containers,image):
|
files_rsync_destination_path = volume_destination_dir + "/files"
|
||||||
for container in containers:
|
pathlib.Path(files_rsync_destination_path).mkdir(parents=True, exist_ok=True)
|
||||||
if has_image(container,image):
|
versions = os.listdir(versions_dir)
|
||||||
return container
|
versions.sort(reverse=True)
|
||||||
return False
|
if len(versions) > 1:
|
||||||
|
last_version = versions[1]
|
||||||
def is_image_whitelisted(container, images):
|
last_version_files_dir = versions_dir + last_version + "/" + volume_name + "/files"
|
||||||
"""Check if the container's image is one of the whitelisted images."""
|
if os.path.isdir(last_version_files_dir):
|
||||||
image_info = execute_shell_command(f"docker inspect {container} | jq -r '.[].Config.Image'")
|
link_dest_parameter="--link-dest='" + last_version_files_dir + "' "
|
||||||
container_image = image_info[0]
|
|
||||||
|
|
||||||
for image in images:
|
|
||||||
if image in container_image:
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def is_any_image_not_whitelisted(containers, images):
|
|
||||||
"""Check if any of the containers are using images that are not whitelisted."""
|
|
||||||
return any(not is_image_whitelisted(container, images) for container in containers)
|
|
||||||
|
|
||||||
def backup_routine_for_volume(volume_name, containers, databases, version_dir, whitelisted_images):
|
|
||||||
"""Perform backup routine for a given volume."""
|
|
||||||
for container in containers:
|
|
||||||
if has_image(container, 'mariadb'):
|
|
||||||
backup_database(container, databases, version_dir, 'mariadb')
|
|
||||||
elif has_image(container, 'postgres'):
|
|
||||||
backup_database(container, databases, version_dir, 'postgres')
|
|
||||||
else:
|
|
||||||
if is_any_image_not_whitelisted(containers, whitelisted_images):
|
|
||||||
stop_containers(containers)
|
|
||||||
backup_volume(volume_name, version_dir)
|
|
||||||
start_containers(containers)
|
|
||||||
else:
|
else:
|
||||||
backup_volume(volume_name, version_dir)
|
print("No previous version exists in path "+ last_version_files_dir + ".")
|
||||||
|
link_dest_parameter=""
|
||||||
def main():
|
else:
|
||||||
print('Start backup routine...')
|
print("No previous version exists in path "+ last_version_files_dir + ".")
|
||||||
dirname = os.path.dirname(__file__)
|
link_dest_parameter=""
|
||||||
repository_name = os.path.basename(dirname)
|
source_dir = "/var/lib/docker/volumes/" + volume_name + "/_data/"
|
||||||
machine_id = get_machine_id()
|
rsync_command = "rsync -abP --delete --delete-excluded " + link_dest_parameter + source_dir + " " + files_rsync_destination_path
|
||||||
backups_dir = '/Backups/'
|
try:
|
||||||
backup_time = datetime.now().strftime("%Y%m%d%H%M%S")
|
print_bash(rsync_command)
|
||||||
version_dir = create_backup_directories(backups_dir, machine_id, repository_name, backup_time)
|
except RsyncCode24Exception:
|
||||||
|
print("Ignoring rsync error code 24, proceeding with the next command.")
|
||||||
print('Start volume backups...')
|
print("stop containers...")
|
||||||
databases = pandas.read_csv(os.path.join(dirname, "databases.csv"), sep=";")
|
print("Backup data after container is stopped...")
|
||||||
volume_names = execute_shell_command("docker volume ls --format '{{.Name}}'")
|
print_bash("docker stop " + list_to_string(containers))
|
||||||
|
print_bash(rsync_command)
|
||||||
# This whitelist is configurated for https://github.com/kevinveenbirkenbach/backup-docker-to-local
|
print("start containers...")
|
||||||
stop_and_restart_not_needed = [
|
print_bash("docker start " + list_to_string(containers))
|
||||||
# 'baserow', Doesn't use an extra database
|
print("end backup routine for volume:" + volume_name)
|
||||||
'element',
|
print('finished volume backups.')
|
||||||
'gitea',
|
print('restart docker service...')
|
||||||
'listmonk',
|
print_bash("systemctl restart docker")
|
||||||
'mastodon',
|
print('finished backup routine.')
|
||||||
'matomo',
|
|
||||||
'memcached',
|
|
||||||
'nextcloud',
|
|
||||||
'openproject',
|
|
||||||
'pixelfed',
|
|
||||||
'redis',
|
|
||||||
'wordpress'
|
|
||||||
]
|
|
||||||
|
|
||||||
for volume_name in volume_names:
|
|
||||||
print(f'Start backup routine for volume: {volume_name}')
|
|
||||||
containers = execute_shell_command(f"docker ps --filter volume=\"{volume_name}\" --format '{{{{.Names}}}}'")
|
|
||||||
if not containers:
|
|
||||||
print('Skipped due to no running containers using this volume.')
|
|
||||||
continue
|
|
||||||
|
|
||||||
backup_routine_for_volume(volume_name, containers, databases, version_dir, stop_and_restart_not_needed)
|
|
||||||
|
|
||||||
print('Finished volume backups.')
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
@ -1,45 +0,0 @@
|
|||||||
import pandas as pd
|
|
||||||
import argparse
|
|
||||||
import os
|
|
||||||
|
|
||||||
def check_and_add_entry(file_path, instance, host, database, username, password):
|
|
||||||
# Check if the file exists and is not empty
|
|
||||||
if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
|
|
||||||
# Read the existing CSV file with header
|
|
||||||
df = pd.read_csv(file_path, sep=';')
|
|
||||||
else:
|
|
||||||
# Create a new DataFrame with columns if file does not exist
|
|
||||||
df = pd.DataFrame(columns=['instance','host', 'database', 'username', 'password'])
|
|
||||||
|
|
||||||
# Check if the entry exists and remove it
|
|
||||||
mask = (df['instance'] == instance) & (df['host'] == host) & (df['database'] == database) & (df['username'] == username)
|
|
||||||
if not df[mask].empty:
|
|
||||||
print("Replacing existing entry.")
|
|
||||||
df = df[~mask]
|
|
||||||
else:
|
|
||||||
print("Adding new entry.")
|
|
||||||
|
|
||||||
# Create a new DataFrame for the new entry
|
|
||||||
new_entry = pd.DataFrame([{'instance': instance, 'host': host, 'database': database, 'username': username, 'password': password}])
|
|
||||||
|
|
||||||
# Add (or replace) the entry using concat
|
|
||||||
df = pd.concat([df, new_entry], ignore_index=True)
|
|
||||||
|
|
||||||
# Save the updated CSV file
|
|
||||||
df.to_csv(file_path, sep=';', index=False)
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description="Check and replace (or add) a database entry in a CSV file.")
|
|
||||||
parser.add_argument("file_path", help="Path to the CSV file")
|
|
||||||
parser.add_argument("instance", help="Database instance")
|
|
||||||
parser.add_argument("host", help="Database host")
|
|
||||||
parser.add_argument("database", help="Database name")
|
|
||||||
parser.add_argument("username", help="Username")
|
|
||||||
parser.add_argument("password", help="Password")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
check_and_add_entry(args.file_path, args.instance, args.host, args.database, args.username, args.password)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
Loading…
Reference in New Issue
Block a user