Compare commits

...

15 Commits

16 changed files with 222 additions and 104 deletions

View File

@ -1,15 +1,18 @@
# Server Tact Variables
## Ours in which the server is 100% working. Rest of the time is reserved for maintanance
hours_server_awake: "{{ range(9, 24) | list + range(0, 3) | list }}"
## Random delay for systemd timers to avoid peak loads.
randomized_delay_sec: "15min"
randomized_delay_sec: "5min"
## Schedule for Health Checks
on_calendar_health_btrfs: "*-*-* 00:00:00"
on_calendar_health_journalctl: "*-*-* 00:00:00"
on_calendar_health_disc_space: "*-*-* 06,12,18,00:00:00"
on_calendar_health_docker_container: "*-*-* 09,10,11,12,13,14,15,16,17,18,19,20,21,22,23,00,01,02:00:00"
on_calendar_health_docker_volumes: "*-*-* 09,10,11,12,13,14,15,16,17,18,19,20,21,22,23,00,01,02:15:00"
on_calendar_health_nginx: "*-*-* 09,10,11,12,13,14,15,16,17,18,19,20,21,22,23,00,01,02:45:00"
on_calendar_health_docker_container: "*-*-* {{ hours_server_awake | join(',') }}:00:00"
on_calendar_health_docker_volumes: "*-*-* {{ hours_server_awake | join(',') }}:15:00"
on_calendar_health_nginx: "*-*-* {{ hours_server_awake | join(',') }}:45:00"
## Schedule for Cleanup Tasks
on_calendar_cleanup_backups: "*-*-* 06,12,18,00:30:00"
@ -20,7 +23,8 @@ on_calendar_backup_docker_to_local: "*-*-* 03:30:00"
on_calendar_backup_remote_to_local: "*-*-* 21:30:00"
## Schedule for Maintenance Tasks
on_calendar_heal_docker: "*-*-* 09,10,11,12,13,14,15,16,17,18,19,20,21,22,23,00,01:30:00"
on_calendar_heal_docker: "*-*-* {{ hours_server_awake | join(',') }}:30:00"
on_calendar_defrost: "*-*-* 00:00:00"
on_calendar_renew_lets_encrypt_certificates: "*-*-* 12,00:30:00"
on_calendar_deploy_mailu_certificates: "*-*-* 13,01:30:00"
on_calendar_msi_keyboard_color: "*-*-* *:*:00"
@ -47,27 +51,40 @@ execute_updates: true # Executes updates
force_backup_before_update: true # Activates the backup before the update procedure
# System Maintanance Services
# System maintenance Services
## Defined Services for Backup Tasks
system_maintanance_backup_services:
## Timeouts to wait for other services to stop
system_maintenance_timeout_cleanup_services: "15min"
system_maintenance_timeout_backup_services: "1h"
system_maintenance_timeout_heal_docker: "30min"
system_maintenance_timeout_update_docker: "5min"
system_maintenance_timeout_defroster: "10min"
## Services
### Defined Services for Backup Tasks
system_maintenance_backup_services:
- "backup-docker-to-local"
- "backup-remote-to-local"
- "backup-data-to-usb"
## Defined Services for System Cleanup
system_maintanance_cleanup_services:
### Defined Services for System Cleanup
system_maintenance_cleanup_services:
- "cleanup-backups"
- "cleanup-disc-space"
- "cleanup-failed-docker-backups"
## Services that Manipulate the System
system_maintanance_manipulation_services:
### Freeze services (wait until they are finished to be sure that nobody else is doing stuff in the fridge)
- "system-maintenance-service-freeze"
- "system-maintenance-service-defrost"
### Services that Manipulate the System
system_maintenance_manipulation_services:
- "heal-docker"
- "update-docker"
## Total System Maintenance Services
system_maintenance_services: "{{ system_maintanance_backup_services + system_maintanance_cleanup_services + system_maintanance_manipulation_services }}"
system_maintenance_services: "{{ system_maintenance_backup_services + system_maintenance_cleanup_services + system_maintenance_manipulation_services }}"
## First default freezer action to apply when freezer service get triggered during play
system_maintenance_service_freeze_action: 'freeze' # Valid Values: freeze, defrost

View File

@ -4,6 +4,5 @@ OnFailure=systemd-notifier@%n.service cleanup-failed-docker-backups.service
[Service]
Type=oneshot
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze "{{ system_maintenance_services }}" --ignore "backup-docker-to-local,backup-remote-to-local,backup-data-to-usb" --max_attempts 600'
ExecStart=/usr/bin/python {{backup_docker_to_local_folder}}backup-docker-to-local.py
ExecStartPost=/bin/sh -c 'systemctl start system-maintenance-service-defrost.service'
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_backup_services| join(' ') }} --timeout "{{system_maintenance_timeout_backup_services}}"'
ExecStart=/bin/sh -c '/usr/bin/python {{backup_docker_to_local_folder}}backup-docker-to-local.py && systemctl start system-maintenance-service-defrost.service'

View File

@ -4,6 +4,5 @@ OnFailure=systemd-notifier@%n.service cleanup-failed-docker-backups.service
[Service]
Type=oneshot
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze "{{ system_maintenance_services }}" --ignore "backup-docker-to-local,backup-remote-to-local,backup-data-to-usb" --max_attempts 600'
ExecStart=/usr/bin/bash {{docker_backup_remote_to_local_folder}}backups-remote-to-local.sh
ExecStartPost=/bin/sh -c 'systemctl start system-maintenance-service-defrost.service'
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_backup_services| join(' ') }} --timeout "{{system_maintenance_timeout_backup_services}}"'
ExecStart=/bin/sh -c '/usr/bin/bash {{docker_backup_remote_to_local_folder}}backups-remote-to-local.sh && systemctl start system-maintenance-service-defrost.service'

View File

@ -4,6 +4,5 @@ OnFailure=systemd-notifier@%n.service
[Service]
Type=oneshot
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze "{{ system_maintenance_services | reject('equalto', "cleanup-backups") | join(',') }}"'
ExecStart=/usr/bin/python {{docker_cleanup_backups}}cleanup-backups.py --backups-folder-path {{backups_folder_path}} --maximum-backup-size-percent {{size_percent_maximum_backup}}
ExecStartPost=/bin/sh -c 'systemctl start system-maintenance-service-defrost.service'
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_cleanup_services| join(' ') }} --timeout "{{system_maintenance_timeout_backup_services}}"'
ExecStart=/bin/sh -c '/usr/bin/python {{docker_cleanup_backups}}cleanup-backups.py --backups-folder-path {{backups_folder_path}} --maximum-backup-size-percent {{size_percent_maximum_backup}} && systemctl start system-maintenance-service-defrost.service'

View File

@ -4,6 +4,5 @@ OnFailure=systemd-notifier@%n.service
[Service]
Type=oneshot
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze "{{ system_maintenance_services | reject('equalto', "cleanup-disc-space") | join(',') }}"'
ExecStart=/bin/bash {{cleanup_disc_space_folder}}cleanup-disc-space.sh {{size_percent_cleanup_disc_space}}
ExecStartPost=/bin/sh -c 'systemctl start system-maintenance-service-defrost.service'
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_cleanup_services| join(' ') }} --timeout "{{system_maintenance_timeout_backup_services}}"'
ExecStart=/bin/sh -c '/bin/bash {{cleanup_disc_space_folder}}cleanup-disc-space.sh {{size_percent_cleanup_disc_space}} && systemctl start system-maintenance-service-defrost.service'

View File

@ -4,6 +4,5 @@ OnFailure=systemd-notifier@%n.service
[Service]
Type=oneshot
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze "{{ system_maintenance_services | reject('equalto', "cleanup-failed-docker-backups") | join(',') }}"'
ExecStart=/bin/sh -c '/usr/bin/yes | /usr/bin/bash {{backup_docker_to_local_cleanup_folder}}cleanup.sh {{backup_docker_to_local_cleanup_machine_id}} {{backup_docker_to_local_cleanup_trigger_directory}}'
ExecStartPost=/bin/sh -c 'systemctl start system-maintenance-service-defrost.service'
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_cleanup_services| join(' ') }} --timeout "{{system_maintenance_timeout_backup_services}}"'
ExecStart=/bin/sh -c '/usr/bin/yes | /usr/bin/bash {{backup_docker_to_local_cleanup_folder}}cleanup.sh {{backup_docker_to_local_cleanup_machine_id}} {{backup_docker_to_local_cleanup_trigger_directory}} && systemctl start system-maintenance-service-defrost.service'

View File

@ -24,7 +24,6 @@
- name: set correct folder permissions
command:
cmd: "docker run --rm --mount type=volume,src=matrix_data,dst=/data -e SYNAPSE_SERVER_NAME={{domain}} -e SYNAPSE_REPORT_STATS=no --entrypoint /bin/sh matrixdotorg/synapse:latest -c 'chown -vR 991:991 /data'"
chdir: "{{path_docker_compose_instances}}baserow/"
- name: add docker-compose.yml
template:

View File

@ -1,6 +1,6 @@
# role docker
## maintanance
## maintenance
### list unused volumes
```bash

View File

@ -1,7 +1,6 @@
- name: "reload heal-docker.service"
systemd:
name: heal-docker.service
enabled: yes
daemon_reload: yes
- name: "restart heal-docker.timer"
systemd:

View File

@ -4,6 +4,5 @@ OnFailure=systemd-notifier@%n.service
[Service]
Type=oneshot
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze "{{ system_maintenance_services | reject('equalto', "heal-docker") | join(',') }}"'
ExecStart=/bin/python {{heal_docker}}heal-docker.py
ExecStartPost=/bin/sh -c 'systemctl start system-maintenance-service-defrost.service'
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_cleanup_services| join(' ') }} heal-docker --timeout "{{system_maintenance_timeout_heal_docker}}"'
ExecStart=/bin/sh -c '/bin/python {{heal_docker}}heal-docker.py && systemctl start system-maintenance-service-defrost.service'

View File

@ -2,89 +2,151 @@ import argparse
import subprocess
import time
import os
from datetime import datetime
def parse_time_to_seconds(time_str):
"""
Convert a time string (e.g., '1h', '30min', '45s') to seconds.
"""
units = {"s": 1, "min": 60, "h": 3600}
if time_str[-3:] in units:
number, unit = time_str[:-3], time_str[-3:]
elif time_str[-2:] in units:
number, unit = time_str[:-2], time_str[-2:]
elif time_str[-1:] in units:
number, unit = time_str[:-1], time_str[-1:]
else:
raise ValueError("Invalid time unit")
return int(number) * units[unit]
def service_file_exists(service_name, service_type="service"):
"""Check if a systemd service file exists."""
# Paths where service files can be stored
"""
Check if a systemd service file of a given type exists for a service.
"""
path = "/etc/systemd/system/"
service_file_name = service_name + "." + service_type
service_file_name = f"{service_name}.{service_type}"
full_path = os.path.join(path, service_file_name)
print(f"Checking {full_path}") # Added debug output
if os.path.isfile(full_path):
return True
else:
print(f"File not found.") # Debug output if file is not found
# Debug output for checking the service file existence
print(f"Checking {full_path}")
return os.path.isfile(full_path)
def check_service_active(service_name):
"""Check if a service is active or activating."""
"""
Check if a systemd service is currently active or activating.
"""
result = subprocess.run(['systemctl', 'is-active', service_name], stdout=subprocess.PIPE)
service_status = result.stdout.decode('utf-8').strip()
return service_status in ['active', 'activating']
is_active = service_status in ['active', 'activating']
print(f"Service {service_name} is {'active' if is_active else 'not active'}.")
return is_active
def freeze(services_to_wait_for, ignored_services, max_attempts):
# Filter services that exist and are not in the ignored list
for service in services_to_wait_for:
print(f"\nFreezing: {service}")
if service in ignored_services:
print(f"{service} will be ignored.")
else:
attempt=0
break_time_sec=5
while check_service_active(service):
attempt += 1
print(f"({attempt}/{max_attempts}) Waiting for {break_time_sec} seconds for {service} to stop...")
time.sleep(break_time_sec)
if attempt > max_attempts:
raise Exception(f"Error: Maximum attempts ({max_attempts}) reached. Exit.")
# Stop and disable the corresponding timer, if it exists
if service_file_exists(service,"timer"):
timer_name = service + ".timer"
subprocess.run(['systemctl', 'stop', timer_name])
subprocess.run(['systemctl', 'disable', timer_name])
print(f"{timer_name} stopped and disabled.")
else:
print(f"Skipped.")
print("\nAll required services have stopped.")
def check_any_service_active(services):
"""
Check if any service in a given list is active or activating.
"""
return any(check_service_active(service) for service in services)
def defrost(services_to_wait_for, ignored_services):
for service in services_to_wait_for:
print(f"\nUnfreezing: {service}")
if service in ignored_services:
print(f"{service} will be ignored.")
elif service_file_exists(service,"timer"):
# Start and enable the corresponding timer, if it exists
timer_name = service + ".timer"
def stop_timer(service):
"""
Stop and disable a systemd timer for a service if it exists.
"""
if service == "system-maintenance-service-defrost":
print(f"Ignoring {service}. It's the initializer of freezer.")
if service_file_exists(service, "timer"):
timer_name = f"{service}.timer"
subprocess.run(['systemctl', 'stop', timer_name])
subprocess.run(['systemctl', 'disable', timer_name])
print(f"{timer_name} stopped and disabled.")
else:
print("No timer to stop for service.")
def filter_services(services, ignored_services):
"""
Filter out services that are in the ignored_services list from services list.
"""
return [service for service in services if service not in ignored_services]
def stop_all_timers(services):
"""
Stop and disable timers for all services in a given list.
"""
for service in services:
stop_timer(service)
def wait_for_all_services_to_stop(filtered_services, max_attempts, attempt, break_time_sec):
"""
Wait until all services in the list have stopped, with a maximum number of attempts.
"""
for service in filtered_services:
while check_service_active(service):
attempt += 1
if attempt > max_attempts:
raise Exception(f"Maximum attempts ({max_attempts}) reached. Exiting.")
print(f"{datetime.now().isoformat()}#{attempt}/{max_attempts}: Waiting for {break_time_sec} seconds for {service} to stop...")
time.sleep(break_time_sec)
return attempt
def freeze(filtered_services, timeout_sec):
"""
Freeze services by stopping them and their timers, waiting up to a timeout.
"""
break_time_sec = 5
attempt = 0
max_attempts = timeout_sec / break_time_sec
while check_any_service_active(filtered_services):
stop_all_timers(filtered_services)
attempt = wait_for_all_services_to_stop(filtered_services, max_attempts, attempt, break_time_sec)
print("All required services have stopped.")
def defrost(filtered_services):
"""
Defrost services by starting and enabling their timers.
"""
for service in filtered_services:
print(f"Unfreezing: {service}")
if service_file_exists(service, "timer"):
timer_name = f"{service}.timer"
subprocess.run(['systemctl', 'start', timer_name])
subprocess.run(['systemctl', 'enable', timer_name])
print(f"{timer_name} started and enabled.")
else:
print(f"Skipped.")
print("\nAll required services are started.")
print("No timer to activate for service.")
print("All required services are started.")
def main(services_to_wait_for, ignored_services, action, max_attempts):
print(f"Services to wait for: {services_to_wait_for}")
def main(services, ignored_services, action, timeout_sec):
"""
Main function to process the command-line arguments and perform actions.
"""
# Ignoring the current running service
running_service=f"system-maintenance-service-{action}"
if running_service not in ignored_services:
ignored_services.append(running_service)
filtered_services = filter_services(services, ignored_services)
print(f"Services to handle: {services}")
print(f"Services to ignore: {ignored_services}")
print(f"Services filtered: {filtered_services}")
if action == 'freeze':
print("Freezing services.");
freeze(services_to_wait_for, ignored_services, max_attempts)
print("Freezing services.")
freeze(filtered_services, timeout_sec)
elif action == 'defrost':
print("Unfreezing services.");
defrost(services_to_wait_for, ignored_services)
print('\nOverview:')
print("Unfreezing services.")
defrost(filtered_services)
print("Overview:")
subprocess.run(['systemctl', 'list-timers'])
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='freezes and defrost systemctl services and timers')
parser = argparse.ArgumentParser(description='Freezes and defrosts systemd services and timers.')
parser.add_argument('action', choices=['freeze', 'defrost'], help='Action to perform: freeze or defrost services.')
parser.add_argument('services', help='Comma-separated list of services to apply the action to')
parser.add_argument('--ignore', help='Comma-separated list of services to ignore in the action', default='')
parser.add_argument('--max_attempts', type=int, default=60, help='Maximum number of attempts for freezing services')
parser.add_argument('services', nargs='+', help='List of services to apply the action to.')
parser.add_argument('--ignore', nargs='*', help='List of services to ignore in the action.', default=[])
parser.add_argument('--timeout', help='Timeout for freezing services (e.g., 1h, 30min, 45s).', default='1h')
args = parser.parse_args()
services_to_wait_for = args.services.split(',')
ignored_services = args.ignore.split(',') if args.ignore else []
max_attempts = args.max_attempts
main(services_to_wait_for, ignored_services,args.action,max_attempts)
services = args.services
ignored_services = args.ignore if args.ignore else []
timeout_seconds = parse_time_to_seconds(args.timeout)
main(services, ignored_services, args.action, timeout_seconds)

View File

@ -0,0 +1,16 @@
- name: "restart system-maintenance-service-defrost.timer"
systemd:
name: system-maintenance-service-defrost.timer
state: restarted
enabled: yes
daemon_reload: yes
- name: "reload system-maintenance-service-freeze.service"
systemd:
name: system-maintenance-service-freeze.service
daemon_reload: yes
- name: "reload system-maintenance-service-defrost.service"
systemd:
name: system-maintenance-service-defrost.service
daemon_reload: yes

View File

@ -5,9 +5,14 @@
dest: "{{path_system_maintenance_service_freezer_script}}"
when: run_once_system_maintenance_service_freeze is not defined
- name: run the system_maintenance_service_freezer tasks once
set_fact:
run_once_system_maintenance_service_freeze: true
- name: Configure system-maintenance-service for each action
loop:
- freeze
- defrost
template:
src: system-maintenance-service-freezer.service.j2
dest: "/etc/systemd/system/system-maintenance-service-{{ item }}.service"
notify: "reload system-maintenance-service-{{ item }}.service"
when: run_once_system_maintenance_service_freeze is not defined
- name: "restart system-maintenance-service.service"
@ -18,6 +23,22 @@
daemon_reload: yes
when: maintenance_service_freeze_action_last is not defined or maintenance_service_freeze_action_last != system_maintenance_service_freeze_action
- name: create system-maintenance-service-defrost.timer
template:
src: system-maintenance-service-defrost.timer.j2
dest: "/etc/systemd/system/system-maintenance-service-defrost.timer"
register: system_maintenance_service_defrost_timer
changed_when: system_maintenance_service_defrost_timer.changed or activate_all_timers | bool
notify: restart system-maintenance-service-defrost.timer
when: run_once_system_maintenance_service_freeze is not defined
## Runtime Variable Setting
- name: run the system_maintenance_service_freezer tasks once
set_fact:
run_once_system_maintenance_service_freeze: true
when: run_once_system_maintenance_service_freeze is not defined
- name: "set variable to prevent loading when action status didn't change"
set_fact:
maintenance_service_freeze_action_last: "{{system_maintenance_service_freeze_action}}"
maintenance_service_freeze_action_last: "{{system_maintenance_service_freeze_action}}"

View File

@ -0,0 +1,10 @@
[Unit]
Description=starts system-maintenance-service-defrost.service
[Timer]
OnCalendar={{on_calendar_defrost}}
RandomizedDelaySec={{randomized_delay_sec}}
Persistent=false
[Install]
WantedBy=timers.target

View File

@ -4,4 +4,5 @@ OnFailure=systemd-notifier@%n.service
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} {{item}} {{ system_maintenance_services | join(",") }}'
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} {{item}} {{ system_maintenance_services | join(' ') }} --timeout "{{system_maintenance_timeout_defroster}}"'
ExecStart=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} {{item}} {{ system_maintenance_services | join(' ') }}'

View File

@ -5,6 +5,5 @@ OnFailure=systemd-notifier@%n.service
[Service]
Type=oneshot
{% if force_backup_before_update | bool %}ExecStartPre=/bin/sh -c 'systemctl start backup-docker-to-local.service'{% endif %}
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze "{{ system_maintenance_services | reject('equalto', "update-docker") | join(',') }}"'
ExecStart=/bin/sh -c '/usr/bin/python {{update_docker_script}} {{path_docker_compose_instances}}'
ExecStartPost=/bin/sh -c 'systemctl start system-maintenance-service-defrost.service'
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_cleanup_services | join(' ') }} update-docker --timeout "{{system_maintenance_timeout_heal_docker}}"'
ExecStart=/bin/sh -c '/usr/bin/python {{update_docker_script}} {{path_docker_compose_instances}} && systemctl start system-maintenance-service-defrost.service'