Implemented system-maintenance-lock and reduced unnecessary complexity

This commit is contained in:
2023-12-16 20:37:40 +01:00
parent 89ffc7fb70
commit 9c21d052c4
30 changed files with 152 additions and 333 deletions

View File

@@ -1,4 +1,4 @@
---
dependencies:
- role: cleanup-backups-service
- role: system-maintenance-service-freezer
- role: system-maintenance-lock

View File

@@ -3,4 +3,4 @@ dependencies:
- backups-provider
- systemd-notifier
- cleanup-failed-docker-backups
- system-maintenance-service-freezer
- system-maintenance-lock

View File

@@ -4,5 +4,5 @@ OnFailure=systemd-notifier@%n.service cleanup-failed-docker-backups.service
[Service]
Type=oneshot
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_backup_services| join(' ') }} --timeout "{{system_maintenance_timeout_backup_services}}"'
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_lock_script }} {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_backup_services| join(' ') }} --timeout "{{sytem_maintenance_lock_timeoutbackup_services}}"'
ExecStart=/bin/sh -c '/usr/bin/python {{backup_docker_to_local_folder}}backup-docker-to-local.py'

View File

@@ -3,4 +3,4 @@ dependencies:
- systemd-notifier
- cleanup-backups-timer
- cleanup-failed-docker-backups
- system-maintenance-service-freezer
- system-maintenance-lock

View File

@@ -4,5 +4,5 @@ OnFailure=systemd-notifier@%n.service cleanup-failed-docker-backups.service
[Service]
Type=oneshot
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_backup_services| join(' ') }} --timeout "{{system_maintenance_timeout_backup_services}}"'
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_lock_script }} {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_backup_services| join(' ') }} --timeout "{{sytem_maintenance_lock_timeoutbackup_services}}"'
ExecStart=/bin/sh -c '/usr/bin/bash {{docker_backup_remote_to_local_folder}}backups-remote-to-local.sh'

View File

@@ -1,4 +1,4 @@
dependencies:
- python-pip
- systemd-notifier
- system-maintenance-service-freezer
- system-maintenance-lock

View File

@@ -4,5 +4,5 @@ OnFailure=systemd-notifier@%n.service
[Service]
Type=oneshot
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_cleanup_services| join(' ') }} --timeout "{{system_maintenance_timeout_backup_services}}"'
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_lock_script }} {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_cleanup_services| join(' ') }} --timeout "{{sytem_maintenance_lock_timeoutbackup_services}}"'
ExecStart=/bin/sh -c '/usr/bin/python {{docker_cleanup_backups}}cleanup-backups.py --backups-folder-path {{backups_folder_path}} --maximum-backup-size-percent {{size_percent_maximum_backup}}'

View File

@@ -1,3 +1,3 @@
dependencies:
- systemd-notifier
- system-maintenance-service-freezer
- system-maintenance-lock

View File

@@ -4,5 +4,5 @@ OnFailure=systemd-notifier@%n.service
[Service]
Type=oneshot
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_cleanup_services| join(' ') }} --timeout "{{system_maintenance_timeout_backup_services}}"'
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_lock_script }} {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_cleanup_services| join(' ') }} --timeout "{{sytem_maintenance_lock_timeoutbackup_services}}"'
ExecStart=/bin/sh -c '/bin/bash {{cleanup_disc_space_folder}}cleanup-disc-space.sh {{size_percent_cleanup_disc_space}}'

View File

@@ -1,4 +1,4 @@
dependencies:
- git
- systemd-notifier
- system-maintenance-service-freezer
- system-maintenance-lock

View File

@@ -4,5 +4,5 @@ OnFailure=systemd-notifier@%n.service
[Service]
Type=oneshot
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_cleanup_services| join(' ') }} --timeout "{{system_maintenance_timeout_backup_services}}"'
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_lock_script }} {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_cleanup_services| join(' ') }} --timeout "{{sytem_maintenance_lock_timeoutbackup_services}}"'
ExecStart=/bin/sh -c '/usr/bin/yes | /usr/bin/bash {{backup_docker_to_local_cleanup_folder}}cleanup.sh {{backup_docker_to_local_cleanup_machine_id}} {{backup_docker_to_local_cleanup_trigger_directory}}'

View File

@@ -1,2 +1,2 @@
dependencies:
- system-maintenance-service-freezer
- system-maintenance-lock

View File

@@ -4,5 +4,5 @@ OnFailure=systemd-notifier@%n.service
[Service]
Type=oneshot
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_cleanup_services| join(' ') }} heal-docker --timeout "{{system_maintenance_timeout_heal_docker}}"'
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_lock_script }} {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_cleanup_services| join(' ') }} heal-docker --timeout "{{sytem_maintenance_lock_timeoutheal_docker}}"'
ExecStart=/bin/sh -c '/bin/python {{heal_docker}}heal-docker.py'

View File

@@ -0,0 +1,10 @@
# Role: System-Maintenance-Lock
## Overview
The `system-maintenance-lock` role is a critical part of maintaining the integrity and performance of a system. It ensures that specific services are not interrupted or conflicted with by other system processes. This role is particularly vital during system updates, backups, or other maintenance activities where conflicting processes could cause issues.
## Usage
This role is used in scenarios where system stability and integrity are paramount, such as during system upgrades, backup processes, or when applying critical patches.
## Created with AI
Created with ChatGPT. Conversation is [here](https://chat.openai.com/share/a886b86b-8de6-4eca-9fba-e36c9f20d536) available.

View File

@@ -0,0 +1,96 @@
import argparse
import subprocess
import time
import os
from datetime import datetime
# Global variable definition
BREAK_TIME_SECONDS = 5
class AttemptException(Exception):
"""A custom exception for maximum number of attempts."""
pass
def parse_time_to_seconds(time_str):
"""
Convert a time string (e.g., '1h', '30min', '45s') to seconds.
"""
units = {"s": 1, "min": 60, "h": 3600}
if time_str[-3:] in units:
number, unit = time_str[:-3], time_str[-3:]
elif time_str[-2:] in units:
number, unit = time_str[:-2], time_str[-2:]
elif time_str[-1:] in units:
number, unit = time_str[:-1], time_str[-1:]
else:
raise ValueError("Invalid time unit")
return int(number) * units[unit]
def check_service_active(service_name):
"""
Check if a systemd service is currently active or activating.
"""
result = subprocess.run(['systemctl', 'is-active', service_name], stdout=subprocess.PIPE)
service_status = result.stdout.decode('utf-8').strip()
is_active = service_status in ['active', 'activating']
print(f"Service {service_name} is {'active' if is_active else 'not active'}.")
return is_active
def check_any_service_active(services):
"""
Check if any service in a given list is active or activating.
"""
return any(check_service_active(service) for service in services)
def filter_services(services, ignored_services):
"""
Filter out services that are in the ignored_services list from services list.
"""
return [service for service in services if service not in ignored_services]
def wait_for_all_services_to_stop(filtered_services, max_attempts, attempt):
"""
Wait until all services in the list have stopped, with a maximum number of attempts.
"""
for service in filtered_services:
while check_service_active(service):
attempt += 1
if attempt > max_attempts:
raise AttemptException(f"Maximum attempts ({max_attempts}) reached. Exiting.")
print(f"{datetime.now().isoformat()}#{attempt}/{max_attempts}: Waiting for {BREAK_TIME_SECONDS} seconds for {service} to stop...")
time.sleep(BREAK_TIME_SECONDS)
return attempt
def get_max_attempts(timeout_sec):
return timeout_sec // BREAK_TIME_SECONDS
def main(services, ignored_services, timeout_sec):
"""
Main function to process the command-line arguments and perform actions.
"""
filtered_services = filter_services(services, ignored_services)
print(f"Services to handle: {services}")
print(f"Services to ignore: {ignored_services}")
print(f"Services filtered: {filtered_services}")
print("Waiting for services to stop.")
attempt = 0
max_attempts = get_max_attempts(timeout_sec)
while check_any_service_active(filtered_services):
attempt = wait_for_all_services_to_stop(filtered_services, max_attempts, attempt)
print("All required services have stopped.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Blocks the code execution as long as defined services are running. Terminates with 0 when all services stopped')
parser.add_argument('services', nargs='+', help='List of services to apply the action to.')
parser.add_argument('--ignore', nargs='*', help='List of services to ignore in the action.', default=[])
parser.add_argument('--timeout', help='Timeout for lock actions (e.g., 1h, 30min, 45s).', default='1min')
args = parser.parse_args()
services = args.services
ignored_services = args.ignore if args.ignore else []
timeout_seconds = parse_time_to_seconds(args.timeout)
main(services, ignored_services, timeout_seconds)

View File

@@ -0,0 +1,20 @@
---
- name: create {{path_system_lock_script}}
copy:
src: system-maintenance-lock.py
dest: "{{path_system_lock_script}}"
when: run_once_system-maintenance_lock is not defined
- name: Configure system-maintenance-service for each action
template:
src: system-maintenance-lock.service.j2
dest: "/etc/systemd/system/system-maintenance-lock.service"
notify: "reload system-maintenance-lock.service"
when: run_once_system-maintenance_lock is not defined
## Runtime Variable Setting
- name: run the system_maintenance_service_freezer tasks once
set_fact:
run_once_system-maintenance_lock: true
when: run_once_system-maintenance_lock is not defined

View File

@@ -1,22 +0,0 @@
# System Maintenance Service Freezer
## Overview
This Ansible role is designed to manage system services through freezing (disabling) and defrosting (enabling) actions. It automates the process of managing crucial system services, especially useful for maintenance tasks like backups, cleanups, and updates.
## Monitoring
To monitor the sucess of the script and the status of systemctl timers execute:
```bash
watch -n 2 systemctl list-timers
```
## Role Variables
- `system_maintenance_services`: List of services to be managed by this role.
## Usage
Configure the role by defining the required variables. The role creates systemd service files that control the specified services based on the `freeze` or `defrost` actions.
For further details and usage examples, refer to the chat conversation with ChatGPT: [Link to ChatGPT Conversation](https://chat.openai.com/share/212af169-1b57-41df-bd2d-c3d32eb1331b).
## Dependencies
- `systemd-notifier`: Ensure this role is present for handling service failures.

View File

@@ -1,187 +0,0 @@
import argparse
import subprocess
import time
import os
from datetime import datetime
# Global variable definition
BREAK_TIME_SECONDS = 5
FREEZER_SERVICES_PREFIX="system-maintenance-service-"
class AttemptException(Exception):
"""A custom exception for maximum number of attempts."""
pass
def parse_time_to_seconds(time_str):
"""
Convert a time string (e.g., '1h', '30min', '45s') to seconds.
"""
units = {"s": 1, "min": 60, "h": 3600}
if time_str[-3:] in units:
number, unit = time_str[:-3], time_str[-3:]
elif time_str[-2:] in units:
number, unit = time_str[:-2], time_str[-2:]
elif time_str[-1:] in units:
number, unit = time_str[:-1], time_str[-1:]
else:
raise ValueError("Invalid time unit")
return int(number) * units[unit]
def service_file_exists(service_name, service_type="service"):
"""
Check if a systemd service file of a given type exists for a service.
"""
path = "/etc/systemd/system/"
service_file_name = f"{service_name}.{service_type}"
full_path = os.path.join(path, service_file_name)
# Debug output for checking the service file existence
print(f"Checking {full_path}")
return os.path.isfile(full_path)
def check_service_active(service_name):
"""
Check if a systemd service is currently active or activating.
"""
result = subprocess.run(['systemctl', 'is-active', service_name], stdout=subprocess.PIPE)
service_status = result.stdout.decode('utf-8').strip()
is_active = service_status in ['active', 'activating']
print(f"Service {service_name} is {'active' if is_active else 'not active'}.")
return is_active
def check_any_service_active(services):
"""
Check if any service in a given list is active or activating.
"""
return any(check_service_active(service) for service in services)
def manage_timer(service, action):
"""
Manage a systemd timer for a service.
action can be 'start' or 'stop'.
"""
if action not in ['start', 'stop']:
raise ValueError("Invalid action specified for manage_timer")
timer_name = f"{service}.timer"
try:
subprocess.run(['systemctl', action, timer_name], check=True)
if action == 'start':
subprocess.run(['systemctl', 'enable', timer_name], check=True)
elif action == 'stop':
subprocess.run(['systemctl', 'disable', timer_name], check=True)
print(f"{timer_name} {action}ed and {'enabled' if action == 'start' else 'disabled'}.")
except subprocess.CalledProcessError as e:
print(f"Error managing timer {timer_name}: {e}")
exit(1)
def stop_timer(service):
"""
Stop and disable a systemd timer for a service if it exists.
"""
if service == f"{FREEZER_SERVICES_PREFIX}defrost":
print(f"Ignoring {service}. It's the initializer of freezer.")
if service_file_exists(service, "timer"):
manage_timer(service, 'stop')
else:
print(f"Timer {service}.timer does not exist.")
def filter_services(services, ignored_services):
"""
Filter out services that are in the ignored_services list from services list.
"""
return [service for service in services if service not in ignored_services]
def stop_all_timers(services):
"""
Stop and disable timers for all services in a given list.
"""
for service in services:
stop_timer(service)
def wait_for_all_services_to_stop(filtered_services, max_attempts, attempt):
"""
Wait until all services in the list have stopped, with a maximum number of attempts.
"""
for service in filtered_services:
while check_service_active(service):
attempt += 1
if attempt > max_attempts:
raise AttemptException(f"Maximum attempts ({max_attempts}) reached. Exiting.")
print(f"{datetime.now().isoformat()}#{attempt}/{max_attempts}: Waiting for {BREAK_TIME_SECONDS} seconds for {service} to stop...")
time.sleep(BREAK_TIME_SECONDS)
return attempt
def freeze(filtered_services, timeout_sec):
"""
Freeze services by stopping them and their timers, waiting up to a timeout.
"""
attempt = 0
max_attempts = get_max_attempts(timeout_sec)
while check_any_service_active(filtered_services):
stop_all_timers(filtered_services)
attempt = wait_for_all_services_to_stop(filtered_services, max_attempts, attempt)
print("All required services have stopped.")
def get_max_attempts(timeout_sec):
return timeout_sec // BREAK_TIME_SECONDS
def defrost(filtered_services,timeout_sec):
"""
Defrost services by starting and enabling their timers.
"""
running_service = f"{FREEZER_SERVICES_PREFIX}defrost"
attempt = 0
max_attempts = get_max_attempts(timeout_sec)
try:
wait_for_all_services_to_stop(filtered_services, max_attempts, attempt)
except AttemptException as e:
print(e)
print("Defrosting was not possible. The execution of other services took to long.")
manage_timer(running_service, "stop")
exit(0)
for service in filtered_services + [running_service]:
print(f"Unfreezing: {service}")
if service_file_exists(service, "timer"):
manage_timer(service, "start")
else:
print("No timer to activate for service.")
print("All required services are started.")
def main(services, ignored_services, action, timeout_sec):
"""
Main function to process the command-line arguments and perform actions.
"""
# Ignoring the current running service
running_service=f"{FREEZER_SERVICES_PREFIX}{action}"
if running_service not in ignored_services:
ignored_services.append(running_service)
filtered_services = filter_services(services, ignored_services)
print(f"Services to handle: {services}")
print(f"Services to ignore: {ignored_services}")
print(f"Services filtered: {filtered_services}")
if action == 'freeze':
print("Freezing services.")
freeze(filtered_services, timeout_sec)
elif action == 'defrost':
print("Unfreezing services.")
defrost(filtered_services, timeout_sec)
print("Overview:")
subprocess.run(['systemctl', 'list-timers'])
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Freezes and defrosts systemd services and timers.')
parser.add_argument('action', choices=['freeze', 'defrost'], help='Action to perform: freeze or defrost services.')
parser.add_argument('services', nargs='+', help='List of services to apply the action to.')
parser.add_argument('--ignore', nargs='*', help='List of services to ignore in the action.', default=[])
parser.add_argument('--timeout', help='Timeout for freezer actions (e.g., 1h, 30min, 45s).', default='1min')
args = parser.parse_args()
services = args.services
ignored_services = args.ignore if args.ignore else []
timeout_seconds = parse_time_to_seconds(args.timeout)
main(services, ignored_services, args.action, timeout_seconds)

View File

@@ -1,16 +0,0 @@
- name: "restart system-maintenance-service-defrost.timer"
systemd:
name: system-maintenance-service-defrost.timer
state: restarted
enabled: yes
daemon_reload: yes
- name: "reload system-maintenance-service-freeze.service"
systemd:
name: system-maintenance-service-freeze.service
daemon_reload: yes
- name: "reload system-maintenance-service-defrost.service"
systemd:
name: system-maintenance-service-defrost.service
daemon_reload: yes

View File

@@ -1,3 +0,0 @@
---
dependencies:
- role: systemd-notifier

View File

@@ -1,44 +0,0 @@
---
- name: create {{path_system_maintenance_service_freezer_script}}
copy:
src: system-maintenance-service-freezer.py
dest: "{{path_system_maintenance_service_freezer_script}}"
when: run_once_system_maintenance_service_freeze is not defined
- name: Configure system-maintenance-service for each action
loop:
- freeze
- defrost
template:
src: system-maintenance-service-freezer.service.j2
dest: "/etc/systemd/system/system-maintenance-service-{{ item }}.service"
notify: "reload system-maintenance-service-{{ item }}.service"
when: run_once_system_maintenance_service_freeze is not defined
- name: "restart system-maintenance-service.service"
systemd:
name: system-maintenance-service-{{system_maintenance_service_freeze_action}}.service
state: restarted
enabled: yes
daemon_reload: yes
when: maintenance_service_freeze_action_last is not defined or maintenance_service_freeze_action_last != system_maintenance_service_freeze_action
- name: create system-maintenance-service-defrost.timer
template:
src: system-maintenance-service-defrost.timer.j2
dest: "/etc/systemd/system/system-maintenance-service-defrost.timer"
register: system_maintenance_service_defrost_timer
changed_when: system_maintenance_service_defrost_timer.changed or activate_all_timers | bool
notify: restart system-maintenance-service-defrost.timer
when: run_once_system_maintenance_service_freeze is not defined
## Runtime Variable Setting
- name: run the system_maintenance_service_freezer tasks once
set_fact:
run_once_system_maintenance_service_freeze: true
when: run_once_system_maintenance_service_freeze is not defined
- name: "set variable to prevent loading when action status didn't change"
set_fact:
maintenance_service_freeze_action_last: "{{system_maintenance_service_freeze_action}}"

View File

@@ -1,10 +0,0 @@
[Unit]
Description=starts system-maintenance-service-defrost.service
[Timer]
OnCalendar={{on_calendar_defrost}}
RandomizedDelaySec={{randomized_delay_sec}}
Persistent=false
[Install]
WantedBy=timers.target

View File

@@ -1,7 +0,0 @@
[Unit]
Description={{item}} systemctl maintenance services
OnFailure=systemd-notifier@%n.service
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} {{item}} {{ system_maintenance_services | join(' ') }} --timeout "{{system_maintenance_timeout_freezer_action}}"'

View File

@@ -1,2 +1,2 @@
dependencies:
- system-maintenance-service-freezer
- system-maintenance-lock

View File

@@ -4,5 +4,5 @@ OnFailure=systemd-notifier@%n.service
[Service]
Type=oneshot
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_maintenance_service_freezer_script }} freeze {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_cleanup_services | join(' ') }} update-docker --timeout "{{system_maintenance_timeout_heal_docker}}"'
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_lock_script }} {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_cleanup_services | join(' ') }} update-docker --timeout "{{sytem_maintenance_lock_timeoutheal_docker}}"'
ExecStart=/bin/sh -c '/usr/bin/python {{update_docker_script}} {{path_docker_compose_instances}}'