mirror of
https://github.com/kevinveenbirkenbach/computer-playbook.git
synced 2025-08-29 15:06:26 +02:00
Shorted maintenance- to maint-
This commit is contained in:
25
roles/maint-lock/README.md
Normal file
25
roles/maint-lock/README.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# System Maintenance Lock
|
||||
|
||||
## Description
|
||||
|
||||
This role provides a locking mechanism to ensure that critical services are not interrupted during maintenance activities such as updates, backups, or patch applications. It waits for specified services to stop and prevents conflicting operations.
|
||||
|
||||
## Overview
|
||||
|
||||
The role performs the following:
|
||||
- Blocks execution until specified services have stopped.
|
||||
- Implements retry logic with a configurable timeout.
|
||||
- Ensures that maintenance tasks are executed only when the system is in a safe state.
|
||||
|
||||
## Purpose
|
||||
|
||||
The primary purpose of this role is to safeguard system stability during maintenance by preventing conflicts with running services. It ensures that maintenance operations proceed only when the environment is ready.
|
||||
|
||||
## Features
|
||||
|
||||
- **Service Locking:** Blocks maintenance tasks until critical services are stopped.
|
||||
- **Timeout and Retry Logic:** Configurable wait times and maximum attempts.
|
||||
- **Conflict Avoidance:** Prevents interference between maintenance operations and running services.
|
||||
|
||||
## Credits 📝
|
||||
Created with ChatGPT. Conversation is [here](https://chat.openai.com/share/a886b86b-8de6-4eca-9fba-e36c9f20d536) available.
|
102
roles/maint-lock/files/maint-lock.py
Normal file
102
roles/maint-lock/files/maint-lock.py
Normal file
@@ -0,0 +1,102 @@
|
||||
import argparse
|
||||
import subprocess
|
||||
import time
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
# Global variable definition
|
||||
BREAK_TIME_SECONDS = 5
|
||||
|
||||
class AttemptException(Exception):
|
||||
"""A custom exception for maximum number of attempts."""
|
||||
pass
|
||||
|
||||
def parse_time_to_seconds(time_str):
|
||||
"""
|
||||
Convert a time string (e.g., '1h', '30min', '45s') to seconds.
|
||||
"""
|
||||
units = {"s": 1, "min": 60, "h": 3600}
|
||||
if time_str[-3:] in units:
|
||||
number, unit = time_str[:-3], time_str[-3:]
|
||||
elif time_str[-2:] in units:
|
||||
number, unit = time_str[:-2], time_str[-2:]
|
||||
elif time_str[-1:] in units:
|
||||
number, unit = time_str[:-1], time_str[-1:]
|
||||
else:
|
||||
raise ValueError("Invalid time unit")
|
||||
return int(number) * units[unit]
|
||||
|
||||
def check_service_active(service_name):
|
||||
"""
|
||||
Check if a systemd service is currently active or activating.
|
||||
"""
|
||||
result = subprocess.run(['systemctl', 'is-active', service_name], stdout=subprocess.PIPE)
|
||||
service_status = result.stdout.decode('utf-8').strip()
|
||||
is_active = service_status in ['active', 'activating']
|
||||
print(f"Service {service_name} is {'active' if is_active else 'not active'}.")
|
||||
return is_active
|
||||
|
||||
def check_any_service_active(services):
|
||||
"""
|
||||
Check if any service in a given list is active or activating.
|
||||
"""
|
||||
return any(check_service_active(service) for service in services)
|
||||
|
||||
def filter_services(services, ignored_services):
|
||||
"""
|
||||
Filter out services that are in the ignored_services list from services list.
|
||||
"""
|
||||
return [service for service in services if service not in ignored_services]
|
||||
|
||||
def wait_for_all_services_to_stop(filtered_services, max_attempts, attempt):
|
||||
"""
|
||||
Wait until all services in the list have stopped, with a maximum number of attempts.
|
||||
"""
|
||||
for service in filtered_services:
|
||||
while check_service_active(service):
|
||||
attempt += 1
|
||||
if attempt > max_attempts:
|
||||
raise AttemptException(f"Maximum attempts ({max_attempts}) reached. Exiting.")
|
||||
print(f"{datetime.now().isoformat()}#{attempt}/{max_attempts}: Waiting for {BREAK_TIME_SECONDS} seconds for {service} to stop...")
|
||||
time.sleep(BREAK_TIME_SECONDS)
|
||||
return attempt
|
||||
|
||||
|
||||
def get_max_attempts(timeout_sec):
|
||||
return timeout_sec // BREAK_TIME_SECONDS
|
||||
|
||||
def append_suffix_to_services(services, suffix=".cymais"):
|
||||
"""
|
||||
Append a specified suffix to each service name in the list.
|
||||
"""
|
||||
return [service + suffix for service in services]
|
||||
|
||||
def main(services, ignored_services, timeout_sec):
|
||||
"""
|
||||
Main function to process the command-line arguments and perform actions.
|
||||
"""
|
||||
services_with_suffix = append_suffix_to_services(services)
|
||||
ignored_services_with_suffix = append_suffix_to_services(ignored_services)
|
||||
filtered_services = filter_services(services_with_suffix, ignored_services_with_suffix )
|
||||
print(f"Services to handle: {services_with_suffix}")
|
||||
print(f"Services to ignore: {ignored_services_with_suffix}")
|
||||
print(f"Services filtered: {filtered_services}")
|
||||
|
||||
print("Waiting for services to stop.")
|
||||
|
||||
attempt = 0
|
||||
max_attempts = get_max_attempts(timeout_sec)
|
||||
while check_any_service_active(filtered_services):
|
||||
attempt = wait_for_all_services_to_stop(filtered_services, max_attempts, attempt)
|
||||
print("All required services have stopped.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Blocks the code execution as long as defined services are running. Terminates with 0 when all services stopped')
|
||||
parser.add_argument('services', nargs='+', help='List of services to apply the action to.')
|
||||
parser.add_argument('--ignore', nargs='*', help='List of services to ignore in the action.', default=[])
|
||||
parser.add_argument('--timeout', help='Timeout for lock actions (e.g., 1h, 30min, 45s).', default='1min')
|
||||
args = parser.parse_args()
|
||||
services = args.services
|
||||
ignored_services = args.ignore if args.ignore else []
|
||||
timeout_seconds = parse_time_to_seconds(args.timeout)
|
||||
main(services, ignored_services, timeout_seconds)
|
24
roles/maint-lock/meta/main.yml
Normal file
24
roles/maint-lock/meta/main.yml
Normal file
@@ -0,0 +1,24 @@
|
||||
---
|
||||
galaxy_info:
|
||||
author: "Kevin Veen-Birkenbach"
|
||||
description: "Ensures system integrity during maintenance activities by blocking execution until critical services have stopped, using a locking mechanism with timeout and retry logic."
|
||||
license: "CyMaIS NonCommercial License (CNCL)"
|
||||
license_url: "https://s.veen.world/cncl"
|
||||
company: |
|
||||
Kevin Veen-Birkenbach
|
||||
Consulting & Coaching Solutions
|
||||
https://www.veen.world
|
||||
min_ansible_version: "2.9"
|
||||
platforms:
|
||||
- name: Linux
|
||||
versions:
|
||||
- all
|
||||
galaxy_tags:
|
||||
- maintenance
|
||||
- lock
|
||||
- system
|
||||
- administration
|
||||
repository: "https://s.veen.world/cymais"
|
||||
issue_tracker_url: "https://s.veen.world/cymaisissues"
|
||||
documentation: "https://s.veen.world/cymais"
|
||||
dependencies: []
|
13
roles/maint-lock/tasks/main.yml
Normal file
13
roles/maint-lock/tasks/main.yml
Normal file
@@ -0,0 +1,13 @@
|
||||
---
|
||||
- name: create {{path_system_lock_script}}
|
||||
copy:
|
||||
src: maint-lock.py
|
||||
dest: "{{path_system_lock_script}}"
|
||||
when: run_once_system_maintenance_lock is not defined
|
||||
|
||||
## Runtime Variable Setting
|
||||
|
||||
- name: run the system_maintenance_service_freezer tasks once
|
||||
set_fact:
|
||||
run_once_system_maintenance_lock: true
|
||||
when: run_once_system_maintenance_lock is not defined
|
Reference in New Issue
Block a user