Shorted maintenance- to maint-

This commit is contained in:
2025-07-09 03:25:03 +02:00
parent ae5f021b8d
commit d0bd33fee3
63 changed files with 96 additions and 96 deletions

View File

@@ -0,0 +1,25 @@
# System Maintenance Lock
## Description
This role provides a locking mechanism to ensure that critical services are not interrupted during maintenance activities such as updates, backups, or patch applications. It waits for specified services to stop and prevents conflicting operations.
## Overview
The role performs the following:
- Blocks execution until specified services have stopped.
- Implements retry logic with a configurable timeout.
- Ensures that maintenance tasks are executed only when the system is in a safe state.
## Purpose
The primary purpose of this role is to safeguard system stability during maintenance by preventing conflicts with running services. It ensures that maintenance operations proceed only when the environment is ready.
## Features
- **Service Locking:** Blocks maintenance tasks until critical services are stopped.
- **Timeout and Retry Logic:** Configurable wait times and maximum attempts.
- **Conflict Avoidance:** Prevents interference between maintenance operations and running services.
## Credits 📝
Created with ChatGPT. Conversation is [here](https://chat.openai.com/share/a886b86b-8de6-4eca-9fba-e36c9f20d536) available.

View File

@@ -0,0 +1,102 @@
import argparse
import subprocess
import time
import os
from datetime import datetime
# Global variable definition
BREAK_TIME_SECONDS = 5
class AttemptException(Exception):
"""A custom exception for maximum number of attempts."""
pass
def parse_time_to_seconds(time_str):
"""
Convert a time string (e.g., '1h', '30min', '45s') to seconds.
"""
units = {"s": 1, "min": 60, "h": 3600}
if time_str[-3:] in units:
number, unit = time_str[:-3], time_str[-3:]
elif time_str[-2:] in units:
number, unit = time_str[:-2], time_str[-2:]
elif time_str[-1:] in units:
number, unit = time_str[:-1], time_str[-1:]
else:
raise ValueError("Invalid time unit")
return int(number) * units[unit]
def check_service_active(service_name):
"""
Check if a systemd service is currently active or activating.
"""
result = subprocess.run(['systemctl', 'is-active', service_name], stdout=subprocess.PIPE)
service_status = result.stdout.decode('utf-8').strip()
is_active = service_status in ['active', 'activating']
print(f"Service {service_name} is {'active' if is_active else 'not active'}.")
return is_active
def check_any_service_active(services):
"""
Check if any service in a given list is active or activating.
"""
return any(check_service_active(service) for service in services)
def filter_services(services, ignored_services):
"""
Filter out services that are in the ignored_services list from services list.
"""
return [service for service in services if service not in ignored_services]
def wait_for_all_services_to_stop(filtered_services, max_attempts, attempt):
"""
Wait until all services in the list have stopped, with a maximum number of attempts.
"""
for service in filtered_services:
while check_service_active(service):
attempt += 1
if attempt > max_attempts:
raise AttemptException(f"Maximum attempts ({max_attempts}) reached. Exiting.")
print(f"{datetime.now().isoformat()}#{attempt}/{max_attempts}: Waiting for {BREAK_TIME_SECONDS} seconds for {service} to stop...")
time.sleep(BREAK_TIME_SECONDS)
return attempt
def get_max_attempts(timeout_sec):
return timeout_sec // BREAK_TIME_SECONDS
def append_suffix_to_services(services, suffix=".cymais"):
"""
Append a specified suffix to each service name in the list.
"""
return [service + suffix for service in services]
def main(services, ignored_services, timeout_sec):
"""
Main function to process the command-line arguments and perform actions.
"""
services_with_suffix = append_suffix_to_services(services)
ignored_services_with_suffix = append_suffix_to_services(ignored_services)
filtered_services = filter_services(services_with_suffix, ignored_services_with_suffix )
print(f"Services to handle: {services_with_suffix}")
print(f"Services to ignore: {ignored_services_with_suffix}")
print(f"Services filtered: {filtered_services}")
print("Waiting for services to stop.")
attempt = 0
max_attempts = get_max_attempts(timeout_sec)
while check_any_service_active(filtered_services):
attempt = wait_for_all_services_to_stop(filtered_services, max_attempts, attempt)
print("All required services have stopped.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Blocks the code execution as long as defined services are running. Terminates with 0 when all services stopped')
parser.add_argument('services', nargs='+', help='List of services to apply the action to.')
parser.add_argument('--ignore', nargs='*', help='List of services to ignore in the action.', default=[])
parser.add_argument('--timeout', help='Timeout for lock actions (e.g., 1h, 30min, 45s).', default='1min')
args = parser.parse_args()
services = args.services
ignored_services = args.ignore if args.ignore else []
timeout_seconds = parse_time_to_seconds(args.timeout)
main(services, ignored_services, timeout_seconds)

View File

@@ -0,0 +1,24 @@
---
galaxy_info:
author: "Kevin Veen-Birkenbach"
description: "Ensures system integrity during maintenance activities by blocking execution until critical services have stopped, using a locking mechanism with timeout and retry logic."
license: "CyMaIS NonCommercial License (CNCL)"
license_url: "https://s.veen.world/cncl"
company: |
Kevin Veen-Birkenbach
Consulting & Coaching Solutions
https://www.veen.world
min_ansible_version: "2.9"
platforms:
- name: Linux
versions:
- all
galaxy_tags:
- maintenance
- lock
- system
- administration
repository: "https://s.veen.world/cymais"
issue_tracker_url: "https://s.veen.world/cymaisissues"
documentation: "https://s.veen.world/cymais"
dependencies: []

View File

@@ -0,0 +1,13 @@
---
- name: create {{path_system_lock_script}}
copy:
src: maint-lock.py
dest: "{{path_system_lock_script}}"
when: run_once_system_maintenance_lock is not defined
## Runtime Variable Setting
- name: run the system_maintenance_service_freezer tasks once
set_fact:
run_once_system_maintenance_lock: true
when: run_once_system_maintenance_lock is not defined