mirror of
https://github.com/kevinveenbirkenbach/computer-playbook.git
synced 2025-08-26 21:45:20 +02:00
Refactor sys-ctl-rpr-docker-soft role to use standalone Python script with argparse and unittests
- Replace Jinja2 template (script.py.j2) with raw Python script (files/script.py) - Add argparse options: --manipulation, --manipulation-string, --timeout - Implement timeout handling in wait_while_manipulation_running - Update systemd ExecStart/ExecStartPre handling in tasks/01_core.yml - Remove obsolete systemctl.service.j2 and script.py.j2 templates - Add unittest suite under tests/unit/roles/sys-ctl-rpr-docker-soft/files/test_script.py - Mock docker and systemctl calls in tests for safe execution Reference: ChatGPT conversation (see https://chatgpt.com/share/68ad770b-ea84-800f-b378-559cb61fc43a)
This commit is contained in:
parent
7ad14673e1
commit
e417bc19bd
166
roles/sys-ctl-rpr-docker-soft/files/script.py
Normal file
166
roles/sys-ctl-rpr-docker-soft/files/script.py
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Restart Docker-Compose configurations with exited or unhealthy containers.
|
||||||
|
This version receives the *manipulation services* via argparse (no Jinja).
|
||||||
|
"""
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
|
||||||
|
def bash(command: str) -> List[str]:
|
||||||
|
print(command)
|
||||||
|
process = subprocess.Popen(
|
||||||
|
[command], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
|
||||||
|
)
|
||||||
|
out, err = process.communicate()
|
||||||
|
stdout = out.splitlines()
|
||||||
|
stderr = err.decode("utf-8", errors="replace").strip()
|
||||||
|
output = [line.decode("utf-8", errors="replace") for line in stdout]
|
||||||
|
if process.returncode > 0:
|
||||||
|
print(command, out, err)
|
||||||
|
raise Exception(stderr or f"Command failed with code {process.returncode}")
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def list_to_string(lst: List[str]) -> str:
|
||||||
|
return " ".join(lst)
|
||||||
|
|
||||||
|
|
||||||
|
def print_bash(command: str) -> List[str]:
|
||||||
|
output = bash(command)
|
||||||
|
if output:
|
||||||
|
print(list_to_string(output))
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def find_docker_compose_file(directory: str) -> str | None:
|
||||||
|
for root, _, files in os.walk(directory):
|
||||||
|
if "docker-compose.yml" in files:
|
||||||
|
return os.path.join(root, "docker-compose.yml")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_services_arg(raw: List[str] | None, raw_str: str | None) -> List[str]:
|
||||||
|
"""
|
||||||
|
Accept either:
|
||||||
|
- multiple --manipulation SERVICE flags (nargs='*')
|
||||||
|
- a single --manipulation-string "svc1 svc2 ..." (space or comma separated)
|
||||||
|
"""
|
||||||
|
if raw:
|
||||||
|
return [s for s in raw if s.strip()]
|
||||||
|
if raw_str:
|
||||||
|
# split on comma or whitespace
|
||||||
|
parts = [p.strip() for chunk in raw_str.split(",") for p in chunk.split()]
|
||||||
|
return [p for p in parts if p]
|
||||||
|
return []
|
||||||
|
|
||||||
|
def wait_while_manipulation_running(
|
||||||
|
services: List[str],
|
||||||
|
waiting_time: int = 600,
|
||||||
|
timeout: int | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Wait until none of the given services are active anymore.
|
||||||
|
Stops waiting if timeout (in seconds) is reached.
|
||||||
|
"""
|
||||||
|
if not services:
|
||||||
|
print("No manipulation services provided. Continuing without wait.")
|
||||||
|
return
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
while True:
|
||||||
|
any_active = False
|
||||||
|
for svc in services:
|
||||||
|
res = subprocess.run(f"systemctl is-active --quiet {svc}", shell=True)
|
||||||
|
if res.returncode == 0:
|
||||||
|
any_active = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if any_active:
|
||||||
|
# Check timeout
|
||||||
|
elapsed = time.time() - start
|
||||||
|
if timeout and elapsed >= timeout:
|
||||||
|
print(f"Timeout ({timeout}s) reached while waiting for services. Continuing anyway.")
|
||||||
|
break
|
||||||
|
print(f"Manipulation service is running. Trying again in {waiting_time} seconds.")
|
||||||
|
time.sleep(waiting_time)
|
||||||
|
else:
|
||||||
|
print("No blocking service is running.")
|
||||||
|
break
|
||||||
|
|
||||||
|
def main(base_directory: str, manipulation_services: List[str], timeout: int | None) -> int:
|
||||||
|
errors = 0
|
||||||
|
wait_while_manipulation_running(manipulation_services, waiting_time=600, timeout=timeout)
|
||||||
|
|
||||||
|
unhealthy_container_names = print_bash(
|
||||||
|
"docker ps --filter health=unhealthy --format '{{{{.Names}}}}'"
|
||||||
|
)
|
||||||
|
exited_container_names = print_bash(
|
||||||
|
"docker ps --filter status=exited --format '{{{{.Names}}}}'"
|
||||||
|
)
|
||||||
|
failed_containers = unhealthy_container_names + exited_container_names
|
||||||
|
|
||||||
|
unfiltered_failed_docker_compose_repositories = [
|
||||||
|
container.split("-")[0] for container in failed_containers
|
||||||
|
]
|
||||||
|
filtered_failed_docker_compose_repositories = list(
|
||||||
|
dict.fromkeys(unfiltered_failed_docker_compose_repositories)
|
||||||
|
)
|
||||||
|
|
||||||
|
for repo in filtered_failed_docker_compose_repositories:
|
||||||
|
compose_file_path = find_docker_compose_file(os.path.join(base_directory, repo))
|
||||||
|
|
||||||
|
if compose_file_path:
|
||||||
|
print("Restarting unhealthy container in:", compose_file_path)
|
||||||
|
project_path = os.path.dirname(compose_file_path)
|
||||||
|
try:
|
||||||
|
print_bash(f'cd "{project_path}" && docker-compose -p "{repo}" restart')
|
||||||
|
except Exception as e:
|
||||||
|
if "port is already allocated" in str(e):
|
||||||
|
print("Detected port allocation problem. Executing recovery steps...")
|
||||||
|
print_bash(f'cd "{project_path}" && docker-compose down')
|
||||||
|
print_bash("systemctl restart docker")
|
||||||
|
print_bash(f'cd "{project_path}" && docker-compose -p "{repo}" up -d')
|
||||||
|
else:
|
||||||
|
print("Unhandled exception during restart:", e)
|
||||||
|
errors += 1
|
||||||
|
else:
|
||||||
|
print("Error: Docker Compose file not found for:", repo)
|
||||||
|
errors += 1
|
||||||
|
|
||||||
|
print("Finished restart procedure.")
|
||||||
|
return errors
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Restart Docker-Compose configurations with exited or unhealthy containers."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--manipulation",
|
||||||
|
metavar="SERVICE",
|
||||||
|
nargs="*",
|
||||||
|
help="Blocking systemd services to wait for (can be specified multiple times).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--manipulation-string",
|
||||||
|
type=str,
|
||||||
|
help='Blocking services as a single string (space- or comma-separated), e.g. "svc1 svc2" or "svc1,svc2".',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--timeout",
|
||||||
|
type=int,
|
||||||
|
default=60,
|
||||||
|
help="Maximum time in seconds to wait for manipulation services before continuing.(Default 1min)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"base_directory",
|
||||||
|
type=str,
|
||||||
|
help="Base directory where Docker Compose configurations are located.",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
services = normalize_services_arg(args.manipulation, args.manipulation_string)
|
||||||
|
exit(main(args.base_directory, services, args.timeout))
|
@ -6,6 +6,10 @@
|
|||||||
- include_role:
|
- include_role:
|
||||||
name: sys-service
|
name: sys-service
|
||||||
vars:
|
vars:
|
||||||
system_service_on_calendar: "{{SYS_SCHEDULE_REPAIR_DOCKER_SOFT}}"
|
system_service_on_calendar: "{{ SYS_SCHEDULE_REPAIR_DOCKER_SOFT }}"
|
||||||
system_service_timer_enabled: true
|
system_service_timer_enabled: true
|
||||||
system_service_tpl_on_failure: "{{ SYS_SERVICE_ON_FAILURE_COMPOSE }}"
|
system_service_tpl_on_failure: "{{ SYS_SERVICE_ON_FAILURE_COMPOSE }}"
|
||||||
|
system_service_tpl_exec_start_pre: "/usr/bin/python {{ PATH_SYSTEM_LOCK_SCRIPT }} {{ SYS_SERVICE_GROUP_MANIPULATION | join(' ') }} --ignore {{ SYS_SERVICE_GROUP_CLEANUP| join(' ') }} {{ SYS_SERVICE_REPAIR_DOCKER_SOFT }} --timeout '{{ SYS_TIMEOUT_HEAL_DOCKER }}'"
|
||||||
|
system_service_tpl_exec_start: >
|
||||||
|
/bin/sh -c '{{ system_service_script_exec }} --manipulation-string "{{ SYS_SERVICE_GROUP_MANIPULATION | join(" ") }}" {{ PATH_DOCKER_COMPOSE_INSTANCES }}'
|
||||||
|
|
||||||
|
@ -1,90 +0,0 @@
|
|||||||
#!/bin/python
|
|
||||||
#
|
|
||||||
# Restart Docker-Compose configurations with exited or unhealthy containers
|
|
||||||
#
|
|
||||||
import subprocess
|
|
||||||
import time
|
|
||||||
import os
|
|
||||||
import argparse
|
|
||||||
|
|
||||||
def bash(command):
|
|
||||||
print(command)
|
|
||||||
process = subprocess.Popen([command], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
|
|
||||||
out, err = process.communicate()
|
|
||||||
stdout = out.splitlines()
|
|
||||||
stderr = err.decode("utf-8").strip() # decode stderr
|
|
||||||
output = [line.decode("utf-8") for line in stdout]
|
|
||||||
if process.returncode > 0:
|
|
||||||
print(command, out, err)
|
|
||||||
raise Exception(stderr) # pass the actual error text
|
|
||||||
return output
|
|
||||||
|
|
||||||
def list_to_string(lst):
|
|
||||||
return ' '.join(lst)
|
|
||||||
|
|
||||||
def print_bash(command):
|
|
||||||
output = bash(command)
|
|
||||||
print(list_to_string(output))
|
|
||||||
return output
|
|
||||||
|
|
||||||
def find_docker_compose_file(directory):
|
|
||||||
for root, _, files in os.walk(directory):
|
|
||||||
if 'docker-compose.yml' in files:
|
|
||||||
return os.path.join(root, 'docker-compose.yml')
|
|
||||||
return None
|
|
||||||
|
|
||||||
def main(base_directory):
|
|
||||||
errors = 0
|
|
||||||
waiting_time = 600
|
|
||||||
blocker_running = True
|
|
||||||
|
|
||||||
while blocker_running:
|
|
||||||
try:
|
|
||||||
{% for manipulation_service in SYS_SERVICE_GROUP_MANIPULATION %}
|
|
||||||
bash("systemctl is-active --quiet {{ manipulation_service }}")
|
|
||||||
{% endfor %}
|
|
||||||
print("Manipulation service is running.")
|
|
||||||
print(f"Trying again in {waiting_time} seconds.")
|
|
||||||
time.sleep(waiting_time)
|
|
||||||
except:
|
|
||||||
blocker_running = False
|
|
||||||
print("No blocking service is running.")
|
|
||||||
|
|
||||||
unhealthy_container_names = print_bash("docker ps --filter health=unhealthy --format '{% raw %}{{.Names}}{% endraw %}'")
|
|
||||||
exited_container_names = print_bash("docker ps --filter status=exited --format '{% raw %}{{.Names}}{% endraw %}'")
|
|
||||||
failed_containers = unhealthy_container_names + exited_container_names
|
|
||||||
|
|
||||||
unfiltered_failed_docker_compose_repositories = [container.split('-')[0] for container in failed_containers]
|
|
||||||
filtered_failed_docker_compose_repositories = list(dict.fromkeys(unfiltered_failed_docker_compose_repositories))
|
|
||||||
|
|
||||||
for repo in filtered_failed_docker_compose_repositories:
|
|
||||||
compose_file_path = find_docker_compose_file(os.path.join(base_directory, repo))
|
|
||||||
|
|
||||||
if compose_file_path:
|
|
||||||
print("Restarting unhealthy container in:", compose_file_path)
|
|
||||||
project_path = os.path.dirname(compose_file_path)
|
|
||||||
try:
|
|
||||||
print_bash(f'cd {project_path} && docker-compose -p "{repo}" restart')
|
|
||||||
except Exception as e:
|
|
||||||
if "port is already allocated" in str(e):
|
|
||||||
print("Detected port allocation problem. Executing recovery steps...")
|
|
||||||
print_bash(f'cd {project_path} && docker-compose down')
|
|
||||||
print_bash('systemctl restart docker')
|
|
||||||
print_bash(f'cd {project_path} && docker-compose -p "{repo}" up -d')
|
|
||||||
else:
|
|
||||||
print("Unhandled exception during restart:", e)
|
|
||||||
errors += 1
|
|
||||||
else:
|
|
||||||
print("Error: Docker Compose file not found for:", repo)
|
|
||||||
errors += 1
|
|
||||||
|
|
||||||
|
|
||||||
print("Finished restart procedure.")
|
|
||||||
exit(errors)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(description="Restart Docker-Compose configurations with exited or unhealthy containers.")
|
|
||||||
parser.add_argument("base_directory", type=str, help="Base directory where Docker Compose configurations are located.")
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
main(args.base_directory)
|
|
@ -1,8 +0,0 @@
|
|||||||
[Unit]
|
|
||||||
Description=restart unhealthy docker containers
|
|
||||||
OnFailure={{ SYS_SERVICE_ON_FAILURE_COMPOSE }}
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Type=oneshot
|
|
||||||
ExecStartPre=/usr/bin/python {{ PATH_SYSTEM_LOCK_SCRIPT }} {{ SYS_SERVICE_GROUP_MANIPULATION | join(' ') }} --ignore {{ SYS_SERVICE_GROUP_CLEANUP| join(' ') }} {{ SYS_SERVICE_REPAIR_DOCKER_SOFT }} --timeout "{{ SYS_TIMEOUT_HEAL_DOCKER }}"
|
|
||||||
ExecStart=/bin/sh -c '{{ system_service_script_exec }} {{ PATH_DOCKER_COMPOSE_INSTANCES }}'
|
|
116
tests/unit/roles/sys-ctl-rpr-docker-soft/files/test_script.py
Normal file
116
tests/unit/roles/sys-ctl-rpr-docker-soft/files/test_script.py
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
import unittest
|
||||||
|
import types
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from importlib.util import spec_from_file_location, module_from_spec
|
||||||
|
|
||||||
|
|
||||||
|
def load_script_module():
|
||||||
|
"""
|
||||||
|
Import the script under test from roles/sys-ctl-rpr-docker-soft/files/script.py
|
||||||
|
"""
|
||||||
|
test_file = Path(__file__).resolve()
|
||||||
|
repo_root = test_file.parents[5] # .../tests/unit/roles/sys-ctl-rpr-docker-soft/files -> repo root
|
||||||
|
script_path = repo_root / "roles" / "sys-ctl-rpr-docker-soft" / "files" / "script.py"
|
||||||
|
if not script_path.exists():
|
||||||
|
raise FileNotFoundError(f"script.py not found at {script_path}")
|
||||||
|
spec = spec_from_file_location("rpr_soft_script", str(script_path))
|
||||||
|
mod = module_from_spec(spec)
|
||||||
|
assert spec.loader is not None
|
||||||
|
spec.loader.exec_module(mod) # type: ignore[attr-defined]
|
||||||
|
return mod
|
||||||
|
|
||||||
|
|
||||||
|
class TestRepairDockerSoft(unittest.TestCase):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
cls.script = load_script_module()
|
||||||
|
|
||||||
|
def test_normalize_services_arg(self):
|
||||||
|
s = self.script
|
||||||
|
self.assertEqual(
|
||||||
|
s.normalize_services_arg(["svc-a.service", " ", "svc-b.service"], None),
|
||||||
|
["svc-a.service", "svc-b.service"],
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
s.normalize_services_arg(None, "svc-a.service svc-b.service"),
|
||||||
|
["svc-a.service", "svc-b.service"],
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
s.normalize_services_arg(None, "svc-a.service, svc-b.service, svc-c.service"),
|
||||||
|
["svc-a.service", "svc-b.service", "svc-c.service"],
|
||||||
|
)
|
||||||
|
self.assertEqual(s.normalize_services_arg([], ""), [])
|
||||||
|
|
||||||
|
def test_wait_while_manipulation_running_respects_timeout(self):
|
||||||
|
s = self.script
|
||||||
|
calls = {"checks": 0, "sleeps": 0}
|
||||||
|
t = {"now": 0}
|
||||||
|
|
||||||
|
def fake_run(cmd, shell):
|
||||||
|
self.assertIn("systemctl is-active --quiet", cmd)
|
||||||
|
calls["checks"] += 1
|
||||||
|
return types.SimpleNamespace(returncode=0)
|
||||||
|
|
||||||
|
def fake_sleep(_secs):
|
||||||
|
calls["sleeps"] += 1
|
||||||
|
|
||||||
|
def fake_time():
|
||||||
|
# each call advances time by 610s
|
||||||
|
t["now"] += 610
|
||||||
|
return t["now"]
|
||||||
|
|
||||||
|
old_run = s.subprocess.run
|
||||||
|
old_sleep = s.time.sleep
|
||||||
|
old_time = s.time.time
|
||||||
|
try:
|
||||||
|
s.subprocess.run = fake_run
|
||||||
|
s.time.sleep = fake_sleep
|
||||||
|
s.time.time = fake_time
|
||||||
|
|
||||||
|
s.wait_while_manipulation_running(["svc-a", "svc-b"], waiting_time=600, timeout=1200)
|
||||||
|
|
||||||
|
self.assertGreaterEqual(calls["sleeps"], 1)
|
||||||
|
self.assertGreaterEqual(calls["checks"], 1)
|
||||||
|
finally:
|
||||||
|
s.subprocess.run = old_run
|
||||||
|
s.time.sleep = old_sleep
|
||||||
|
s.time.time = old_time
|
||||||
|
|
||||||
|
def test_main_restarts_and_counts_errors(self):
|
||||||
|
s = self.script
|
||||||
|
cmd_log = []
|
||||||
|
|
||||||
|
def fake_print_bash(cmd):
|
||||||
|
cmd_log.append(cmd)
|
||||||
|
if cmd.startswith("docker ps --filter health=unhealthy"):
|
||||||
|
return ["app1-web-1", "db-1"]
|
||||||
|
if cmd.startswith("docker ps --filter status=exited"):
|
||||||
|
return ["app1-worker-1", "other-2"]
|
||||||
|
if "docker-compose" in cmd:
|
||||||
|
return []
|
||||||
|
return []
|
||||||
|
|
||||||
|
def fake_find_docker_compose(path):
|
||||||
|
if path.endswith("/app1") or path.endswith("/db"):
|
||||||
|
return str(Path(path) / "docker-compose.yml")
|
||||||
|
return None
|
||||||
|
|
||||||
|
old_print_bash = s.print_bash
|
||||||
|
old_find = s.find_docker_compose_file
|
||||||
|
try:
|
||||||
|
s.print_bash = fake_print_bash
|
||||||
|
s.find_docker_compose_file = fake_find_docker_compose # <-- jetzt gleicher Name!
|
||||||
|
|
||||||
|
errors = s.main("/BASE", manipulation_services=[], timeout=None)
|
||||||
|
self.assertEqual(errors, 1)
|
||||||
|
|
||||||
|
restart_cmds = [c for c in cmd_log if "docker-compose -p" in c and " restart" in c]
|
||||||
|
self.assertTrue(any('cd "/BASE/app1"' in c and 'docker-compose -p "app1" restart' in c for c in restart_cmds))
|
||||||
|
self.assertTrue(any('cd "/BASE/db"' in c and 'docker-compose -p "db" restart' in c for c in restart_cmds))
|
||||||
|
finally:
|
||||||
|
s.print_bash = old_print_bash
|
||||||
|
s.find_docker_compose_file = old_find
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
Loading…
x
Reference in New Issue
Block a user