mirror of
https://github.com/kevinveenbirkenbach/computer-playbook.git
synced 2025-08-29 15:06:26 +02:00
Another big round of refactoring and cleaning...
This commit is contained in:
28
roles/sys-rpr-docker-soft/README.md
Normal file
28
roles/sys-rpr-docker-soft/README.md
Normal file
@@ -0,0 +1,28 @@
|
||||
# Docker Healer 🩺
|
||||
|
||||
## Description
|
||||
|
||||
This Ansible role automatically restarts Docker Compose configurations with exited or unhealthy containers on Arch Linux systems. It ensures the stability of containerized workloads by recovering from common error conditions like port binding issues.
|
||||
|
||||
## Overview
|
||||
|
||||
Tailored for Arch Linux, this role monitors containers for failure states and initiates a controlled restart of affected Compose configurations. If port conflicts prevent recovery, the role stops the affected stack, restarts Docker, and recreates the container environment.
|
||||
|
||||
## Purpose
|
||||
|
||||
The purpose of this role is to provide automated healing for Docker Compose environments, minimizing manual recovery effort and reducing downtime.
|
||||
|
||||
## Features
|
||||
|
||||
- **Container Health Monitoring:** Detects unhealthy or exited containers.
|
||||
- **Automated Recovery:** Restarts failed containers and resolves port binding issues.
|
||||
- **Run-once Setup Logic:** Ensures idempotent execution by controlling task flow with internal flags.
|
||||
- **System Role Integration:** Seamlessly integrates with CyMaIS system maintenance logic.
|
||||
|
||||
## Credits 📝
|
||||
|
||||
Developed and maintained by **Kevin Veen-Birkenbach**.
|
||||
Learn more at [www.veen.world](https://www.veen.world)
|
||||
|
||||
Part of the [CyMaIS Project](https://github.com/kevinveenbirkenbach/cymais)
|
||||
License: [CyMaIS NonCommercial License (CNCL)](https://s.veen.world/cncl)
|
89
roles/sys-rpr-docker-soft/files/sys-rpr-docker-soft.py
Normal file
89
roles/sys-rpr-docker-soft/files/sys-rpr-docker-soft.py
Normal file
@@ -0,0 +1,89 @@
|
||||
#!/bin/python
|
||||
#
|
||||
# Restart Docker-Compose configurations with exited or unhealthy containers
|
||||
#
|
||||
import subprocess
|
||||
import time
|
||||
import os
|
||||
import argparse
|
||||
|
||||
def bash(command):
|
||||
print(command)
|
||||
process = subprocess.Popen([command], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
|
||||
out, err = process.communicate()
|
||||
stdout = out.splitlines()
|
||||
stderr = err.decode("utf-8").strip() # decode stderr
|
||||
output = [line.decode("utf-8") for line in stdout]
|
||||
if process.returncode > 0:
|
||||
print(command, out, err)
|
||||
raise Exception(stderr) # pass the actual error text
|
||||
return output
|
||||
|
||||
def list_to_string(lst):
|
||||
return ' '.join(lst)
|
||||
|
||||
def print_bash(command):
|
||||
output = bash(command)
|
||||
print(list_to_string(output))
|
||||
return output
|
||||
|
||||
def find_docker_compose_file(directory):
|
||||
for root, _, files in os.walk(directory):
|
||||
if 'docker-compose.yml' in files:
|
||||
return os.path.join(root, 'docker-compose.yml')
|
||||
return None
|
||||
|
||||
def main(base_directory):
|
||||
errors = 0
|
||||
waiting_time = 600
|
||||
blocker_running = True
|
||||
|
||||
while blocker_running:
|
||||
try:
|
||||
bash("systemctl is-active --quiet sys-bkp-docker-to-local.cymais.service")
|
||||
bash("systemctl is-active --quiet update-docker.cymais.service")
|
||||
print("Backup is running.")
|
||||
print(f"Trying again in {waiting_time} seconds.")
|
||||
time.sleep(waiting_time)
|
||||
except:
|
||||
blocker_running = False
|
||||
print("No blocking service is running.")
|
||||
|
||||
unhealthy_container_names = print_bash("docker ps --filter health=unhealthy --format '{{.Names}}'")
|
||||
exited_container_names = print_bash("docker ps --filter status=exited --format '{{.Names}}'")
|
||||
failed_containers = unhealthy_container_names + exited_container_names
|
||||
|
||||
unfiltered_failed_docker_compose_repositories = [container.split('-')[0] for container in failed_containers]
|
||||
filtered_failed_docker_compose_repositories = list(dict.fromkeys(unfiltered_failed_docker_compose_repositories))
|
||||
|
||||
for repo in filtered_failed_docker_compose_repositories:
|
||||
compose_file_path = find_docker_compose_file(os.path.join(base_directory, repo))
|
||||
|
||||
if compose_file_path:
|
||||
print("Restarting unhealthy container in:", compose_file_path)
|
||||
project_path = os.path.dirname(compose_file_path)
|
||||
try:
|
||||
print_bash(f'cd {project_path} && docker-compose -p "{repo}" restart')
|
||||
except Exception as e:
|
||||
if "port is already allocated" in str(e):
|
||||
print("Detected port allocation problem. Executing recovery steps...")
|
||||
print_bash(f'cd {project_path} && docker-compose down')
|
||||
print_bash('systemctl restart docker')
|
||||
print_bash(f'cd {project_path} && docker-compose -p "{repo}" up -d')
|
||||
else:
|
||||
print("Unhandled exception during restart:", e)
|
||||
errors += 1
|
||||
else:
|
||||
print("Error: Docker Compose file not found for:", repo)
|
||||
errors += 1
|
||||
|
||||
|
||||
print("Finished restart procedure.")
|
||||
exit(errors)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Restart Docker-Compose configurations with exited or unhealthy containers.")
|
||||
parser.add_argument("base_directory", type=str, help="Base directory where Docker Compose configurations are located.")
|
||||
args = parser.parse_args()
|
||||
|
||||
main(args.base_directory)
|
5
roles/sys-rpr-docker-soft/handlers/main.yml
Normal file
5
roles/sys-rpr-docker-soft/handlers/main.yml
Normal file
@@ -0,0 +1,5 @@
|
||||
- name: restart sys-rpr-docker-soft.cymais.service
|
||||
systemd:
|
||||
name: sys-rpr-docker-soft.cymais.service
|
||||
state: restarted
|
||||
daemon_reload: yes
|
26
roles/sys-rpr-docker-soft/meta/main.yml
Normal file
26
roles/sys-rpr-docker-soft/meta/main.yml
Normal file
@@ -0,0 +1,26 @@
|
||||
---
|
||||
galaxy_info:
|
||||
author: "Kevin Veen-Birkenbach"
|
||||
description: "Automated recovery for unhealthy or exited Docker Compose containers."
|
||||
license: "CyMaIS NonCommercial License (CNCL)"
|
||||
license_url: "https://s.veen.world/cncl"
|
||||
company: |
|
||||
Kevin Veen-Birkenbach
|
||||
Consulting & Coaching Solutions
|
||||
https://www.veen.world
|
||||
min_ansible_version: "2.9"
|
||||
platforms:
|
||||
- name: Archlinux
|
||||
versions:
|
||||
- rolling
|
||||
galaxy_tags:
|
||||
- docker
|
||||
- docker-compose
|
||||
- systemd
|
||||
- automation
|
||||
- archlinux
|
||||
repository: https://s.veen.world/cymais
|
||||
issue_tracker_url: https://s.veen.world/cymaisissues
|
||||
documentation: https://s.veen.world/cymais
|
||||
dependencies:
|
||||
- sys-lock
|
37
roles/sys-rpr-docker-soft/tasks/main.yml
Normal file
37
roles/sys-rpr-docker-soft/tasks/main.yml
Normal file
@@ -0,0 +1,37 @@
|
||||
- name: "create {{heal_docker}}"
|
||||
file:
|
||||
path: "{{heal_docker}}"
|
||||
state: directory
|
||||
mode: 0755
|
||||
when: run_once_heal_docker is not defined
|
||||
|
||||
- name: create sys-rpr-docker-soft.py
|
||||
copy:
|
||||
src: sys-rpr-docker-soft.py
|
||||
dest: "{{heal_docker}}sys-rpr-docker-soft.py"
|
||||
notify: restart sys-rpr-docker-soft.cymais.service
|
||||
when: run_once_heal_docker is not defined
|
||||
|
||||
- name: create sys-rpr-docker-soft.cymais.service
|
||||
template:
|
||||
src: sys-rpr-docker-soft.service.j2
|
||||
dest: /etc/systemd/system/sys-rpr-docker-soft.cymais.service
|
||||
notify: restart sys-rpr-docker-soft.cymais.service
|
||||
when: run_once_heal_docker is not defined
|
||||
|
||||
- name: "set 'service_name' to '{{ role_name }}'"
|
||||
set_fact:
|
||||
service_name: "{{ role_name }}"
|
||||
when: run_once_heal_docker is not defined
|
||||
|
||||
- name: "include role for sys-timer for {{service_name}}"
|
||||
include_role:
|
||||
name: sys-timer
|
||||
vars:
|
||||
on_calendar: "{{on_calendar_heal_docker}}"
|
||||
when: run_once_heal_docker is not defined
|
||||
|
||||
- name: run the heal_docker tasks once
|
||||
set_fact:
|
||||
run_once_heal_docker: true
|
||||
when: run_once_heal_docker is not defined
|
@@ -0,0 +1,8 @@
|
||||
[Unit]
|
||||
Description=restart unhealthy docker containers
|
||||
OnFailure=sys-alm-compose.cymais@%n.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStartPre=/bin/sh -c '/usr/bin/python {{ path_system_lock_script }} {{ system_maintenance_services | join(' ') }} --ignore {{system_maintenance_cleanup_services| join(' ') }} sys-rpr-docker-soft --timeout "{{system_maintenance_lock_timeout_heal_docker}}"'
|
||||
ExecStart=/bin/sh -c '/bin/python {{heal_docker}}sys-rpr-docker-soft.py {{path_docker_compose_instances}}'
|
2
roles/sys-rpr-docker-soft/vars/main.yml
Normal file
2
roles/sys-rpr-docker-soft/vars/main.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
heal_docker: '{{path_administrator_scripts}}sys-rpr-docker-soft/'
|
||||
|
Reference in New Issue
Block a user