Another big round of refactoring and cleaning...

This commit is contained in:
2025-07-11 17:55:26 +02:00
parent aa61bf2a44
commit 168c5c0da6
323 changed files with 761 additions and 811 deletions

View File

@@ -0,0 +1,21 @@
# Health Check for Docker Containers
## Description
This Ansible role is designed to ensure the health of Docker containers running on a system. It includes a script that checks for unhealthy or exited Docker containers and sets up a systemd service and timer to regularly execute this check.
## Files
- `vars/main.yml`: Variable definitions for the script's directory.
- `handlers/main.yml`: Handlers to reload and restart the systemd service and timer.
- `files/sys-hlth-docker-container.sh`: The script that checks the container health.
- `tasks/main.yml`: Tasks to create necessary directories, copy scripts, and create systemd service and timer.
- `templates/sys-hlth-docker-container.cymais.service.j2`: Systemd service template.
- `templates/sys-hlth-docker-container.cymais.timer.j2`: Systemd timer template.
- `meta/main.yml`: Meta information declaring dependencies for the role.
## Usage
To use this role, include it in your playbook and set the `path_administrator_scripts` variable to the desired path for the health check scripts.
Ensure that the `sys-alm-compose` dependency is satisfied for error notifications.

View File

@@ -0,0 +1,30 @@
#!/bin/sh
docker_ps_grep_unhealthy="$(docker ps --filter health=unhealthy --format '{{.Names}}')"
docker_ps_grep_exited="$(docker ps --filter status=exited --format '{{.ID}}')"
exitcode=0
if [ -n "$docker_ps_grep_unhealthy" ]; then
echo "Some docker containers are unhealthy: $docker_ps_grep_unhealthy"
exitcode=1
fi
if [ -n "$docker_ps_grep_exited" ]; then
for container_id in $docker_ps_grep_exited
do
container_exit_code="$(docker inspect "$container_id" --format='{{.State.ExitCode}}')"
container_name="$(docker inspect "$container_id" --format='{{.Name}}')"
container_name="${container_name#/}" # Entfernt das führende '/'
if [ "$container_exit_code" -ne "0" ]; then
echo "Container $container_name exited with code $container_exit_code"
exitcode=2
fi
done
fi
if [ "$exitcode" -ne "0" ]; then
exit $exitcode
fi
echo "All docker containers are healthy."
exit

View File

@@ -0,0 +1,5 @@
- name: "reload sys-hlth-docker-container.cymais.service"
systemd:
name: sys-hlth-docker-container.cymais.service
enabled: yes
daemon_reload: yes

View File

@@ -0,0 +1,24 @@
---
galaxy_info:
author: "Kevin Veen-Birkenbach"
description: "Checks Docker containers for unhealthy or exited states and alerts on any issues."
company: |
Kevin Veen-Birkenbach
Consulting & Coaching Solutions
https://www.veen.world
license: "CyMaIS NonCommercial License (CNCL)"
license_url: "https://s.veen.world/cncl"
min_ansible_version: "2.9"
platforms:
- name: Archlinux
versions: ["rolling"]
galaxy_tags:
- monitor
- docker
- containers
- health
- systemd
repository: "https://s.veen.world/cymais"
documentation: "https://s.veen.world/cymais"
dependencies:
- sys-alm-compose

View File

@@ -0,0 +1,36 @@
- name: "create {{health_docker_container_folder}}"
file:
path: "{{health_docker_container_folder}}"
state: directory
mode: 0755
when: run_once_health_docker_container is not defined
- name: create sys-hlth-docker-container.sh
copy:
src: sys-hlth-docker-container.sh
dest: "{{health_docker_container_folder}}sys-hlth-docker-container.sh"
when: run_once_health_docker_container is not defined
- name: create sys-hlth-docker-container.cymais.service
template:
src: sys-hlth-docker-container.service.j2
dest: /etc/systemd/system/sys-hlth-docker-container.cymais.service
notify: reload sys-hlth-docker-container.cymais.service
when: run_once_health_docker_container is not defined
- name: "set 'service_name' to '{{ role_name }}'"
set_fact:
service_name: "{{ role_name }}"
when: run_once_health_docker_container is not defined
- name: "include role for sys-timer for {{service_name}}"
include_role:
name: sys-timer
vars:
on_calendar: "{{on_calendar_health_docker_container}}"
when: run_once_health_docker_container is not defined
- name: run the health_docker_container tasks once
set_fact:
run_once_health_docker_container: true
when: run_once_health_docker_container is not defined

View File

@@ -0,0 +1,7 @@
[Unit]
Description=Checking docker health
OnFailure=sys-alm-compose.cymais@%n.service
[Service]
Type=oneshot
ExecStart=/bin/bash {{health_docker_container_folder}}sys-hlth-docker-container.sh

View File

@@ -0,0 +1 @@
health_docker_container_folder: '{{path_administrator_scripts}}sys-hlth-docker-container/'