Refactor systemctl services and categories due to alarm bugs

This commit restructures systemctl service definitions and category mappings.

Motivation: Alarm-related bugs revealed inconsistencies in service and role handling.

Preparation step: lays the groundwork for fixing the alarm issues by aligning categories, roles, and service templates.
This commit is contained in:
2025-08-18 13:35:43 +02:00
parent 29f50da226
commit 3a839cfe37
289 changed files with 975 additions and 948 deletions

View File

@@ -0,0 +1,31 @@
# Docker Volumes Health Check
## Description
This role detects unused **anonymous Docker volumes** that are not bound to any running container.
It can optionally exclude specific volumes from the check using a configurable whitelist.
## Overview
The role installs a script and a `systemd` service with a timer to periodically scan for leftover anonymous volumes.
This helps prevent wasted disk space and leftover resources from old deployments.
## Purpose
The main purpose of this role is to keep Docker environments clean by identifying and reporting orphaned anonymous volumes.
It supports a whitelist mechanism to avoid alerting on known or intentional volumes.
## Features
- **Anonymous Volume Detection:** Identifies volumes with 64-character IDs not attached to any container.
- **Whitelist Support:** Skips volumes listed in `DOCKER_WHITELISTET_ANON_VOLUMES`.
- **Bootstrap Volume Exclusion:** Ignores known bootstrap volumes (e.g., `/var/www/bootstrap`).
- **Systemd Integration:** Installs a one-shot service and timer to automate checks.
- **Alerting Support:** Works with the [`sys-ctl-alm-compose`](../sys-ctl-alm-compose/README.md) role for failure notifications.
## Further Resources
- [Docker Volumes Documentation](https://docs.docker.com/storage/volumes/)
- [Systemd Timers Documentation](https://www.freedesktop.org/software/systemd/man/systemd.timer.html)
- [ChatGPT Create Conversation](https://chat.openai.com/share/1fa829f1-f001-4111-b1d4-1b2e3d583da2).

View File

@@ -0,0 +1,51 @@
#!/bin/bash
status=0
# The first argument is a space-separated list of whitelisted volume IDs
whitelist=$1
whitelisted_volumes=($whitelist) # Split into an array
anonymous_volumes=$(docker volume ls --format "{{.Name}}" | grep -E '^[a-f0-9]{64}$')
if [ -z "$anonymous_volumes" ]; then
echo "No anonymous volumes found."
exit
fi
echo "Anonymous volumes found:"
for volume in $anonymous_volumes; do
# Check if the volume is in the whitelist
if printf '%s\n' "${whitelisted_volumes[@]}" | grep -q "^$volume$"; then
echo "Volume $volume is whitelisted and will be skipped."
continue
fi
container_mount_path=$(docker ps -q | xargs -I {} docker inspect {} --format="{{range .Mounts}}{{if eq .Name \"$volume\"}}{{.Destination}}{{end}}{{end}}" | tr -d '\n' | xargs)
if [ "$container_mount_path" == "/var/www/bootstrap" ]; then
echo "Volume $volume is a bootstrap volume and will be skipped."
continue
fi
((status++))
container_ids=$(docker ps -aq --filter volume=$volume)
if [ -z "$container_ids" ]; then
echo "Volume $volume is not used by any running containers."
continue
fi
for container_id in $container_ids; do
container_name=$(docker inspect --format '{{ .Name }}' $container_id | sed 's#^/##')
mount_path=$(docker inspect --format "{{ range .Mounts }}{{ if eq .Name \"$volume\" }}{{ .Destination }}{{ end }}{{ end }}" $container_id)
if [ -n "$mount_path" ]; then
echo "Volume $volume is used by container $container_name at mount path $mount_path"
else
echo "Volume $volume is used by container $container_name, but mount path could not be determined."
fi
done
done
exit $status

View File

@@ -0,0 +1,5 @@
- name: "reload sys-ctl-hlth-docker-volumes service"
systemd:
name: sys-ctl-hlth-docker-volumes{{ SYS_SERVICE_SUFFIX }}
enabled: yes
daemon_reload: yes

View File

@@ -0,0 +1,21 @@
galaxy_info:
author: "Kevin Veen-Birkenbach"
description: "Detects anonymous Docker volumes not bound to containers (unless whitelisted) and alerts."
company: |
Kevin Veen-Birkenbach
Consulting & Coaching Solutions
https://www.veen.world
license: "Infinito.Nexus NonCommercial License"
license_url: "https://s.infinito.nexus/license"
min_ansible_version: "2.9"
platforms:
- name: Archlinux
versions: ["rolling"]
galaxy_tags:
- monitor
- docker
- volumes
- health
- systemd
repository: "https://s.infinito.nexus/code"
documentation: "https://docs.infinito.nexus"

View File

@@ -0,0 +1,31 @@
- name: Include dependency 'sys-ctl-alm-compose'
include_role:
name: sys-ctl-alm-compose
when: run_once_sys_ctl_alm_compose is not defined
- name: "create {{health_docker_volumes_folder}}"
file:
path: "{{health_docker_volumes_folder}}"
state: directory
mode: "0755"
- name: create sys-ctl-hlth-docker-volumes.sh
copy:
src: sys-ctl-hlth-docker-volumes.sh
dest: "{{health_docker_volumes_folder}}sys-ctl-hlth-docker-volumes.sh"
- name: create sys-ctl-hlth-docker-volumes{{ SYS_SERVICE_SUFFIX }}
template:
src: sys-ctl-hlth-docker-volumes.service.j2
dest: /etc/systemd/system/sys-ctl-hlth-docker-volumes{{ SYS_SERVICE_SUFFIX }}
notify: reload sys-ctl-hlth-docker-volumes service
- name: "set 'service_name' to '{{ role_name }}'"
set_fact:
service_name: "{{ role_name }}"
- name: "include role for sys-timer for {{ service_name }}"
include_role:
name: sys-timer
vars:
on_calendar: "{{SYS_SCHEDULE_HEALTH_DOCKER_VOLUMES}}"

View File

@@ -0,0 +1,4 @@
- block:
- include_tasks: 01_core.yml
- include_tasks: utils/run_once.yml
when: run_once_sys_ctl_hlth_docker_volumes is not defined

View File

@@ -0,0 +1,7 @@
[Unit]
Description=Checking docker health
OnFailure=sys-ctl-alm-compose.{{ SOFTWARE_NAME }}@%n.service
[Service]
Type=oneshot
ExecStart=/bin/bash {{ health_docker_volumes_folder }}sys-ctl-hlth-docker-volumes.sh "{{ DOCKER_WHITELISTET_ANON_VOLUMES | join(' ') }}"

View File

@@ -0,0 +1 @@
health_docker_volumes_folder: '{{ PATH_ADMINISTRATOR_SCRIPTS }}sys-ctl-hlth-docker-volumes/'