Refactor systemctl services and categories due to alarm bugs

This commit restructures systemctl service definitions and category mappings.

Motivation: Alarm-related bugs revealed inconsistencies in service and role handling.

Preparation step: lays the groundwork for fixing the alarm issues by aligning categories, roles, and service templates.
This commit is contained in:
2025-08-18 13:35:43 +02:00
parent 29f50da226
commit 3a839cfe37
289 changed files with 975 additions and 948 deletions

View File

@@ -0,0 +1,16 @@
# sys-ctl-hlth-btrfs
## Description
Checks the health of all mounted Btrfs filesystems by inspecting device error counters.
## Features
- Iterates over every Btrfs filesystem.
- Runs `btrfs device stats` and alerts if any error counters are non-zero.
- Hooks into systemd and a timer for regular checks.
- On failure, calls `sys-ctl-alm-compose.infinito@…` for notification.
## Usage
Just include this role in your playbook; it will:
1. Deploy a small shell script under `{{ PATH_ADMINISTRATOR_SCRIPTS }}/sys-ctl-hlth-btrfs/`.
2. Install a `.service` and `.timer` unit.
3. Send alerts via `sys-ctl-alm-compose` if any filesystem shows errors.

View File

@@ -0,0 +1,11 @@
#!/bin/bash
exit_code=0
for path in $(btrfs filesystem show | awk '/ path /{print $NF}')
do
echo "Checking health for $path..."
result=$(btrfs device stats $path)
echo "$result"
regex='\.(.*)_errs(\s*)[1-9]'
[[ "$result" =~ $regex ]] && echo "Errors found!" && exit_code=1;
done
exit $exit_code

View File

@@ -0,0 +1,5 @@
- name: "reload sys-ctl-hlth-btrfs service"
systemd:
name: sys-ctl-hlth-btrfs{{ SYS_SERVICE_SUFFIX }}
enabled: yes
daemon_reload: yes

View File

@@ -0,0 +1,21 @@
galaxy_info:
author: "Kevin Veen-Birkenbach"
description: "Health-check for Btrfs filesystems, alerts on any device error counters."
company: |
Kevin Veen-Birkenbach
Consulting & Coaching Solutions
https://www.veen.world
license: "Infinito.Nexus NonCommercial License"
license_url: "https://s.infinito.nexus/license"
min_ansible_version: "2.9"
platforms:
- name: Archlinux
versions: ["rolling"]
galaxy_tags:
- monitor
- btrfs
- health
- systemd
- filesystem
repository: "https://s.infinito.nexus/code"
documentation: "https://docs.infinito.nexus"

View File

@@ -0,0 +1,36 @@
- block:
- name: Include dependency 'sys-ctl-alm-compose'
include_role:
name: sys-ctl-alm-compose
when: run_once_sys_ctl_alm_compose is not defined
- include_tasks: utils/run_once.yml
when: run_once_sys_ctl_hlth_btrfs is not defined
- name: "create {{docker_health_btrfs_folder}}"
file:
path: "{{docker_health_btrfs_folder}}"
state: directory
mode: "0755"
- name: create sys-ctl-hlth-btrfs.sh
copy:
src: sys-ctl-hlth-btrfs.sh
dest: "{{docker_health_btrfs_folder}}sys-ctl-hlth-btrfs.sh"
- name: create sys-ctl-hlth-btrfs{{ SYS_SERVICE_SUFFIX }}
template:
src: sys-ctl-hlth-btrfs.service.j2
dest: /etc/systemd/system/sys-ctl-hlth-btrfs{{ SYS_SERVICE_SUFFIX }}
notify: reload sys-ctl-hlth-btrfs service
- name: "set 'service_name' to '{{ role_name }}'"
set_fact:
service_name: "{{ role_name }}"
- name: "include role for sys-timer for {{ service_name }}"
include_role:
name: sys-timer
vars:
on_calendar: "{{SYS_SCHEDULE_HEALTH_BTRFS}}"

View File

@@ -0,0 +1,7 @@
[Unit]
Description=Check btrfs status
OnFailure=sys-ctl-alm-compose.{{ SOFTWARE_NAME }}@%n.service
[Service]
Type=oneshot
ExecStart=/bin/bash {{docker_health_btrfs_folder}}sys-ctl-hlth-btrfs.sh

View File

@@ -0,0 +1 @@
docker_health_btrfs_folder: '{{ PATH_ADMINISTRATOR_SCRIPTS }}sys-ctl-hlth-btrfs/'