Refactor systemctl services and categories due to alarm bugs

This commit restructures systemctl service definitions and category mappings.

Motivation: Alarm-related bugs revealed inconsistencies in service and role handling.

Preparation step: lays the groundwork for fixing the alarm issues by aligning categories, roles, and service templates.
This commit is contained in:
2025-08-18 13:35:43 +02:00
parent 29f50da226
commit 3a839cfe37
289 changed files with 975 additions and 948 deletions

View File

@@ -0,0 +1,13 @@
# sys-ctl-hlth-journalctl
## Description
Scans `journalctl` over the last day for “error” entries and alerts if any are found.
## Features
- Runs `journalctl --since '1 day ago' | grep -i error`.
- Exits non-zero on matches.
- Scheduled via systemd timer.
- Alerts via `sys-ctl-alm-compose` on detection.
## Usage
Include the role; set `on_calendar_health_journalctl` for your preferred schedule.

View File

@@ -0,0 +1,10 @@
#!/bin/sh
echo "Checking journalctl for error messages..."
journalctl_errors="$(journalctl --since '1 day ago' --no-pager | grep -i 'error')"
if [ ! -z "$journalctl_errors" ]
then
echo "Some errors where found: $journalctl_errors"
exit 1
fi
echo "All docker containers are healthy."
exit 0

View File

@@ -0,0 +1,5 @@
- name: "reload sys-ctl-hlth-journalctl service"
systemd:
name: sys-ctl-hlth-journalctl{{ SYS_SERVICE_SUFFIX }}
enabled: yes
daemon_reload: yes

View File

@@ -0,0 +1,21 @@
galaxy_info:
author: "Kevin Veen-Birkenbach"
description: "Searches the systemd journal for errors over the past day and alerts if any are found."
company: |
Kevin Veen-Birkenbach
Consulting & Coaching Solutions
https://www.veen.world
license: "Infinito.Nexus NonCommercial License"
license_url: "https://s.infinito.nexus/license"
min_ansible_version: "2.9"
platforms:
- name: Archlinux
versions: ["rolling"]
galaxy_tags:
- monitor
- journalctl
- logs
- health
- systemd
repository: "https://s.infinito.nexus/code"
documentation: "https://docs.infinito.nexus"

View File

@@ -0,0 +1,31 @@
- name: Include dependency 'sys-ctl-alm-compose'
include_role:
name: sys-ctl-alm-compose
when: run_once_sys_ctl_alm_compose is not defined
- name: "create {{health_journalctl_folder}}"
file:
path: "{{health_journalctl_folder}}"
state: directory
mode: "0755"
- name: create sys-ctl-hlth-journalctl.sh
copy:
src: sys-ctl-hlth-journalctl.sh
dest: "{{health_journalctl_folder}}sys-ctl-hlth-journalctl.sh"
- name: create sys-ctl-hlth-journalctl{{ SYS_SERVICE_SUFFIX }}
template:
src: sys-ctl-hlth-journalctl.service.j2
dest: /etc/systemd/system/sys-ctl-hlth-journalctl{{ SYS_SERVICE_SUFFIX }}
notify: reload sys-ctl-hlth-journalctl service
- name: "set 'service_name' to '{{ role_name }}'"
set_fact:
service_name: "{{ role_name }}"
- name: "include role for sys-timer for {{ service_name }}"
include_role:
name: sys-timer
vars:
on_calendar: "{{SYS_SCHEDULE_HEALTH_JOURNALCTL}}"

View File

@@ -0,0 +1,4 @@
- block:
- include_tasks: 01_core.yml
- include_tasks: utils/run_once.yml
when: run_once_sys_ctl_hlth_journalctl is not defined

View File

@@ -0,0 +1,7 @@
[Unit]
Description=checking journalctl health
OnFailure=sys-ctl-alm-compose.{{ SOFTWARE_NAME }}@%n.service
[Service]
Type=oneshot
ExecStart=/bin/bash {{health_journalctl_folder}}sys-ctl-hlth-journalctl.sh

View File

@@ -0,0 +1,2 @@
health_journalctl_folder: '{{ PATH_ADMINISTRATOR_SCRIPTS }}sys-ctl-hlth-journalctl/'