Refactor systemctl services and categories due to alarm bugs

This commit restructures systemctl service definitions and category mappings.

Motivation: Alarm-related bugs revealed inconsistencies in service and role handling.

Preparation step: lays the groundwork for fixing the alarm issues by aligning categories, roles, and service templates.
This commit is contained in:
2025-08-18 13:35:43 +02:00
parent 29f50da226
commit 3a839cfe37
289 changed files with 975 additions and 948 deletions

View File

@@ -1,6 +1,8 @@
SOFTWARE_NAME: "Infinito.Nexus" # Name of the software
# Deployment
ENVIRONMENT: "production" # Possible values: production, development
DEPLOYMENT_MODE: "single" # Use single, if you deploy on one server. Use cluster if you setup in cluster mode.
# If true, sensitive credentials will be masked or hidden from all Ansible task logs
# Recommendet to set to true
@@ -20,9 +22,6 @@ HOST_TIME_FORMAT: "HH:mm"
HOST_THOUSAND_SEPARATOR: "."
HOST_DECIMAL_MARK: ","
# Deployment mode
DEPLOYMENT_MODE: "single" # Use single, if you deploy on one server. Use cluster if you setup in cluster mode.
# Web
WEB_PROTOCOL: "https" # Web protocol type. Use https or http. If you run local you need to change it to http
WEB_PORT: "{{ 443 if WEB_PROTOCOL == 'https' else 80 }}" # Default port web applications will listen to
@@ -30,17 +29,6 @@ WEB_PORT: "{{ 443 if WEB_PROTOCOL == 'https' else 80 }}" # Defaul
# Domain
PRIMARY_DOMAIN: "localhost" # Primary Domain of the server
# Server Tact Variables
## Ours in which the server is "awake" (100% working). Rest of the time is reserved for maintanance
HOURS_SERVER_AWAKE: "0..23"
## Random delay for systemd timers to avoid peak loads.
RANDOMIZED_DELAY_SEC: "5min"
# Runtime Variables for Process Control
ACTIVATE_ALL_TIMERS: false # Activates all timers, independend if the handlers had been triggered
DNS_PROVIDER: cloudflare # The DNS Provider\Registrar for the domain
HOSTING_PROVIDER: hetzner # Provider which hosts the server
@@ -52,18 +40,15 @@ CERTBOT_CREDENTIALS_FILE: "{{ CERTBOT_CREDENTIALS_DIR }}/{{ CERT
CERTBOT_DNS_PROPAGATION_WAIT_SECONDS: 300 # How long should the script wait for DNS propagation before continuing
CERTBOT_FLAVOR: san # Possible options: san (recommended, with a dns flavor like cloudflare, or hetzner), wildcard(doesn't function with www redirect), dedicated
# Path where Certbot stores challenge webroot files
LETSENCRYPT_WEBROOT_PATH: "/var/lib/letsencrypt/"
# Letsencrypt
LETSENCRYPT_WEBROOT_PATH: "/var/lib/letsencrypt/" # Path where Certbot stores challenge webroot files
LETSENCRYPT_BASE_PATH: "/etc/letsencrypt/" # Base directory containing Certbot configuration, account data, and archives
LETSENCRYPT_LIVE_PATH: "{{ LETSENCRYPT_BASE_PATH }}live/" # Symlink directory for the current active certificate and private key
# Base directory containing Certbot configuration, account data, and archives
LETSENCRYPT_BASE_PATH: "/etc/letsencrypt/"
# Symlink directory for the current active certificate and private key
LETSENCRYPT_LIVE_PATH: "{{ LETSENCRYPT_BASE_PATH }}live/"
## Docker Role Specific Parameters
DOCKER_RESTART_POLICY: "unless-stopped"
DOCKER_VARS_FILE: "{{ playbook_dir }}/roles/docker-compose/vars/docker-compose.yml"
## Docker
DOCKER_RESTART_POLICY: "unless-stopped" # Default restart parameter for docker containers
DOCKER_VARS_FILE: "{{ playbook_dir }}/roles/docker-compose/vars/docker-compose.yml" # File containing docker compose variables used by other services
DOCKER_WHITELISTET_ANON_VOLUMES: [] # Volumes which should be ignored during docker anonymous health check
# Asyn Confitguration
ASYNC_ENABLED: "{{ not MODE_DEBUG | bool }}" # Activate async, deactivated for debugging
@@ -88,10 +73,6 @@ _applications_nextcloud_oidc_flavor: >-
)
}}
# Systemctl
SYS_TIMER_SUFFIX: ".{{ SOFTWARE_NAME | lower }}.timer"
SYS_SERVICE_SUFFIX: ".{{ SOFTWARE_NAME | lower }}.service"
# Role-based access control
# @See https://en.wikipedia.org/wiki/Role-based_access_control
RBAC:

View File

@@ -1,38 +0,0 @@
# System maintenance Services
## Timeouts to wait for other services to stop
system_maintenance_lock_timeout_cleanup_services: "15min"
system_maintenance_lock_timeout_storage_optimizer: "10min"
system_maintenance_lock_timeout_backup_services: "1h"
system_maintenance_lock_timeout_heal_docker: "30min"
system_maintenance_lock_timeout_update_docker: "2min"
system_maintenance_lock_timeout_restart_docker: "{{system_maintenance_lock_timeout_update_docker}}"
## Services
### Defined Services for Backup Tasks
system_maintenance_backup_services:
- "sys-bkp-docker-2-loc"
- "svc-bkp-rmt-2-loc"
- "svc-bkp-loc-2-usb"
- "sys-bkp-docker-2-loc-everything"
### Defined Services for System Cleanup
system_maintenance_cleanup_services:
- "sys-cln-backups"
- "sys-cln-disc-space"
- "sys-cln-faild-bkps"
### Services that Manipulate the System
system_maintenance_manipulation_services:
- "sys-rpr-docker-soft"
- "update-docker"
- "svc-opt-ssd-hdd"
- "sys-rpr-docker-hard"
## Total System Maintenance Services
system_maintenance_services: "{{ system_maintenance_backup_services + system_maintenance_cleanup_services + system_maintenance_manipulation_services }}"
### Define Variables for Docker Volume Health services
whitelisted_anonymous_docker_volumes: []

View File

@@ -0,0 +1,40 @@
# Services
## Meta
SYS_SERVICE_SUFFIX: ".{{ SOFTWARE_NAME | lower }}.service"
## Names
SYS_SERVICE_ALARM_CMP: "sys-ctl-alm-compose.{{ SOFTWARE_NAME }}@.service"
## Groups
SYS_SERVICE_GROUP_BACKUPS: >
{{ (('sys-ctl-bkp-' | get_category_entries) + ('svc-bkp-' | get_category_entries))
| map('regex_replace', '$', SYS_SERVICE_SUFFIX) | list }}
SYS_SERVICE_GROUP_CLEANUP: >
{{ ('sys-ctl-cln-' | get_category_entries)
| map('regex_replace', '$', SYS_SERVICE_SUFFIX) | list }}
SYS_SERVICE_GROUP_REPAIR: >
{{ ('sys-ctl-rpr-' | get_category_entries)
| map('regex_replace', '$', SYS_SERVICE_SUFFIX) | list }}
SYS_SERVICE_GROUP_OPTIMIZATION: >
{{ ('svc-opt-' | get_category_entries)
| map('regex_replace', '$', SYS_SERVICE_SUFFIX) | list }}
SYS_SERVICE_GROUP_MAINTANANCE: >
{{ ('svc-mtn-' | get_category_entries)
| map('regex_replace', '$', SYS_SERVICE_SUFFIX) | list }}
## Collection of services to manipulate the system
SYS_SERVICE_GROUP_MANIPULATION: >
{{
SYS_SERVICE_GROUP_BACKUPS +
SYS_SERVICE_GROUP_CLEANUP +
SYS_SERVICE_GROUP_REPAIR +
SYS_SERVICE_GROUP_OPTIMIZATION +
SYS_SERVICE_GROUP_MAINTANANCE +
[ 'update-docker' ]
}}

View File

@@ -1,29 +0,0 @@
## Schedule for Health Checks
on_calendar_health_btrfs: "*-*-* 00:00:00" # Check once per day the btrfs for errors
on_calendar_health_journalctl: "*-*-* 00:00:00" # Check once per day the journalctl for errors
on_calendar_health_disc_space: "*-*-* 06,12,18,00:00:00" # Check four times per day if there is sufficient disc space
on_calendar_health_docker_container: "*-*-* {{ HOURS_SERVER_AWAKE }}:00:00" # Check once per hour if the docker containers are healthy
on_calendar_health_docker_volumes: "*-*-* {{ HOURS_SERVER_AWAKE }}:15:00" # Check once per hour if the docker volumes are healthy
on_calendar_health_csp_crawler: "*-*-* {{ HOURS_SERVER_AWAKE }}:30:00" # Check once per hour if all CSP are fullfilled available
on_calendar_health_nginx: "*-*-* {{ HOURS_SERVER_AWAKE }}:45:00" # Check once per hour if all webservices are available
on_calendar_health_msmtp: "*-*-* 00:00:00" # Check once per day SMTP Server
## Schedule for Cleanup Tasks
on_calendar_cleanup_backups: "*-*-* 00,06,12,18:30:00" # Cleanup backups every 6 hours, MUST be called before disc space cleanup
on_calendar_cleanup_disc_space: "*-*-* 07,13,19,01:30:00" # Cleanup disc space every 6 hours
on_calendar_cleanup_certs: "*-*-* 12,00:45:00" # Deletes and revokes unused certs
## Schedule for Backup Tasks
on_calendar_backup_docker_to_local: "*-*-* 03:30:00"
on_calendar_backup_remote_to_local: "*-*-* 21:30:00"
## Schedule for Maintenance Tasks
on_calendar_heal_docker: "*-*-* {{ HOURS_SERVER_AWAKE }}:30:00" # Heal unhealthy docker instances once per hour
on_calendar_renew_lets_encrypt_certificates: "*-*-* 12,00:30:00" # Renew Mailu certificates twice per day
on_calendar_deploy_certificates: "*-*-* 13,01:30:00" # Deploy letsencrypt certificates twice per day to docker containers
on_calendar_msi_keyboard_color: "*-*-* *:*:00" # Change the keyboard color every minute
on_calendar_cleanup_failed_docker: "*-*-* 12:00:00" # Clean up failed docker backups every noon
on_calendar_btrfs_auto_balancer: "Sat *-*-01..07 00:00:00" # Execute btrfs auto balancer every first Saturday of a month
on_calendar_restart_docker: "Sun *-*-* 08:00:00" # Restart docker instances every Sunday at 8:00 AM
on_calendar_nextcloud: "22" # Do nextcloud maintanace between 22:00 and 02:00

View File

@@ -0,0 +1,54 @@
# Service Timers
## Meta
SYS_TIMER_SUFFIX: ".{{ SOFTWARE_NAME | lower }}.timer"
## Server Tact Variables
HOURS_SERVER_AWAKE: "0..23" # Ours in which the server is "awake" (100% working). Rest of the time is reserved for maintanance
RANDOMIZED_DELAY_SEC: "5min" # Random delay for systemd timers to avoid peak loads.
ACTIVATE_ALL_TIMERS: false # Runtime Variables for Process Control - Activates all timers, independend if the handlers had been triggered
## Timeouts for all services
SYS_TIMEOUT_CLEANUP_SERVICES: "15min"
SYS_TIMEOUT_STORAGE_OPTIMIZER: "10min"
SYS_TIMEOUT_BACKUP_SERVICES: "1h"
SYS_TIMEOUT_HEAL_DOCKER: "30min"
SYS_TIMEOUT_UPDATE_DOCKER: "2min"
SYS_TIMEOUT_RESTART_DOCKER: "{{ SYS_TIMEOUT_UPDATE_DOCKER }}"
## On Calendar
### Schedule for health checks
SYS_SCHEDULE_HEALTH_BTRFS: "*-*-* 00:00:00" # Check once per day the btrfs for errors
SYS_SCHEDULE_HEALTH_JOURNALCTL: "*-*-* 00:00:00" # Check once per day the journalctl for errors
SYS_SCHEDULE_HEALTH_DISC_SPACE: "*-*-* 06,12,18,00:00:00" # Check four times per day if there is sufficient disc space
SYS_SCHEDULE_HEALTH_DOCKER_CONTAINER: "*-*-* {{ HOURS_SERVER_AWAKE }}:00:00" # Check once per hour if the docker containers are healthy
SYS_SCHEDULE_HEALTH_DOCKER_VOLUMES: "*-*-* {{ HOURS_SERVER_AWAKE }}:15:00" # Check once per hour if the docker volumes are healthy
SYS_SCHEDULE_HEALTH_CSP_CRAWLER: "*-*-* {{ HOURS_SERVER_AWAKE }}:30:00" # Check once per hour if all CSP are fullfilled available
SYS_SCHEDULE_HEALTH_NGINX: "*-*-* {{ HOURS_SERVER_AWAKE }}:45:00" # Check once per hour if all webservices are available
SYS_SCHEDULE_HEALTH_MSMTP: "*-*-* 00:00:00" # Check once per day SMTP Server
### Schedule for cleanup tasks
SYS_SCHEDULE_CLEANUP_BACKUPS: "*-*-* 00,06,12,18:30:00" # Cleanup backups every 6 hours, MUST be called before disc space cleanup
SYS_SCHEDULE_CLEANUP_DISC_SPACE: "*-*-* 07,13,19,01:30:00" # Cleanup disc space every 6 hours
SYS_SCHEDULE_CLEANUP_CERTS: "*-*-* 12,00:45:00" # Deletes and revokes unused certs
SYS_SCHEDULE_CLEANUP_FAILED_BACKUPS: "*-*-* 12:00:00" # Clean up failed docker backups every noon
### Schedule for repair services
SYS_SCHEDULE_REPAIR_BTRFS_AUTO_BALANCER: "Sat *-*-01..07 00:00:00" # Execute btrfs auto balancer every first Saturday of a month
SYS_SCHEDULE_REPAIR_DOCKER_SOFT: "*-*-* {{ HOURS_SERVER_AWAKE }}:30:00" # Heal unhealthy docker instances once per hour
SYS_SCHEDULE_REPAIR_DOCKER_HARD: "Sun *-*-* 08:00:00" # Restart docker instances every Sunday at 8:00 AM
### Schedule for backup tasks
SYS_SCHEDULE_BACKUP_DOCKER_TO_LOCAL: "*-*-* 03:30:00"
SYS_SCHEDULE_BACKUP_REMOTE_TO_LOCAL: "*-*-* 21:30:00"
### Schedule for Maintenance Tasks
SYS_SCHEDULE_MAINTANANCE_LETSENCRYPT_RENEW: "*-*-* 12,00:30:00" # Renew Mailu certificates twice per day
SYS_SCHEDULE_MAINTANANCE_LETSENCRYPT_DEPLOY: "*-*-* 13,01:30:00" # Deploy letsencrypt certificates twice per day to docker containers
SYS_SCHEDULE_MAINTANANCE_NEXTCLOUD: "22" # Do nextcloud maintanace between 22:00 and 02:00
### Animation
SYS_SCHEDULE_ANIMATION_KEYBOARD_COLOR: "*-*-* *:*:00" # Change the keyboard color every minute