refactor: improve service handling and introduce MODE_ASSERT

- Improved get_service_name filter plugin (clearer suffix handling, consistent var names).
- Added MODE_ASSERT flag to optionally execute validation/assertion tasks.
- Fixed systemd unit handling: consistent use of %I instead of %i, correct escaping of instance names.
- Unified on_failure behavior and alarm composer scripts.
- Cleaned up redundant logging, handlers, and debug config.
- Strengthened sys-service template resolution with assert (only active when MODE_ASSERT).
- Simplified timer and suffix handling with get_service_name filter.
- Hardened sensitive tasks with no_log.
- Added conditional asserts across roles (Keycloak, DNS, Mailu, Discourse, etc.).

These changes improve consistency, safety, and validation across the automation stack.

Conversation: https://chatgpt.com/share/68a4ae28-483c-800f-b2f7-f64c7124c274
This commit is contained in:
Kevin Veen-Birkenbach 2025-08-19 19:02:52 +02:00
parent 6e538eabc8
commit a10dd402b8
No known key found for this signature in database
GPG Key ID: 44D8F11FD62F878E
30 changed files with 82 additions and 55 deletions

View File

@ -15,8 +15,8 @@ Suffix handling:
"""
def get_service_name(systemctl_id, software_name, suffix=""):
sid = str(systemctl_id).strip().lower()
sw = str(software_name).strip().lower()
sid = str(systemctl_id).strip().lower()
software_name = str(software_name).strip().lower()
# Determine suffix
if suffix is False:
@ -24,14 +24,13 @@ def get_service_name(systemctl_id, software_name, suffix=""):
elif suffix == "" or suffix is None:
sfx = ".service"
else:
sfx = "." + str(suffix).strip().lower()
sfx = str(suffix).strip().lower()
if sid.endswith("@"):
base = sid[:-1] # drop the trailing '@'
return f"{base}.{sw}@{sfx}"
return f"{base}.{software_name}@{sfx}"
else:
return f"{sid}.{sw}{sfx}"
return f"{sid}.{software_name}{sfx}"
class FilterModule(object):
def filters(self):

View File

@ -7,3 +7,4 @@ MODE_BACKUP: true # Activates the backup before the update procedure
MODE_CLEANUP: true # Cleanup unused files and configurations
MODE_DEBUG: false # This enables debugging in ansible and in the apps, You SHOULD NOT enable this on production servers
MODE_RESET: false # Cleans up all Infinito.Nexus files. It's necessary to run to whole playbook and not particial roles when using this function.
MODE_ASSERT: false # Executes validation tasks during the run.

View File

@ -14,7 +14,7 @@ SYS_SERVICE_REPAIR_DOCKER_HARD: "{{ 'sys-ctl-rpr-docker-hard' | get_servic
SYS_SERVICE_UPDATE_DOCKER: "{{ 'update-docker' | get_service_name(SOFTWARE_NAME) }}"
## On Failure
SYS_SERVICE_ON_FAILURE_COMPOSE: "{{ 'sys-ctl-alm-compose' | get_service_name(SOFTWARE_NAME,'%i.service') }}"
SYS_SERVICE_ON_FAILURE_COMPOSE: "{{ ('sys-ctl-alm-compose@') | get_service_name(SOFTWARE_NAME, False) }}%n.service"
## Groups
SYS_SERVICE_GROUP_BACKUPS: >

View File

@ -2,7 +2,6 @@
# Service Timers
## Meta
SYS_TIMER_SUFFIX: ".{{ SOFTWARE_NAME | lower }}.timer"
SYS_TIMER_ALL_ENABLED: "{{ not MODE_DEBUG }}" # Runtime Variables for Process Control - Activates all timers, independend if the handlers had been triggered
## Server Tact Variables

View File

@ -12,6 +12,7 @@
name: github.com
key: "{{ lookup('pipe', 'ssh-keyscan -t ed25519 github.com | grep -v \"^#\"') }}"
become: true
no_log: "{{ MASK_CREDENTIALS_IN_LOGS | bool }}"
- name: Create installation directory for Kevin's Package Manager
file:
@ -37,7 +38,7 @@
- name: create config.yaml
template:
src: config.yaml.j2
dest: "{{pkgmgr_config_path}}"
dest: "{{ pkgmgr_config_path }}"
become: true
- name: Run the Package Manager install command to create an alias for Kevins package manager

View File

@ -36,8 +36,6 @@ http
'"X-Forwarded-For: $http_x_forwarded_for" '
'"Scheme: $scheme" "Protocol: $server_protocol" "ServerName: $server_name"';
access_log /dev/stdout debug;
{% else %}
access_log /dev/stdout debug;
{% endif %}
error_log /dev/stderr info;

View File

@ -21,6 +21,7 @@
- target
- source
to nonempty values in your configuration file.
when: MODE_ASSERT | bool
- include_role:
name: sys-service

View File

@ -1,13 +1,4 @@
---
- name: Wait until OpenResty container is running
command: docker inspect -f '{{.State.Running}}' {{ OPENRESTY_CONTAINER }}
register: openresty_status
retries: 10
delay: 3
until: openresty_status.stdout.strip() == "true"
changed_when: false
listen: restart openresty
- name: Validate OpenResty configuration
command: >
docker exec {{ OPENRESTY_CONTAINER }} openresty -t -q

View File

@ -6,20 +6,30 @@
- sys-ctl-alm-email
vars:
flush_handlers: true
systemctl_timer_enabled: false
systemctl_copy_files: true
system_service_timer_enabled: false
system_service_copy_files: true
- name: "Include core service for '{{ system_service_id }}'"
include_role:
name: sys-service
vars:
flush_handlers: true
systemctl_timer_enabled: false
systemctl_copy_files: true
systemctl_tpl_exec_start: "{{ system_service_script_exec }} %i"
systemctl_tpl_on_failure: "" # No on failure needed, because it's anyhow the default on failure procedure
system_service_timer_enabled: false
system_service_copy_files: true
system_service_tpl_exec_start: "{{ system_service_script_exec }} %I"
system_service_tpl_on_failure: "" # No on failure needed, because it's anyhow the default on failure procedure
- name: "Send message to test service."
systemd:
name: "sys-ctl-alm-compose@{{ SYSTEMCTL_ALARM_COMPOSER_DUMMY_MESSAGE }}.service"
state: started
- block:
- name: Escape instance name for systemctl call
ansible.builtin.command:
argv:
- systemd-escape
- "{{ SYSTEMCTL_ALARM_COMPOSER_DUMMY_MESSAGE }}"
register: escaped_name
changed_when: false
- name: Start sys-ctl-alm-compose instance
ansible.builtin.systemd:
name: "{{ ('sys-ctl-alm-compose@') | get_service_name(SOFTWARE_NAME, False) ~ escaped_name.stdout ~ '.service' }}"
state: started
when: MODE_ASSERT | bool

View File

@ -1,10 +1,11 @@
#!/bin/bash
err=0
set -u
{% for alarm_service in SYSTEMCTL_ALARM_COMPOSER_SUBSERVICES %}
{% set alarm_service_full_name = alarm_service | get_service_name(SOFTWARE_NAME, '"$1".service') %}
if ! /usr/bin/systemctl start {{ alarm_service_full_name }}; then
echo "ERROR: Failed to start {{ alarm_service_full_name }}" >&2
{% for alarm in SYSTEMCTL_ALARM_COMPOSER_SUBSERVICES %}
# sys-ctl-alm-email.infinito.nexus@<escaped>.service (no extra dot!)
unit="{{ (alarm ~ '@') | get_service_name(SOFTWARE_NAME, False) }}$(systemd-escape "$1").service"
if ! /usr/bin/systemctl start -- "$unit"; then
echo "ERROR: Failed to start $unit" >&2
err=1
fi
{% endfor %}

View File

@ -1,4 +1,10 @@
#!/bin/bash
set -u
STATUS_OUT="$(systemctl status --full "$1" 2>/dev/null | head -n 30)"
if [ -z "$STATUS_OUT" ]; then
STATUS_OUT="(no matching systemd unit found for: $1)"
fi
/usr/bin/sendmail -t <<ERRMAIL
To: {{ users.administrator.email }}
@ -7,9 +13,8 @@ Subject: $1
Content-Transfer-Encoding: 8bit
Content-Type: text/plain; charset=UTF-8
A problem with the service $1 occured:
$(systemctl status --full "$1" | head -n 30)
A problem with the service $1 occurred:
$STATUS_OUT
ERRMAIL

View File

@ -3,6 +3,6 @@ Description=status email for %i to user
[Service]
Type=oneshot
ExecStart={{ system_service_script_exec }} %i
ExecStart={{ system_service_script_exec }} %I
User=root
Group=systemd-journal

View File

@ -8,6 +8,7 @@
Please provide nonempty values for:
- telegram_bot_token # Your Telegram bots API token
- telegram_chat_id # The Telegram chat ID to send messages to
when: MODE_ASSERT | bool
- include_role:
name: sys-service

View File

@ -3,6 +3,6 @@ Description=status Telegram message for %i to user
[Service]
Type=oneshot
ExecStart={{ system_service_script_exec }} %i
ExecStart={{ system_service_script_exec }} %I
User=root
Group=systemd-journal

View File

@ -7,7 +7,9 @@
that:
- SYSTEMD_MANAGER_CONF_DIR | regex_search('^/etc/systemd/system\.conf\.d/?$')
fail_msg: "SYSTEMD_MANAGER_CONF_DIR must be /etc/systemd/system.conf.d"
when: SYSTEMD_MANAGER_RESET_PURGE | bool
when:
- SYSTEMD_MANAGER_RESET_PURGE | bool
- MODE_ASSERT | bool
- name: "Purge manager drop-in directory (remove)"
file:

View File

@ -5,6 +5,7 @@
ansible.builtin.assert:
that: [ "CLOUDFLARE_API_TOKEN | length > 0" ]
no_log: "{{ cloudflare_no_log | bool }}"
when: MODE_ASSERT | bool
- name: Apply A/AAAA
community.general.cloudflare_dns:

View File

@ -14,6 +14,7 @@
that: [ "_hz_token | length > 0" ]
fail_msg: "HETZNER_API_TOKEN is required for the Cloud flavor."
no_log: "{{ hetzner_no_log | bool }}"
when: MODE_ASSERT | bool
- name: Collect hcloud servers if needed (server records without identifier)
hetzner.hcloud.server_info:
@ -61,6 +62,7 @@
)
fail_msg: "Could not resolve hcloud server by IPv4 for one or more records."
no_log: "{{ hetzner_no_log | bool }}"
when: MODE_ASSERT | bool
- name: Validate records (cloud)
ansible.builtin.assert:
@ -74,6 +76,7 @@
+ ((_rdns_records | default(rdns_records)) | rejectattr('resource','equalto','server') | list | length)
) == ((_rdns_records | default(rdns_records)) | length)
no_log: "{{ hetzner_no_log | bool }}"
when: MODE_ASSERT | bool
- name: Apply rDNS via hcloud
hetzner.hcloud.hcloud_rdns:

View File

@ -7,6 +7,7 @@
- (HETZNER_ROBOT_PASSWORD | default('') | length) > 0
fail_msg: "Robot credentials required: HETZNER_ROBOT_USER / HETZNER_ROBOT_PASSWORD."
no_log: "{{ hetzner_no_log | bool }}"
when: MODE_ASSERT | bool
- name: Validate records (robot)
ansible.builtin.assert:
@ -16,6 +17,7 @@
- (rdns_records | selectattr('dns_ptr','defined') | list | length) == (rdns_records | length)
fail_msg: "Each record must have ip_address and dns_ptr for Robot rDNS."
no_log: "{{ hetzner_no_log | bool }}"
when: MODE_ASSERT | bool
- name: Apply rDNS via Hetzner Robot API
vars:

View File

@ -1,4 +1,3 @@
# 1) Find the template (prefer target role, then fall back to this role)
- name: Resolve systemctl template source
set_fact:
system_service_template_src: >-
@ -17,31 +16,29 @@
errors='strict'
) }}
# Optional: sanity check with a clear error if truly nothing found
- name: Ensure a systemctl template was found
assert:
that: system_service_template_src | length > 0
fail_msg: >-
Could not resolve any systemctl template. Looked in:
{{ system_service_role_dir }}/templates/ and {{ role_path }}/templates/.
when: MODE_ASSERT | bool
# 2) Now we may safely derive whether its the “@” variant
- name: Flag whether @-template is used
set_fact:
system_service_uses_at: "{{ (system_service_template_src | basename) is search('@\\.service\\.j2$') }}"
system_service_uses_at: "{{ system_service_id.endswith('@') }}"
# 3) Use it
- name: "setup systemctl '{{ system_service_id }}'"
template:
src: "{{ system_service_template_src }}"
dest: "{{ [ PATH_SYSTEM_SERVICE_DIR, system_service_id | get_service_name(SOFTWARE_NAME) ] | path_join }}"
notify: "{{ 'reload system daemon' if system_service_uses_at else 'refresh systemctl service' }}"
- name: refresh systemctl service when SYS_SERVICE_ALL_ENABLED
command: /bin/true
notify:
- reload system daemon
- refresh systemctl service
when:
- SYS_SERVICE_ALL_ENABLED | bool
- not system_service_uses_at
- name: refresh systemctl service when SYS_SERVICE_ALL_ENABLE
block:
- command: /bin/true
notify: reload system daemon
- command: /bin/true
notify: refresh systemctl service
when: not system_service_uses_at
when: SYS_SERVICE_ALL_ENABLED | bool

View File

@ -3,6 +3,7 @@
that:
- "'@' not in system_service_id"
fail_msg: "Invalid system_service_id '{{ system_service_id }}' → must not contain '@'."
when: MODE_ASSERT | bool
- name: "Make '{{ system_service_id }}' available for sys-timer"
set_fact:

View File

@ -1,6 +1,8 @@
[Unit]
Description={{ SOFTWARE_NAME }} - Service for role '{{ system_service_id }}'
{% if system_service_tpl_on_failure |length > 0 %}
OnFailure={{ system_service_tpl_on_failure }}
{% endif %}
[Service]
Type={{ system_service_tpl_type }}

View File

@ -1 +1 @@
sys_timer_file: "{{ system_service_timer_service }}{{ SYS_TIMER_SUFFIX }}"
sys_timer_file: "{{ system_service_timer_service | get_service_name(SOFTWARE_NAME,'.timer') }}"

View File

@ -12,6 +12,7 @@
generate_ssh_key: yes
ssh_key_type: rsa
ssh_key_bits: 8192
no_log: "{{ MASK_CREDENTIALS_IN_LOGS | bool }}"
- name: "set correct rights for {{ PATH_ADMINISTRATOR_HOME }}"
file:

View File

@ -16,6 +16,7 @@
- docker_compose is defined
- ports is defined
fail_msg: "Load roles/docker-compose/vars/docker-compose.yml and set `database_type` first."
when: MODE_ASSERT | bool
- name: "Disconnect DB container from Discourse networks"
ansible.builtin.command:

View File

@ -46,6 +46,7 @@
- scope_id_rbac | length > 0
- (app_client_id_cmd.stdout | trim) is match('^[0-9a-f-]+$')
fail_msg: "Could not determine client or scope ID."
when: MODE_ASSERT | bool
- name: Get current optional client scopes
shell: >

View File

@ -26,6 +26,7 @@
assert:
that: [ "(ldap_cmp_id.stdout | trim) not in ['', 'null']" ]
fail_msg: "LDAP component '{{ KEYCLOAK_LDAP_CMP_NAME }}' not found in Keycloak."
when: MODE_ASSERT | bool
- name: Pull LDAP component from dictionary (by name)
set_fact:
@ -42,6 +43,7 @@
- ldap_component_tpl | length > 0
- (ldap_component_tpl.subComponents | default({})) | length > 0
fail_msg: "LDAP component '{{ KEYCLOAK_LDAP_CMP_NAME }}' not found in KEYCLOAK_DICTIONARY_REALM."
when: MODE_ASSERT | bool
- name: Extract mapper 'ldap-roles' from template (raw)
set_fact:
@ -59,6 +61,7 @@
assert:
that: [ "desired_group_mapper_raw | length > 0" ]
fail_msg: "'ldap-roles' mapper not found in dictionary under LDAP component."
when: MODE_ASSERT | bool
- name: Build clean mapper payload
set_fact:

View File

@ -18,6 +18,7 @@
- kc_lookup_value is defined
- kc_desired is defined
fail_msg: "kc_object_kind, kc_lookup_value, kc_desired are required."
when: MODE_ASSERT | bool
- name: Derive API endpoint and lookup field
set_fact:
@ -67,6 +68,7 @@
- (kc_obj_id | trim) != ''
- (kc_obj_id | trim) != 'null'
fail_msg: "{{ kc_object_kind | capitalize }} '{{ kc_lookup_value }}' not found."
when: MODE_ASSERT | bool
- name: Read current object
shell: >
@ -85,6 +87,7 @@
when:
- kc_object_kind == 'component'
- (kc_desired.providerId is defined)
- MODE_ASSERT | bool
assert:
that:
- cur_obj.providerId == kc_desired.providerId

View File

@ -6,6 +6,7 @@
- MAILU_HOSTNAMES | length <= 1
fail_msg: "MAILU_HOSTNAMES must be a list with at most one entry (only one host is supported). You can set the other ones as alias."
success_msg: "MAILU_HOSTNAMES is valid."
when: MODE_ASSERT | bool
- name: "Mailu Docker and Webserver Setup"
block:

View File

@ -1,5 +1,6 @@
- name: "Validate configuration"
include_tasks: "02_validate.yml"
when: MODE_ASSERT | bool
- name: "load docker, proxy for '{{ application_id }}'"
include_role:

View File

@ -28,6 +28,7 @@
fail_msg: "Group '{{ item }}' has no entry in 'applications'"
success_msg: "Group '{{ item }}' is defined in 'applications'"
loop: "{{ group_names }}"
when: MODE_ASSERT | bool
- name: Merge current play applications
set_fact: