Implement dynamic TimeoutStartSec filter for domains and update roles

- Added new filter plugin 'timeout_start_sec_for_domains' to calculate TimeoutStartSec based on number of domains.
- Updated sys-ctl-hlth-csp and sys-ctl-hlth-webserver tasks to use the filter.
- Removed obsolete systemctl.service.j2 in sys-ctl-hlth-csp.
- Adjusted variable naming (CURRENT_PLAY_DOMAINS_ALL etc.) in multiple roles.
- Updated srv-letsencrypt and sys-svc-certs to use uppercase vars.
- Switched pretix role to sys-stk-full-stateful and removed leftover javascript.js.
- Added unittests for the new filter under tests/unit/filter_plugins.

See conversation: https://chatgpt.com/share/68b1ae9a-1ac0-800f-b49d-2915386a1a23
This commit is contained in:
2025-08-29 15:44:31 +02:00
parent 009bee531b
commit f36c7831b1
11 changed files with 181 additions and 25 deletions

View File

@@ -0,0 +1,75 @@
from ansible.errors import AnsibleFilterError
class FilterModule(object):
"""
Compute a max TimeoutStartSec for systemd services that iterate over many domains.
The timeout scales with the number of unique domains (optionally including www.* clones)
and is clamped between configurable min/max bounds.
"""
def filters(self):
return {
"timeout_start_sec_for_domains": self.timeout_start_sec_for_domains,
}
def timeout_start_sec_for_domains(
self,
domains_dict,
include_www=True,
per_domain_seconds=25,
overhead_seconds=30,
min_seconds=120,
max_seconds=3600,
):
"""
Args:
domains_dict (dict): Same structure you pass to generate_all_domains
(values can be str | list[str] | dict[str,str]).
include_www (bool): If true, also count "www.<domain>" variants.
per_domain_seconds (int): Budget per domain (default 25s).
overhead_seconds (int): Fixed overhead on top (default 30s).
min_seconds (int): Lower clamp (default 120s).
max_seconds (int): Upper clamp (default 3600s).
Returns:
int: TimeoutStartSec in seconds (integer).
Raises:
AnsibleFilterError: On invalid input types or unexpected failures.
"""
try:
if not isinstance(domains_dict, dict):
raise AnsibleFilterError("Expected 'domains_dict' to be a dict.")
# Local flatten similar to your generate_all_domains
def _flatten(domains):
flat = []
for v in (domains or {}).values():
if isinstance(v, str):
flat.append(v)
elif isinstance(v, list):
flat.extend(v)
elif isinstance(v, dict):
flat.extend(v.values())
return flat
flat = _flatten(domains_dict)
if include_www:
# dedupe first so we don't generate duplicate www-variants
base_unique = sorted(set(flat))
www_variants = [f"www.{d}" for d in base_unique if not str(d).startswith("www.")]
flat.extend(www_variants)
unique_domains = sorted(set(flat))
count = len(unique_domains)
# Compute and clamp
raw = overhead_seconds + per_domain_seconds * count
clamped = max(min_seconds, min(max_seconds, int(raw)))
return clamped
except AnsibleFilterError:
raise
except Exception as exc:
raise AnsibleFilterError(f"timeout_start_sec_for_domains failed: {exc}")

View File

@@ -1,4 +1,4 @@
caa_entries:
- tag: issue
value: letsencrypt.org
base_sld_domains: '{{ current_play_domains_all | generate_base_sld_domains }}'
base_sld_domains: '{{ CURRENT_PLAY_DOMAINS_ALL | generate_base_sld_domains }}'

View File

@@ -16,4 +16,5 @@
system_service_on_calendar: "{{ SYS_SCHEDULE_HEALTH_CSP_CRAWLER }}"
system_service_timer_enabled: true
system_service_tpl_on_failure: "{{ SYS_SERVICE_ON_FAILURE_COMPOSE }}"
system_service_tpl_timeout_start_sec: 15min
system_service_tpl_timeout_start_sec: "{{ CURRENT_PLAY_DOMAINS_ALL | timeout_start_sec_for_domains }}"
system_service_tpl_exec_start: "{{ system_service_script_exec }} --nginx-config-dir={{ NGINX.DIRECTORIES.HTTP.SERVERS }}"

View File

@@ -1,7 +0,0 @@
[Unit]
Description=Check for CSP-blocked resources via Puppeteer
OnFailure={{ SYS_SERVICE_ON_FAILURE_COMPOSE }}
[Service]
Type=oneshot
ExecStart={{ system_service_script_exec }} --nginx-config-dir={{ NGINX.DIRECTORIES.HTTP.SERVERS }}

View File

@@ -16,6 +16,7 @@
- include_role:
name: sys-service
vars:
system_service_on_calendar: "{{ SYS_SCHEDULE_HEALTH_NGINX }}"
system_service_timer_enabled: true
system_service_tpl_on_failure: "{{ SYS_SERVICE_ON_FAILURE_COMPOSE }}"
system_service_on_calendar: "{{ SYS_SCHEDULE_HEALTH_NGINX }}"
system_service_timer_enabled: true
system_service_tpl_on_failure: "{{ SYS_SERVICE_ON_FAILURE_COMPOSE }}"
system_service_tpl_timeout_start_sec: "{{ CURRENT_PLAY_DOMAINS_ALL | timeout_start_sec_for_domains }}"

View File

@@ -8,7 +8,7 @@
- name: Generate SAN certificate with certbundle
command: >-
certbundle
--domains "{{ current_play_domains_all | join(',') }}"
--domains "{{ CURRENT_PLAY_DOMAINS_ALL | join(',') }}"
--certbot-email "{{ users.administrator.email }}"
--certbot-acme-challenge-method "{{ CERTBOT_ACME_CHALLENGE_METHOD }}"
--chunk-size 100

View File

@@ -1,7 +1,7 @@
---
- block:
- name: "load docker and db for {{ application_id }}"
- name: "load docker, db and proxy for {{ application_id }}"
include_role:
name: sys-stk-back-stateful
name: sys-stk-full-stateful
- include_tasks: utils/run_once.yml
when: run_once_web_app_pretix is not defined

View File

@@ -1 +0,0 @@
alert('Custom JS loaded');

View File

@@ -3,4 +3,4 @@ application_id: "web-opt-rdr-www"
# Redirect WWW
REDIRECT_WWW_FLAVOR: "{{ applications | get_app_conf(application_id, 'prefered_flavor') if DNS_PROVIDER == 'cloudflare' else 'origin' }}"
REDIRECT_WWW_DOMAINS: "{{ current_play_domains_all | select('match', '^www\\.') | list }}"
REDIRECT_WWW_DOMAINS: "{{ CURRENT_PLAY_DOMAINS_ALL | select('match', '^www\\.') | list }}"

View File

@@ -6,7 +6,7 @@
- name: Merge system_email definitions
set_fact:
SYSTEM_EMAIL: "{{ DEFAULT_SYSTEM_EMAIL | combine(system_email | default({}, true), recursive=True) }}"
SYSTEM_EMAIL: "{{ DEFAULT_SYSTEM_EMAIL | combine(SYSTEM_EMAIL | default({}, true), recursive=True) }}"
no_log: "{{ MASK_CREDENTIALS_IN_LOGS | bool }}"
- name: Merge variables
@@ -32,7 +32,7 @@
- name: Merge current play applications
set_fact:
current_play_applications: >-
CURRENT_PLAY_APPLICATIONS: >-
{{
applications |
applications_if_group_and_deps(group_names)
@@ -41,14 +41,14 @@
- name: Merge current play domain definitions
set_fact:
current_play_domains: >-
CURRENT_PLAY_DOMAINS: >-
{{
applications
| canonical_domains_map(
PRIMARY_DOMAIN,
recursive=True,
roles_base_dir=([ playbook_dir, 'roles' ] | path_join),
seed=(current_play_applications | dict2items | map(attribute='key') | list)
seed=(CURRENT_PLAY_APPLICATIONS | dict2items | map(attribute='key') | list)
)
| combine(domains | default({}, true), recursive=True)
}}
@@ -59,7 +59,7 @@
{{
defaults_applications |
canonical_domains_map(PRIMARY_DOMAIN) |
combine(current_play_domains, recursive=True)
combine(CURRENT_PLAY_DOMAINS, recursive=True)
}}
- name: Merge redirect_domain_mappings
set_fact:
@@ -74,16 +74,16 @@
set_fact:
redirect_domain_mappings: >-
{{
current_play_applications |
CURRENT_PLAY_APPLICATIONS |
domain_mappings(PRIMARY_DOMAIN) |
merge_mapping(redirect_domain_mappings, 'source')
}}
- name: Set current play all domains incl. www redirect if enabled
set_fact:
current_play_domains_all: >-
CURRENT_PLAY_DOMAINS_ALL: >-
{{
(current_play_domains |
(CURRENT_PLAY_DOMAINS |
combine(
redirect_domain_mappings | default([]) |
items2dict(key_name='source', value_name='source'),

View File

@@ -0,0 +1,87 @@
# tests/unit/filter_plugins/test_timeout_start_sec_for_domains.py
import unittest
from ansible.errors import AnsibleFilterError
from filter_plugins.timeout_start_sec_for_domains import FilterModule
def _f():
return FilterModule().filters()["timeout_start_sec_for_domains"]
class TestTimeoutStartSecForDomains(unittest.TestCase):
def test_basic_calculation_with_www(self):
# 3 unique base domains → + www.* = 6 domains
domains = {
"canonical": ["example.com", "foo.bar"],
"api": {"a": "api.example.com"},
}
result = _f()(domains, include_www=True,
per_domain_seconds=25,
overhead_seconds=30,
min_seconds=120,
max_seconds=3600)
# raw = 30 + 25 * 6 = 180
self.assertEqual(result, 180)
def test_no_www_min_clamp_applies(self):
# 3 unique domains, no www.* → raw = 30 + 25*3 = 105 → clamped to min=120
domains = {
"canonical": ["example.com", "foo.bar"],
"api": {"a": "api.example.com"},
}
result = _f()(domains, include_www=False,
per_domain_seconds=25,
overhead_seconds=30,
min_seconds=120,
max_seconds=3600)
self.assertEqual(result, 120)
def test_max_clamp_applies(self):
# >143 domains needed to exceed 3600 (25s each + 30 overhead)
many = [f"host{i}.example.com" for i in range(150)]
domains = {"canonical": many}
result = _f()(domains, include_www=False,
per_domain_seconds=25,
overhead_seconds=30,
min_seconds=120,
max_seconds=3600)
self.assertEqual(result, 3600)
def test_deduplication_of_domains(self):
# All entries resolve to "x.com" → only 1 unique domain
domains = {
"a": ["x.com", "x.com"],
"b": "x.com",
"c": {"k": "x.com"},
}
result = _f()(domains, include_www=False,
per_domain_seconds=25,
overhead_seconds=30,
min_seconds=120,
max_seconds=3600)
# raw = 30 + 25 * 1 = 55 → clamped to 120
self.assertEqual(result, 120)
def test_deduplication_with_www_variants(self):
# 2 unique base domains, one already includes a "www.a.com"
domains = {
"canonical": ["a.com", "b.com", "www.a.com"],
"extra": {"x": "a.com"},
}
result = _f()(domains, include_www=True,
per_domain_seconds=25,
overhead_seconds=30,
min_seconds=1,
max_seconds=10000)
# Unique: {"a.com","b.com","www.a.com","www.b.com"} → 4
# raw = 30 + 25*4 = 130
self.assertEqual(result, 130)
def test_raises_on_non_dict_input(self):
with self.assertRaises(AnsibleFilterError):
_f()(["not-a-dict"])
if __name__ == "__main__":
unittest.main()