From f36c7831b15dcbc0b4294451c8b746981d41f0f3 Mon Sep 17 00:00:00 2001 From: Kevin Veen-Birkenbach Date: Fri, 29 Aug 2025 15:44:31 +0200 Subject: [PATCH] Implement dynamic TimeoutStartSec filter for domains and update roles - Added new filter plugin 'timeout_start_sec_for_domains' to calculate TimeoutStartSec based on number of domains. - Updated sys-ctl-hlth-csp and sys-ctl-hlth-webserver tasks to use the filter. - Removed obsolete systemctl.service.j2 in sys-ctl-hlth-csp. - Adjusted variable naming (CURRENT_PLAY_DOMAINS_ALL etc.) in multiple roles. - Updated srv-letsencrypt and sys-svc-certs to use uppercase vars. - Switched pretix role to sys-stk-full-stateful and removed leftover javascript.js. - Added unittests for the new filter under tests/unit/filter_plugins. See conversation: https://chatgpt.com/share/68b1ae9a-1ac0-800f-b49d-2915386a1a23 --- .../timeout_start_sec_for_domains.py | 75 ++++++++++++++++ roles/srv-letsencrypt/vars/main.yml | 2 +- roles/sys-ctl-hlth-csp/tasks/01_core.yml | 3 +- .../templates/systemctl.service.j2 | 7 -- .../sys-ctl-hlth-webserver/tasks/01_core.yml | 7 +- roles/sys-svc-certs/tasks/flavors/_san.yml | 2 +- roles/web-app-pretix/tasks/main.yml | 4 +- roles/web-app-pretix/templates/javascript.js | 1 - roles/web-opt-rdr-www/vars/main.yml | 2 +- tasks/stages/01_constructor.yml | 16 ++-- .../test_timeout_start_sec_for_domains.py | 87 +++++++++++++++++++ 11 files changed, 181 insertions(+), 25 deletions(-) create mode 100644 filter_plugins/timeout_start_sec_for_domains.py delete mode 100644 roles/sys-ctl-hlth-csp/templates/systemctl.service.j2 delete mode 100644 roles/web-app-pretix/templates/javascript.js create mode 100644 tests/unit/filter_plugins/test_timeout_start_sec_for_domains.py diff --git a/filter_plugins/timeout_start_sec_for_domains.py b/filter_plugins/timeout_start_sec_for_domains.py new file mode 100644 index 00000000..fba98dc2 --- /dev/null +++ b/filter_plugins/timeout_start_sec_for_domains.py @@ -0,0 +1,75 @@ +from ansible.errors import AnsibleFilterError + +class FilterModule(object): + """ + Compute a max TimeoutStartSec for systemd services that iterate over many domains. + The timeout scales with the number of unique domains (optionally including www.* clones) + and is clamped between configurable min/max bounds. + """ + + def filters(self): + return { + "timeout_start_sec_for_domains": self.timeout_start_sec_for_domains, + } + + def timeout_start_sec_for_domains( + self, + domains_dict, + include_www=True, + per_domain_seconds=25, + overhead_seconds=30, + min_seconds=120, + max_seconds=3600, + ): + """ + Args: + domains_dict (dict): Same structure you pass to generate_all_domains + (values can be str | list[str] | dict[str,str]). + include_www (bool): If true, also count "www." variants. + per_domain_seconds (int): Budget per domain (default 25s). + overhead_seconds (int): Fixed overhead on top (default 30s). + min_seconds (int): Lower clamp (default 120s). + max_seconds (int): Upper clamp (default 3600s). + + Returns: + int: TimeoutStartSec in seconds (integer). + + Raises: + AnsibleFilterError: On invalid input types or unexpected failures. + """ + try: + if not isinstance(domains_dict, dict): + raise AnsibleFilterError("Expected 'domains_dict' to be a dict.") + + # Local flatten similar to your generate_all_domains + def _flatten(domains): + flat = [] + for v in (domains or {}).values(): + if isinstance(v, str): + flat.append(v) + elif isinstance(v, list): + flat.extend(v) + elif isinstance(v, dict): + flat.extend(v.values()) + return flat + + flat = _flatten(domains_dict) + + if include_www: + # dedupe first so we don't generate duplicate www-variants + base_unique = sorted(set(flat)) + www_variants = [f"www.{d}" for d in base_unique if not str(d).startswith("www.")] + flat.extend(www_variants) + + unique_domains = sorted(set(flat)) + count = len(unique_domains) + + # Compute and clamp + raw = overhead_seconds + per_domain_seconds * count + clamped = max(min_seconds, min(max_seconds, int(raw))) + return clamped + + except AnsibleFilterError: + raise + except Exception as exc: + raise AnsibleFilterError(f"timeout_start_sec_for_domains failed: {exc}") diff --git a/roles/srv-letsencrypt/vars/main.yml b/roles/srv-letsencrypt/vars/main.yml index b68ea536..03343533 100644 --- a/roles/srv-letsencrypt/vars/main.yml +++ b/roles/srv-letsencrypt/vars/main.yml @@ -1,4 +1,4 @@ caa_entries: - tag: issue value: letsencrypt.org -base_sld_domains: '{{ current_play_domains_all | generate_base_sld_domains }}' +base_sld_domains: '{{ CURRENT_PLAY_DOMAINS_ALL | generate_base_sld_domains }}' diff --git a/roles/sys-ctl-hlth-csp/tasks/01_core.yml b/roles/sys-ctl-hlth-csp/tasks/01_core.yml index effb3d55..24433baa 100644 --- a/roles/sys-ctl-hlth-csp/tasks/01_core.yml +++ b/roles/sys-ctl-hlth-csp/tasks/01_core.yml @@ -16,4 +16,5 @@ system_service_on_calendar: "{{ SYS_SCHEDULE_HEALTH_CSP_CRAWLER }}" system_service_timer_enabled: true system_service_tpl_on_failure: "{{ SYS_SERVICE_ON_FAILURE_COMPOSE }}" - system_service_tpl_timeout_start_sec: 15min + system_service_tpl_timeout_start_sec: "{{ CURRENT_PLAY_DOMAINS_ALL | timeout_start_sec_for_domains }}" + system_service_tpl_exec_start: "{{ system_service_script_exec }} --nginx-config-dir={{ NGINX.DIRECTORIES.HTTP.SERVERS }}" diff --git a/roles/sys-ctl-hlth-csp/templates/systemctl.service.j2 b/roles/sys-ctl-hlth-csp/templates/systemctl.service.j2 deleted file mode 100644 index c9de1d18..00000000 --- a/roles/sys-ctl-hlth-csp/templates/systemctl.service.j2 +++ /dev/null @@ -1,7 +0,0 @@ -[Unit] -Description=Check for CSP-blocked resources via Puppeteer -OnFailure={{ SYS_SERVICE_ON_FAILURE_COMPOSE }} - -[Service] -Type=oneshot -ExecStart={{ system_service_script_exec }} --nginx-config-dir={{ NGINX.DIRECTORIES.HTTP.SERVERS }} \ No newline at end of file diff --git a/roles/sys-ctl-hlth-webserver/tasks/01_core.yml b/roles/sys-ctl-hlth-webserver/tasks/01_core.yml index cf1b7127..f2367f26 100644 --- a/roles/sys-ctl-hlth-webserver/tasks/01_core.yml +++ b/roles/sys-ctl-hlth-webserver/tasks/01_core.yml @@ -16,6 +16,7 @@ - include_role: name: sys-service vars: - system_service_on_calendar: "{{ SYS_SCHEDULE_HEALTH_NGINX }}" - system_service_timer_enabled: true - system_service_tpl_on_failure: "{{ SYS_SERVICE_ON_FAILURE_COMPOSE }}" + system_service_on_calendar: "{{ SYS_SCHEDULE_HEALTH_NGINX }}" + system_service_timer_enabled: true + system_service_tpl_on_failure: "{{ SYS_SERVICE_ON_FAILURE_COMPOSE }}" + system_service_tpl_timeout_start_sec: "{{ CURRENT_PLAY_DOMAINS_ALL | timeout_start_sec_for_domains }}" diff --git a/roles/sys-svc-certs/tasks/flavors/_san.yml b/roles/sys-svc-certs/tasks/flavors/_san.yml index ca796dbf..30261752 100644 --- a/roles/sys-svc-certs/tasks/flavors/_san.yml +++ b/roles/sys-svc-certs/tasks/flavors/_san.yml @@ -8,7 +8,7 @@ - name: Generate SAN certificate with certbundle command: >- certbundle - --domains "{{ current_play_domains_all | join(',') }}" + --domains "{{ CURRENT_PLAY_DOMAINS_ALL | join(',') }}" --certbot-email "{{ users.administrator.email }}" --certbot-acme-challenge-method "{{ CERTBOT_ACME_CHALLENGE_METHOD }}" --chunk-size 100 diff --git a/roles/web-app-pretix/tasks/main.yml b/roles/web-app-pretix/tasks/main.yml index 3d520d16..805c4773 100644 --- a/roles/web-app-pretix/tasks/main.yml +++ b/roles/web-app-pretix/tasks/main.yml @@ -1,7 +1,7 @@ --- - block: - - name: "load docker and db for {{ application_id }}" + - name: "load docker, db and proxy for {{ application_id }}" include_role: - name: sys-stk-back-stateful + name: sys-stk-full-stateful - include_tasks: utils/run_once.yml when: run_once_web_app_pretix is not defined diff --git a/roles/web-app-pretix/templates/javascript.js b/roles/web-app-pretix/templates/javascript.js deleted file mode 100644 index 5a1bd867..00000000 --- a/roles/web-app-pretix/templates/javascript.js +++ /dev/null @@ -1 +0,0 @@ -alert('Custom JS loaded'); \ No newline at end of file diff --git a/roles/web-opt-rdr-www/vars/main.yml b/roles/web-opt-rdr-www/vars/main.yml index 8fe08189..9d465c61 100644 --- a/roles/web-opt-rdr-www/vars/main.yml +++ b/roles/web-opt-rdr-www/vars/main.yml @@ -3,4 +3,4 @@ application_id: "web-opt-rdr-www" # Redirect WWW REDIRECT_WWW_FLAVOR: "{{ applications | get_app_conf(application_id, 'prefered_flavor') if DNS_PROVIDER == 'cloudflare' else 'origin' }}" -REDIRECT_WWW_DOMAINS: "{{ current_play_domains_all | select('match', '^www\\.') | list }}" \ No newline at end of file +REDIRECT_WWW_DOMAINS: "{{ CURRENT_PLAY_DOMAINS_ALL | select('match', '^www\\.') | list }}" \ No newline at end of file diff --git a/tasks/stages/01_constructor.yml b/tasks/stages/01_constructor.yml index e514ec1a..0a6c3fa8 100644 --- a/tasks/stages/01_constructor.yml +++ b/tasks/stages/01_constructor.yml @@ -6,7 +6,7 @@ - name: Merge system_email definitions set_fact: - SYSTEM_EMAIL: "{{ DEFAULT_SYSTEM_EMAIL | combine(system_email | default({}, true), recursive=True) }}" + SYSTEM_EMAIL: "{{ DEFAULT_SYSTEM_EMAIL | combine(SYSTEM_EMAIL | default({}, true), recursive=True) }}" no_log: "{{ MASK_CREDENTIALS_IN_LOGS | bool }}" - name: Merge variables @@ -32,7 +32,7 @@ - name: Merge current play applications set_fact: - current_play_applications: >- + CURRENT_PLAY_APPLICATIONS: >- {{ applications | applications_if_group_and_deps(group_names) @@ -41,14 +41,14 @@ - name: Merge current play domain definitions set_fact: - current_play_domains: >- + CURRENT_PLAY_DOMAINS: >- {{ applications | canonical_domains_map( PRIMARY_DOMAIN, recursive=True, roles_base_dir=([ playbook_dir, 'roles' ] | path_join), - seed=(current_play_applications | dict2items | map(attribute='key') | list) + seed=(CURRENT_PLAY_APPLICATIONS | dict2items | map(attribute='key') | list) ) | combine(domains | default({}, true), recursive=True) }} @@ -59,7 +59,7 @@ {{ defaults_applications | canonical_domains_map(PRIMARY_DOMAIN) | - combine(current_play_domains, recursive=True) + combine(CURRENT_PLAY_DOMAINS, recursive=True) }} - name: Merge redirect_domain_mappings set_fact: @@ -74,16 +74,16 @@ set_fact: redirect_domain_mappings: >- {{ - current_play_applications | + CURRENT_PLAY_APPLICATIONS | domain_mappings(PRIMARY_DOMAIN) | merge_mapping(redirect_domain_mappings, 'source') }} - name: Set current play all domains incl. www redirect if enabled set_fact: - current_play_domains_all: >- + CURRENT_PLAY_DOMAINS_ALL: >- {{ - (current_play_domains | + (CURRENT_PLAY_DOMAINS | combine( redirect_domain_mappings | default([]) | items2dict(key_name='source', value_name='source'), diff --git a/tests/unit/filter_plugins/test_timeout_start_sec_for_domains.py b/tests/unit/filter_plugins/test_timeout_start_sec_for_domains.py new file mode 100644 index 00000000..d9a1c6ad --- /dev/null +++ b/tests/unit/filter_plugins/test_timeout_start_sec_for_domains.py @@ -0,0 +1,87 @@ +# tests/unit/filter_plugins/test_timeout_start_sec_for_domains.py +import unittest +from ansible.errors import AnsibleFilterError +from filter_plugins.timeout_start_sec_for_domains import FilterModule + + +def _f(): + return FilterModule().filters()["timeout_start_sec_for_domains"] + + +class TestTimeoutStartSecForDomains(unittest.TestCase): + + def test_basic_calculation_with_www(self): + # 3 unique base domains → + www.* = 6 domains + domains = { + "canonical": ["example.com", "foo.bar"], + "api": {"a": "api.example.com"}, + } + result = _f()(domains, include_www=True, + per_domain_seconds=25, + overhead_seconds=30, + min_seconds=120, + max_seconds=3600) + # raw = 30 + 25 * 6 = 180 + self.assertEqual(result, 180) + + def test_no_www_min_clamp_applies(self): + # 3 unique domains, no www.* → raw = 30 + 25*3 = 105 → clamped to min=120 + domains = { + "canonical": ["example.com", "foo.bar"], + "api": {"a": "api.example.com"}, + } + result = _f()(domains, include_www=False, + per_domain_seconds=25, + overhead_seconds=30, + min_seconds=120, + max_seconds=3600) + self.assertEqual(result, 120) + + def test_max_clamp_applies(self): + # >143 domains needed to exceed 3600 (25s each + 30 overhead) + many = [f"host{i}.example.com" for i in range(150)] + domains = {"canonical": many} + result = _f()(domains, include_www=False, + per_domain_seconds=25, + overhead_seconds=30, + min_seconds=120, + max_seconds=3600) + self.assertEqual(result, 3600) + + def test_deduplication_of_domains(self): + # All entries resolve to "x.com" → only 1 unique domain + domains = { + "a": ["x.com", "x.com"], + "b": "x.com", + "c": {"k": "x.com"}, + } + result = _f()(domains, include_www=False, + per_domain_seconds=25, + overhead_seconds=30, + min_seconds=120, + max_seconds=3600) + # raw = 30 + 25 * 1 = 55 → clamped to 120 + self.assertEqual(result, 120) + + def test_deduplication_with_www_variants(self): + # 2 unique base domains, one already includes a "www.a.com" + domains = { + "canonical": ["a.com", "b.com", "www.a.com"], + "extra": {"x": "a.com"}, + } + result = _f()(domains, include_www=True, + per_domain_seconds=25, + overhead_seconds=30, + min_seconds=1, + max_seconds=10000) + # Unique: {"a.com","b.com","www.a.com","www.b.com"} → 4 + # raw = 30 + 25*4 = 130 + self.assertEqual(result, 130) + + def test_raises_on_non_dict_input(self): + with self.assertRaises(AnsibleFilterError): + _f()(["not-a-dict"]) + + +if __name__ == "__main__": + unittest.main()