mirror of
https://github.com/kevinveenbirkenbach/computer-playbook.git
synced 2025-11-10 15:16:31 +00:00
feat(filters): enforce safe Node.js heap sizing via reusable filter
- Add node_autosize filter (node_max_old_space_size) using get_app_conf - Raise error when mem_limit < min_mb to prevent OOM-kill misconfigurations - Wire Whiteboard NODE_OPTIONS and increase mem_limit to 1g; set cpus=1 - Refactor PeerTube to use the same filter; simplify vars - Add unit tests; keep integration filters usage green Context: https://chatgpt.com/share/690e0499-6a94-800f-b8ed-2c5124690103
This commit is contained in:
141
filter_plugins/node_autosize.py
Normal file
141
filter_plugins/node_autosize.py
Normal file
@@ -0,0 +1,141 @@
|
||||
# filter_plugins/node_autosize.py
|
||||
# Reuse app config to derive sensible Node.js heap sizes for containers.
|
||||
#
|
||||
# Usage example (Jinja):
|
||||
# {{ applications | node_max_old_space_size('web-app-nextcloud', 'whiteboard') }}
|
||||
#
|
||||
# Heuristics (defaults):
|
||||
# - candidate = 35% of mem_limit
|
||||
# - min = 768 MB (required minimum)
|
||||
# - cap = min(3072 MB, 60% of mem_limit)
|
||||
#
|
||||
# NEW: If mem_limit (container cgroup RAM) is smaller than min_mb, we raise an
|
||||
# exception — to prevent a misconfiguration where Node's heap could exceed the cgroup
|
||||
# and be OOM-killed.
|
||||
|
||||
from __future__ import annotations
|
||||
import re
|
||||
from ansible.errors import AnsibleFilterError
|
||||
|
||||
# Import the shared config resolver from module_utils
|
||||
try:
|
||||
from module_utils.config_utils import get_app_conf, AppConfigKeyError
|
||||
except Exception as e:
|
||||
raise AnsibleFilterError(
|
||||
f"Failed to import get_app_conf from module_utils.config_utils: {e}"
|
||||
)
|
||||
|
||||
_SIZE_RE = re.compile(r"^\s*(\d+(?:\.\d+)?)\s*([kmgtp]?i?b?)?\s*$", re.IGNORECASE)
|
||||
_MULT = {
|
||||
"": 1,
|
||||
"b": 1,
|
||||
"k": 10**3, "kb": 10**3,
|
||||
"m": 10**6, "mb": 10**6,
|
||||
"g": 10**9, "gb": 10**9,
|
||||
"t": 10**12, "tb": 10**12,
|
||||
"p": 10**15, "pb": 10**15,
|
||||
"kib": 1024,
|
||||
"mib": 1024**2,
|
||||
"gib": 1024**3,
|
||||
"tib": 1024**4,
|
||||
"pib": 1024**5,
|
||||
}
|
||||
|
||||
|
||||
def _to_bytes(val):
|
||||
"""Convert numeric or string memory limits (e.g. '512m', '2GiB') to bytes."""
|
||||
if val is None or val == "":
|
||||
return None
|
||||
if isinstance(val, (int, float)):
|
||||
return int(val)
|
||||
if not isinstance(val, str):
|
||||
raise AnsibleFilterError(f"Unsupported mem_limit type: {type(val).__name__}")
|
||||
m = _SIZE_RE.match(val)
|
||||
if not m:
|
||||
raise AnsibleFilterError(f"Unrecognized mem_limit string: {val!r}")
|
||||
num = float(m.group(1))
|
||||
unit = (m.group(2) or "").lower()
|
||||
if unit not in _MULT:
|
||||
raise AnsibleFilterError(f"Unknown unit in mem_limit: {unit!r}")
|
||||
return int(num * _MULT[unit])
|
||||
|
||||
|
||||
def _mb(bytes_val: int) -> int:
|
||||
"""Return decimal MB (10^6) as integer — Node expects MB units."""
|
||||
return int(round(bytes_val / 10**6))
|
||||
|
||||
|
||||
def _compute_old_space_mb(
|
||||
total_mb: int, pct: float, min_mb: int, hardcap_mb: int, safety_cap_pct: float
|
||||
) -> int:
|
||||
"""
|
||||
Compute Node.js old-space heap (MB) with safe minimum and cap handling.
|
||||
|
||||
NOTE: The calling function ensures total_mb >= min_mb; here we only
|
||||
apply the sizing heuristics and caps.
|
||||
"""
|
||||
candidate = int(total_mb * float(pct))
|
||||
safety_cap = int(total_mb * float(safety_cap_pct))
|
||||
final_cap = min(int(hardcap_mb), safety_cap)
|
||||
|
||||
# Enforce minimum first; only apply cap if it's above the minimum
|
||||
candidate = max(candidate, int(min_mb))
|
||||
if final_cap >= int(min_mb):
|
||||
candidate = min(candidate, final_cap)
|
||||
|
||||
# Never below a tiny hard floor
|
||||
return max(candidate, 128)
|
||||
|
||||
|
||||
def node_max_old_space_size(
|
||||
applications: dict,
|
||||
application_id: str,
|
||||
service_name: str,
|
||||
pct: float = 0.35,
|
||||
min_mb: int = 768,
|
||||
hardcap_mb: int = 3072,
|
||||
safety_cap_pct: float = 0.60,
|
||||
) -> int:
|
||||
"""
|
||||
Derive Node.js --max-old-space-size (MB) from the service's mem_limit in app config.
|
||||
|
||||
Looks up: docker.services.<service_name>.mem_limit for the given application_id.
|
||||
|
||||
Raises:
|
||||
AnsibleFilterError if mem_limit is missing/invalid OR if mem_limit (MB) < min_mb.
|
||||
"""
|
||||
try:
|
||||
mem_limit = get_app_conf(
|
||||
applications=applications,
|
||||
application_id=application_id,
|
||||
config_path=f"docker.services.{service_name}.mem_limit",
|
||||
strict=True,
|
||||
default=None,
|
||||
)
|
||||
except AppConfigKeyError as e:
|
||||
raise AnsibleFilterError(str(e))
|
||||
|
||||
if mem_limit in (None, False, ""):
|
||||
raise AnsibleFilterError(
|
||||
f"mem_limit not set for application '{application_id}', service '{service_name}'"
|
||||
)
|
||||
|
||||
total_bytes = _to_bytes(mem_limit)
|
||||
total_mb = _mb(total_bytes)
|
||||
|
||||
# NEW: guardrail — refuse to size a heap larger than the cgroup limit
|
||||
if total_mb < int(min_mb):
|
||||
raise AnsibleFilterError(
|
||||
f"mem_limit ({total_mb} MB) is below the required minimum heap ({int(min_mb)} MB) "
|
||||
f"for application '{application_id}', service '{service_name}'. "
|
||||
f"Increase mem_limit or lower min_mb."
|
||||
)
|
||||
|
||||
return _compute_old_space_mb(total_mb, pct, min_mb, hardcap_mb, safety_cap_pct)
|
||||
|
||||
|
||||
class FilterModule(object):
|
||||
def filters(self):
|
||||
return {
|
||||
"node_max_old_space_size": node_max_old_space_size,
|
||||
}
|
||||
@@ -93,9 +93,9 @@ docker:
|
||||
version: "latest"
|
||||
backup:
|
||||
no_stop_required: true
|
||||
cpus: "0.25"
|
||||
cpus: "1"
|
||||
mem_reservation: "128m"
|
||||
mem_limit: "512m"
|
||||
mem_limit: "1g"
|
||||
pids_limit: 1024
|
||||
enabled: "{{ applications | get_app_conf('web-app-nextcloud', 'features.oidc', False, True, True) }}" # Activate OIDC for Nextcloud
|
||||
# floavor decides which OICD plugin should be used.
|
||||
|
||||
@@ -77,7 +77,8 @@
|
||||
volumes:
|
||||
- whiteboard_tmp:/tmp
|
||||
- whiteboard_fontcache:/var/cache/fontconfig
|
||||
|
||||
environment:
|
||||
- NODE_OPTIONS=--max-old-space-size={{ NEXTCLOUD_WHITEBOARD_MAX_OLD_SPACE_SIZE }}
|
||||
expose:
|
||||
- "{{ container_port }}"
|
||||
shm_size: 1g
|
||||
|
||||
@@ -130,6 +130,7 @@ NEXTCLOUD_WHITEBOARD_TMP_VOLUME: "{{ applications | get_app_conf(applic
|
||||
NEXTCLOUD_WHITEBOARD_FRONTCACHE_VOLUME: "{{ applications | get_app_conf(application_id, 'docker.volumes.whiteboard_fontcache') }}"
|
||||
NEXTCLOUD_WHITEBOARD_SERVICE_DIRECTORY: "{{ [ docker_compose.directories.services, 'whiteboard' ] | path_join }}"
|
||||
NEXTCLOUD_WHITEBOARD_SERVICE_DOCKERFILE: "{{ [ NEXTCLOUD_WHITEBOARD_SERVICE_DIRECTORY, 'Dockerfile' ] | path_join }}"
|
||||
NEXTCLOUD_WHITEBOARD_MAX_OLD_SPACE_SIZE: "{{ applications | node_max_old_space_size(application_id, NEXTCLOUD_WHITEBOARD_SERVICE) }}"
|
||||
|
||||
### Collabora
|
||||
NEXTCLOUD_COLLABORA_URL: "{{ domains | get_url('web-svc-collabora', WEB_PROTOCOL) }}"
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# General
|
||||
application_id: "web-app-peertube"
|
||||
database_type: "postgres"
|
||||
entity_name: "{{ application_id | get_entity_name }}"
|
||||
|
||||
# Docker
|
||||
docker_compose_flush_handlers: true
|
||||
@@ -16,32 +17,8 @@ PEERTUBE_CONFIG_VOLUME: "{{ applications | get_app_conf(application_id
|
||||
PEERTUBE_OIDC_PLUGIN: "peertube-plugin-auth-openid-connect"
|
||||
PEERTUBE_OIDC_ENABLED: "{{ applications | get_app_conf(application_id, 'features.oidc') }}"
|
||||
|
||||
# === Dynamic performance defaults ==========================================
|
||||
|
||||
# Raw Docker configuration values (with sane fallbacks)
|
||||
peertube_cpus: "{{ applications | get_app_conf(application_id, 'docker.services.peertube.cpus') | float }}"
|
||||
peertube_mem_limit_raw: "{{ applications | get_app_conf(application_id, 'docker.services.peertube.mem_limit') }}"
|
||||
peertube_mem_bytes: "{{ peertube_mem_limit_raw | human_to_bytes }}"
|
||||
peertube_mem_mb: "{{ ((peertube_mem_bytes | int) // (1024 * 1024)) | int }}"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Node heap size:
|
||||
# ~35% of total RAM, but at least 768 MB, at most 3072 MB,
|
||||
# and never more than 60% of total memory (safety cap for small containers)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_peertube_heap_candidate_mb: "{{ ((peertube_mem_mb | float) * 0.35) | round(0, 'floor') | int }}"
|
||||
_peertube_heap_cap_mb: "{{ ((peertube_mem_mb | float) * 0.60) | round(0, 'floor') | int }}"
|
||||
|
||||
# Step 1: enforce minimum (≥768 MB)
|
||||
_peertube_heap_min_applied: "{{ [ (_peertube_heap_candidate_mb | int), 768 ] | max }}"
|
||||
|
||||
# Step 2: determine hard cap (min of 3072 MB and 60% of total memory)
|
||||
_peertube_heap_hardcap: "{{ [ 3072, (_peertube_heap_cap_mb | int) ] | min }}"
|
||||
|
||||
# Step 3: final heap = min(min-applied, hardcap)
|
||||
PEERTUBE_MAX_OLD_SPACE_SIZE: "{{ [ (_peertube_heap_min_applied | int), (_peertube_heap_hardcap | int) ] | min }}"
|
||||
|
||||
# Transcoding concurrency: half the vCPUs; min 1, max 8
|
||||
_peertube_concurrency_candidate: "{{ ((peertube_cpus | float) * 0.5) | round(0, 'floor') | int }}"
|
||||
# Performance
|
||||
PEERTUBE_CPUS: "{{ applications | get_app_conf(application_id, 'docker.services.peertube.cpus') | float }}"
|
||||
PEERTUBE_MAX_OLD_SPACE_SIZE: "{{ applications | node_max_old_space_size(application_id, entity_name) }}"
|
||||
_peertube_concurrency_candidate: "{{ ((PEERTUBE_CPUS | float) * 0.5) | round(0, 'floor') | int }}"
|
||||
PEERTUBE_TRANSCODING_CONCURRENCY: "{{ [ ( [ (_peertube_concurrency_candidate | int), 1 ] | max ), 8 ] | min }}"
|
||||
|
||||
80
tests/unit/filter_plugins/test_node_autosize.py
Normal file
80
tests/unit/filter_plugins/test_node_autosize.py
Normal file
@@ -0,0 +1,80 @@
|
||||
# tests/unit/filter_plugins/test_node_autosize.py
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
# Module under test
|
||||
import filter_plugins.node_autosize as na
|
||||
|
||||
try:
|
||||
from ansible.errors import AnsibleFilterError # type: ignore
|
||||
except Exception:
|
||||
AnsibleFilterError = Exception
|
||||
|
||||
|
||||
class TestNodeAutosizeFilter(unittest.TestCase):
|
||||
"""Unit tests for the node_autosize filter plugin."""
|
||||
|
||||
def setUp(self):
|
||||
# Default parameters used by all tests
|
||||
self.applications = {"web-app-nextcloud": {"docker": {"services": {"whiteboard": {}}}}}
|
||||
self.application_id = "web-app-nextcloud"
|
||||
self.service_name = "whiteboard"
|
||||
|
||||
# Patch get_app_conf (imported from module_utils.config_utils) inside the filter plugin
|
||||
self.patcher = patch("filter_plugins.node_autosize.get_app_conf")
|
||||
self.mock_get_app_conf = self.patcher.start()
|
||||
|
||||
def tearDown(self):
|
||||
self.patcher.stop()
|
||||
|
||||
def _set_mem_limit(self, value):
|
||||
"""Helper: mock get_app_conf to return a specific mem_limit value."""
|
||||
def _fake_get_app_conf(applications, application_id, config_path, strict=True, default=None, **_kwargs):
|
||||
assert application_id == self.application_id
|
||||
assert config_path == f"docker.services.{self.service_name}.mem_limit"
|
||||
return value
|
||||
self.mock_get_app_conf.side_effect = _fake_get_app_conf
|
||||
|
||||
# --- Tests for node_max_old_space_size (MB) ---
|
||||
|
||||
def test_512m_below_minimum_raises(self):
|
||||
# mem_limit=512 MB < min_mb=768 -> must raise
|
||||
self._set_mem_limit("512m")
|
||||
with self.assertRaises(AnsibleFilterError):
|
||||
na.node_max_old_space_size(self.applications, self.application_id, self.service_name)
|
||||
|
||||
def test_2g_caps_to_minimum_768(self):
|
||||
self._set_mem_limit("2g")
|
||||
mb = na.node_max_old_space_size(self.applications, self.application_id, self.service_name)
|
||||
self.assertEqual(mb, 768) # 35% of 2g = 700 < 768 -> min wins
|
||||
|
||||
def test_8g_uses_35_percent_without_hitting_hardcap(self):
|
||||
self._set_mem_limit("8g")
|
||||
mb = na.node_max_old_space_size(self.applications, self.application_id, self.service_name)
|
||||
self.assertEqual(mb, 2800) # 8g -> 8000 MB * 0.35 = 2800
|
||||
|
||||
def test_16g_hits_hardcap_3072(self):
|
||||
self._set_mem_limit("16g")
|
||||
mb = na.node_max_old_space_size(self.applications, self.application_id, self.service_name)
|
||||
self.assertEqual(mb, 3072) # 35% of 16g = 5600, hardcap=3072
|
||||
|
||||
def test_numeric_bytes_input(self):
|
||||
# 2 GiB in bytes (IEC): 2 * 1024 ** 3 = 2147483648
|
||||
self._set_mem_limit(2147483648)
|
||||
mb = na.node_max_old_space_size(self.applications, self.application_id, self.service_name)
|
||||
# 2 GiB ≈ 2147 MB; 35% => ~751, min 768 => 768
|
||||
self.assertEqual(mb, 768)
|
||||
|
||||
def test_invalid_unit_raises_error(self):
|
||||
self._set_mem_limit("12x") # invalid unit
|
||||
with self.assertRaises(AnsibleFilterError):
|
||||
na.node_max_old_space_size(self.applications, self.application_id, self.service_name)
|
||||
|
||||
def test_missing_mem_limit_raises_error(self):
|
||||
self._set_mem_limit(None)
|
||||
with self.assertRaises(AnsibleFilterError):
|
||||
na.node_max_old_space_size(self.applications, self.application_id, self.service_name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user