From 22f1e24773e29fea87d5c37a00a60a03732db473 Mon Sep 17 00:00:00 2001 From: Kevin Veen-Birkenbach Date: Tue, 16 Dec 2025 21:25:34 +0100 Subject: [PATCH] split tests into lint/unit/integration and enhance vars-usage lint - Introduce separate test-lint, test-unit, and test-integration targets - Add TEST_PATTERN support to filter unittest discovery per test category - Move vars-usage check to tests/lint and classify it as a static lint test - Enhance vars-usage lint to report defining files and line numbers - Keep test-messy for backwards compatibility and Ansible syntax check https://chatgpt.com/share/6941c04c-b8d0-800f-9fe8-a5c01a1e1032 --- Makefile | 54 ++++- tests/integration/test_vars_usage_in_yaml.py | 107 +++++++-- tests/lint/__init__.py | 0 tests/lint/test_vars_usage_in_yaml.py | 233 +++++++++++++++++++ 4 files changed, 360 insertions(+), 34 deletions(-) create mode 100644 tests/lint/__init__.py create mode 100644 tests/lint/test_vars_usage_in_yaml.py diff --git a/Makefile b/Makefile index a263f14a..a9af5f0f 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,7 @@ SHELL := /usr/bin/env bash VENV ?= .venv PYTHON := $(VENV)/bin/python PIP := $(PYTHON) -m pip + ROLES_DIR := ./roles APPLICATIONS_OUT := ./group_vars/all/04_applications.yml APPLICATIONS_SCRIPT := ./cli/setup/applications.py @@ -12,6 +13,12 @@ INCLUDES_SCRIPT := ./cli/build/role_include.py # Directory where these include-files will be written INCLUDES_OUT_DIR := ./tasks/groups +# --- Test filtering (unittest discover) --- +TEST_PATTERN ?= test*.py +LINT_TESTS_DIR ?= tests/lint +UNIT_TESTS_DIR ?= tests/unit +INTEGRATION_TESTS_DIR ?= tests/integration + # Compute extra users as before RESERVED_USERNAMES := $(shell \ find $(ROLES_DIR) -maxdepth 1 -type d -printf '%f\n' \ @@ -21,7 +28,10 @@ RESERVED_USERNAMES := $(shell \ | paste -sd, - \ ) -.PHONY: deps setup setup-clean test-messy test install +.PHONY: \ + deps setup setup-clean install \ + test test-messy test-lint test-unit test-integration \ + clean clean-keep-logs list tree mig dockerignore clean-keep-logs: @echo "🧹 Cleaning ignored files but keeping logs/…" @@ -32,20 +42,20 @@ clean: git clean -fdX list: - @echo Generating the roles list + @echo "Generating the roles list" $(PYTHON) main.py build roles_list tree: - @echo Generating Tree + @echo "Generating Tree" $(PYTHON) main.py build tree -D 2 --no-signal mig: list tree - @echo Creating meta data for meta infinity graph + @echo "Creating meta data for meta infinity graph" dockerignore: - @echo Create dockerignore + @echo "Create dockerignore" cat .gitignore > .dockerignore - echo ".git" >> .dockerignore + echo ".git" >> .dockerignore setup: deps dockerignore @echo "🔧 Generating users defaults → $(USERS_OUT)…" @@ -74,9 +84,34 @@ setup: deps dockerignore setup-clean: clean setup @echo "Full build with cleanup before was executed." -test-messy: - @echo "🧪 Running Python tests…" - PYTHONPATH=. $(PYTHON) -m unittest discover -s tests +# --- Tests (separated) --- + +test-lint: + @if [ ! -d "$(LINT_TESTS_DIR)" ]; then \ + echo "ℹ️ No lint tests directory found at $(LINT_TESTS_DIR) (skipping)."; \ + exit 0; \ + fi + @echo "🔎 Running lint tests (dir: $(LINT_TESTS_DIR), pattern: $(TEST_PATTERN))…" + PYTHONPATH=. $(PYTHON) -m unittest discover -s $(LINT_TESTS_DIR) -p "$(TEST_PATTERN)" + +test-unit: + @if [ ! -d "$(UNIT_TESTS_DIR)" ]; then \ + echo "ℹ️ No unit tests directory found at $(UNIT_TESTS_DIR) (skipping)."; \ + exit 0; \ + fi + @echo "🧪 Running unit tests (dir: $(UNIT_TESTS_DIR), pattern: $(TEST_PATTERN))…" + PYTHONPATH=. $(PYTHON) -m unittest discover -s $(UNIT_TESTS_DIR) -p "$(TEST_PATTERN)" + +test-integration: + @if [ ! -d "$(INTEGRATION_TESTS_DIR)" ]; then \ + echo "ℹ️ No integration tests directory found at $(INTEGRATION_TESTS_DIR) (skipping)."; \ + exit 0; \ + fi + @echo "🧪 Running integration tests (dir: $(INTEGRATION_TESTS_DIR), pattern: $(TEST_PATTERN))…" + PYTHONPATH=. $(PYTHON) -m unittest discover -s $(INTEGRATION_TESTS_DIR) -p "$(TEST_PATTERN)" + +# Backwards compatible target (kept) +test-messy: test-lint test-unit test-integration @echo "📑 Checking Ansible syntax…" ansible-playbook -i localhost, -c local $(foreach f,$(wildcard group_vars/all/*.yml),-e @$(f)) playbook.yml --syntax-check @@ -94,4 +129,3 @@ deps: install: deps @echo "✅ Python environment installed (editable)." - diff --git a/tests/integration/test_vars_usage_in_yaml.py b/tests/integration/test_vars_usage_in_yaml.py index c39f0080..8f355f87 100644 --- a/tests/integration/test_vars_usage_in_yaml.py +++ b/tests/integration/test_vars_usage_in_yaml.py @@ -1,7 +1,7 @@ import unittest from pathlib import Path import re -from typing import Any, Iterable, Set, List +from typing import Any, Iterable, Set, List, Dict, Tuple import yaml @@ -10,6 +10,7 @@ class TestVarsPassedAreUsed(unittest.TestCase): Integration test: - Walk all *.yml/*.yaml and *.j2 files - Collect variable names passed via task-level `vars:` + AND remember where they were defined (file + line) - Consider a var "used" if it appears in ANY of: • Jinja output blocks: {{ ... var_name ... }} • Jinja statement blocks: {% ... var_name ... %} @@ -52,19 +53,64 @@ class TestVarsPassedAreUsed(unittest.TestCase): for item in node: yield from self._walk_mapping(item) - # ---------- Collect vars passed via `vars:` ---------- + # ---------- Collect vars passed via `vars:` (with locations) ---------- - def _collect_vars_passed(self) -> Set[str]: + def _collect_vars_passed_with_locations(self) -> Tuple[Set[str], Dict[str, Set[Tuple[Path, int]]]]: + """ + Returns: + - a set of all var names passed via `vars:` + - a mapping var_name -> set of (path, line_number) where that var is defined under a vars: block + + Line numbers are best-effort based on raw text scanning (not YAML AST), + because PyYAML doesn't preserve line info. + """ collected: Set[str] = set() + locations: Dict[str, Set[Tuple[Path, int]]] = {} + + # Regex-based scan for: + # vars: + # key: + vars_block_re = re.compile(r"^(\s*)vars:\s*$") + key_re = re.compile(r"^(\s*)([A-Za-z_][A-Za-z0-9_]*)\s*:") + for yml in self._iter_files(self.YAML_EXTENSIONS): - docs = self._load_yaml_documents(yml) - for doc in docs: - for mapping in self._walk_mapping(doc): - if "vars" in mapping and isinstance(mapping["vars"], dict): - for k in mapping["vars"].keys(): - if isinstance(k, str) and k.strip(): - collected.add(k.strip()) - return collected + try: + lines = yml.read_text(encoding="utf-8").splitlines() + except Exception: + continue + + i = 0 + while i < len(lines): + m = vars_block_re.match(lines[i]) + if not m: + i += 1 + continue + + base_indent = len(m.group(1)) + i += 1 + + while i < len(lines): + line = lines[i] + + # allow blank lines inside vars block + if not line.strip(): + i += 1 + continue + + indent = len(line) - len(line.lstrip(" ")) + # end of vars block when indentation drops back + if indent <= base_indent: + break + + km = key_re.match(line) + if km: + key = km.group(2).strip() + if key: + collected.add(key) + locations.setdefault(key, set()).add((yml, i + 1)) # 1-based line number + i += 1 + + return collected, locations # ---------- Gather text for Jinja usage scanning ---------- @@ -114,15 +160,12 @@ class TestVarsPassedAreUsed(unittest.TestCase): We use a tempered regex to avoid stopping at the first '}}'/'%}' and a negative lookahead `(?!\\s*\\()` after the token. """ - # Word token not followed by '(' → real variable usage token = r"\b" + re.escape(var_name) + r"\b(?!\s*\()" - # Output blocks: {{ ... }} pat_output = re.compile( r"{{(?:(?!}}).)*" + token + r"(?:(?!}}).)*}}", re.DOTALL, ) - # Statement blocks: {% ... %} pat_stmt = re.compile( r"{%(?:(?!%}).)*" + token + r"(?:(?!%}).)*%}", re.DOTALL, @@ -140,7 +183,7 @@ class TestVarsPassedAreUsed(unittest.TestCase): # ---------- Test ---------- def test_vars_passed_are_used_in_yaml_or_jinja(self): - vars_passed = self._collect_vars_passed() + vars_passed, vars_locations = self._collect_vars_passed_with_locations() self.assertTrue( vars_passed, "No variables passed via `vars:` were found. " @@ -157,18 +200,34 @@ class TestVarsPassedAreUsed(unittest.TestCase): or self._used_in_ansible_exprs(var_name, ansible_exprs) ) if not used: - if var_name not in ['ansible_python_interpreter']: + if var_name not in ["ansible_python_interpreter"]: unused.append(var_name) if unused: - msg = ( + lines: List[str] = [] + lines.append( "The following variables are passed via `vars:` but never referenced in:\n" " • Jinja output/statement blocks ({{ ... }} / {% ... %}) OR\n" - " • Ansible expressions (when/loop/with_*)\n\n" - + "\n".join(f" - {v}" for v in unused) - + "\n\nNotes:\n" - " • Function-like tokens (name followed by '(') are ignored intentionally.\n" - " • If a var is only used in Python code or other file types, extend the test accordingly\n" - " or remove the var if it's truly unused." + " • Ansible expressions (when/loop/with_*)\n" ) - self.fail(msg) + + for v in unused: + lines.append(f"- {v}") + locs = sorted( + vars_locations.get(v, set()), + key=lambda t: (str(t[0]), t[1]), + ) + if locs: + for path, lineno in locs: + rel = path.relative_to(self.REPO_ROOT) + lines.append(f" • {rel}:{lineno}") + else: + lines.append(" • (location unknown)") + + lines.append( + "\nNotes:\n" + " • Function-like tokens (name followed by '(') are ignored intentionally.\n" + " • If a var is only used in Python code or other file types, extend the test accordingly\n" + " or remove the var if it's truly unused." + ) + self.fail("\n".join(lines)) diff --git a/tests/lint/__init__.py b/tests/lint/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/lint/test_vars_usage_in_yaml.py b/tests/lint/test_vars_usage_in_yaml.py new file mode 100644 index 00000000..8f355f87 --- /dev/null +++ b/tests/lint/test_vars_usage_in_yaml.py @@ -0,0 +1,233 @@ +import unittest +from pathlib import Path +import re +from typing import Any, Iterable, Set, List, Dict, Tuple +import yaml + + +class TestVarsPassedAreUsed(unittest.TestCase): + """ + Integration test: + - Walk all *.yml/*.yaml and *.j2 files + - Collect variable names passed via task-level `vars:` + AND remember where they were defined (file + line) + - Consider a var "used" if it appears in ANY of: + • Jinja output blocks: {{ ... var_name ... }} + • Jinja statement blocks: {% ... var_name ... %} + (robust against inner '}' / '%' via tempered regex) + • Ansible expressions in YAML: + - when: (string or list of strings) + - loop: + - with_*: + + Additional rule: + - Do NOT count as used if the token is immediately followed by '(' (optionally with whitespace), + i.e. treat `var_name(` as a function/macro call, not a variable usage. + """ + + REPO_ROOT = Path(__file__).resolve().parents[2] + YAML_EXTENSIONS = {".yml", ".yaml"} + JINJA_EXTENSIONS = {".j2"} + + # ---------- File iteration & YAML loading ---------- + + def _iter_files(self, extensions: set[str]) -> Iterable[Path]: + for p in self.REPO_ROOT.rglob("*"): + if p.is_file() and p.suffix in extensions: + yield p + + def _load_yaml_documents(self, path: Path) -> List[Any]: + try: + with path.open("r", encoding="utf-8") as f: + return list(yaml.safe_load_all(f)) or [] + except Exception: + # File may contain heavy templating or anchors; skip structural parse + return [] + + def _walk_mapping(self, node: Any) -> Iterable[dict]: + if isinstance(node, dict): + yield node + for v in node.values(): + yield from self._walk_mapping(v) + elif isinstance(node, list): + for item in node: + yield from self._walk_mapping(item) + + # ---------- Collect vars passed via `vars:` (with locations) ---------- + + def _collect_vars_passed_with_locations(self) -> Tuple[Set[str], Dict[str, Set[Tuple[Path, int]]]]: + """ + Returns: + - a set of all var names passed via `vars:` + - a mapping var_name -> set of (path, line_number) where that var is defined under a vars: block + + Line numbers are best-effort based on raw text scanning (not YAML AST), + because PyYAML doesn't preserve line info. + """ + collected: Set[str] = set() + locations: Dict[str, Set[Tuple[Path, int]]] = {} + + # Regex-based scan for: + # vars: + # key: + vars_block_re = re.compile(r"^(\s*)vars:\s*$") + key_re = re.compile(r"^(\s*)([A-Za-z_][A-Za-z0-9_]*)\s*:") + + for yml in self._iter_files(self.YAML_EXTENSIONS): + try: + lines = yml.read_text(encoding="utf-8").splitlines() + except Exception: + continue + + i = 0 + while i < len(lines): + m = vars_block_re.match(lines[i]) + if not m: + i += 1 + continue + + base_indent = len(m.group(1)) + i += 1 + + while i < len(lines): + line = lines[i] + + # allow blank lines inside vars block + if not line.strip(): + i += 1 + continue + + indent = len(line) - len(line.lstrip(" ")) + # end of vars block when indentation drops back + if indent <= base_indent: + break + + km = key_re.match(line) + if km: + key = km.group(2).strip() + if key: + collected.add(key) + locations.setdefault(key, set()).add((yml, i + 1)) # 1-based line number + i += 1 + + return collected, locations + + # ---------- Gather text for Jinja usage scanning ---------- + + def _concat_texts(self) -> str: + parts: List[str] = [] + for f in self._iter_files(self.YAML_EXTENSIONS | self.JINJA_EXTENSIONS): + try: + parts.append(f.read_text(encoding="utf-8")) + except Exception: + # Non-UTF8 or unreadable — ignore + pass + return "\n".join(parts) + + # ---------- Extract Ansible expression strings from YAML ---------- + + def _collect_ansible_expressions(self) -> List[str]: + """ + Return a flat list of strings taken from Ansible expression-bearing fields: + - when: or when: [, , ...] + - loop: + - with_*: + """ + exprs: List[str] = [] + for yml in self._iter_files(self.YAML_EXTENSIONS): + docs = self._load_yaml_documents(yml) + for doc in docs: + for mapping in self._walk_mapping(doc): + for key, val in list(mapping.items()): + if key == "when": + if isinstance(val, str): + exprs.append(val) + elif isinstance(val, list): + exprs.extend([x for x in val if isinstance(x, str)]) + elif key == "loop": + if isinstance(val, str): + exprs.append(val) + elif isinstance(key, str) and key.startswith("with_"): + if isinstance(val, str): + exprs.append(val) + return exprs + + # ---------- Usage checks ---------- + + def _used_in_jinja_blocks(self, var_name: str, text: str) -> bool: + """ + Detect var usage inside Jinja blocks, excluding function/macro calls like `var_name(...)`. + We use a tempered regex to avoid stopping at the first '}}'/'%}' and a negative lookahead + `(?!\\s*\\()` after the token. + """ + token = r"\b" + re.escape(var_name) + r"\b(?!\s*\()" + + pat_output = re.compile( + r"{{(?:(?!}}).)*" + token + r"(?:(?!}}).)*}}", + re.DOTALL, + ) + pat_stmt = re.compile( + r"{%(?:(?!%}).)*" + token + r"(?:(?!%}).)*%}", + re.DOTALL, + ) + return pat_output.search(text) is not None or pat_stmt.search(text) is not None + + def _used_in_ansible_exprs(self, var_name: str, exprs: List[str]) -> bool: + """ + Detect var usage in Ansible expressions (when/loop/with_*), + excluding function/macro calls like `var_name(...)`. + """ + pat = re.compile(r"\b" + re.escape(var_name) + r"\b(?!\s*\()") + return any(pat.search(e) for e in exprs) + + # ---------- Test ---------- + + def test_vars_passed_are_used_in_yaml_or_jinja(self): + vars_passed, vars_locations = self._collect_vars_passed_with_locations() + self.assertTrue( + vars_passed, + "No variables passed via `vars:` were found. " + "Check the repo root path in this test." + ) + + all_text = self._concat_texts() + ansible_exprs = self._collect_ansible_expressions() + + unused: List[str] = [] + for var_name in sorted(vars_passed): + used = ( + self._used_in_jinja_blocks(var_name, all_text) + or self._used_in_ansible_exprs(var_name, ansible_exprs) + ) + if not used: + if var_name not in ["ansible_python_interpreter"]: + unused.append(var_name) + + if unused: + lines: List[str] = [] + lines.append( + "The following variables are passed via `vars:` but never referenced in:\n" + " • Jinja output/statement blocks ({{ ... }} / {% ... %}) OR\n" + " • Ansible expressions (when/loop/with_*)\n" + ) + + for v in unused: + lines.append(f"- {v}") + locs = sorted( + vars_locations.get(v, set()), + key=lambda t: (str(t[0]), t[1]), + ) + if locs: + for path, lineno in locs: + rel = path.relative_to(self.REPO_ROOT) + lines.append(f" • {rel}:{lineno}") + else: + lines.append(" • (location unknown)") + + lines.append( + "\nNotes:\n" + " • Function-like tokens (name followed by '(') are ignored intentionally.\n" + " • If a var is only used in Python code or other file types, extend the test accordingly\n" + " or remove the var if it's truly unused." + ) + self.fail("\n".join(lines))