split tests into lint/unit/integration and enhance vars-usage lint

- Introduce separate test-lint, test-unit, and test-integration targets - Add TEST_PATTERN support to filter unittest discovery per test category - Move vars-usage check to tests/lint and classify it as a static lint test - Enhance vars-usage lint to report defining files and line numbers - Keep test-messy for backwards compatibility and Ansible syntax check https://chatgpt.com/share/6941c04c-b8d0-800f-9fe8-a5c01a1e1032
2025-12-18 14:52:52 +00:00 · 2025-12-16 21:25:34 +01:00
parent 14548cbc52
commit 22f1e24773
4 changed files with 360 additions and 34 deletions
--- a/54
+++ b/54
@@ -2,6 +2,7 @@ SHELL 				:= /usr/bin/env bash
 VENV        		?= .venv
 PYTHON      		:= $(VENV)/bin/python
 PIP         		:= $(PYTHON) -m pip
 ROLES_DIR           := ./roles
 APPLICATIONS_OUT    := ./group_vars/all/04_applications.yml
 APPLICATIONS_SCRIPT := ./cli/setup/applications.py
@@ -12,6 +13,12 @@ INCLUDES_SCRIPT     := ./cli/build/role_include.py
 # Directory where these include-files will be written
 INCLUDES_OUT_DIR    := ./tasks/groups
 # --- Test filtering (unittest discover) ---
 TEST_PATTERN            ?= test*.py
 LINT_TESTS_DIR          ?= tests/lint
 UNIT_TESTS_DIR          ?= tests/unit
 INTEGRATION_TESTS_DIR   ?= tests/integration
 # Compute extra users as before
 RESERVED_USERNAMES := $(shell \
  find $(ROLES_DIR) -maxdepth 1 -type d -printf '%f\n' \
@@ -21,7 +28,10 @@ RESERVED_USERNAMES := $(shell \
    | paste -sd, - \
 )
-.PHONY: deps setup setup-clean test-messy test install
+.PHONY: \
 	deps setup setup-clean install \
 	test test-messy test-lint test-unit test-integration \
 	clean clean-keep-logs list tree mig dockerignore
 clean-keep-logs:
 	@echo "🧹 Cleaning ignored files but keeping logs/…"
@@ -32,20 +42,20 @@ clean:
 	git clean -fdX
 list:
-	@echo Generating the roles list
+	@echo "Generating the roles list"
 	$(PYTHON) main.py build roles_list
 tree:
-	@echo Generating Tree
+	@echo "Generating Tree"
 	$(PYTHON) main.py build tree -D 2 --no-signal
 mig: list tree
-	@echo Creating meta data for meta infinity graph
+	@echo "Creating meta data for meta infinity graph"
 dockerignore:
-	@echo Create dockerignore
+	@echo "Create dockerignore"
 	cat .gitignore > .dockerignore
-	echo ".git" >> .dockerignore 
+	echo ".git" >> .dockerignore
 setup: deps dockerignore
 	@echo "🔧 Generating users defaults → $(USERS_OUT)…"
@@ -74,9 +84,34 @@ setup: deps dockerignore
 setup-clean: clean setup
 	@echo "Full build with cleanup before was executed."
-test-messy: 
+# --- Tests (separated) ---
-	@echo "🧪 Running Python tests…"
+
-	PYTHONPATH=. $(PYTHON) -m unittest discover -s tests
+test-lint:
 	@if [ ! -d "$(LINT_TESTS_DIR)" ]; then \
 		echo "ℹ️  No lint tests directory found at $(LINT_TESTS_DIR) (skipping)."; \
 		exit 0; \
 	fi
 	@echo "🔎 Running lint tests (dir: $(LINT_TESTS_DIR), pattern: $(TEST_PATTERN))…"
 	PYTHONPATH=. $(PYTHON) -m unittest discover -s $(LINT_TESTS_DIR) -p "$(TEST_PATTERN)"
 test-unit:
 	@if [ ! -d "$(UNIT_TESTS_DIR)" ]; then \
 		echo "ℹ️  No unit tests directory found at $(UNIT_TESTS_DIR) (skipping)."; \
 		exit 0; \
 	fi
 	@echo "🧪 Running unit tests (dir: $(UNIT_TESTS_DIR), pattern: $(TEST_PATTERN))…"
 	PYTHONPATH=. $(PYTHON) -m unittest discover -s $(UNIT_TESTS_DIR) -p "$(TEST_PATTERN)"
 test-integration:
 	@if [ ! -d "$(INTEGRATION_TESTS_DIR)" ]; then \
 		echo "ℹ️  No integration tests directory found at $(INTEGRATION_TESTS_DIR) (skipping)."; \
 		exit 0; \
 	fi
 	@echo "🧪 Running integration tests (dir: $(INTEGRATION_TESTS_DIR), pattern: $(TEST_PATTERN))…"
 	PYTHONPATH=. $(PYTHON) -m unittest discover -s $(INTEGRATION_TESTS_DIR) -p "$(TEST_PATTERN)"
 # Backwards compatible target (kept)
 test-messy: test-lint test-unit test-integration
 	@echo "📑 Checking Ansible syntax…"
 	ansible-playbook -i localhost, -c local $(foreach f,$(wildcard group_vars/all/*.yml),-e @$(f)) playbook.yml --syntax-check
@@ -94,4 +129,3 @@ deps:
 install: deps
 	@echo "✅ Python environment installed (editable)."
--- a/tests/integration/test_vars_usage_in_yaml.py
+++ b/tests/integration/test_vars_usage_in_yaml.py
@@ -1,7 +1,7 @@
 import unittest
 from pathlib import Path
 import re
-from typing import Any, Iterable, Set, List
+from typing import Any, Iterable, Set, List, Dict, Tuple
 import yaml
@@ -10,6 +10,7 @@ class TestVarsPassedAreUsed(unittest.TestCase):
    Integration test:
    - Walk all *.yml/*.yaml and *.j2 files
    - Collect variable names passed via task-level `vars:`
      AND remember where they were defined (file + line)
    - Consider a var "used" if it appears in ANY of:
        • Jinja output blocks:     {{ ... var_name ... }}
        • Jinja statement blocks:  {% ... var_name ... %}
@@ -52,19 +53,64 @@ class TestVarsPassedAreUsed(unittest.TestCase):
            for item in node:
                yield from self._walk_mapping(item)
-    # ---------- Collect vars passed via `vars:` ----------
+    # ---------- Collect vars passed via `vars:` (with locations) ----------
-    def _collect_vars_passed(self) -> Set[str]:
+    def _collect_vars_passed_with_locations(self) -> Tuple[Set[str], Dict[str, Set[Tuple[Path, int]]]]:
        """
        Returns:
          - a set of all var names passed via `vars:`
          - a mapping var_name -> set of (path, line_number) where that var is defined under a vars: block
        Line numbers are best-effort based on raw text scanning (not YAML AST),
        because PyYAML doesn't preserve line info.
        """
        collected: Set[str] = set()
        locations: Dict[str, Set[Tuple[Path, int]]] = {}
        # Regex-based scan for:
        #   <indent>vars:
        #     <more-indent>key:
        vars_block_re = re.compile(r"^(\s*)vars:\s*$")
        key_re = re.compile(r"^(\s*)([A-Za-z_][A-Za-z0-9_]*)\s*:")
        for yml in self._iter_files(self.YAML_EXTENSIONS):
-            docs = self._load_yaml_documents(yml)
+            try:
-            for doc in docs:
+                lines = yml.read_text(encoding="utf-8").splitlines()
-                for mapping in self._walk_mapping(doc):
+            except Exception:
-                    if "vars" in mapping and isinstance(mapping["vars"], dict):
+                continue
-                        for k in mapping["vars"].keys():
+
-                            if isinstance(k, str) and k.strip():
+            i = 0
-                                collected.add(k.strip())
+            while i < len(lines):
-        return collected
+                m = vars_block_re.match(lines[i])
                if not m:
                    i += 1
                    continue
                base_indent = len(m.group(1))
                i += 1
                while i < len(lines):
                    line = lines[i]
                    # allow blank lines inside vars block
                    if not line.strip():
                        i += 1
                        continue
                    indent = len(line) - len(line.lstrip(" "))
                    # end of vars block when indentation drops back
                    if indent <= base_indent:
                        break
                    km = key_re.match(line)
                    if km:
                        key = km.group(2).strip()
                        if key:
                            collected.add(key)
                            locations.setdefault(key, set()).add((yml, i + 1))  # 1-based line number
                    i += 1
        return collected, locations
    # ---------- Gather text for Jinja usage scanning ----------
@@ -114,15 +160,12 @@ class TestVarsPassedAreUsed(unittest.TestCase):
        We use a tempered regex to avoid stopping at the first '}}'/'%}' and a negative lookahead
        `(?!\\s*\\()` after the token.
        """
        # Word token not followed by '(' → real variable usage
        token = r"\b" + re.escape(var_name) + r"\b(?!\s*\()"
        # Output blocks: {{ ... }}
        pat_output = re.compile(
            r"{{(?:(?!}}).)*" + token + r"(?:(?!}}).)*}}",
            re.DOTALL,
        )
        # Statement blocks: {% ... %}
        pat_stmt = re.compile(
            r"{%(?:(?!%}).)*" + token + r"(?:(?!%}).)*%}",
            re.DOTALL,
@@ -140,7 +183,7 @@ class TestVarsPassedAreUsed(unittest.TestCase):
    # ---------- Test ----------
    def test_vars_passed_are_used_in_yaml_or_jinja(self):
-        vars_passed = self._collect_vars_passed()
+        vars_passed, vars_locations = self._collect_vars_passed_with_locations()
        self.assertTrue(
            vars_passed,
            "No variables passed via `vars:` were found. "
@@ -157,18 +200,34 @@ class TestVarsPassedAreUsed(unittest.TestCase):
                or self._used_in_ansible_exprs(var_name, ansible_exprs)
            )
            if not used:
-                if var_name not in ['ansible_python_interpreter']:
+                if var_name not in ["ansible_python_interpreter"]:
                    unused.append(var_name)
        if unused:
-            msg = (
+            lines: List[str] = []
            lines.append(
                "The following variables are passed via `vars:` but never referenced in:\n"
                "  • Jinja output/statement blocks ({{ ... }} / {% ... %}) OR\n"
-                "  • Ansible expressions (when/loop/with_*)\n\n"
+                "  • Ansible expressions (when/loop/with_*)\n"
                + "\n".join(f"  - {v}" for v in unused)
                + "\n\nNotes:\n"
                  " • Function-like tokens (name followed by '(') are ignored intentionally.\n"
                  " • If a var is only used in Python code or other file types, extend the test accordingly\n"
                  "   or remove the var if it's truly unused."
            )
-            self.fail(msg)
+
            for v in unused:
                lines.append(f"- {v}")
                locs = sorted(
                    vars_locations.get(v, set()),
                    key=lambda t: (str(t[0]), t[1]),
                )
                if locs:
                    for path, lineno in locs:
                        rel = path.relative_to(self.REPO_ROOT)
                        lines.append(f"    • {rel}:{lineno}")
                else:
                    lines.append("    • (location unknown)")
            lines.append(
                "\nNotes:\n"
                " • Function-like tokens (name followed by '(') are ignored intentionally.\n"
                " • If a var is only used in Python code or other file types, extend the test accordingly\n"
                "   or remove the var if it's truly unused."
            )
            self.fail("\n".join(lines))
--- a/tests/lint/init.py
+++ b/tests/lint/init.py
--- a/tests/lint/test_vars_usage_in_yaml.py
+++ b/tests/lint/test_vars_usage_in_yaml.py
@@ -0,0 +1,233 @@
 import unittest
 from pathlib import Path
 import re
 from typing import Any, Iterable, Set, List, Dict, Tuple
 import yaml
 class TestVarsPassedAreUsed(unittest.TestCase):
    """
    Integration test:
    - Walk all *.yml/*.yaml and *.j2 files
    - Collect variable names passed via task-level `vars:`
      AND remember where they were defined (file + line)
    - Consider a var "used" if it appears in ANY of:
        • Jinja output blocks:     {{ ... var_name ... }}
        • Jinja statement blocks:  {% ... var_name ... %}
          (robust against inner '}' / '%' via tempered regex)
        • Ansible expressions in YAML:
            - when: <expr>          (string or list of strings)
            - loop: <expr>
            - with_*: <expr>
    Additional rule:
    - Do NOT count as used if the token is immediately followed by '(' (optionally with whitespace),
      i.e. treat `var_name(` as a function/macro call, not a variable usage.
    """
    REPO_ROOT = Path(__file__).resolve().parents[2]
    YAML_EXTENSIONS = {".yml", ".yaml"}
    JINJA_EXTENSIONS = {".j2"}
    # ---------- File iteration & YAML loading ----------
    def _iter_files(self, extensions: set[str]) -> Iterable[Path]:
        for p in self.REPO_ROOT.rglob("*"):
            if p.is_file() and p.suffix in extensions:
                yield p
    def _load_yaml_documents(self, path: Path) -> List[Any]:
        try:
            with path.open("r", encoding="utf-8") as f:
                return list(yaml.safe_load_all(f)) or []
        except Exception:
            # File may contain heavy templating or anchors; skip structural parse
            return []
    def _walk_mapping(self, node: Any) -> Iterable[dict]:
        if isinstance(node, dict):
            yield node
            for v in node.values():
                yield from self._walk_mapping(v)
        elif isinstance(node, list):
            for item in node:
                yield from self._walk_mapping(item)
    # ---------- Collect vars passed via `vars:` (with locations) ----------
    def _collect_vars_passed_with_locations(self) -> Tuple[Set[str], Dict[str, Set[Tuple[Path, int]]]]:
        """
        Returns:
          - a set of all var names passed via `vars:`
          - a mapping var_name -> set of (path, line_number) where that var is defined under a vars: block
        Line numbers are best-effort based on raw text scanning (not YAML AST),
        because PyYAML doesn't preserve line info.
        """
        collected: Set[str] = set()
        locations: Dict[str, Set[Tuple[Path, int]]] = {}
        # Regex-based scan for:
        #   <indent>vars:
        #     <more-indent>key:
        vars_block_re = re.compile(r"^(\s*)vars:\s*$")
        key_re = re.compile(r"^(\s*)([A-Za-z_][A-Za-z0-9_]*)\s*:")
        for yml in self._iter_files(self.YAML_EXTENSIONS):
            try:
                lines = yml.read_text(encoding="utf-8").splitlines()
            except Exception:
                continue
            i = 0
            while i < len(lines):
                m = vars_block_re.match(lines[i])
                if not m:
                    i += 1
                    continue
                base_indent = len(m.group(1))
                i += 1
                while i < len(lines):
                    line = lines[i]
                    # allow blank lines inside vars block
                    if not line.strip():
                        i += 1
                        continue
                    indent = len(line) - len(line.lstrip(" "))
                    # end of vars block when indentation drops back
                    if indent <= base_indent:
                        break
                    km = key_re.match(line)
                    if km:
                        key = km.group(2).strip()
                        if key:
                            collected.add(key)
                            locations.setdefault(key, set()).add((yml, i + 1))  # 1-based line number
                    i += 1
        return collected, locations
    # ---------- Gather text for Jinja usage scanning ----------
    def _concat_texts(self) -> str:
        parts: List[str] = []
        for f in self._iter_files(self.YAML_EXTENSIONS | self.JINJA_EXTENSIONS):
            try:
                parts.append(f.read_text(encoding="utf-8"))
            except Exception:
                # Non-UTF8 or unreadable — ignore
                pass
        return "\n".join(parts)
    # ---------- Extract Ansible expression strings from YAML ----------
    def _collect_ansible_expressions(self) -> List[str]:
        """
        Return a flat list of strings taken from Ansible expression-bearing fields:
        - when: <str> or when: [<str>, <str>, ...]
        - loop: <str>
        - with_*: <str>
        """
        exprs: List[str] = []
        for yml in self._iter_files(self.YAML_EXTENSIONS):
            docs = self._load_yaml_documents(yml)
            for doc in docs:
                for mapping in self._walk_mapping(doc):
                    for key, val in list(mapping.items()):
                        if key == "when":
                            if isinstance(val, str):
                                exprs.append(val)
                            elif isinstance(val, list):
                                exprs.extend([x for x in val if isinstance(x, str)])
                        elif key == "loop":
                            if isinstance(val, str):
                                exprs.append(val)
                        elif isinstance(key, str) and key.startswith("with_"):
                            if isinstance(val, str):
                                exprs.append(val)
        return exprs
    # ---------- Usage checks ----------
    def _used_in_jinja_blocks(self, var_name: str, text: str) -> bool:
        """
        Detect var usage inside Jinja blocks, excluding function/macro calls like `var_name(...)`.
        We use a tempered regex to avoid stopping at the first '}}'/'%}' and a negative lookahead
        `(?!\\s*\\()` after the token.
        """
        token = r"\b" + re.escape(var_name) + r"\b(?!\s*\()"
        pat_output = re.compile(
            r"{{(?:(?!}}).)*" + token + r"(?:(?!}}).)*}}",
            re.DOTALL,
        )
        pat_stmt = re.compile(
            r"{%(?:(?!%}).)*" + token + r"(?:(?!%}).)*%}",
            re.DOTALL,
        )
        return pat_output.search(text) is not None or pat_stmt.search(text) is not None
    def _used_in_ansible_exprs(self, var_name: str, exprs: List[str]) -> bool:
        """
        Detect var usage in Ansible expressions (when/loop/with_*),
        excluding function/macro calls like `var_name(...)`.
        """
        pat = re.compile(r"\b" + re.escape(var_name) + r"\b(?!\s*\()")
        return any(pat.search(e) for e in exprs)
    # ---------- Test ----------
    def test_vars_passed_are_used_in_yaml_or_jinja(self):
        vars_passed, vars_locations = self._collect_vars_passed_with_locations()
        self.assertTrue(
            vars_passed,
            "No variables passed via `vars:` were found. "
            "Check the repo root path in this test."
        )
        all_text = self._concat_texts()
        ansible_exprs = self._collect_ansible_expressions()
        unused: List[str] = []
        for var_name in sorted(vars_passed):
            used = (
                self._used_in_jinja_blocks(var_name, all_text)
                or self._used_in_ansible_exprs(var_name, ansible_exprs)
            )
            if not used:
                if var_name not in ["ansible_python_interpreter"]:
                    unused.append(var_name)
        if unused:
            lines: List[str] = []
            lines.append(
                "The following variables are passed via `vars:` but never referenced in:\n"
                "  • Jinja output/statement blocks ({{ ... }} / {% ... %}) OR\n"
                "  • Ansible expressions (when/loop/with_*)\n"
            )
            for v in unused:
                lines.append(f"- {v}")
                locs = sorted(
                    vars_locations.get(v, set()),
                    key=lambda t: (str(t[0]), t[1]),
                )
                if locs:
                    for path, lineno in locs:
                        rel = path.relative_to(self.REPO_ROOT)
                        lines.append(f"    • {rel}:{lineno}")
                else:
                    lines.append("    • (location unknown)")
            lines.append(
                "\nNotes:\n"
                " • Function-like tokens (name followed by '(') are ignored intentionally.\n"
                " • If a var is only used in Python code or other file types, extend the test accordingly\n"
                "   or remove the var if it's truly unused."
            )
            self.fail("\n".join(lines))