Implement filter checks: ensure all defined filters are used and remove dead code

Integration tests added/updated: - tests/integration/test_filters_usage.py: AST-based detection of filter definitions (FilterModule.filters), robust Jinja detection ({{ ... }}, {% ... %}, {% filter ... %}), plus Python call tracking; fails if a filter is used only under tests/. - tests/integration/test_filters_are_defined.py: inverse check — every filter used in .yml/.yaml/.j2/.jinja2/.tmpl must be defined locally. Scans only inside Jinja blocks and ignores pipes inside strings (e.g., lookup('pipe', "... | grep ... | awk ...")) to avoid false positives like trusted_hosts, woff/woff2, etc. Bug fixes & robustness: - Build regexes without %-string formatting to avoid ValueError from literal '%' in Jinja tags. - Strip quoted strings in usage analysis so sed/grep/awk pipes are not miscounted as filters. - Prevent self-matches in the defining file. Cleanup / removal of dead code: - Removed unused filter plugins and related unit tests: * filter_plugins/alias_domains_map.py * filter_plugins/get_application_id.py * filter_plugins/load_configuration.py * filter_plugins/safe.py * filter_plugins/safe_join.py * roles/svc-db-openldap/filter_plugins/build_ldap_nested_group_entries.py * roles/sys-ctl-bkp-docker-2-loc/filter_plugins/dict_to_cli_args.py * corresponding tests under tests/unit/* - roles/svc-db-postgres/filter_plugins/split_postgres_connections.py: dropped no-longer-needed list_postgres_roles API; adjusted tests. Misc: - sys-stk-front-proxy/defaults/main.yml: clarified valid vhost_flavour values (comma-separated). Ref: https://chatgpt.com/share/68b56bac-c4f8-800f-aeef-6708dbb44199
2025-10-31 02:10:05 +00:00 · 2025-09-01 11:47:51 +02:00
parent 34b3f3b0ad
commit 7791bd8c04
20 changed files with 514 additions and 993 deletions
--- a/tests/integration/test_filters_are_defined.py
+++ b/tests/integration/test_filters_are_defined.py
@@ -0,0 +1,252 @@
+# tests/integration/test_filters_are_defined.py
+import ast
+import os
+import re
+import unittest
+from typing import Dict, List, Set, Tuple
+
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))
+
+# Where filter definitions may exist
+FILTER_PLUGIN_BASES = [
+    os.path.join(PROJECT_ROOT, "filter_plugins"),
+    os.path.join(PROJECT_ROOT, "roles"),  # includes roles/*/filter_plugins
+]
+
+# Where to search for usages (EXCLUDES tests/ by default)
+SEARCH_BASES = [PROJECT_ROOT]
+EXCLUDE_TESTS = True  # keep True to require real usage sites
+
+# File extensions to scan for template usage
+USAGE_EXTS = (".yml", ".yaml", ".j2", ".jinja2", ".tmpl")
+
+# Built-in / common filters that shouldn't require local definitions
+BUILTIN_FILTERS: Set[str] = {
+    # Jinja2 core/common
+    "abs", "attr", "batch", "capitalize", "center", "default", "d", "dictsort", "escape",
+    "e", "filesizeformat", "first", "float", "forceescape", "format", "groupby", "indent",
+    "int", "join", "last", "length", "list", "lower", "map", "min", "max", "random",
+    "reject", "rejectattr", "replace", "reverse", "round", "safe", "select",
+    "selectattr", "slice", "sort", "string", "striptags", "sum", "title", "trim",
+    "truncate", "unique", "upper", "urlencode", "urlize", "wordcount", "xmlattr",
+
+    # Common Ansible filters (subset, extend as needed)
+    "b64decode", "b64encode", "basename", "dirname", "from_json", "to_json",
+    "from_yaml", "to_yaml", "combine", "difference", "intersect",
+    "flatten", "zip", "regex_search", "regex_replace", "bool",
+    "type_debug", "json_query", "mandatory", "hash", "checksum",
+    "lower", "upper", "capitalize", "unique", "dict2items", "items2dict", "password_hash", "path_join", "product", "quote", "split", "ternary", "to_nice_yaml", "tojson",
+
+    # Date/time-ish
+    "strftime",
+}
+
+def _iter_files(base: str, *, exts: Tuple[str, ...]):
+    for root, _, files in os.walk(base):
+        if EXCLUDE_TESTS and (os.sep + "tests" + os.sep) in (root + os.sep):
+            continue
+        for fn in files:
+            if fn.endswith(exts):
+                yield os.path.join(root, fn)
+
+def _is_filter_plugins_dir(path: str) -> bool:
+    return "filter_plugins" in os.path.normpath(path).split(os.sep)
+
+def _read(path: str) -> str:
+    try:
+        with open(path, "r", encoding="utf-8", errors="ignore") as f:
+            return f.read()
+    except Exception:
+        return ""
+
+# ---------------------------
+# Collect defined filters (AST)
+# ---------------------------
+
+class _FiltersCollector(ast.NodeVisitor):
+    def __init__(self):
+        self.defs: List[Tuple[str, str]] = []
+
+    def visit_Return(self, node: ast.Return):
+        self.defs.extend(self._extract_mapping(node.value))
+
+    def _extract_mapping(self, node) -> List[Tuple[str, str]]:
+        pairs: List[Tuple[str, str]] = []
+        if isinstance(node, ast.Dict):
+            for k, v in zip(node.keys, node.values):
+                key = k.value if isinstance(k, ast.Constant) and isinstance(k.value, str) else None
+                val = self._name_of(v)
+                if key:
+                    pairs.append((key, val))
+            return pairs
+        if isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == "dict":
+            for kw in node.keywords or []:
+                if kw.arg:
+                    pairs.append((kw.arg, self._name_of(kw.value)))
+            return pairs
+        if isinstance(node, ast.Name):
+            return []
+        return []
+
+    @staticmethod
+    def _name_of(v) -> str:
+        if isinstance(v, ast.Name):
+            return v.id
+        if isinstance(v, ast.Attribute):
+            return v.attr
+        return ""
+
+def _collect_filters_from_filters_method(func: ast.FunctionDef) -> List[Tuple[str, str]]:
+    c = _FiltersCollector()
+    c.visit(func)
+
+    name_dicts: Dict[str, List[Tuple[str, str]]] = {}
+    returned_names: List[str] = []
+
+    for n in ast.walk(func):
+        if isinstance(n, ast.Assign):
+            if len(n.targets) == 1 and isinstance(n.targets[0], ast.Name):
+                tgt = n.targets[0].id
+                pairs = _FiltersCollector()._extract_mapping(n.value)
+                if pairs:
+                    name_dicts.setdefault(tgt, []).extend(pairs)
+        elif isinstance(n, ast.Call):
+            if isinstance(n.func, ast.Attribute) and n.func.attr == "update":
+                obj = n.func.value
+                if isinstance(obj, ast.Name) and n.args:
+                    add_pairs = _FiltersCollector()._extract_mapping(n.args[0])
+                    if add_pairs:
+                        name_dicts.setdefault(obj.id, []).extend(add_pairs)
+        elif isinstance(n, ast.Return) and isinstance(n.value, ast.Name):
+            returned_names.append(n.value.id)
+
+    for nm in returned_names:
+        for p in name_dicts.get(nm, []):
+            c.defs.append(p)
+
+    # dedupe
+    seen = set()
+    out: List[Tuple[str, str]] = []
+    for k, v in c.defs:
+        if (k, v) not in seen:
+            seen.add((k, v))
+            out.append((k, v))
+    return out
+
+def collect_defined_filters() -> Set[str]:
+    defined: Set[str] = set()
+    for base in FILTER_PLUGIN_BASES:
+        for path in _iter_files(base, exts=(".py",)):
+            if not _is_filter_plugins_dir(path):
+                continue
+            code = _read(path)
+            if not code:
+                continue
+            try:
+                tree = ast.parse(code, filename=path)
+            except Exception:
+                continue
+            for node in tree.body:
+                if isinstance(node, ast.ClassDef) and node.name == "FilterModule":
+                    for item in node.body:
+                        if isinstance(item, ast.FunctionDef) and item.name == "filters":
+                            for fname, _call in _collect_filters_from_filters_method(item):
+                                defined.add(fname)
+    return defined
+
+# ---------------------------
+# Collect used filters (Jinja-only scanning with string stripping)
+# ---------------------------
+
+# Capture inner bodies of Jinja blocks
+RE_JINJA_MUSTACHE = re.compile(r"\{\{(.*?)\}\}", re.DOTALL)
+RE_JINJA_TAG      = re.compile(r"\{%(.*?)%\}", re.DOTALL)
+
+# Within a Jinja body, capture "| filter_name" (with args or not)
+RE_PIPE_IN_BODY   = re.compile(r"\|\s*([A-Za-z_]\w*)\b")
+
+# Matches "{% filter filter_name %}"
+RE_BLOCK_FILTER   = re.compile(r"\{%\s*filter\s+([A-Za-z_]\w*)\b", re.DOTALL)
+
+def _strip_quoted(text: str) -> str:
+    """
+    Remove content inside single/double quotes to avoid false positives for pipes in strings,
+    e.g. lookup('pipe', "pacman ... | grep ... | awk ...") -> pipes are ignored.
+    """
+    out = []
+    i = 0
+    n = len(text)
+    quote = None
+    while i < n:
+        ch = text[i]
+        if quote is None:
+            if ch in ("'", '"'):
+                quote = ch
+                i += 1
+                continue
+            out.append(ch)
+            i += 1
+        else:
+            # inside quotes; handle simple escapes \" and \'
+            if ch == "\\" and i + 1 < n:
+                i += 2
+                continue
+            if ch == quote:
+                quote = None
+            i += 1
+    return "".join(out)
+
+def _extract_filters_from_jinja_body(body: str) -> Set[str]:
+    # Strip quoted strings first so pipes inside strings are ignored
+    body_no_str = _strip_quoted(body)
+    return {m.group(1) for m in RE_PIPE_IN_BODY.finditer(body_no_str)}
+
+def collect_used_filters() -> Set[str]:
+    used: Set[str] = set()
+    for base in SEARCH_BASES:
+        for path in _iter_files(base, exts=USAGE_EXTS):
+            text = _read(path)
+            if not text:
+                continue
+
+            # 1) Filters used in {{ ... }} blocks
+            for m in RE_JINJA_MUSTACHE.finditer(text):
+                used |= _extract_filters_from_jinja_body(m.group(1))
+
+            # 2) Filters used in {% ... %} blocks (e.g., set, if, for)
+            for m in RE_JINJA_TAG.finditer(text):
+                used |= _extract_filters_from_jinja_body(m.group(1))
+
+            # 3) Block filter form: {% filter name %} ... {% endfilter %}
+            for m in RE_BLOCK_FILTER.finditer(text):
+                used.add(m.group(1))
+
+    return used
+
+# ---------------------------
+# Test
+# ---------------------------
+
+class TestAllUsedFiltersAreDefined(unittest.TestCase):
+    def test_all_used_filters_have_definitions(self):
+        defined = collect_defined_filters()
+        used = collect_used_filters()
+
+        # Remove built-ins and known-safe filters
+        candidates = sorted(used - BUILTIN_FILTERS)
+
+        # Unknown filters are those not defined locally
+        unknown = [f for f in candidates if f not in defined]
+
+        if unknown:
+            lines = [
+                "These filters are used in templates/YAML but have no local definition "
+                "(and are not in BUILTIN_FILTERS):"
+            ]
+            for f in unknown:
+                lines.append(f"- " + f)
+            self.fail("\n".join(lines))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/integration/test_filters_usage.py
+++ b/tests/integration/test_filters_usage.py
@@ -0,0 +1,260 @@
+import ast
+import os
+import re
+import unittest
+from typing import Dict, List, Tuple, Optional
+
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))
+
+FILTER_PLUGIN_BASES = [
+    os.path.join(PROJECT_ROOT, "filter_plugins"),
+    os.path.join(PROJECT_ROOT, "roles"),
+]
+
+SEARCH_BASES = [PROJECT_ROOT]
+
+SEARCH_EXTS = (".yml", ".yaml", ".j2", ".jinja2", ".tmpl", ".py")
+
+def _iter_files(base: str, *, py_only: bool = False):
+    for root, _, files in os.walk(base):
+        for fn in files:
+            if py_only and not fn.endswith(".py"):
+                continue
+            if not py_only and not fn.endswith(SEARCH_EXTS):
+                continue
+            yield os.path.join(root, fn)
+
+def _is_filter_plugins_dir(path: str) -> bool:
+    return "filter_plugins" in os.path.normpath(path).split(os.sep)
+
+def _read(path: str) -> str:
+    try:
+        with open(path, "r", encoding="utf-8", errors="ignore") as f:
+            return f.read()
+    except Exception:
+        return ""
+
+# ---------------------------
+# Filter definition extraction
+# ---------------------------
+
+class _FiltersCollector(ast.NodeVisitor):
+    """
+    Extract mappings returned by FilterModule.filters().
+    Handles:
+      return {'name': fn, "x": y}
+      d = {'name': fn}; d.update({...}); return d
+      return dict(name=fn, x=y)
+    """
+    def __init__(self):
+        self.defs: List[Tuple[str, str]] = []  # (filter_name, callable_name)
+
+    def visit_Return(self, node: ast.Return):
+        mapping = self._extract_mapping(node.value)
+        for k, v in mapping:
+            self.defs.append((k, v))
+
+    def _extract_mapping(self, node) -> List[Tuple[str, str]]:
+        pairs: List[Tuple[str, str]] = []
+
+        # dict literal
+        if isinstance(node, ast.Dict):
+            for k, v in zip(node.keys, node.values):
+                key = k.value if isinstance(k, ast.Constant) and isinstance(k.value, str) else None
+                val = self._name_of(v)
+                if key:
+                    pairs.append((key, val))
+            return pairs
+
+        # dict(...) call
+        if isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == "dict":
+            # keywords: dict(name=fn)
+            for kw in node.keywords or []:
+                if kw.arg:
+                    pairs.append((kw.arg, self._name_of(kw.value)))
+            return pairs
+
+        # Name (variable) that might be a dict assembled earlier in the function
+        if isinstance(node, ast.Name):
+            # Fallback: we can't easily dataflow-resolve here; handled elsewhere by walking Assign/Call
+            return []
+
+        return []
+
+    @staticmethod
+    def _name_of(v) -> str:
+        if isinstance(v, ast.Name):
+            return v.id
+        if isinstance(v, ast.Attribute):
+            return v.attr  # take right-most name
+        return ""
+
+def _collect_filters_from_filters_method(func: ast.FunctionDef) -> List[Tuple[str, str]]:
+    """
+    Walks the function to assemble any mapping that flows into the return.
+    We capture direct return dicts and also a common pattern:
+        d = {...}
+        d.update({...})
+        return d
+    """
+    collector = _FiltersCollector()
+    collector.visit(func)
+
+    # additionally scan simple 'X = {...}' and 'X.update({...})' patterns,
+    # and if 'return X' occurs, merge those dicts.
+    name_dicts: Dict[str, List[Tuple[str, str]]] = {}
+    returns: List[str] = []
+
+    for n in ast.walk(func):
+        if isinstance(n, ast.Assign):
+            # X = { ... }
+            if len(n.targets) == 1 and isinstance(n.targets[0], ast.Name):
+                tgt = n.targets[0].id
+                pairs = _FiltersCollector()._extract_mapping(n.value)
+                if pairs:
+                    name_dicts.setdefault(tgt, []).extend(pairs)
+        elif isinstance(n, ast.Call):
+            # X.update({ ... })
+            if isinstance(n.func, ast.Attribute) and n.func.attr == "update":
+                obj = n.func.value
+                if isinstance(obj, ast.Name):
+                    add_pairs = _FiltersCollector()._extract_mapping(n.args[0] if n.args else None)
+                    if add_pairs:
+                        name_dicts.setdefault(obj.id, []).extend(add_pairs)
+        elif isinstance(n, ast.Return) and isinstance(n.value, ast.Name):
+            returns.append(n.value.id)
+
+    for rname in returns:
+        for p in name_dicts.get(rname, []):
+            collector.defs.append(p)
+
+    # dedupe
+    seen = set()
+    out: List[Tuple[str, str]] = []
+    for k, v in collector.defs:
+        if (k, v) not in seen:
+            seen.add((k, v))
+            out.append((k, v))
+    return out
+
+def _ast_collect_filters_from_file(path: str) -> List[Tuple[str, str, str]]:
+    code = _read(path)
+    if not code:
+        return []
+    try:
+        tree = ast.parse(code, filename=path)
+    except Exception:
+        return []
+
+    results: List[Tuple[str, str, str]] = []
+    for node in tree.body:
+        if isinstance(node, ast.ClassDef) and node.name == "FilterModule":
+            for item in node.body:
+                if isinstance(item, ast.FunctionDef) and item.name == "filters":
+                    for (fname, callname) in _collect_filters_from_filters_method(item):
+                        results.append((fname, callname, path))
+    return results
+
+def collect_defined_filters() -> List[Dict[str, str]]:
+    found: List[Dict[str, str]] = []
+    for base in FILTER_PLUGIN_BASES:
+        for path in _iter_files(base, py_only=True):
+            if not _is_filter_plugins_dir(path):
+                continue
+            for (filter_name, callable_name, fpath) in _ast_collect_filters_from_file(path):
+                found.append({"filter": filter_name, "callable": callable_name, "file": fpath})
+    return found
+
+# ---------------------------
+# Usage detection
+# ---------------------------
+
+def _compile_jinja_patterns(name: str) -> list[re.Pattern]:
+    """
+    Build robust patterns that match Jinja filter usage without using '%' string formatting.
+    Handles:
+      - {{ ... | name }}
+      - {% ... | name %}
+      - {% filter name %}...{% endfilter %}
+      - bare YAML/Jinja like: when: x | name
+    """
+    escaped = re.escape(name)
+    return [
+        re.compile(r"\{\{[^}]*\|\s*" + escaped + r"\b", re.DOTALL),   # {{ ... | name }}
+        re.compile(r"\{%\s*[^%]*\|\s*" + escaped + r"\b", re.DOTALL), # {% ... | name %}
+        re.compile(r"\{%\s*filter\s+" + escaped + r"\b"),             # {% filter name %}
+        re.compile(r"\|\s*" + escaped + r"\b"),                       # bare: when: x | name
+    ]
+
+def _python_call_pattern(callable_name: str) -> Optional[re.Pattern]:
+    if not callable_name:
+        return None
+    return re.compile(r"\b%s\s*\(" % re.escape(callable_name))
+
+def search_usage(filter_name: str, callable_name: str, *, skip_file: str) -> tuple[bool, bool]:
+    """
+    Search for filter usage.
+
+    Returns tuple:
+      (used_anywhere, used_outside_tests)
+
+    - used_anywhere: True if found in repo at all
+    - used_outside_tests: True if found outside tests/
+    """
+    jinja_pats = _compile_jinja_patterns(filter_name)
+    py_pat = _python_call_pattern(callable_name)
+
+    used_anywhere = False
+    used_outside_tests = False
+
+    for base in SEARCH_BASES:
+        for path in _iter_files(base, py_only=False):
+            try:
+                if os.path.samefile(path, skip_file):
+                    continue
+            except Exception:
+                pass
+
+            content = _read(path)
+            if not content:
+                continue
+
+            hit = False
+            for pat in jinja_pats:
+                if pat.search(content):
+                    hit = True
+                    break
+
+            if not hit and py_pat and path.endswith(".py") and py_pat.search(content):
+                hit = True
+
+            if hit:
+                used_anywhere = True
+                if "/tests/" not in path and not path.endswith("tests"):
+                    used_outside_tests = True
+
+    return used_anywhere, used_outside_tests
+
+class TestFilterDefinitionsAreUsed(unittest.TestCase):
+    def test_every_defined_filter_is_used(self):
+        definitions = collect_defined_filters()
+        if not definitions:
+            self.skipTest("No filters found under filter_plugins/.")
+
+        unused = []
+        for d in definitions:
+            f_name, c_name, f_path = d["filter"], d["callable"], d["file"]
+            used_any, used_outside = search_usage(f_name, c_name, skip_file=f_path)
+            if not used_any:
+                unused.append((f_name, c_name, f_path, "not used anywhere"))
+            elif not used_outside:
+                unused.append((f_name, c_name, f_path, "only used in tests"))
+
+        if unused:
+            msg = ["The following filters are invalidly unused:"]
+            for f, c, p, reason in sorted(unused):
+                msg.append(f"- '{f}' (callable '{c or 'unknown'}') defined in {p} → {reason}")
+            self.fail("\n".join(msg))
+
+if __name__ == "__main__":
+    unittest.main()