mirror of
https://github.com/kevinveenbirkenbach/computer-playbook.git
synced 2025-08-15 16:40:45 +02:00
Add integration test to detect excessive duplicate 'when' conditions in tasks files
This test scans all .yml/.yaml files under any tasks/ directory and flags cases where the same 'when' condition appears on more than 3 tasks in the same file. Excessive duplication of identical conditions can harm Ansible performance because the condition is re-evaluated for every task. The test suggests replacing repeated conditions with an include_tasks call or a block guarded by the condition to evaluate it only once. https://chatgpt.com/share/6899c605-6f40-800f-a954-ccb62f8bbcf1
This commit is contained in:
parent
6e04ac58d2
commit
6318611931
202
tests/integration/test_when_condition_duplication.py
Normal file
202
tests/integration/test_when_condition_duplication.py
Normal file
@ -0,0 +1,202 @@
|
|||||||
|
# tests/integration/test_when_condition_duplication.py
|
||||||
|
import os
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, Iterable, List, Tuple
|
||||||
|
|
||||||
|
try:
|
||||||
|
import yaml # PyYAML
|
||||||
|
except ImportError as e:
|
||||||
|
raise SystemExit(
|
||||||
|
"PyYAML is required to run this test. Install with: pip install pyyaml"
|
||||||
|
) from e
|
||||||
|
|
||||||
|
|
||||||
|
THRESHOLD = 3 # fail if the same when-condition occurs on more than this many tasks
|
||||||
|
|
||||||
|
|
||||||
|
def _find_repo_root_containing(marker_names: Iterable[str], max_up: int = 8) -> Path:
|
||||||
|
"""
|
||||||
|
Walk upwards from this file to find the repo root. We assume the project root
|
||||||
|
contains at least one of `marker_names` (e.g., 'roles', '.git', 'playbooks').
|
||||||
|
"""
|
||||||
|
here = Path(__file__).resolve().parent
|
||||||
|
cur = here
|
||||||
|
for _ in range(max_up):
|
||||||
|
for marker in marker_names:
|
||||||
|
if (cur / marker).exists():
|
||||||
|
return cur
|
||||||
|
if cur.parent == cur:
|
||||||
|
break
|
||||||
|
cur = cur.parent
|
||||||
|
# Fallback: repo root assumed 3 levels up from tests/integration
|
||||||
|
return Path(__file__).resolve().parents[3]
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_when(value: Any) -> str:
|
||||||
|
"""
|
||||||
|
Normalize Ansible 'when' to a comparable string:
|
||||||
|
- If it's a list, join with ' && ' preserving order (order matters in Ansible).
|
||||||
|
- If it's a scalar, strip leading/trailing whitespace.
|
||||||
|
- Represent everything as a single-line string for stable comparison.
|
||||||
|
"""
|
||||||
|
if value is None:
|
||||||
|
return ""
|
||||||
|
if isinstance(value, list):
|
||||||
|
parts = []
|
||||||
|
for v in value:
|
||||||
|
s = "" if v is None else str(v).strip()
|
||||||
|
# collapse internal whitespace runs to a single space for stability
|
||||||
|
s = " ".join(s.split())
|
||||||
|
parts.append(s)
|
||||||
|
return " && ".join(parts)
|
||||||
|
# scalar (str, int, bool, jinja template, etc.)
|
||||||
|
s = str(value).strip()
|
||||||
|
return " ".join(s.split())
|
||||||
|
|
||||||
|
|
||||||
|
def _iter_tasks(node: Any) -> Iterable[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Yield task-like dicts (those which may contain 'when') from arbitrary YAML structures.
|
||||||
|
Handles:
|
||||||
|
- Top-level lists of tasks
|
||||||
|
- Dicts that contain keys like 'block', 'rescue', 'always' (Ansible blocks)
|
||||||
|
- Nested lists/dicts recursively
|
||||||
|
We only yield a dict once as a "task" (the one that has a 'when' or looks like a task).
|
||||||
|
"""
|
||||||
|
if isinstance(node, list):
|
||||||
|
for item in node:
|
||||||
|
yield from _iter_tasks(item)
|
||||||
|
elif isinstance(node, dict):
|
||||||
|
# If this dict itself looks like a task (has module keys or 'when'/'name'),
|
||||||
|
# yield it, but also traverse nested blocks.
|
||||||
|
is_task_like = any(
|
||||||
|
k in node
|
||||||
|
for k in (
|
||||||
|
"when",
|
||||||
|
"name",
|
||||||
|
"block",
|
||||||
|
"rescue",
|
||||||
|
"always",
|
||||||
|
"include_tasks",
|
||||||
|
"import_tasks",
|
||||||
|
"ansible.builtin.include_tasks",
|
||||||
|
"ansible.builtin.import_tasks",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if is_task_like:
|
||||||
|
yield node
|
||||||
|
|
||||||
|
# Recurse into Ansible block sections if present
|
||||||
|
for section in ("block", "rescue", "always"):
|
||||||
|
if section in node and isinstance(node[section], list):
|
||||||
|
for item in node[section]:
|
||||||
|
yield from _iter_tasks(item)
|
||||||
|
# Also traverse other nested structures conservatively
|
||||||
|
for k, v in node.items():
|
||||||
|
if k not in ("block", "rescue", "always"):
|
||||||
|
if isinstance(v, (list, dict)):
|
||||||
|
yield from _iter_tasks(v)
|
||||||
|
|
||||||
|
|
||||||
|
def _load_yaml_documents(path: Path) -> List[Any]:
|
||||||
|
"""
|
||||||
|
Load all YAML documents from a file. Best-effort parsing:
|
||||||
|
- If YAML fails due to Jinja syntax, we still raise, because a broken file
|
||||||
|
should be fixed in the repo.
|
||||||
|
"""
|
||||||
|
text = path.read_text(encoding="utf-8")
|
||||||
|
return list(yaml.safe_load_all(text)) # may return [None] if empty
|
||||||
|
|
||||||
|
|
||||||
|
def _collect_when_counts(yaml_docs: List[Any]) -> Dict[str, List[Tuple[str, str]]]:
|
||||||
|
"""
|
||||||
|
Return a mapping: normalized_when -> list of (task_name, hint_location)
|
||||||
|
where each entry corresponds to a task that uses that 'when'.
|
||||||
|
"""
|
||||||
|
counts: Dict[str, List[Tuple[str, str]]] = {}
|
||||||
|
for doc in yaml_docs:
|
||||||
|
for task in _iter_tasks(doc):
|
||||||
|
if "when" not in task:
|
||||||
|
continue
|
||||||
|
normalized = _normalize_when(task.get("when"))
|
||||||
|
if not normalized:
|
||||||
|
continue
|
||||||
|
task_name = str(task.get("name") or "<unnamed task>")
|
||||||
|
# Provide a minimal hint for where this came from (e.g., module/inclusion used)
|
||||||
|
hint = None
|
||||||
|
for key in (
|
||||||
|
"include_tasks",
|
||||||
|
"import_tasks",
|
||||||
|
"ansible.builtin.include_tasks",
|
||||||
|
"ansible.builtin.import_tasks",
|
||||||
|
):
|
||||||
|
if key in task:
|
||||||
|
hint = f"{key}: {task[key]}"
|
||||||
|
break
|
||||||
|
hint_loc = hint or "task"
|
||||||
|
counts.setdefault(normalized, []).append((task_name, hint_loc))
|
||||||
|
return counts
|
||||||
|
|
||||||
|
|
||||||
|
class WhenConditionDuplicationTest(unittest.TestCase):
|
||||||
|
"""
|
||||||
|
Integration test that ensures we don't repeat the same 'when' condition
|
||||||
|
on too many tasks in a single tasks file.
|
||||||
|
|
||||||
|
Rationale:
|
||||||
|
Repeating identical 'when' across many tasks forces Ansible to evaluate
|
||||||
|
the same condition over and over, which is bad for performance. Prefer
|
||||||
|
factoring those tasks into a dedicated file and use `include_tasks`
|
||||||
|
(or a block with a single 'when') to evaluate once.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_excessive_repeated_when_in_tasks_files(self):
|
||||||
|
repo_root = _find_repo_root_containing(marker_names=(".git", "roles", "playbooks"))
|
||||||
|
tasks_globs = [
|
||||||
|
"**/tasks/**/*.yml",
|
||||||
|
"**/tasks/**/*.yaml",
|
||||||
|
]
|
||||||
|
|
||||||
|
violations: List[str] = []
|
||||||
|
|
||||||
|
for pattern in tasks_globs:
|
||||||
|
for path in repo_root.glob(pattern):
|
||||||
|
# Only scan files that are inside the project workspace
|
||||||
|
if not path.is_file():
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
docs = _load_yaml_documents(path)
|
||||||
|
except Exception as exc:
|
||||||
|
self.fail(f"Failed to parse YAML file: {path}\n{exc}")
|
||||||
|
|
||||||
|
counts = _collect_when_counts(docs)
|
||||||
|
for normalized_when, occurrences in counts.items():
|
||||||
|
if len(occurrences) > THRESHOLD:
|
||||||
|
# Build a helpful error message showing a few sample tasks with this condition
|
||||||
|
sample = "\n".join(
|
||||||
|
f" - {tname} ({hint})"
|
||||||
|
for tname, hint in occurrences[:5]
|
||||||
|
)
|
||||||
|
violations.append(
|
||||||
|
(
|
||||||
|
f"{path} uses the same 'when' condition more than {THRESHOLD} times "
|
||||||
|
f"({len(occurrences)} occurrences):\n"
|
||||||
|
f" WHEN: {normalized_when}\n"
|
||||||
|
f" Sample tasks:\n{sample}\n"
|
||||||
|
f"Suggestion: Group these tasks into a separate file and call it with "
|
||||||
|
f"`include_tasks`, or use a single `block` guarded by this 'when' to avoid "
|
||||||
|
f"re-evaluating the condition repeatedly."
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if violations:
|
||||||
|
self.fail(
|
||||||
|
"Excessive duplicate 'when' conditions detected (hurts performance):\n\n"
|
||||||
|
+ "\n\n".join(violations)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
Loading…
x
Reference in New Issue
Block a user