Refine role dependency graph/tree builders and tests

- Refactor cli/build/graph.py to use cached metadata and dependency indices
  for faster graph generation and cleaner separation of concerns
- Refactor cli/build/tree.py to delegate per-role processing to process_role()
  and support parallel execution via ProcessPoolExecutor
- Add unit tests for graph helper functions and build_mappings()
  under tests/unit/cli/build/test_graph.py
- Add unit tests for find_roles() and process_role() behaviour
  under tests/unit/cli/build/test_tree.py
- Remove the old include_role dependency integration test which relied on the
  previous tree.json dependencies bucket

For details see ChatGPT conversation: https://chatgpt.com/share/6926b805-28a0-800f-a075-e5250aab5c4a
This commit is contained in:
2025-11-26 09:20:45 +01:00
parent aca2da885d
commit 9c65bd4839
5 changed files with 760 additions and 283 deletions

View File

@@ -6,168 +6,347 @@ import json
import re
from typing import List, Dict, Any, Set
from module_utils.role_dependency_resolver import RoleDependencyResolver
# Regex used to ignore Jinja expressions inside include/import statements
JINJA_PATTERN = re.compile(r'{{.*}}')
ALL_DEP_TYPES = ['run_after', 'dependencies', 'include_tasks', 'import_tasks', 'include_role', 'import_role']
ALL_DIRECTIONS = ['to', 'from']
ALL_KEYS = [f"{dep}_{dir}" for dep in ALL_DEP_TYPES for dir in ALL_DIRECTIONS]
# All dependency types the graph builder supports
ALL_DEP_TYPES = [
"run_after",
"dependencies",
"include_tasks",
"import_tasks",
"include_role",
"import_role",
]
# Graph directions: outgoing edges ("to") vs incoming edges ("from")
ALL_DIRECTIONS = ["to", "from"]
# Combined keys: e.g. "include_role_to", "dependencies_from", etc.
ALL_KEYS = [f"{dep}_{direction}" for dep in ALL_DEP_TYPES for direction in ALL_DIRECTIONS]
# ------------------------------------------------------------
# Helpers for locating meta and task files
# ------------------------------------------------------------
def find_role_meta(roles_dir: str, role: str) -> str:
path = os.path.join(roles_dir, role, 'meta', 'main.yml')
"""Return path to meta/main.yml of a role or raise FileNotFoundError."""
path = os.path.join(roles_dir, role, "meta", "main.yml")
if not os.path.isfile(path):
raise FileNotFoundError(f"Metadata not found for role: {role}")
return path
def find_role_tasks(roles_dir: str, role: str) -> str:
path = os.path.join(roles_dir, role, 'tasks', 'main.yml')
"""Return path to tasks/main.yml of a role or raise FileNotFoundError."""
path = os.path.join(roles_dir, role, "tasks", "main.yml")
if not os.path.isfile(path):
raise FileNotFoundError(f"Tasks not found for role: {role}")
return path
# ------------------------------------------------------------
# Parsers for meta and tasks
# ------------------------------------------------------------
def load_meta(path: str) -> Dict[str, Any]:
with open(path, 'r') as f:
"""
Load metadata from meta/main.yml.
Returns a dict with:
- galaxy_info
- run_after
- dependencies
"""
with open(path, "r") as f:
data = yaml.safe_load(f) or {}
galaxy_info = data.get('galaxy_info', {}) or {}
galaxy_info = data.get("galaxy_info", {}) or {}
return {
'galaxy_info': galaxy_info,
'run_after': galaxy_info.get('run_after', []) or [],
'dependencies': data.get('dependencies', []) or []
"galaxy_info": galaxy_info,
"run_after": galaxy_info.get("run_after", []) or [],
"dependencies": data.get("dependencies", []) or [],
}
def load_tasks(path: str, dep_type: str) -> List[str]:
with open(path, 'r') as f:
"""
Parse include_tasks/import_tasks from tasks/main.yml.
Only accepts simple, non-Jinja names.
"""
with open(path, "r") as f:
data = yaml.safe_load(f) or []
included_roles = []
roles: List[str] = []
for task in data:
if not isinstance(task, dict):
continue
if dep_type in task:
entry = task[dep_type]
if isinstance(entry, dict):
entry = entry.get('name', '')
if entry and not JINJA_PATTERN.search(entry):
included_roles.append(entry)
entry = entry.get("name", "")
if isinstance(entry, str) and entry and not JINJA_PATTERN.search(entry):
roles.append(entry)
return included_roles
return roles
# ------------------------------------------------------------
# Graph builder using precomputed caches (fast)
# ------------------------------------------------------------
def build_single_graph(
start_role: str,
dep_type: str,
direction: str,
roles_dir: str,
max_depth: int
max_depth: int,
caches: Dict[str, Any],
) -> Dict[str, Any]:
"""
Build a graph (nodes + links) for one role, one dep_type, one direction.
Uses only precomputed in-memory caches, no filesystem access.
caches structure:
caches["meta"][role] -> meta information
caches["deps"][dep_type][role] -> outgoing targets
caches["rev"][dep_type][target] -> set of source roles
"""
nodes: Dict[str, Dict[str, Any]] = {}
links: List[Dict[str, str]] = []
meta_cache = caches["meta"]
deps_cache = caches["deps"]
rev_cache = caches["rev"]
# --------------------------------------------------------
# Ensure a role exists as a node
# --------------------------------------------------------
def ensure_node(role: str):
if role in nodes:
return
# Try retrieving cached meta; fallback: lazy load
meta = meta_cache.get(role)
if meta is None:
try:
meta = load_meta(find_role_meta(roles_dir, role))
meta_cache[role] = meta
except FileNotFoundError:
meta = {"galaxy_info": {}}
galaxy_info = meta.get("galaxy_info", {}) or {}
node = {
"id": role,
**galaxy_info,
"doc_url": f"https://docs.infinito.nexus/roles/{role}/README.html",
"source_url": f"https://github.com/kevinveenbirkenbach/infinito-nexus/tree/master/roles/{role}",
}
nodes[role] = node
# --------------------------------------------------------
# Outgoing edges: role -> targets
# --------------------------------------------------------
def outgoing(role: str) -> List[str]:
return deps_cache.get(dep_type, {}).get(role, []) or []
# --------------------------------------------------------
# Incoming edges: sources -> role
# --------------------------------------------------------
def incoming(role: str) -> Set[str]:
return rev_cache.get(dep_type, {}).get(role, set())
# --------------------------------------------------------
# DFS traversal
# --------------------------------------------------------
def traverse(role: str, depth: int, path: Set[str]):
if role not in nodes:
meta = load_meta(find_role_meta(roles_dir, role))
node = {'id': role}
node.update(meta['galaxy_info'])
node['doc_url'] = f"https://docs.infinito.nexus/roles/{role}/README.html"
node['source_url'] = f"https://s.infinito.nexus/code/tree/master/roles/{role}"
nodes[role] = node
ensure_node(role)
if max_depth > 0 and depth >= max_depth:
return
neighbors = []
if dep_type in ['run_after', 'dependencies']:
meta = load_meta(find_role_meta(roles_dir, role))
neighbors = meta.get(dep_type, [])
else:
try:
neighbors = load_tasks(find_role_tasks(roles_dir, role), dep_type)
except FileNotFoundError:
neighbors = []
if direction == "to":
for tgt in outgoing(role):
ensure_node(tgt)
links.append({"source": role, "target": tgt, "type": dep_type})
if tgt not in path:
traverse(tgt, depth + 1, path | {tgt})
if direction == 'to':
for tgt in neighbors:
links.append({'source': role, 'target': tgt, 'type': dep_type})
if tgt in path:
continue
traverse(tgt, depth + 1, path | {tgt})
else: # direction == "from"
for src in incoming(role):
ensure_node(src)
links.append({"source": src, "target": role, "type": dep_type})
if src not in path:
traverse(src, depth + 1, path | {src})
else: # direction == 'from'
for other in os.listdir(roles_dir):
try:
other_neighbors = []
if dep_type in ['run_after', 'dependencies']:
meta_o = load_meta(find_role_meta(roles_dir, other))
other_neighbors = meta_o.get(dep_type, [])
else:
other_neighbors = load_tasks(find_role_tasks(roles_dir, other), dep_type)
traverse(start_role, 0, {start_role})
if role in other_neighbors:
links.append({'source': other, 'target': role, 'type': dep_type})
if other in path:
continue
traverse(other, depth + 1, path | {other})
return {"nodes": list(nodes.values()), "links": links}
except FileNotFoundError:
continue
traverse(start_role, depth=0, path={start_role})
return {'nodes': list(nodes.values()), 'links': links}
# ------------------------------------------------------------
# Build all graph variants for one role
# ------------------------------------------------------------
def build_mappings(
start_role: str,
roles_dir: str,
max_depth: int
) -> Dict[str, Any]:
"""
Build all 12 graph variants (6 dep types × 2 directions).
Accelerated version:
- One-time scan of all metadata
- One-time scan of all include_role/import_role
- One-time scan of include_tasks/import_tasks
- Build reverse-index tables
- Then generate all graphs purely from memory
"""
result: Dict[str, Any] = {}
for key in ALL_KEYS:
dep_type, direction = key.rsplit('_', 1)
roles = [
r for r in os.listdir(roles_dir)
if os.path.isdir(os.path.join(roles_dir, r))
]
# Pre-caches
meta_cache: Dict[str, Dict[str, Any]] = {}
deps_cache: Dict[str, Dict[str, List[str]]] = {dep: {} for dep in ALL_DEP_TYPES}
rev_cache: Dict[str, Dict[str, Set[str]]] = {dep: {} for dep in ALL_DEP_TYPES}
resolver = RoleDependencyResolver(roles_dir)
# --------------------------------------------------------
# Step 1: Preload meta-based deps (run_after, dependencies)
# --------------------------------------------------------
for role in roles:
try:
result[key] = build_single_graph(start_role, dep_type, direction, roles_dir, max_depth)
meta = load_meta(find_role_meta(roles_dir, role))
except FileNotFoundError:
continue
meta_cache[role] = meta
for dep_key in ["run_after", "dependencies"]:
values = meta.get(dep_key, []) or []
if isinstance(values, list) and values:
deps_cache[dep_key][role] = values
for tgt in values:
if isinstance(tgt, str) and tgt.strip():
rev_cache[dep_key].setdefault(tgt.strip(), set()).add(role)
# --------------------------------------------------------
# Step 2: Preload include_role/import_role (resolver)
# --------------------------------------------------------
for role in roles:
role_path = os.path.join(roles_dir, role)
inc, imp = resolver._scan_tasks(role_path)
if inc:
inc_list = sorted(inc)
deps_cache["include_role"][role] = inc_list
for tgt in inc_list:
rev_cache["include_role"].setdefault(tgt, set()).add(role)
if imp:
imp_list = sorted(imp)
deps_cache["import_role"][role] = imp_list
for tgt in imp_list:
rev_cache["import_role"].setdefault(tgt, set()).add(role)
# --------------------------------------------------------
# Step 3: Preload include_tasks/import_tasks
# --------------------------------------------------------
for role in roles:
try:
tasks_path = find_role_tasks(roles_dir, role)
except FileNotFoundError:
continue
for dep_key in ["include_tasks", "import_tasks"]:
values = load_tasks(tasks_path, dep_key)
if values:
deps_cache[dep_key][role] = values
for tgt in values:
rev_cache[dep_key].setdefault(tgt, set()).add(role)
caches = {
"meta": meta_cache,
"deps": deps_cache,
"rev": rev_cache,
}
# --------------------------------------------------------
# Step 4: Build all graphs from caches
# --------------------------------------------------------
for key in ALL_KEYS:
dep_type, direction = key.rsplit("_", 1)
try:
result[key] = build_single_graph(
start_role=start_role,
dep_type=dep_type,
direction=direction,
roles_dir=roles_dir,
max_depth=max_depth,
caches=caches,
)
except Exception:
result[key] = {'nodes': [], 'links': []}
result[key] = {"nodes": [], "links": []}
return result
# ------------------------------------------------------------
# Output helper
# ------------------------------------------------------------
def output_graph(graph_data: Any, fmt: str, start: str, key: str):
base = f"{start}_{key}"
if fmt == 'console':
if fmt == "console":
print(f"--- {base} ---")
print(yaml.safe_dump(graph_data, sort_keys=False))
elif fmt in ('yaml', 'json'):
else:
path = f"{base}.{fmt}"
with open(path, 'w') as f:
if fmt == 'yaml':
with open(path, "w") as f:
if fmt == "yaml":
yaml.safe_dump(graph_data, f, sort_keys=False)
else:
json.dump(graph_data, f, indent=2)
print(f"Wrote {path}")
else:
raise ValueError(f"Unknown format: {fmt}")
# ------------------------------------------------------------
# CLI entrypoint
# ------------------------------------------------------------
def main():
script_dir = os.path.dirname(os.path.abspath(__file__))
default_roles_dir = os.path.abspath(os.path.join(script_dir, '..', '..', 'roles'))
default_roles_dir = os.path.abspath(os.path.join(script_dir, "..", "..", "roles"))
parser = argparse.ArgumentParser(description="Generate dependency graphs")
parser.add_argument('-r', '--role', required=True, help="Starting role name")
parser.add_argument('-D', '--depth', type=int, default=0, help="Max recursion depth")
parser.add_argument('-o', '--output', choices=['yaml', 'json', 'console'], default='console')
parser.add_argument('--roles-dir', default=default_roles_dir, help="Roles directory")
parser.add_argument("-r", "--role", required=True, help="Starting role name")
parser.add_argument("-D", "--depth", type=int, default=0, help="Max recursion depth")
parser.add_argument("-o", "--output", choices=["yaml", "json", "console"], default="console")
parser.add_argument("--roles-dir", default=default_roles_dir, help="Roles directory")
args = parser.parse_args()
graphs = build_mappings(args.role, args.roles_dir, args.depth)
for key in ALL_KEYS:
graph_data = graphs.get(key, {'nodes': [], 'links': []})
graph_data = graphs.get(key, {"nodes": [], "links": []})
output_graph(graph_data, args.output, args.role, key)
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -2,19 +2,76 @@
import os
import argparse
import json
from typing import Dict, Any
from typing import Dict, Any, Optional, Iterable, Tuple
from concurrent.futures import ProcessPoolExecutor, as_completed
from cli.build.graph import build_mappings, output_graph
from module_utils.role_dependency_resolver import RoleDependencyResolver
def find_roles(roles_dir: str):
def find_roles(roles_dir: str) -> Iterable[Tuple[str, str]]:
"""
Yield (role_name, role_path) for all roles in the given roles_dir.
"""
for entry in os.listdir(roles_dir):
path = os.path.join(roles_dir, entry)
if os.path.isdir(path):
yield entry, path
def process_role(
role_name: str,
roles_dir: str,
depth: int,
shadow_folder: Optional[str],
output: str,
preview: bool,
verbose: bool,
no_include_role: bool, # currently unused, kept for CLI compatibility
no_import_role: bool, # currently unused, kept for CLI compatibility
no_dependencies: bool, # currently unused, kept for CLI compatibility
no_run_after: bool, # currently unused, kept for CLI compatibility
) -> None:
"""
Worker function: build graphs and (optionally) write meta/tree.json for a single role.
Note:
This version no longer adds a custom top-level "dependencies" bucket.
Only the graphs returned by build_mappings() are written.
"""
role_path = os.path.join(roles_dir, role_name)
if verbose:
print(f"[worker] Processing role: {role_name}")
# Build the full graph structure (all dep types / directions) for this role
graphs: Dict[str, Any] = build_mappings(
start_role=role_name,
roles_dir=roles_dir,
max_depth=depth,
)
# Preview mode: dump graphs to console instead of writing tree.json
if preview:
for key, data in graphs.items():
if verbose:
print(f"[worker] Previewing graph '{key}' for role '{role_name}'")
# In preview mode we always output as console
output_graph(data, "console", role_name, key)
return
# Non-preview: write meta/tree.json for this role
if shadow_folder:
tree_file = os.path.join(shadow_folder, role_name, "meta", "tree.json")
else:
tree_file = os.path.join(role_path, "meta", "tree.json")
os.makedirs(os.path.dirname(tree_file), exist_ok=True)
with open(tree_file, "w", encoding="utf-8") as f:
json.dump(graphs, f, indent=2)
print(f"Wrote {tree_file}")
def main():
script_dir = os.path.dirname(os.path.abspath(__file__))
default_roles_dir = os.path.abspath(os.path.join(script_dir, "..", "..", "roles"))
@@ -22,24 +79,67 @@ def main():
parser = argparse.ArgumentParser(
description="Generate all graphs for each role and write meta/tree.json"
)
parser.add_argument("-d", "--role_dir", default=default_roles_dir,
help=f"Path to roles directory (default: {default_roles_dir})")
parser.add_argument("-D", "--depth", type=int, default=0,
help="Max recursion depth (>0) or <=0 to stop on cycle")
parser.add_argument("-o", "--output", choices=["yaml", "json", "console"],
default="json", help="Output format")
parser.add_argument("-p", "--preview", action="store_true",
help="Preview graphs to console instead of writing files")
parser.add_argument("-s", "--shadow-folder", type=str, default=None,
help="If set, writes tree.json to this shadow folder instead of the role's actual meta/ folder")
parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose logging")
parser.add_argument(
"-d",
"--role_dir",
default=default_roles_dir,
help=f"Path to roles directory (default: {default_roles_dir})",
)
parser.add_argument(
"-D",
"--depth",
type=int,
default=0,
help="Max recursion depth (>0) or <=0 to stop on cycle",
)
parser.add_argument(
"-o",
"--output",
choices=["yaml", "json", "console"],
default="json",
help="Output format for preview mode",
)
parser.add_argument(
"-p",
"--preview",
action="store_true",
help="Preview graphs to console instead of writing files",
)
parser.add_argument(
"-s",
"--shadow-folder",
type=str,
default=None,
help="If set, writes tree.json to this shadow folder instead of the role's actual meta/ folder",
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Enable verbose logging",
)
# Toggles
parser.add_argument("--no-include-role", action="store_true", help="Do not scan include_role")
parser.add_argument("--no-import-role", action="store_true", help="Do not scan import_role")
parser.add_argument("--no-dependencies", action="store_true", help="Do not read meta/main.yml dependencies")
parser.add_argument("--no-run-after", action="store_true",
help="Do not read galaxy_info.run_after from meta/main.yml")
# Toggles (kept for CLI compatibility, currently only meaningful for future extensions)
parser.add_argument(
"--no-include-role",
action="store_true",
help="Reserved: do not include include_role in custom dependency bucket",
)
parser.add_argument(
"--no-import-role",
action="store_true",
help="Reserved: do not include import_role in custom dependency bucket",
)
parser.add_argument(
"--no-dependencies",
action="store_true",
help="Reserved: do not include meta dependencies in custom dependency bucket",
)
parser.add_argument(
"--no-run-after",
action="store_true",
help="Reserved: do not include run_after in custom dependency bucket",
)
args = parser.parse_args()
@@ -50,54 +150,53 @@ def main():
print(f"Preview mode: {args.preview}")
print(f"Shadow folder: {args.shadow_folder}")
resolver = RoleDependencyResolver(args.role_dir)
roles = [role_name for role_name, _ in find_roles(args.role_dir)]
for role_name, role_path in find_roles(args.role_dir):
if args.verbose:
print(f"Processing role: {role_name}")
# For preview, run sequentially to avoid completely interleaved output.
if args.preview:
for role_name in roles:
process_role(
role_name=role_name,
roles_dir=args.role_dir,
depth=args.depth,
shadow_folder=args.shadow_folder,
output=args.output,
preview=True,
verbose=args.verbose,
no_include_role=args.no_include_role,
no_import_role=args.no_import_role,
no_dependencies=args.no_dependencies,
no_run_after=args.no_run_after,
)
return
graphs: Dict[str, Any] = build_mappings(
start_role=role_name,
roles_dir=args.role_dir,
max_depth=args.depth
)
# Non-preview: roles are processed in parallel
with ProcessPoolExecutor() as executor:
futures = {
executor.submit(
process_role,
role_name,
args.role_dir,
args.depth,
args.shadow_folder,
args.output,
False, # preview=False in parallel mode
args.verbose,
args.no_include_role,
args.no_import_role,
args.no_dependencies,
args.no_run_after,
): role_name
for role_name in roles
}
# Direct deps (depth=1) getrennt erfasst für buckets
inc_roles, imp_roles = resolver._scan_tasks(role_path)
meta_deps = resolver._extract_meta_dependencies(role_path)
run_after = set()
if not args.no_run_after:
run_after = resolver._extract_meta_run_after(role_path)
if any([not args.no_include_role and inc_roles,
not args.no_import_role and imp_roles,
not args.no_dependencies and meta_deps,
not args.no_run_after and run_after]):
deps_root = graphs.setdefault("dependencies", {})
if not args.no_include_role and inc_roles:
deps_root["include_role"] = sorted(inc_roles)
if not args.no_import_role and imp_roles:
deps_root["import_role"] = sorted(imp_roles)
if not args.no_dependencies and meta_deps:
deps_root["dependencies"] = sorted(meta_deps)
if not args.no_run_after and run_after:
deps_root["run_after"] = sorted(run_after)
graphs["dependencies"] = deps_root
if args.preview:
for key, data in graphs.items():
if args.verbose:
print(f"Previewing graph '{key}' for role '{role_name}'")
output_graph(data, "console", role_name, key)
else:
if args.shadow_folder:
tree_file = os.path.join(args.shadow_folder, role_name, "meta", "tree.json")
else:
tree_file = os.path.join(role_path, "meta", "tree.json")
os.makedirs(os.path.dirname(tree_file), exist_ok=True)
with open(tree_file, "w", encoding="utf-8") as f:
json.dump(graphs, f, indent=2)
print(f"Wrote {tree_file}")
for future in as_completed(futures):
role_name = futures[future]
try:
future.result()
except Exception as exc:
# Do not crash the whole run; report the failing role instead.
print(f"[ERROR] Role '{role_name}' failed: {exc}")
if __name__ == "__main__":