diff --git a/roles/sys-ctl-hlth-docker-container/tasks/01_core.yml b/roles/sys-ctl-hlth-docker-container/tasks/01_core.yml index 57224ede..1d5f6fd7 100644 --- a/roles/sys-ctl-hlth-docker-container/tasks/01_core.yml +++ b/roles/sys-ctl-hlth-docker-container/tasks/01_core.yml @@ -3,6 +3,11 @@ name: sys-ctl-alm-compose when: run_once_sys_ctl_alm_compose is not defined +- name: Include dependency 'sys-ctl-rpr-docker-soft' + include_role: + name: sys-ctl-rpr-docker-soft + when: run_once_sys_ctl_rpr_docker_soft is not defined + - include_role: name: sys-service vars: diff --git a/roles/sys-ctl-hlth-docker-volumes/tasks/01_core.yml b/roles/sys-ctl-hlth-docker-volumes/tasks/01_core.yml index 23d3a9ab..c4326995 100644 --- a/roles/sys-ctl-hlth-docker-volumes/tasks/01_core.yml +++ b/roles/sys-ctl-hlth-docker-volumes/tasks/01_core.yml @@ -2,7 +2,7 @@ include_role: name: sys-ctl-alm-compose when: run_once_sys_ctl_alm_compose is not defined - + - include_role: name: sys-service vars: diff --git a/roles/sys-ctl-rpr-docker-soft/files/script.py b/roles/sys-ctl-rpr-docker-soft/files/script.py index 9aeac0ee..e7232a7d 100644 --- a/roles/sys-ctl-rpr-docker-soft/files/script.py +++ b/roles/sys-ctl-rpr-docker-soft/files/script.py @@ -1,15 +1,26 @@ #!/usr/bin/env python3 """ Restart Docker-Compose configurations with exited or unhealthy containers. -This version receives the *manipulation services* via argparse (no Jinja). + +STRICT mode: Resolve the Compose project exclusively via Docker labels +(com.docker.compose.project and com.docker.compose.project.working_dir). +No container-name fallback. If labels are missing or Docker is unavailable, +the script records an error for that container. + +All shell interactions that matter for tests go through print_bash() +so they can be monkeypatched in unit tests. """ import subprocess import time import os import argparse -from typing import List +from typing import List, Optional, Tuple +# --------------------------- +# Shell helpers +# --------------------------- + def bash(command: str) -> List[str]: print(command) process = subprocess.Popen( @@ -30,31 +41,45 @@ def list_to_string(lst: List[str]) -> str: def print_bash(command: str) -> List[str]: + """ + Wrapper around bash() that echoes combined output for easier debugging + and can be monkeypatched in tests. + """ output = bash(command) if output: print(list_to_string(output)) return output -def find_docker_compose_file(directory: str) -> str | None: +# --------------------------- +# Filesystem / compose helpers +# --------------------------- + +def find_docker_compose_file(directory: str) -> Optional[str]: + """ + Search for docker-compose.yml beneath a directory. + """ for root, _, files in os.walk(directory): if "docker-compose.yml" in files: return os.path.join(root, "docker-compose.yml") return None -def detect_env_file(project_path: str) -> str | None: +def detect_env_file(project_path: str) -> Optional[str]: """ - Return the path to a Compose env file if present (.env preferred, fallback to env). + Return the path to a Compose env file if present (.env preferred, fallback to .env/env). """ - candidates = [os.path.join(project_path, ".env"), os.path.join(project_path, ".env", "env")] + candidates = [ + os.path.join(project_path, ".env"), + os.path.join(project_path, ".env", "env"), + ] for candidate in candidates: if os.path.isfile(candidate): return candidate return None -def compose_cmd(subcmd: str, project_path: str, project_name: str | None = None) -> str: +def compose_cmd(subcmd: str, project_path: str, project_name: Optional[str] = None) -> str: """ Build a docker-compose command string with optional -p and --env-file if present. Example: compose_cmd("restart", "/opt/docker/foo", "foo") @@ -69,6 +94,10 @@ def compose_cmd(subcmd: str, project_path: str, project_name: str | None = None) return " ".join(parts) +# --------------------------- +# Business logic +# --------------------------- + def normalize_services_arg(raw: List[str] | None, raw_str: str | None) -> List[str]: """ Accept either: @@ -78,7 +107,6 @@ def normalize_services_arg(raw: List[str] | None, raw_str: str | None) -> List[s if raw: return [s for s in raw if s.strip()] if raw_str: - # split on comma or whitespace parts = [p.strip() for chunk in raw_str.split(",") for p in chunk.split()] return [p for p in parts if p] return [] @@ -87,7 +115,7 @@ def normalize_services_arg(raw: List[str] | None, raw_str: str | None) -> List[s def wait_while_manipulation_running( services: List[str], waiting_time: int = 600, - timeout: int | None = None, + timeout: Optional[int] = None, ) -> None: """ Wait until none of the given services are active anymore. @@ -107,7 +135,6 @@ def wait_while_manipulation_running( break if any_active: - # Check timeout elapsed = time.time() - start if timeout and elapsed >= timeout: print(f"Timeout ({timeout}s) reached while waiting for services. Continuing anyway.") @@ -119,7 +146,30 @@ def wait_while_manipulation_running( break -def main(base_directory: str, manipulation_services: List[str], timeout: int | None) -> int: +def get_compose_project_info(container: str) -> Tuple[str, str]: + """ + Resolve project name and working dir from Docker labels. + STRICT: Raises RuntimeError if labels are missing/unreadable. + """ + out_project = print_bash( + f"docker inspect -f '{{{{ index .Config.Labels \"com.docker.compose.project\" }}}}' {container}" + ) + out_workdir = print_bash( + f"docker inspect -f '{{{{ index .Config.Labels \"com.docker.compose.project.working_dir\" }}}}' {container}" + ) + + project = out_project[0].strip() if out_project else "" + workdir = out_workdir[0].strip() if out_workdir else "" + + if not project: + raise RuntimeError(f"No compose project label found for container {container}") + if not workdir: + raise RuntimeError(f"No compose working_dir label found for container {container}") + + return project, workdir + + +def main(base_directory: str, manipulation_services: List[str], timeout: Optional[int]) -> int: errors = 0 wait_while_manipulation_running(manipulation_services, waiting_time=600, timeout=timeout) @@ -131,43 +181,50 @@ def main(base_directory: str, manipulation_services: List[str], timeout: int | N ) failed_containers = unhealthy_container_names + exited_container_names - unfiltered_failed_docker_compose_repositories = [ - container.split("-")[0] for container in failed_containers - ] - filtered_failed_docker_compose_repositories = list( - dict.fromkeys(unfiltered_failed_docker_compose_repositories) - ) + for container in failed_containers: + try: + project, workdir = get_compose_project_info(container) + except Exception as e: + print(f"Error reading compose labels for {container}: {e}") + errors += 1 + continue - for repo in filtered_failed_docker_compose_repositories: - compose_file_path = find_docker_compose_file(os.path.join(base_directory, repo)) + compose_file_path = os.path.join(workdir, "docker-compose.yml") + if not os.path.isfile(compose_file_path): + # As STRICT: we only trust labels; if file not there, error out. + print(f"Error: docker-compose.yml not found at {compose_file_path} for container {container}") + errors += 1 + continue - if compose_file_path: + project_path = os.path.dirname(compose_file_path) + try: print("Restarting unhealthy container in:", compose_file_path) - project_path = os.path.dirname(compose_file_path) - try: - # restart with optional --env-file and -p - print_bash(compose_cmd("restart", project_path, repo)) - except Exception as e: - if "port is already allocated" in str(e): - print("Detected port allocation problem. Executing recovery steps...") - # down (no -p needed), then engine restart, then up -d with -p + print_bash(compose_cmd("restart", project_path, project)) + except Exception as e: + if "port is already allocated" in str(e): + print("Detected port allocation problem. Executing recovery steps...") + try: print_bash(compose_cmd("down", project_path)) print_bash("systemctl restart docker") - print_bash(compose_cmd("up -d", project_path, repo)) - else: - print("Unhandled exception during restart:", e) + print_bash(compose_cmd("up -d", project_path, project)) + except Exception as e2: + print("Unhandled exception during recovery:", e2) errors += 1 - else: - print("Error: Docker Compose file not found for:", repo) - errors += 1 + else: + print("Unhandled exception during restart:", e) + errors += 1 print("Finished restart procedure.") return errors +# --------------------------- +# CLI +# --------------------------- + if __name__ == "__main__": parser = argparse.ArgumentParser( - description="Restart Docker-Compose configurations with exited or unhealthy containers." + description="Restart Docker-Compose configurations with exited or unhealthy containers (STRICT label mode)." ) parser.add_argument( "--manipulation", @@ -184,12 +241,12 @@ if __name__ == "__main__": "--timeout", type=int, default=60, - help="Maximum time in seconds to wait for manipulation services before continuing.(Default 1min)", + help="Maximum time in seconds to wait for manipulation services before continuing. (Default 1min)", ) parser.add_argument( "base_directory", type=str, - help="Base directory where Docker Compose configurations are located.", + help="(Unused in STRICT mode) Base directory where Docker Compose configurations are located.", ) args = parser.parse_args() services = normalize_services_arg(args.manipulation, args.manipulation_string) diff --git a/roles/sys-svc-docker/README.md b/roles/sys-svc-docker/README.md index d6cc7dd9..9d726f55 100644 --- a/roles/sys-svc-docker/README.md +++ b/roles/sys-svc-docker/README.md @@ -17,14 +17,8 @@ When enabled via `MODE_CLEANUP` or `MODE_RESET`, it will automatically prune unu Installs Docker and Docker Compose via the system package manager. - **Integrated Dependencies** - Includes backup, repair, and health check sub-roles: - - `sys-ctl-bkp-docker-2-loc` - - `user-administrator` - - `sys-ctl-hlth-docker-container` - - `sys-ctl-hlth-docker-volumes` - - `sys-ctl-rpr-docker-soft` - - `sys-ctl-rpr-docker-hard` - + Includes backup, repair, and health check sub-roles + - **Cleanup & Reset Modes** - `MODE_CLEANUP`: Removes unused Docker containers, networks, images, and volumes. - `MODE_RESET`: Performs cleanup and restarts the Docker service. diff --git a/roles/sys-svc-docker/tasks/01_core.yml b/roles/sys-svc-docker/tasks/01_core.yml index 1cc8f1e3..1f4c58b9 100644 --- a/roles/sys-svc-docker/tasks/01_core.yml +++ b/roles/sys-svc-docker/tasks/01_core.yml @@ -21,6 +21,5 @@ - sys-ctl-bkp-docker-2-loc - sys-ctl-hlth-docker-container - sys-ctl-hlth-docker-volumes - - sys-ctl-rpr-docker-soft - sys-ctl-rpr-docker-hard when: SYS_SVC_DOCKER_LOAD_SERVICES | bool \ No newline at end of file diff --git a/tests/unit/roles/sys-ctl-rpr-docker-soft/files/test_script.py b/tests/unit/roles/sys-ctl-rpr-docker-soft/files/test_script.py index 003ccb39..38b310a7 100644 --- a/tests/unit/roles/sys-ctl-rpr-docker-soft/files/test_script.py +++ b/tests/unit/roles/sys-ctl-rpr-docker-soft/files/test_script.py @@ -115,29 +115,48 @@ class TestRepairDockerSoft(unittest.TestCase): def fake_print_bash(cmd): cmd_log.append(cmd) + + # 1) docker ps Mocks (deine bisherigen) if cmd.startswith("docker ps --filter health=unhealthy"): return ["app1-web-1", "db-1"] if cmd.startswith("docker ps --filter status=exited"): return ["app1-worker-1", "other-2"] + + # 2) docker inspect Labels (NEU) + # project label + if cmd.startswith("docker inspect -f '{{ index .Config.Labels \"com.docker.compose.project\" }}'"): + container = cmd.split()[-1] + if container in ("app1-web-1", "app1-worker-1"): + return ["app1"] + if container == "db-1": + return ["db"] + return [""] # other-2 hat keine Labels -> soll fehlschlagen + + # working_dir label + if cmd.startswith("docker inspect -f '{{ index .Config.Labels \"com.docker.compose.project.working_dir\" }}'"): + container = cmd.split()[-1] + if container in ("app1-web-1", "app1-worker-1"): + return ["/BASE/app1"] + if container == "db-1": + return ["/BASE/db"] + return [""] # other-2 -> keine Angabe + + # 3) docker-compose Aufrufe (unverändert okay) if "docker-compose" in cmd: return [] + return [] + # find_docker_compose_file wird in STRICT nicht benutzt, kann aber bleiben def fake_find_docker_compose(path): - # Compose-Projekte: app1, db -> vorhanden; "other" -> nicht vorhanden if path.endswith("/app1") or path.endswith("/db"): return str(Path(path) / "docker-compose.yml") return None - # Steuere die detect_env_file-Antwort: - # - Für app1 existiert nur .env/env - # - Für db existiert .env - def fake_detect_env_file(project_path: str): - if project_path.endswith("/app1"): - return f"{project_path}/.env/env" - if project_path.endswith("/db"): - return f"{project_path}/.env" - return None + # 4) os.path.isfile für STRICT mode (NEU) + old_isfile = s.os.path.isfile + def fake_isfile(path): + return path in ("/BASE/app1/docker-compose.yml", "/BASE/db/docker-compose.yml") old_print_bash = s.print_bash old_find = s.find_docker_compose_file @@ -145,14 +164,18 @@ class TestRepairDockerSoft(unittest.TestCase): try: s.print_bash = fake_print_bash s.find_docker_compose_file = fake_find_docker_compose - s.detect_env_file = fake_detect_env_file + s.detect_env_file = lambda project_path: ( + f"{project_path}/.env/env" if project_path.endswith("/app1") + else (f"{project_path}/.env" if project_path.endswith("/db") else None) + ) + s.os.path.isfile = fake_isfile # <— wichtig für STRICT errors = s.main("/BASE", manipulation_services=[], timeout=None) - # one error expected for "other" (no compose file) + + # Erwartung: nur "other-2" scheitert -> 1 Fehler self.assertEqual(errors, 1) restart_cmds = [c for c in cmd_log if ' docker-compose' in c and " restart" in c] - # app1: --env-file "/BASE/app1/.env/env" + -p "app1" self.assertTrue(any( 'cd "/BASE/app1"' in c and '--env-file "/BASE/app1/.env/env"' in c and @@ -160,7 +183,6 @@ class TestRepairDockerSoft(unittest.TestCase): ' restart' in c for c in restart_cmds )) - # db: --env-file "/BASE/db/.env" + -p "db" self.assertTrue(any( 'cd "/BASE/db"' in c and '--env-file "/BASE/db/.env"' in c and @@ -172,6 +194,8 @@ class TestRepairDockerSoft(unittest.TestCase): s.print_bash = old_print_bash s.find_docker_compose_file = old_find s.detect_env_file = old_detect + s.os.path.isfile = old_isfile + if __name__ == "__main__":