sys-ctl-rpr-docker-soft: switch to STRICT label mode and adapt tests

- script.py now resolves docker-compose project and working_dir strictly from container labels
- removed container-name fallback logic
- adjusted sys-ctl-hlth-docker-container to include sys-ctl-rpr-docker-soft
- cleaned up sys-svc-docker dependencies
- updated unit tests to mock docker inspect and os.path.isfile for STRICT mode

Conversation: https://chatgpt.com/share/68b80927-b800-800f-a909-0fe8d110fd0e
This commit is contained in:
2025-09-03 11:24:14 +02:00
parent 0ada12e3ca
commit 5e550ce3a3
6 changed files with 140 additions and 61 deletions

View File

@@ -3,6 +3,11 @@
name: sys-ctl-alm-compose
when: run_once_sys_ctl_alm_compose is not defined
- name: Include dependency 'sys-ctl-rpr-docker-soft'
include_role:
name: sys-ctl-rpr-docker-soft
when: run_once_sys_ctl_rpr_docker_soft is not defined
- include_role:
name: sys-service
vars:

View File

@@ -1,15 +1,26 @@
#!/usr/bin/env python3
"""
Restart Docker-Compose configurations with exited or unhealthy containers.
This version receives the *manipulation services* via argparse (no Jinja).
STRICT mode: Resolve the Compose project exclusively via Docker labels
(com.docker.compose.project and com.docker.compose.project.working_dir).
No container-name fallback. If labels are missing or Docker is unavailable,
the script records an error for that container.
All shell interactions that matter for tests go through print_bash()
so they can be monkeypatched in unit tests.
"""
import subprocess
import time
import os
import argparse
from typing import List
from typing import List, Optional, Tuple
# ---------------------------
# Shell helpers
# ---------------------------
def bash(command: str) -> List[str]:
print(command)
process = subprocess.Popen(
@@ -30,31 +41,45 @@ def list_to_string(lst: List[str]) -> str:
def print_bash(command: str) -> List[str]:
"""
Wrapper around bash() that echoes combined output for easier debugging
and can be monkeypatched in tests.
"""
output = bash(command)
if output:
print(list_to_string(output))
return output
def find_docker_compose_file(directory: str) -> str | None:
# ---------------------------
# Filesystem / compose helpers
# ---------------------------
def find_docker_compose_file(directory: str) -> Optional[str]:
"""
Search for docker-compose.yml beneath a directory.
"""
for root, _, files in os.walk(directory):
if "docker-compose.yml" in files:
return os.path.join(root, "docker-compose.yml")
return None
def detect_env_file(project_path: str) -> str | None:
def detect_env_file(project_path: str) -> Optional[str]:
"""
Return the path to a Compose env file if present (.env preferred, fallback to env).
Return the path to a Compose env file if present (.env preferred, fallback to .env/env).
"""
candidates = [os.path.join(project_path, ".env"), os.path.join(project_path, ".env", "env")]
candidates = [
os.path.join(project_path, ".env"),
os.path.join(project_path, ".env", "env"),
]
for candidate in candidates:
if os.path.isfile(candidate):
return candidate
return None
def compose_cmd(subcmd: str, project_path: str, project_name: str | None = None) -> str:
def compose_cmd(subcmd: str, project_path: str, project_name: Optional[str] = None) -> str:
"""
Build a docker-compose command string with optional -p and --env-file if present.
Example: compose_cmd("restart", "/opt/docker/foo", "foo")
@@ -69,6 +94,10 @@ def compose_cmd(subcmd: str, project_path: str, project_name: str | None = None)
return " ".join(parts)
# ---------------------------
# Business logic
# ---------------------------
def normalize_services_arg(raw: List[str] | None, raw_str: str | None) -> List[str]:
"""
Accept either:
@@ -78,7 +107,6 @@ def normalize_services_arg(raw: List[str] | None, raw_str: str | None) -> List[s
if raw:
return [s for s in raw if s.strip()]
if raw_str:
# split on comma or whitespace
parts = [p.strip() for chunk in raw_str.split(",") for p in chunk.split()]
return [p for p in parts if p]
return []
@@ -87,7 +115,7 @@ def normalize_services_arg(raw: List[str] | None, raw_str: str | None) -> List[s
def wait_while_manipulation_running(
services: List[str],
waiting_time: int = 600,
timeout: int | None = None,
timeout: Optional[int] = None,
) -> None:
"""
Wait until none of the given services are active anymore.
@@ -107,7 +135,6 @@ def wait_while_manipulation_running(
break
if any_active:
# Check timeout
elapsed = time.time() - start
if timeout and elapsed >= timeout:
print(f"Timeout ({timeout}s) reached while waiting for services. Continuing anyway.")
@@ -119,7 +146,30 @@ def wait_while_manipulation_running(
break
def main(base_directory: str, manipulation_services: List[str], timeout: int | None) -> int:
def get_compose_project_info(container: str) -> Tuple[str, str]:
"""
Resolve project name and working dir from Docker labels.
STRICT: Raises RuntimeError if labels are missing/unreadable.
"""
out_project = print_bash(
f"docker inspect -f '{{{{ index .Config.Labels \"com.docker.compose.project\" }}}}' {container}"
)
out_workdir = print_bash(
f"docker inspect -f '{{{{ index .Config.Labels \"com.docker.compose.project.working_dir\" }}}}' {container}"
)
project = out_project[0].strip() if out_project else ""
workdir = out_workdir[0].strip() if out_workdir else ""
if not project:
raise RuntimeError(f"No compose project label found for container {container}")
if not workdir:
raise RuntimeError(f"No compose working_dir label found for container {container}")
return project, workdir
def main(base_directory: str, manipulation_services: List[str], timeout: Optional[int]) -> int:
errors = 0
wait_while_manipulation_running(manipulation_services, waiting_time=600, timeout=timeout)
@@ -131,43 +181,50 @@ def main(base_directory: str, manipulation_services: List[str], timeout: int | N
)
failed_containers = unhealthy_container_names + exited_container_names
unfiltered_failed_docker_compose_repositories = [
container.split("-")[0] for container in failed_containers
]
filtered_failed_docker_compose_repositories = list(
dict.fromkeys(unfiltered_failed_docker_compose_repositories)
)
for container in failed_containers:
try:
project, workdir = get_compose_project_info(container)
except Exception as e:
print(f"Error reading compose labels for {container}: {e}")
errors += 1
continue
for repo in filtered_failed_docker_compose_repositories:
compose_file_path = find_docker_compose_file(os.path.join(base_directory, repo))
compose_file_path = os.path.join(workdir, "docker-compose.yml")
if not os.path.isfile(compose_file_path):
# As STRICT: we only trust labels; if file not there, error out.
print(f"Error: docker-compose.yml not found at {compose_file_path} for container {container}")
errors += 1
continue
if compose_file_path:
project_path = os.path.dirname(compose_file_path)
try:
print("Restarting unhealthy container in:", compose_file_path)
project_path = os.path.dirname(compose_file_path)
try:
# restart with optional --env-file and -p
print_bash(compose_cmd("restart", project_path, repo))
except Exception as e:
if "port is already allocated" in str(e):
print("Detected port allocation problem. Executing recovery steps...")
# down (no -p needed), then engine restart, then up -d with -p
print_bash(compose_cmd("restart", project_path, project))
except Exception as e:
if "port is already allocated" in str(e):
print("Detected port allocation problem. Executing recovery steps...")
try:
print_bash(compose_cmd("down", project_path))
print_bash("systemctl restart docker")
print_bash(compose_cmd("up -d", project_path, repo))
else:
print("Unhandled exception during restart:", e)
print_bash(compose_cmd("up -d", project_path, project))
except Exception as e2:
print("Unhandled exception during recovery:", e2)
errors += 1
else:
print("Error: Docker Compose file not found for:", repo)
errors += 1
else:
print("Unhandled exception during restart:", e)
errors += 1
print("Finished restart procedure.")
return errors
# ---------------------------
# CLI
# ---------------------------
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Restart Docker-Compose configurations with exited or unhealthy containers."
description="Restart Docker-Compose configurations with exited or unhealthy containers (STRICT label mode)."
)
parser.add_argument(
"--manipulation",
@@ -184,12 +241,12 @@ if __name__ == "__main__":
"--timeout",
type=int,
default=60,
help="Maximum time in seconds to wait for manipulation services before continuing.(Default 1min)",
help="Maximum time in seconds to wait for manipulation services before continuing. (Default 1min)",
)
parser.add_argument(
"base_directory",
type=str,
help="Base directory where Docker Compose configurations are located.",
help="(Unused in STRICT mode) Base directory where Docker Compose configurations are located.",
)
args = parser.parse_args()
services = normalize_services_arg(args.manipulation, args.manipulation_string)

View File

@@ -17,13 +17,7 @@ When enabled via `MODE_CLEANUP` or `MODE_RESET`, it will automatically prune unu
Installs Docker and Docker Compose via the system package manager.
- **Integrated Dependencies**
Includes backup, repair, and health check sub-roles:
- `sys-ctl-bkp-docker-2-loc`
- `user-administrator`
- `sys-ctl-hlth-docker-container`
- `sys-ctl-hlth-docker-volumes`
- `sys-ctl-rpr-docker-soft`
- `sys-ctl-rpr-docker-hard`
Includes backup, repair, and health check sub-roles
- **Cleanup & Reset Modes**
- `MODE_CLEANUP`: Removes unused Docker containers, networks, images, and volumes.

View File

@@ -21,6 +21,5 @@
- sys-ctl-bkp-docker-2-loc
- sys-ctl-hlth-docker-container
- sys-ctl-hlth-docker-volumes
- sys-ctl-rpr-docker-soft
- sys-ctl-rpr-docker-hard
when: SYS_SVC_DOCKER_LOAD_SERVICES | bool

View File

@@ -115,29 +115,48 @@ class TestRepairDockerSoft(unittest.TestCase):
def fake_print_bash(cmd):
cmd_log.append(cmd)
# 1) docker ps Mocks (deine bisherigen)
if cmd.startswith("docker ps --filter health=unhealthy"):
return ["app1-web-1", "db-1"]
if cmd.startswith("docker ps --filter status=exited"):
return ["app1-worker-1", "other-2"]
# 2) docker inspect Labels (NEU)
# project label
if cmd.startswith("docker inspect -f '{{ index .Config.Labels \"com.docker.compose.project\" }}'"):
container = cmd.split()[-1]
if container in ("app1-web-1", "app1-worker-1"):
return ["app1"]
if container == "db-1":
return ["db"]
return [""] # other-2 hat keine Labels -> soll fehlschlagen
# working_dir label
if cmd.startswith("docker inspect -f '{{ index .Config.Labels \"com.docker.compose.project.working_dir\" }}'"):
container = cmd.split()[-1]
if container in ("app1-web-1", "app1-worker-1"):
return ["/BASE/app1"]
if container == "db-1":
return ["/BASE/db"]
return [""] # other-2 -> keine Angabe
# 3) docker-compose Aufrufe (unverändert okay)
if "docker-compose" in cmd:
return []
return []
# find_docker_compose_file wird in STRICT nicht benutzt, kann aber bleiben
def fake_find_docker_compose(path):
# Compose-Projekte: app1, db -> vorhanden; "other" -> nicht vorhanden
if path.endswith("/app1") or path.endswith("/db"):
return str(Path(path) / "docker-compose.yml")
return None
# Steuere die detect_env_file-Antwort:
# - Für app1 existiert nur .env/env
# - Für db existiert .env
def fake_detect_env_file(project_path: str):
if project_path.endswith("/app1"):
return f"{project_path}/.env/env"
if project_path.endswith("/db"):
return f"{project_path}/.env"
return None
# 4) os.path.isfile für STRICT mode (NEU)
old_isfile = s.os.path.isfile
def fake_isfile(path):
return path in ("/BASE/app1/docker-compose.yml", "/BASE/db/docker-compose.yml")
old_print_bash = s.print_bash
old_find = s.find_docker_compose_file
@@ -145,14 +164,18 @@ class TestRepairDockerSoft(unittest.TestCase):
try:
s.print_bash = fake_print_bash
s.find_docker_compose_file = fake_find_docker_compose
s.detect_env_file = fake_detect_env_file
s.detect_env_file = lambda project_path: (
f"{project_path}/.env/env" if project_path.endswith("/app1")
else (f"{project_path}/.env" if project_path.endswith("/db") else None)
)
s.os.path.isfile = fake_isfile # <— wichtig für STRICT
errors = s.main("/BASE", manipulation_services=[], timeout=None)
# one error expected for "other" (no compose file)
# Erwartung: nur "other-2" scheitert -> 1 Fehler
self.assertEqual(errors, 1)
restart_cmds = [c for c in cmd_log if ' docker-compose' in c and " restart" in c]
# app1: --env-file "/BASE/app1/.env/env" + -p "app1"
self.assertTrue(any(
'cd "/BASE/app1"' in c and
'--env-file "/BASE/app1/.env/env"' in c and
@@ -160,7 +183,6 @@ class TestRepairDockerSoft(unittest.TestCase):
' restart' in c
for c in restart_cmds
))
# db: --env-file "/BASE/db/.env" + -p "db"
self.assertTrue(any(
'cd "/BASE/db"' in c and
'--env-file "/BASE/db/.env"' in c and
@@ -172,6 +194,8 @@ class TestRepairDockerSoft(unittest.TestCase):
s.print_bash = old_print_bash
s.find_docker_compose_file = old_find
s.detect_env_file = old_detect
s.os.path.isfile = old_isfile
if __name__ == "__main__":