diff --git a/docs/vision/README.md b/docs/vision/README.md index 0eae3513..922141bd 100644 --- a/docs/vision/README.md +++ b/docs/vision/README.md @@ -23,11 +23,11 @@ Moreover, our vision goes beyond just IT infrastructure; it extends to the broad Ultimately, our vision is to redefine the way IT infrastructure is deployed and managed, offering a solution that is swift, secure, and scalable, capable of meeting the needs of businesses, individuals, NGOs, and large enterprises. CyMaIS will empower all stakeholders by providing a foundation for a decentralized, transparent, and resilient digital future—setting a new benchmark for security, reliability, and sovereignty in the digital age. ## Key Points -1. Empowerment people and institutions -2. Data sovereignty -3. Control over infrastructure -4. Automated infrastructure setup -5. Open Source -6. Decentralized Services -7. Scalabel -8. Global resilience and security \ No newline at end of file +- Empower people and institutions +- Data sovereignty +- Control over infrastructure +- Automated infrastructure setup +- Open Source +- Decentralized Services +- Scalabel +- Global resilience and security \ No newline at end of file diff --git a/roles/heal-docker/README.md b/roles/heal-docker/README.md index 4b88f314..5dcfa84d 100644 --- a/roles/heal-docker/README.md +++ b/roles/heal-docker/README.md @@ -1,2 +1,28 @@ -# heal-docker -docker-compose restart for containers which are unhealty or excited \ No newline at end of file +# Docker Healer 🩺 + +## Description + +This Ansible role automatically restarts Docker Compose configurations with exited or unhealthy containers on Arch Linux systems. It ensures the stability of containerized workloads by recovering from common error conditions like port binding issues. + +## Overview + +Tailored for Arch Linux, this role monitors containers for failure states and initiates a controlled restart of affected Compose configurations. If port conflicts prevent recovery, the role stops the affected stack, restarts Docker, and recreates the container environment. + +## Purpose + +The purpose of this role is to provide automated healing for Docker Compose environments, minimizing manual recovery effort and reducing downtime. + +## Features + +- **Container Health Monitoring:** Detects unhealthy or exited containers. +- **Automated Recovery:** Restarts failed containers and resolves port binding issues. +- **Run-once Setup Logic:** Ensures idempotent execution by controlling task flow with internal flags. +- **System Role Integration:** Seamlessly integrates with CyMaIS system maintenance logic. + +## Credits 📝 + +Developed and maintained by **Kevin Veen-Birkenbach**. +Learn more at [www.veen.world](https://www.veen.world) + +Part of the [CyMaIS Project](https://github.com/kevinveenbirkenbach/cymais) +License: [CyMaIS NonCommercial License (CNCL)](https://s.veen.world/cncl) diff --git a/roles/heal-docker/files/heal-docker.py b/roles/heal-docker/files/heal-docker.py index 4d70e49b..e6b52979 100644 --- a/roles/heal-docker/files/heal-docker.py +++ b/roles/heal-docker/files/heal-docker.py @@ -12,10 +12,11 @@ def bash(command): process = subprocess.Popen([command], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) out, err = process.communicate() stdout = out.splitlines() + stderr = err.decode("utf-8").strip() # decode stderr output = [line.decode("utf-8") for line in stdout] - if process.wait() > bool(0): + if process.returncode > 0: print(command, out, err) - raise Exception("Exitcode is greater than 0") + raise Exception(stderr) # pass the actual error text return output def list_to_string(lst): @@ -60,10 +61,22 @@ def main(base_directory): if compose_file_path: print("Restarting unhealthy container in:", compose_file_path) - print_bash(f'cd {os.path.dirname(compose_file_path)} && docker-compose -p "{repo}" restart') + project_path = os.path.dirname(compose_file_path) + try: + print_bash(f'cd {project_path} && docker-compose -p "{repo}" restart') + except Exception as e: + if "port is already allocated" in str(e): + print("Detected port allocation problem. Executing recovery steps...") + print_bash(f'cd {project_path} && docker-compose down') + print_bash('systemctl restart docker') + print_bash(f'cd {project_path} && docker-compose -p "{repo}" up -d') + else: + print("Unhandled exception during restart:", e) + errors += 1 else: print("Error: Docker Compose file not found for:", repo) errors += 1 + print("Finished restart procedure.") exit(errors) diff --git a/roles/heal-docker/handlers/main.yml b/roles/heal-docker/handlers/main.yml index e3ebda0b..9b7ba080 100644 --- a/roles/heal-docker/handlers/main.yml +++ b/roles/heal-docker/handlers/main.yml @@ -1,4 +1,5 @@ -- name: "reload heal-docker.cymais.service" +- name: restart heal-docker.cymais.service systemd: name: heal-docker.cymais.service + state: restarted daemon_reload: yes diff --git a/roles/heal-docker/meta/main.yml b/roles/heal-docker/meta/main.yml index 6f898c6d..a354a197 100644 --- a/roles/heal-docker/meta/main.yml +++ b/roles/heal-docker/meta/main.yml @@ -1,2 +1,26 @@ +--- +galaxy_info: + author: "Kevin Veen-Birkenbach" + description: "Automated recovery for unhealthy or exited Docker Compose containers." + license: "CyMaIS NonCommercial License (CNCL)" + license_url: "https://s.veen.world/cncl" + company: | + Kevin Veen-Birkenbach + Consulting & Coaching Solutions + https://www.veen.world + min_ansible_version: "2.9" + platforms: + - name: Archlinux + versions: + - rolling + galaxy_tags: + - docker + - docker-compose + - systemd + - automation + - archlinux + repository: https://s.veen.world/cymais + issue_tracker_url: https://s.veen.world/cymaisissues + documentation: https://s.veen.world/cymais dependencies: - - system-maintenance-lock + - system-maintenance-lock \ No newline at end of file diff --git a/roles/heal-docker/tasks/main.yml b/roles/heal-docker/tasks/main.yml index 8a6cebae..e9522d9b 100644 --- a/roles/heal-docker/tasks/main.yml +++ b/roles/heal-docker/tasks/main.yml @@ -9,13 +9,14 @@ copy: src: heal-docker.py dest: "{{heal_docker}}heal-docker.py" + notify: restart heal-docker.cymais.service when: run_once_heal_docker is not defined - name: create heal-docker.cymais.service template: src: heal-docker.service.j2 dest: /etc/systemd/system/heal-docker.cymais.service - notify: reload heal-docker.cymais.service + notify: restart heal-docker.cymais.service when: run_once_heal_docker is not defined - name: set service_name to the name of the current role