SatelliteQE · JacobCallahan · Nov 19, 2024 · ogajduse · Dec 5, 2024 · ogajduse
diff --git a/broker/config_manager.py b/broker/config_manager.py
@@ -99,7 +99,7 @@ def _get_migrations(self, force_version=None):
         migrations = []
         for _, name, _ in pkgutil.iter_modules(config_migrations.__path__):
             module = importlib.import_module(f"broker.config_migrations.{name}")
-            if hasattr(module, "run_migrations"):
+            if hasattr(module, "run_migrations") and "example" not in module.__name__:
                 if force_version and force_version == file_name_to_ver(name):
                     migrations.append(module)
                     break

diff --git a/broker/config_migrations/example_migration.py b/broker/config_migrations/example_migration.py
@@ -0,0 +1,25 @@
+"""Config migrations for versions older than 0.6.1 to 0.6.1.
+
+Copy this file to a new file in the same directory and modify it to create a new migration.
+The new file must be named `vX_Y_Z.py` where X_Y_Z is the version you are migrating to.
+
+e.g. cp example_migration.py v0_6_1.py
+"""
+
+from logzero import logger
+
+TO_VERSION = "0.6.1"
+
+
+def example_migration(config_dict):
+    """Migrations should modify the config_dict in place and return it."""
+    config_dict["example_key"] = "example_value"
+    return config_dict
+
+
+def run_migrations(config_dict):
+    """Run all migrations."""
+    logger.info(f"Running config migrations for {TO_VERSION}.")
+    config_dict = example_migration(config_dict)
+    config_dict["_version"] = TO_VERSION
+    return config_dict
diff --git a/broker/config_migrations/v0_6_3.py b/broker/config_migrations/v0_6_3.py
@@ -0,0 +1,22 @@
+"""Config migrations for versions older than 0.6.3 to 0.6.3."""
+
+from logzero import logger
+
+TO_VERSION = "0.6.3"
+
+
+def add_dangling_behavior(config_dict):
+    """Add the dangling_behavior config to AnsibleTower."""
+    if "AnsibleTower" in config_dict:
+        if "dangling_behavior" not in config_dict["AnsibleTower"]:
+            logger.debug("Adding dangling_behavior to AnsibleTower.")
+            config_dict["AnsibleTower"]["dangling_behavior"] = "checkin"
+    return config_dict
+
+
+def run_migrations(config_dict):
+    """Run all migrations."""
+    logger.info(f"Running config migrations for {TO_VERSION}.")
+    config_dict = add_dangling_behavior(config_dict)
+    config_dict["_version"] = TO_VERSION
+    return config_dict
diff --git a/broker/providers/ansible_tower.py b/broker/providers/ansible_tower.py
@@ -8,9 +8,10 @@
 import click
 from dynaconf import Validator
 from logzero import logger
+from rich.prompt import Prompt
 
 from broker import exceptions
-from broker.helpers import eval_filter, find_origin, yaml
+from broker.helpers import eval_filter, find_origin, update_inventory, yaml
 from broker.settings import settings
 
 try:
@@ -29,7 +30,7 @@
 
 
 def convert_pseudonamespaces(attr_dict):
-    """Recursively convert PsuedoNamespace objects into dictionaries."""
+    """Recursively convert PseudoNamespace objects into dictionaries."""
     out_dict = {}
     for key, value in attr_dict.items():
         if isinstance(value, awxkit.utils.PseudoNamespace):
@@ -127,6 +128,7 @@ class AnsibleTower(Provider):
             | Validator("ANSIBLETOWER.token", must_exist=True)
         ),
         Validator("ANSIBLETOWER.inventory", default=None),
+        Validator("ANSIBLETOWER.dangling_behavior", default="checkin"),
-        Validator("ANSIBLETOWER.dangling_behavior", default="checkin"),
+        Validator("ANSIBLETOWER.dangling_behavior", default="checkin", is_in=["prompt", "checkin", "store"]),
-        Validator("ANSIBLETOWER.dangling_behavior", default="checkin"),
+        Validator("ANSIBLETOWER.dangling_behavior", default="checkin", is_in=["prompt", "checkin", "store"]),
     ]
 
     _checkout_options = [
@@ -170,11 +172,12 @@ def __init__(self, **kwargs):
         self.uname = settings.ANSIBLETOWER.get("username")
         self.pword = settings.ANSIBLETOWER.get("password")
         self.token = settings.ANSIBLETOWER.get("token")
+        self.dangling_behavior = settings.ANSIBLETOWER.get("dangling_behavior")
         self._inventory = kwargs.get("tower_inventory") or settings.ANSIBLETOWER.inventory
         # Init the class itself
         config = kwargs.get("config")
         root = kwargs.get("root")
-        self._v2, self.username = get_awxkit_and_uname(
+        self._v2, self.uname = get_awxkit_and_uname(
             config=config,
             root=root,
             url=self.url,
@@ -380,6 +383,54 @@ def _get_failure_messages(self, workflow):
         else:
             return failure_messages
 
+    def _try_get_dangling_hosts(self, failed_workflow):
+        """Get one or more hosts that may have been left behind by a failed workflow."""
+        hosts = []
+        for node in failed_workflow.get_related("workflow_nodes").results:
+            if not (job_fields := node.summary_fields.get("job", {})) or job_fields.get(
+                "failed"
+            ):  # skip jobs with no summary fields and failed jobs
+                continue
+            if jobs := self._v2.jobs.get(id=job_fields["id"]).results:
+                if vm_name := jobs[0].artifacts.get("vm_name"):
+                    hosts.append(vm_name)
+        return list(set(hosts))
+
+    def handle_dangling_hosts(self, job):
+        """Attempt to check in dangling hosts associated with the given job."""
+        dangling_hosts = self._try_get_dangling_hosts(job)
+        if not dangling_hosts:
+            logger.debug("No dangling hosts found for the failed job.")
+            return
+        dangling_behavior = self.dangling_behavior
+        for dangling_host in dangling_hosts:
+            logger.info(f"Found dangling host: {dangling_host}")
+            if dangling_behavior == "prompt":
+                choice = Prompt.ask(
+                    "What would you like to do with this host? [c/s/cA/sA]\n",
+                    "Checkin (c), Store (s), Checkin All (cA), Store All (sA)",
+                    choices=["c", "s", "cA", "sA"],
+                )
+                if choice == "cA":
+                    dangling_behavior = "checkin"
+                elif choice == "sA":
+                    dangling_behavior = "store"
+            else:
+                choice = None
+            # handle checkins
+            if choice == "c" or dangling_behavior == "checkin":
+                try:
+                    self.release(dangling_host)
+                    logger.debug(f"Successfully checked in dangling host: {dangling_host}")
+                except exceptions.BrokerError:
+                    logger.warning(f"Failed to check in dangling host: {dangling_host}")
+            elif choice == "s" or dangling_behavior == "store":
+                logger.debug(f"Storing dangling host: {dangling_host}")
+                host = self._v2.hosts.get(name=dangling_host).results[0]
+                host = self._compile_host_info(host)
+                host["failed"] = True
-                host["failed"] = True
+                host["deploy_failed"] = True
-                host["failed"] = True
+                host["deploy_failed"] = True
+                update_inventory(add=host)
+
     def _compile_host_info(self, host):
         try:
             host_facts = host.related.ansible_facts.get()
@@ -607,20 +658,23 @@ def execute(self, **kwargs):  # noqa: PLR0912,PLR0915 - Possible TODO refactor
         logger.info(f"Waiting for job: \nAPI: {job_api_url}\nUI: {job_ui_url}")
         job.wait_until_completed(timeout=settings.ANSIBLETOWER.workflow_timeout)
         if job.status != "successful":
+            failure_message = self._get_failure_messages(job)
             message_data = {
                 f"{subject.capitalize()} Status": job.status,
-                "Reason(s)": self._get_failure_messages(job),
+                "Reason(s)": failure_message,
                 "URL": job_ui_url,
             }
             helpers.emit(message_data)
+            if "was automatically checked-in" not in failure_message:
+                self.handle_dangling_hosts(job)
             raise JobExecutionError(message_data=message_data["Reason(s)"])
         if strategy := kwargs.pop("artifacts", None):
             return self._merge_artifacts(job, strategy=strategy)
         return job
 
     def get_inventory(self, user=None):
         """Compile a list of hosts based on any inventory a user's name is mentioned."""
-        user = user or self.username
+        user = user or self.uname
         invs = [
             inv
             for inv in self._v2.inventory.get(page_size=200).results