Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add initial attempts to automatically clean up dangling hosts #337

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion broker/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def _get_migrations(self, force_version=None):
migrations = []
for _, name, _ in pkgutil.iter_modules(config_migrations.__path__):
module = importlib.import_module(f"broker.config_migrations.{name}")
if hasattr(module, "run_migrations"):
if hasattr(module, "run_migrations") and "example" not in module.__name__:
if force_version and force_version == file_name_to_ver(name):
migrations.append(module)
break
Expand Down
25 changes: 25 additions & 0 deletions broker/config_migrations/example_migration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Config migrations for versions older than 0.6.1 to 0.6.1.

Copy this file to a new file in the same directory and modify it to create a new migration.
The new file must be named `vX_Y_Z.py` where X_Y_Z is the version you are migrating to.

e.g. cp example_migration.py v0_6_1.py
"""

from logzero import logger

TO_VERSION = "0.6.1"


def example_migration(config_dict):
"""Migrations should modify the config_dict in place and return it."""
config_dict["example_key"] = "example_value"
return config_dict


def run_migrations(config_dict):
"""Run all migrations."""
logger.info(f"Running config migrations for {TO_VERSION}.")
config_dict = example_migration(config_dict)
config_dict["_version"] = TO_VERSION
return config_dict
22 changes: 22 additions & 0 deletions broker/config_migrations/v0_6_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""Config migrations for versions older than 0.6.3 to 0.6.3."""

from logzero import logger

TO_VERSION = "0.6.3"


def add_dangling_behavior(config_dict):
"""Add the dangling_behavior config to AnsibleTower."""
if "AnsibleTower" in config_dict:
if "dangling_behavior" not in config_dict["AnsibleTower"]:
logger.debug("Adding dangling_behavior to AnsibleTower.")
config_dict["AnsibleTower"]["dangling_behavior"] = "checkin"
return config_dict


def run_migrations(config_dict):
"""Run all migrations."""
logger.info(f"Running config migrations for {TO_VERSION}.")
config_dict = add_dangling_behavior(config_dict)
config_dict["_version"] = TO_VERSION
return config_dict
64 changes: 59 additions & 5 deletions broker/providers/ansible_tower.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
import click
from dynaconf import Validator
from logzero import logger
from rich.prompt import Prompt

from broker import exceptions
from broker.helpers import eval_filter, find_origin, yaml
from broker.helpers import eval_filter, find_origin, update_inventory, yaml
from broker.settings import settings

try:
Expand All @@ -29,7 +30,7 @@


def convert_pseudonamespaces(attr_dict):
"""Recursively convert PsuedoNamespace objects into dictionaries."""
"""Recursively convert PseudoNamespace objects into dictionaries."""
out_dict = {}
for key, value in attr_dict.items():
if isinstance(value, awxkit.utils.PseudoNamespace):
Expand Down Expand Up @@ -127,6 +128,7 @@ class AnsibleTower(Provider):
| Validator("ANSIBLETOWER.token", must_exist=True)
),
Validator("ANSIBLETOWER.inventory", default=None),
Validator("ANSIBLETOWER.dangling_behavior", default="checkin"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Validator("ANSIBLETOWER.dangling_behavior", default="checkin"),
Validator("ANSIBLETOWER.dangling_behavior", default="checkin", is_in=["prompt", "checkin", "store"]),

]

_checkout_options = [
Expand Down Expand Up @@ -170,11 +172,12 @@ def __init__(self, **kwargs):
self.uname = settings.ANSIBLETOWER.get("username")
self.pword = settings.ANSIBLETOWER.get("password")
self.token = settings.ANSIBLETOWER.get("token")
self.dangling_behavior = settings.ANSIBLETOWER.get("dangling_behavior")
self._inventory = kwargs.get("tower_inventory") or settings.ANSIBLETOWER.inventory
# Init the class itself
config = kwargs.get("config")
root = kwargs.get("root")
self._v2, self.username = get_awxkit_and_uname(
self._v2, self.uname = get_awxkit_and_uname(
config=config,
root=root,
url=self.url,
Expand Down Expand Up @@ -380,6 +383,54 @@ def _get_failure_messages(self, workflow):
else:
return failure_messages

def _try_get_dangling_hosts(self, failed_workflow):
"""Get one or more hosts that may have been left behind by a failed workflow."""
hosts = []
for node in failed_workflow.get_related("workflow_nodes").results:
if not (job_fields := node.summary_fields.get("job", {})) or job_fields.get(
"failed"
): # skip jobs with no summary fields and failed jobs
continue
if jobs := self._v2.jobs.get(id=job_fields["id"]).results:
if vm_name := jobs[0].artifacts.get("vm_name"):
hosts.append(vm_name)
return list(set(hosts))

def handle_dangling_hosts(self, job):
"""Attempt to check in dangling hosts associated with the given job."""
dangling_hosts = self._try_get_dangling_hosts(job)
if not dangling_hosts:
logger.debug("No dangling hosts found for the failed job.")
return
dangling_behavior = self.dangling_behavior
for dangling_host in dangling_hosts:
logger.info(f"Found dangling host: {dangling_host}")
if dangling_behavior == "prompt":
choice = Prompt.ask(
"What would you like to do with this host? [c/s/cA/sA]\n",
"Checkin (c), Store (s), Checkin All (cA), Store All (sA)",
choices=["c", "s", "cA", "sA"],
)
if choice == "cA":
dangling_behavior = "checkin"
elif choice == "sA":
dangling_behavior = "store"
else:
choice = None
# handle checkins
if choice == "c" or dangling_behavior == "checkin":
try:
self.release(dangling_host)
logger.debug(f"Successfully checked in dangling host: {dangling_host}")
except exceptions.BrokerError:
logger.warning(f"Failed to check in dangling host: {dangling_host}")
elif choice == "s" or dangling_behavior == "store":
logger.debug(f"Storing dangling host: {dangling_host}")
host = self._v2.hosts.get(name=dangling_host).results[0]
host = self._compile_host_info(host)
host["failed"] = True
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
host["failed"] = True
host["deploy_failed"] = True

I prefer to be a bit more explicit about what failed.

update_inventory(add=host)

def _compile_host_info(self, host):
try:
host_facts = host.related.ansible_facts.get()
Expand Down Expand Up @@ -607,20 +658,23 @@ def execute(self, **kwargs): # noqa: PLR0912,PLR0915 - Possible TODO refactor
logger.info(f"Waiting for job: \nAPI: {job_api_url}\nUI: {job_ui_url}")
job.wait_until_completed(timeout=settings.ANSIBLETOWER.workflow_timeout)
if job.status != "successful":
failure_message = self._get_failure_messages(job)
message_data = {
f"{subject.capitalize()} Status": job.status,
"Reason(s)": self._get_failure_messages(job),
"Reason(s)": failure_message,
"URL": job_ui_url,
}
helpers.emit(message_data)
if "was automatically checked-in" not in failure_message:
self.handle_dangling_hosts(job)
raise JobExecutionError(message_data=message_data["Reason(s)"])
if strategy := kwargs.pop("artifacts", None):
return self._merge_artifacts(job, strategy=strategy)
return job

def get_inventory(self, user=None):
"""Compile a list of hosts based on any inventory a user's name is mentioned."""
user = user or self.username
user = user or self.uname
invs = [
inv
for inv in self._v2.inventory.get(page_size=200).results
Expand Down
Loading