Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AAP-36604 (analytics) Thousands of zombie/orphaned Slow/Stuck DB queries in controller querying active host count #15715

Merged
merged 4 commits into from
Dec 18, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 31 additions & 18 deletions awx/main/tasks/host_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from awx.main.models.inventory import HostMetric, HostMetricSummaryMonthly
from awx.main.tasks.helpers import is_run_threshold_reached
from awx.conf.license import get_license
from awx.main.utils.pglock import advisory_lock


logger = logging.getLogger('awx.main.tasks.host_metrics')

Expand Down Expand Up @@ -90,7 +92,10 @@ def hard_cleanup(threshold=None):


class HostMetricSummaryMonthlyTask:
LOCK_KEY = 'host_metric_summary_monthly'
LOCK_SESSION_TIMEOUT = 300000 # 5 minutes.
"""
Task runs every four hours, longer lock timeout avoids premature termination due to high db load or other latency.
This task computes last [threshold] months of HostMetricSummaryMonthly table
[threshold] is setting CLEANUP_HOST_METRICS_HARD_THRESHOLD
Each record in the table represents changes in HostMetric table in one month
Expand All @@ -115,29 +120,37 @@ def __init__(self):
self.records_to_update = []

def execute(self):
self._load_existing_summaries()
self._load_hosts_added()
self._load_hosts_deleted()

# Get first month after last hard delete
month = self._get_first_month()
license_consumed = self._get_license_consumed_before(month)
with advisory_lock(
HostMetricSummaryMonthlyTask.LOCK_KEY, lock_session_timeout_milliseconds=HostMetricSummaryMonthlyTask.LOCK_SESSION_TIMEOUT, wait=False
) as acquired:
if not acquired:
logger.info("Another instance of host_metric_summary_monthly is already running. Exiting.")
return

self._load_existing_summaries()
self._load_hosts_added()
self._load_hosts_deleted()

# Get first month after last hard delete
month = self._get_first_month()
license_consumed = self._get_license_consumed_before(month)

# Fill record for each month
while month <= datetime.date.today().replace(day=1):
summary = self._find_or_create_summary(month)
# Update summary and update license_consumed by hosts added/removed this month
self._update_summary(summary, month, license_consumed)
license_consumed = summary.license_consumed
# Fill record for each month
while month <= datetime.date.today().replace(day=1):
summary = self._find_or_create_summary(month)
# Update summary and update license_consumed by hosts added/removed this month
self._update_summary(summary, month, license_consumed)
license_consumed = summary.license_consumed

month = month + relativedelta(months=1)
month = month + relativedelta(months=1)

# Create/Update stats
HostMetricSummaryMonthly.objects.bulk_create(self.records_to_create, batch_size=1000)
HostMetricSummaryMonthly.objects.bulk_update(self.records_to_update, ['license_consumed', 'hosts_added', 'hosts_deleted'], batch_size=1000)
# Create/Update stats
HostMetricSummaryMonthly.objects.bulk_create(self.records_to_create, batch_size=1000)
HostMetricSummaryMonthly.objects.bulk_update(self.records_to_update, ['license_consumed', 'hosts_added', 'hosts_deleted'], batch_size=1000)

# Set timestamp of last run
settings.HOST_METRIC_SUMMARY_TASK_LAST_TS = now()
# Set timestamp of last run
settings.HOST_METRIC_SUMMARY_TASK_LAST_TS = now()

def _get_license_consumed_before(self, month):
license_consumed = 0
Expand Down
Loading