diff --git a/awx/main/management/commands/cleanup_host_metrics.py b/awx/main/management/commands/cleanup_host_metrics.py index a250af98528c..c390c074ad61 100644 --- a/awx/main/management/commands/cleanup_host_metrics.py +++ b/awx/main/management/commands/cleanup_host_metrics.py @@ -1,22 +1,22 @@ -from awx.main.models import HostMetric from django.core.management.base import BaseCommand from django.conf import settings +from awx.main.tasks.host_metrics import HostMetricTask class Command(BaseCommand): """ - Run soft-deleting of HostMetrics + This command provides cleanup task for HostMetric model. + There are two modes, which run in following order: + - soft cleanup + - - Perform soft-deletion of all host metrics last automated 12 months ago or before. + This is the same as issuing a DELETE request to /api/v2/host_metrics/N/ for all host metrics that match the criteria. + - - updates columns delete, deleted_counter and last_deleted + - hard cleanup + - - Permanently erase from the database all host metrics last automated 36 months ago or before. + This operation happens after the soft deletion has finished. """ - help = 'Run soft-deleting of HostMetrics' - - def add_arguments(self, parser): - parser.add_argument('--months-ago', type=int, dest='months-ago', action='store', help='Threshold in months for soft-deleting') + help = 'Run soft and hard-deletion of HostMetrics' def handle(self, *args, **options): - months_ago = options.get('months-ago') or None - - if not months_ago: - months_ago = getattr(settings, 'CLEANUP_HOST_METRICS_SOFT_THRESHOLD', 12) - - HostMetric.cleanup_task(months_ago) + HostMetricTask().cleanup(soft_threshold=settings.CLEANUP_HOST_METRICS_SOFT_THRESHOLD, hard_threshold=settings.CLEANUP_HOST_METRICS_HARD_THRESHOLD) diff --git a/awx/main/models/inventory.py b/awx/main/models/inventory.py index 0310d3b1a2c0..7cf7f0710e7f 100644 --- a/awx/main/models/inventory.py +++ b/awx/main/models/inventory.py @@ -10,7 +10,6 @@ import os.path from urllib.parse import urljoin -import dateutil.relativedelta import yaml # Django @@ -890,23 +889,6 @@ def soft_restore(self): self.deleted = False self.save(update_fields=['deleted']) - @classmethod - def cleanup_task(cls, months_ago): - try: - months_ago = int(months_ago) - if months_ago <= 0: - raise ValueError() - - last_automation_before = now() - dateutil.relativedelta.relativedelta(months=months_ago) - - logger.info(f'cleanup_host_metrics: soft-deleting records last automated before {last_automation_before}') - HostMetric.active_objects.filter(last_automation__lt=last_automation_before).update( - deleted=True, deleted_counter=models.F('deleted_counter') + 1, last_deleted=now() - ) - settings.CLEANUP_HOST_METRICS_LAST_TS = now() - except (TypeError, ValueError): - logger.error(f"cleanup_host_metrics: months_ago({months_ago}) has to be a positive integer value") - class HostMetricSummaryMonthly(models.Model): """ diff --git a/awx/main/tasks/helpers.py b/awx/main/tasks/helpers.py new file mode 100644 index 000000000000..ae91e941437f --- /dev/null +++ b/awx/main/tasks/helpers.py @@ -0,0 +1,10 @@ +from django.utils.timezone import now +from rest_framework.fields import DateTimeField + + +def is_run_threshold_reached(setting, threshold_seconds): + last_time = DateTimeField().to_internal_value(setting) if setting else None + if not last_time: + return True + else: + return (now() - last_time).total_seconds() > threshold_seconds diff --git a/awx/main/tasks/host_metrics.py b/awx/main/tasks/host_metrics.py index abf658ef8398..750cd199c515 100644 --- a/awx/main/tasks/host_metrics.py +++ b/awx/main/tasks/host_metrics.py @@ -3,33 +3,90 @@ import logging from django.conf import settings -from django.db.models import Count +from django.db.models import Count, F from django.db.models.functions import TruncMonth from django.utils.timezone import now -from rest_framework.fields import DateTimeField from awx.main.dispatch import get_task_queuename from awx.main.dispatch.publish import task from awx.main.models.inventory import HostMetric, HostMetricSummaryMonthly +from awx.main.tasks.helpers import is_run_threshold_reached from awx.conf.license import get_license -logger = logging.getLogger('awx.main.tasks.host_metric_summary_monthly') +logger = logging.getLogger('awx.main.tasks.host_metrics') + + +@task(queue=get_task_queuename) +def cleanup_host_metrics(): + if is_run_threshold_reached(getattr(settings, 'CLEANUP_HOST_METRICS_LAST_TS', None), getattr(settings, 'CLEANUP_HOST_METRICS_INTERVAL', 30) * 86400): + logger.info(f"Executing cleanup_host_metrics, last ran at {getattr(settings, 'CLEANUP_HOST_METRICS_LAST_TS', '---')}") + HostMetricTask().cleanup( + soft_threshold=getattr(settings, 'CLEANUP_HOST_METRICS_SOFT_THRESHOLD', 12), + hard_threshold=getattr(settings, 'CLEANUP_HOST_METRICS_HARD_THRESHOLD', 36), + ) + logger.info("Finished cleanup_host_metrics") @task(queue=get_task_queuename) def host_metric_summary_monthly(): """Run cleanup host metrics summary monthly task each week""" - if _is_run_threshold_reached( - getattr(settings, 'HOST_METRIC_SUMMARY_TASK_LAST_TS', None), getattr(settings, 'HOST_METRIC_SUMMARY_TASK_INTERVAL', 7) * 86400 - ): + if is_run_threshold_reached(getattr(settings, 'HOST_METRIC_SUMMARY_TASK_LAST_TS', None), getattr(settings, 'HOST_METRIC_SUMMARY_TASK_INTERVAL', 7) * 86400): logger.info(f"Executing host_metric_summary_monthly, last ran at {getattr(settings, 'HOST_METRIC_SUMMARY_TASK_LAST_TS', '---')}") HostMetricSummaryMonthlyTask().execute() logger.info("Finished host_metric_summary_monthly") -def _is_run_threshold_reached(setting, threshold_seconds): - last_time = DateTimeField().to_internal_value(setting) if setting else DateTimeField().to_internal_value('1970-01-01') +class HostMetricTask: + """ + This class provides cleanup task for HostMetric model. + There are two modes: + - soft cleanup (updates columns delete, deleted_counter and last_deleted) + - hard cleanup (deletes from the db) + """ + + def cleanup(self, soft_threshold=None, hard_threshold=None): + """ + Main entrypoint, runs either soft cleanup, hard cleanup or both + + :param soft_threshold: (int) + :param hard_threshold: (int) + """ + if hard_threshold is not None: + self.hard_cleanup(hard_threshold) + if soft_threshold is not None: + self.soft_cleanup(soft_threshold) + + settings.CLEANUP_HOST_METRICS_LAST_TS = now() + + @staticmethod + def soft_cleanup(threshold=None): + if threshold is None: + threshold = getattr(settings, 'CLEANUP_HOST_METRICS_SOFT_THRESHOLD', 12) + + try: + threshold = int(threshold) + except (ValueError, TypeError) as e: + raise type(e)("soft_threshold has to be convertible to number") from e + + last_automation_before = now() - relativedelta(months=threshold) + rows = HostMetric.active_objects.filter(last_automation__lt=last_automation_before).update( + deleted=True, deleted_counter=F('deleted_counter') + 1, last_deleted=now() + ) + logger.info(f'cleanup_host_metrics: soft-deleted records last automated before {last_automation_before}, affected rows: {rows}') + + @staticmethod + def hard_cleanup(threshold=None): + if threshold is None: + threshold = getattr(settings, 'CLEANUP_HOST_METRICS_HARD_THRESHOLD', 36) + + try: + threshold = int(threshold) + except (ValueError, TypeError) as e: + raise type(e)("hard_threshold has to be convertible to number") from e - return (now() - last_time).total_seconds() > threshold_seconds + last_deleted_before = now() - relativedelta(months=threshold) + queryset = HostMetric.objects.filter(deleted=True, last_deleted__lt=last_deleted_before) + rows = queryset.delete() + logger.info(f'cleanup_host_metrics: hard-deleted records which were soft deleted before {last_deleted_before}, affected rows: {rows[0]}') class HostMetricSummaryMonthlyTask: diff --git a/awx/main/tasks/system.py b/awx/main/tasks/system.py index 9e834f273e5e..d15cde6e7f75 100644 --- a/awx/main/tasks/system.py +++ b/awx/main/tasks/system.py @@ -48,7 +48,6 @@ Inventory, SmartInventoryMembership, Job, - HostMetric, convert_jsonfields, ) from awx.main.constants import ACTIVE_STATES @@ -64,6 +63,7 @@ from awx.main.utils.reload import stop_local_services from awx.main.utils.pglock import advisory_lock +from awx.main.tasks.helpers import is_run_threshold_reached from awx.main.tasks.receptor import get_receptor_ctl, worker_info, worker_cleanup, administrative_workunit_reaper, write_receptor_config from awx.main.consumers import emit_channel_notification from awx.main import analytics @@ -368,9 +368,7 @@ def send_notifications(notification_list, job_id=None): @task(queue=get_task_queuename) def gather_analytics(): - from awx.conf.models import Setting - - if is_run_threshold_reached(Setting.objects.filter(key='AUTOMATION_ANALYTICS_LAST_GATHER').first(), settings.AUTOMATION_ANALYTICS_GATHER_INTERVAL): + if is_run_threshold_reached(getattr(settings, 'AUTOMATION_ANALYTICS_LAST_GATHER', None), settings.AUTOMATION_ANALYTICS_GATHER_INTERVAL): analytics.gather() @@ -427,29 +425,6 @@ def cleanup_images_and_files(): _cleanup_images_and_files() -@task(queue=get_task_queuename) -def cleanup_host_metrics(): - """Run cleanup host metrics ~each month""" - # TODO: move whole method to host_metrics in follow-up PR - from awx.conf.models import Setting - - if is_run_threshold_reached( - Setting.objects.filter(key='CLEANUP_HOST_METRICS_LAST_TS').first(), getattr(settings, 'CLEANUP_HOST_METRICS_INTERVAL', 30) * 86400 - ): - months_ago = getattr(settings, 'CLEANUP_HOST_METRICS_SOFT_THRESHOLD', 12) - logger.info("Executing cleanup_host_metrics") - HostMetric.cleanup_task(months_ago) - logger.info("Finished cleanup_host_metrics") - - -def is_run_threshold_reached(setting, threshold_seconds): - from rest_framework.fields import DateTimeField - - last_time = DateTimeField().to_internal_value(setting.value) if setting and setting.value else DateTimeField().to_internal_value('1970-01-01') - - return (now() - last_time).total_seconds() > threshold_seconds - - @task(queue=get_task_queuename) def cluster_node_health_check(node): """ diff --git a/awx/main/tests/functional/commands/test_cleanup_host_metrics.py b/awx/main/tests/functional/commands/test_cleanup_host_metrics.py new file mode 100644 index 000000000000..ac6d0bde3243 --- /dev/null +++ b/awx/main/tests/functional/commands/test_cleanup_host_metrics.py @@ -0,0 +1,78 @@ +import pytest + +from awx.main.tasks.host_metrics import HostMetricTask +from awx.main.models.inventory import HostMetric +from awx.main.tests.factories.fixtures import mk_host_metric +from dateutil.relativedelta import relativedelta +from django.conf import settings +from django.utils import timezone + + +@pytest.mark.django_db +def test_no_host_metrics(): + """No-crash test""" + assert HostMetric.objects.count() == 0 + HostMetricTask().cleanup(soft_threshold=0, hard_threshold=0) + HostMetricTask().cleanup(soft_threshold=24, hard_threshold=42) + assert HostMetric.objects.count() == 0 + + +@pytest.mark.django_db +def test_delete_exception(): + """Crash test""" + with pytest.raises(ValueError): + HostMetricTask().soft_cleanup("") + with pytest.raises(TypeError): + HostMetricTask().hard_cleanup(set()) + + +@pytest.mark.django_db +@pytest.mark.parametrize('threshold', [settings.CLEANUP_HOST_METRICS_SOFT_THRESHOLD, 20]) +def test_soft_delete(threshold): + """Metrics with last_automation < threshold are updated to deleted=True""" + mk_host_metric('host_1', first_automation=ago(months=1), last_automation=ago(months=1), deleted=False) + mk_host_metric('host_2', first_automation=ago(months=1), last_automation=ago(months=1), deleted=True) + mk_host_metric('host_3', first_automation=ago(months=1), last_automation=ago(months=threshold, hours=-1), deleted=False) + mk_host_metric('host_4', first_automation=ago(months=1), last_automation=ago(months=threshold, hours=-1), deleted=True) + mk_host_metric('host_5', first_automation=ago(months=1), last_automation=ago(months=threshold, hours=1), deleted=False) + mk_host_metric('host_6', first_automation=ago(months=1), last_automation=ago(months=threshold, hours=1), deleted=True) + mk_host_metric('host_7', first_automation=ago(months=1), last_automation=ago(months=42), deleted=False) + mk_host_metric('host_8', first_automation=ago(months=1), last_automation=ago(months=42), deleted=True) + + assert HostMetric.objects.count() == 8 + assert HostMetric.active_objects.count() == 4 + + for i in range(2): + HostMetricTask().cleanup(soft_threshold=threshold) + assert HostMetric.objects.count() == 8 + + hostnames = set(HostMetric.objects.filter(deleted=False).order_by('hostname').values_list('hostname', flat=True)) + assert hostnames == {'host_1', 'host_3'} + + +@pytest.mark.django_db +@pytest.mark.parametrize('threshold', [settings.CLEANUP_HOST_METRICS_HARD_THRESHOLD, 20]) +def test_hard_delete(threshold): + """Metrics with last_deleted < threshold and deleted=True are deleted from the db""" + mk_host_metric('host_1', first_automation=ago(months=1), last_deleted=ago(months=1), deleted=False) + mk_host_metric('host_2', first_automation=ago(months=1), last_deleted=ago(months=1), deleted=True) + mk_host_metric('host_3', first_automation=ago(months=1), last_deleted=ago(months=threshold, hours=-1), deleted=False) + mk_host_metric('host_4', first_automation=ago(months=1), last_deleted=ago(months=threshold, hours=-1), deleted=True) + mk_host_metric('host_5', first_automation=ago(months=1), last_deleted=ago(months=threshold, hours=1), deleted=False) + mk_host_metric('host_6', first_automation=ago(months=1), last_deleted=ago(months=threshold, hours=1), deleted=True) + mk_host_metric('host_7', first_automation=ago(months=1), last_deleted=ago(months=42), deleted=False) + mk_host_metric('host_8', first_automation=ago(months=1), last_deleted=ago(months=42), deleted=True) + + assert HostMetric.objects.count() == 8 + assert HostMetric.active_objects.count() == 4 + + for i in range(2): + HostMetricTask().cleanup(hard_threshold=threshold) + assert HostMetric.objects.count() == 6 + + hostnames = set(HostMetric.objects.order_by('hostname').values_list('hostname', flat=True)) + assert hostnames == {'host_1', 'host_2', 'host_3', 'host_4', 'host_5', 'host_7'} + + +def ago(months=0, hours=0): + return timezone.now() - relativedelta(months=months, hours=hours) diff --git a/awx/settings/defaults.py b/awx/settings/defaults.py index 59ea79f1128e..19a83cdbccf6 100644 --- a/awx/settings/defaults.py +++ b/awx/settings/defaults.py @@ -470,7 +470,7 @@ 'receptor_reaper': {'task': 'awx.main.tasks.system.awx_receptor_workunit_reaper', 'schedule': timedelta(seconds=60)}, 'send_subsystem_metrics': {'task': 'awx.main.analytics.analytics_tasks.send_subsystem_metrics', 'schedule': timedelta(seconds=20)}, 'cleanup_images': {'task': 'awx.main.tasks.system.cleanup_images_and_files', 'schedule': timedelta(hours=3)}, - 'cleanup_host_metrics': {'task': 'awx.main.tasks.system.cleanup_host_metrics', 'schedule': timedelta(hours=3, minutes=30)}, + 'cleanup_host_metrics': {'task': 'awx.main.tasks.host_metrics.cleanup_host_metrics', 'schedule': timedelta(hours=3, minutes=30)}, 'host_metric_summary_monthly': {'task': 'awx.main.tasks.host_metrics.host_metric_summary_monthly', 'schedule': timedelta(hours=4)}, } @@ -1049,7 +1049,7 @@ # - 'unique_managed_hosts': Compliant = automated - deleted hosts (using /api/v2/host_metrics/) SUBSCRIPTION_USAGE_MODEL = '' -# Host metrics cleanup - last time of the cleanup run (soft-deleting records) +# Host metrics cleanup - last time of the task/command run CLEANUP_HOST_METRICS_LAST_TS = None # Host metrics cleanup - minimal interval between two cleanups in days CLEANUP_HOST_METRICS_INTERVAL = 30 # days