From d2e1cfa46e428312e40550fbe4ed8202b2348ad3 Mon Sep 17 00:00:00 2001 From: Seth Foster Date: Thu, 24 Oct 2024 14:59:45 -0400 Subject: [PATCH] Make rrule fast forwarding stable By stable, we mean future occurrences of the rrule should be the same before and after the fast forward operation. The problem before was that we were fast forwarding to 7 days ago. For some rrules, this does not retain the old occurrences. Thus, jobs would launch at unexpected times. This change makes sure we fast forward in increments of the rrule INTERVAL (converted to seconds), thus the new dtstart should be in the occurrence list of the old rrule. Signed-off-by: Seth Foster --- awx/main/models/schedules.py | 61 ++++++++++-- .../unit/utils/test_schedule_fast_forward.py | 92 +++++++++++++++++++ 2 files changed, 147 insertions(+), 6 deletions(-) create mode 100644 awx/main/tests/unit/utils/test_schedule_fast_forward.py diff --git a/awx/main/models/schedules.py b/awx/main/models/schedules.py index 3993fdf183fa..1c7b63a1459e 100644 --- a/awx/main/models/schedules.py +++ b/awx/main/models/schedules.py @@ -35,6 +35,45 @@ UTC_TIMEZONES = {x: tzutc() for x in dateutil.parser.parserinfo().UTCZONE} +SECONDS_IN_WEEK = 7 * 24 * 60 * 60 + + +def fast_forward_date(rrule): + ''' + Utility to fast forward an rrule, maintaining consistency in the resulting + occurrences + Fast forwards the rrule to 7 days ago + Returns a datetime object + ''' + if not rrule._freq in (dateutil.rrule.HOURLY, dateutil.rrule.MINUTELY): + raise RuntimeError("Cannot fast forward rrule, frequency must be HOURLY or MINUTELY") + + interval = rrule._interval if rrule._interval else 1 + if rrule._freq == dateutil.rrule.HOURLY: + interval *= 60 * 60 + elif rrule._freq == dateutil.rrule.MINUTELY: + interval *= 60 + + if type(interval) == float and not interval.is_integer(): + raise RuntimeError("Cannot fast forward rule, interval is a fraction of a second") + + seconds_since_dtstart = (now() - rrule._dtstart).total_seconds() + + # fast forward to 7 days ago + fast_forward_seconds = seconds_since_dtstart - SECONDS_IN_WEEK + + # make sure at least one chunk of interval can fit inside the time period we are trying to fast forward + if interval > fast_forward_seconds: + raise RuntimeError(f"Cannot fast forward rrule {rrule}, interval is greater than the fast forward amount of {fast_forward_seconds} seconds") + + # it is important to fast forward by a number that is divisible by + # interval. For example, if interval is 7 hours, we fast forward by 7, 14, 21, etc. hours. + # Otherwise, the occurrences after the fast forward might not match the ones before. + # x // y is integer division, lopping off any remainder, so that we get the outcome we want. + new_start = rrule._dtstart + datetime.timedelta(seconds=(fast_forward_seconds // interval) * interval) + return new_start + + class ScheduleFilterMethods(object): def enabled(self, enabled=True): return self.filter(enabled=enabled) @@ -207,23 +246,33 @@ def rrulestr(cls, rrule, fast_forward=True, **kwargs): raise ValueError('A valid TZID must be provided (e.g., America/New_York)') # Fast forward is a way for us to limit the number of events in the rruleset - # If we are fastforwading and we don't have a count limited rule that is minutely or hourley + # If we are fast forwarding and we don't have a count limited rule that is minutely or hourly # We will modify the start date of the rule to last week to prevent a large number of entries if fast_forward: try: # All rules in a ruleset will have the same dtstart value - # so lets compare the first event to now to see if its > 7 days old + # so lets compare the first event to now to see if its > 30 days old + # we choose > 30 days because if rrule has FREQ=HOURLY and INTERVAL=23, it will take + # at least 23 days before we can fast forward reliably and retain stable occurrences. + # since we are fast forwarding to 7 days ago, we check fo rrrules older than (23+7) days first_event = x[0] - if (now() - first_event).days > 7: + if (now() - first_event).days > 30: for rule in x._rrule: # If any rule has a minutely or hourly rule without a count... if rule._freq in [dateutil.rrule.MINUTELY, dateutil.rrule.HOURLY] and not rule._count: # hourly/minutely rrules with far-past DTSTART values # are *really* slow to precompute # start *from* one week ago to speed things up drastically - new_start = (now() - datetime.timedelta(days=7)).strftime('%Y%m%d') - # Now we want to repalce the DTSTART:T with the new date (which includes the T) - new_rrule = re.sub('(DTSTART[^:]*):[^T]+T', r'\1:{0}T'.format(new_start), rrule) + try: + new_start = fast_forward_date(rule) + except RuntimeError as e: + logger.warning(e) + # fallback to setting dtstart to 7 days ago, but this has the consequence of + # occurrences not matching the old occurrences. + new_start = now() - datetime.timedelta(days=7) + new_start_fmt = new_start.strftime('%Y%m%d') + # Now we want to replace the DTSTART:T with the new date (which includes the T) + new_rrule = re.sub('(DTSTART[^:]*):[^T]+T', r'\1:{0}T'.format(new_start_fmt), rrule) return Schedule.rrulestr(new_rrule, fast_forward=False) except IndexError: pass diff --git a/awx/main/tests/unit/utils/test_schedule_fast_forward.py b/awx/main/tests/unit/utils/test_schedule_fast_forward.py new file mode 100644 index 000000000000..ade9d7228050 --- /dev/null +++ b/awx/main/tests/unit/utils/test_schedule_fast_forward.py @@ -0,0 +1,92 @@ +import pytest +import datetime + +from django.utils.timezone import now + +from awx.main.models.schedules import fast_forward_date +from dateutil.rrule import rrule, HOURLY, MINUTELY, MONTHLY + + +def test_fast_forward_date_all_hours(): + ''' + Assert that the resulting fast forwarded date is included in the original rrule + occurrence list + ''' + for interval in range(1, 24): + dtstart = now() - datetime.timedelta(days=30) + rule = rrule(freq=HOURLY, interval=interval, dtstart=dtstart) + new_datetime = fast_forward_date(rule) + + # get occurrences of the rrule + gen = rule.xafter(new_datetime - datetime.timedelta(days=1), count=100) + found_matching_date = False + for occurrence in gen: + if occurrence == new_datetime: + found_matching_date = True + break + + assert found_matching_date + + +@pytest.mark.parametrize( + 'freq, interval', + [ + (MINUTELY, 15), + (MINUTELY, 120), + (MINUTELY, 60 * 24 * 3), + (HOURLY, 7), + (HOURLY, 24 * 3), + ], +) +def test_fast_forward_date(freq, interval): + ''' + Assert that the resulting fast forwarded date is included in the original rrule + occurrence list + ''' + dtstart = now() - datetime.timedelta(days=30) + rule = rrule(freq=freq, interval=interval, dtstart=dtstart) + new_datetime = fast_forward_date(rule) + + # get occurrences of the rrule + gen = rule.xafter(new_datetime - datetime.timedelta(days=1), count=100) + found_matching_date = False + for occurrence in gen: + if occurrence == new_datetime: + found_matching_date = True + break + + assert found_matching_date + + +@pytest.mark.parametrize( + 'freq, interval, error', + [ + (MINUTELY, 15.5555, "interval is a fraction of a second"), + (MONTHLY, 1, "frequency must be HOURLY or MINUTELY"), + (HOURLY, 24 * 30, "interval is greater than the fast forward amount"), + ], +) +def test_error_fast_forward_date(freq, interval, error): + dtstart = now() - datetime.timedelta(days=30) + rule = rrule(freq=freq, interval=interval, dtstart=dtstart) + if error: + with pytest.raises(Exception) as e_info: + fast_forward_date(rule) + + assert error in e_info.value.args[0] + + +def test_very_old_date(): + dtstart = now() - datetime.timedelta(days=365 * 10) + rule = rrule(freq=HOURLY, interval=11, dtstart=dtstart) + new_datetime = fast_forward_date(rule) + + # get occurrences of the rrule + gen = rule.xafter(new_datetime - datetime.timedelta(days=1), count=100) + found_matching_date = False + for occurrence in gen: + if occurrence == new_datetime: + found_matching_date = True + break + + assert found_matching_date