From 96253337bf2f3afad11778bdb84d584927523217 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Sat, 5 Oct 2024 21:55:08 -0700 Subject: [PATCH] feat: use `Retry-After` header into consideration for rate limited feeds --- internal/model/feed.go | 21 +++--- internal/model/feed_test.go | 70 +++++++++++++++---- internal/reader/fetcher/response_handler.go | 22 ++++++ .../reader/fetcher/response_handler_test.go | 35 ++++++++++ internal/reader/handler/handler.go | 23 ++++-- 5 files changed, 143 insertions(+), 28 deletions(-) diff --git a/internal/model/feed.go b/internal/model/feed.go index 141abd41387..9f1de1eb50d 100644 --- a/internal/model/feed.go +++ b/internal/model/feed.go @@ -112,12 +112,13 @@ func (f *Feed) CheckedNow() { } // ScheduleNextCheck set "next_check_at" of a feed based on the scheduler selected from the configuration. -func (f *Feed) ScheduleNextCheck(weeklyCount int, newTTL int) { - f.TTL = newTTL +func (f *Feed) ScheduleNextCheck(weeklyCount int, refreshDelayInMinutes int) { + f.TTL = refreshDelayInMinutes + // Default to the global config Polling Frequency. - var intervalMinutes int - switch config.Opts.PollingScheduler() { - case SchedulerEntryFrequency: + intervalMinutes := config.Opts.SchedulerRoundRobinMinInterval() + + if config.Opts.PollingScheduler() == SchedulerEntryFrequency { if weeklyCount <= 0 { intervalMinutes = config.Opts.SchedulerEntryFrequencyMaxInterval() } else { @@ -125,13 +126,13 @@ func (f *Feed) ScheduleNextCheck(weeklyCount int, newTTL int) { intervalMinutes = int(math.Min(float64(intervalMinutes), float64(config.Opts.SchedulerEntryFrequencyMaxInterval()))) intervalMinutes = int(math.Max(float64(intervalMinutes), float64(config.Opts.SchedulerEntryFrequencyMinInterval()))) } - default: - intervalMinutes = config.Opts.SchedulerRoundRobinMinInterval() } - // If the feed has a TTL defined, we use it to make sure we don't check it too often. - if newTTL > intervalMinutes && newTTL > 0 { - intervalMinutes = newTTL + + // If the feed has a TTL or a Retry-After defined, we use it to make sure we don't check it too often. + if refreshDelayInMinutes > 0 && refreshDelayInMinutes > intervalMinutes { + intervalMinutes = refreshDelayInMinutes } + f.NextCheckAt = time.Now().Add(time.Minute * time.Duration(intervalMinutes)) } diff --git a/internal/model/feed_test.go b/internal/model/feed_test.go index df5c6885f9d..673b828e91e 100644 --- a/internal/model/feed_test.go +++ b/internal/model/feed_test.go @@ -14,7 +14,7 @@ import ( const ( largeWeeklyCount = 10080 - noNewTTL = 0 + noRefreshDelay = 0 ) func TestFeedCategorySetter(t *testing.T) { @@ -76,7 +76,7 @@ func checkTargetInterval(t *testing.T, feed *Feed, targetInterval int, timeBefor } } -func TestFeedScheduleNextCheckDefault(t *testing.T) { +func TestFeedScheduleNextCheckRoundRobinDefault(t *testing.T) { os.Clearenv() var err error @@ -88,15 +88,60 @@ func TestFeedScheduleNextCheckDefault(t *testing.T) { timeBefore := time.Now() feed := &Feed{} - weeklyCount := 10 - feed.ScheduleNextCheck(weeklyCount, noNewTTL) + feed.ScheduleNextCheck(0, noRefreshDelay) if feed.NextCheckAt.IsZero() { t.Error(`The next_check_at must be set`) } targetInterval := config.Opts.SchedulerRoundRobinMinInterval() - checkTargetInterval(t, feed, targetInterval, timeBefore, "default SchedulerRoundRobinMinInterval") + checkTargetInterval(t, feed, targetInterval, timeBefore, "TestFeedScheduleNextCheckRoundRobinDefault") +} + +func TestFeedScheduleNextCheckRoundRobinWithRefreshDelayAboveMinInterval(t *testing.T) { + os.Clearenv() + + var err error + parser := config.NewParser() + config.Opts, err = parser.ParseEnvironmentVariables() + if err != nil { + t.Fatalf(`Parsing failure: %v`, err) + } + + timeBefore := time.Now() + feed := &Feed{} + + feed.ScheduleNextCheck(0, config.Opts.SchedulerRoundRobinMinInterval()+30) + + if feed.NextCheckAt.IsZero() { + t.Error(`The next_check_at must be set`) + } + + expectedInterval := config.Opts.SchedulerRoundRobinMinInterval() + 30 + checkTargetInterval(t, feed, expectedInterval, timeBefore, "TestFeedScheduleNextCheckRoundRobinWithRefreshDelayAboveMinInterval") +} + +func TestFeedScheduleNextCheckRoundRobinWithRefreshDelayBelowMinInterval(t *testing.T) { + os.Clearenv() + + var err error + parser := config.NewParser() + config.Opts, err = parser.ParseEnvironmentVariables() + if err != nil { + t.Fatalf(`Parsing failure: %v`, err) + } + + timeBefore := time.Now() + feed := &Feed{} + + feed.ScheduleNextCheck(0, config.Opts.SchedulerRoundRobinMinInterval()-30) + + if feed.NextCheckAt.IsZero() { + t.Error(`The next_check_at must be set`) + } + + expectedInterval := config.Opts.SchedulerRoundRobinMinInterval() + checkTargetInterval(t, feed, expectedInterval, timeBefore, "TestFeedScheduleNextCheckRoundRobinWithRefreshDelayBelowMinInterval") } func TestFeedScheduleNextCheckRoundRobinMinInterval(t *testing.T) { @@ -114,15 +159,14 @@ func TestFeedScheduleNextCheckRoundRobinMinInterval(t *testing.T) { timeBefore := time.Now() feed := &Feed{} - weeklyCount := 100 - feed.ScheduleNextCheck(weeklyCount, noNewTTL) + feed.ScheduleNextCheck(0, noRefreshDelay) if feed.NextCheckAt.IsZero() { t.Error(`The next_check_at must be set`) } - targetInterval := minInterval - checkTargetInterval(t, feed, targetInterval, timeBefore, "round robin min interval") + expectedInterval := minInterval + checkTargetInterval(t, feed, expectedInterval, timeBefore, "TestFeedScheduleNextCheckRoundRobinMinInterval") } func TestFeedScheduleNextCheckEntryFrequencyMaxInterval(t *testing.T) { @@ -144,7 +188,7 @@ func TestFeedScheduleNextCheckEntryFrequencyMaxInterval(t *testing.T) { feed := &Feed{} // Use a very small weekly count to trigger the max interval weeklyCount := 1 - feed.ScheduleNextCheck(weeklyCount, noNewTTL) + feed.ScheduleNextCheck(weeklyCount, noRefreshDelay) if feed.NextCheckAt.IsZero() { t.Error(`The next_check_at must be set`) @@ -173,7 +217,7 @@ func TestFeedScheduleNextCheckEntryFrequencyMaxIntervalZeroWeeklyCount(t *testin feed := &Feed{} // Use a very small weekly count to trigger the max interval weeklyCount := 0 - feed.ScheduleNextCheck(weeklyCount, noNewTTL) + feed.ScheduleNextCheck(weeklyCount, noRefreshDelay) if feed.NextCheckAt.IsZero() { t.Error(`The next_check_at must be set`) @@ -202,7 +246,7 @@ func TestFeedScheduleNextCheckEntryFrequencyMinInterval(t *testing.T) { feed := &Feed{} // Use a very large weekly count to trigger the min interval weeklyCount := largeWeeklyCount - feed.ScheduleNextCheck(weeklyCount, noNewTTL) + feed.ScheduleNextCheck(weeklyCount, noRefreshDelay) if feed.NextCheckAt.IsZero() { t.Error(`The next_check_at must be set`) @@ -228,7 +272,7 @@ func TestFeedScheduleNextCheckEntryFrequencyFactor(t *testing.T) { timeBefore := time.Now() feed := &Feed{} weeklyCount := 7 - feed.ScheduleNextCheck(weeklyCount, noNewTTL) + feed.ScheduleNextCheck(weeklyCount, noRefreshDelay) if feed.NextCheckAt.IsZero() { t.Error(`The next_check_at must be set`) diff --git a/internal/reader/fetcher/response_handler.go b/internal/reader/fetcher/response_handler.go index 540fdac0e88..2f38b8e4b23 100644 --- a/internal/reader/fetcher/response_handler.go +++ b/internal/reader/fetcher/response_handler.go @@ -13,7 +13,9 @@ import ( "net/http" "net/url" "os" + "strconv" "strings" + "time" "miniflux.app/v2/internal/locale" ) @@ -51,6 +53,26 @@ func (r *ResponseHandler) ETag() string { return r.httpResponse.Header.Get("ETag") } +func (r *ResponseHandler) ParseRetryDelay() int { + retryAfterHeaderValue := r.httpResponse.Header.Get("Retry-After") + if retryAfterHeaderValue != "" { + // First, try to parse as an integer (number of seconds) + if seconds, err := strconv.Atoi(retryAfterHeaderValue); err == nil { + return seconds + } + + // If not an integer, try to parse as an HTTP-date + if t, err := time.Parse(time.RFC1123, retryAfterHeaderValue); err == nil { + return int(time.Until(t).Seconds()) + } + } + return 0 +} + +func (r *ResponseHandler) IsRateLimited() bool { + return r.httpResponse.StatusCode == http.StatusTooManyRequests +} + func (r *ResponseHandler) IsModified(lastEtagValue, lastModifiedValue string) bool { if r.httpResponse.StatusCode == http.StatusNotModified { return false diff --git a/internal/reader/fetcher/response_handler_test.go b/internal/reader/fetcher/response_handler_test.go index cf2ff28fa74..da1f7856e2a 100644 --- a/internal/reader/fetcher/response_handler_test.go +++ b/internal/reader/fetcher/response_handler_test.go @@ -6,6 +6,7 @@ package fetcher // import "miniflux.app/v2/internal/reader/fetcher" import ( "net/http" "testing" + "time" ) func TestIsModified(t *testing.T) { @@ -67,3 +68,37 @@ func TestIsModified(t *testing.T) { }) } } + +func TestRetryDelay(t *testing.T) { + var testCases = map[string]struct { + RetryAfterHeader string + ExpectedDelay int + }{ + "Empty header": { + RetryAfterHeader: "", + ExpectedDelay: 0, + }, + "Integer value": { + RetryAfterHeader: "42", + ExpectedDelay: 42, + }, + "HTTP-date": { + RetryAfterHeader: time.Now().Add(42 * time.Second).Format(time.RFC1123), + ExpectedDelay: 41, + }, + } + for name, tc := range testCases { + t.Run(name, func(tt *testing.T) { + header := http.Header{} + header.Add("Retry-After", tc.RetryAfterHeader) + rh := ResponseHandler{ + httpResponse: &http.Response{ + Header: header, + }, + } + if tc.ExpectedDelay != rh.ParseRetryDelay() { + tt.Errorf("Expected %d, got %d for scenario %q", tc.ExpectedDelay, rh.ParseRetryDelay(), name) + } + }) + } +} diff --git a/internal/reader/handler/handler.go b/internal/reader/handler/handler.go index aa9d9704538..2e992b67711 100644 --- a/internal/reader/handler/handler.go +++ b/internal/reader/handler/handler.go @@ -210,7 +210,7 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool } weeklyEntryCount := 0 - newTTL := 0 + refreshDelayInMinutes := 0 if config.Opts.PollingScheduler() == model.SchedulerEntryFrequency { var weeklyCountErr error weeklyEntryCount, weeklyCountErr = store.WeeklyFeedEntryCount(userID, feedID) @@ -220,7 +220,7 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool } originalFeed.CheckedNow() - originalFeed.ScheduleNextCheck(weeklyEntryCount, newTTL) + originalFeed.ScheduleNextCheck(weeklyEntryCount, refreshDelayInMinutes) requestBuilder := fetcher.NewRequestBuilder() requestBuilder.WithUsernameAndPassword(originalFeed.Username, originalFeed.Password) @@ -241,6 +241,19 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(originalFeed.FeedURL)) defer responseHandler.Close() + if responseHandler.IsRateLimited() { + retryDelayInSeconds := responseHandler.ParseRetryDelay() + refreshDelayInMinutes = retryDelayInSeconds / 60 + originalFeed.ScheduleNextCheck(weeklyEntryCount, refreshDelayInMinutes) + + slog.Warn("Feed is rate limited", + slog.String("feed_url", originalFeed.FeedURL), + slog.Int("retry_delay_in_seconds", retryDelayInSeconds), + slog.Int("refresh_delay_in_minutes", refreshDelayInMinutes), + slog.Time("new_next_check_at", originalFeed.NextCheckAt), + ) + } + if localizedError := responseHandler.LocalizedError(); localizedError != nil { slog.Warn("Unable to fetch feed", slog.String("feed_url", originalFeed.FeedURL), slog.Any("error", localizedError.Error())) originalFeed.WithTranslatedErrorMessage(localizedError.Translate(user.Language)) @@ -283,15 +296,15 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool } // If the feed has a TTL defined, we use it to make sure we don't check it too often. - newTTL = updatedFeed.TTL + refreshDelayInMinutes = updatedFeed.TTL // Set the next check at with updated arguments. - originalFeed.ScheduleNextCheck(weeklyEntryCount, newTTL) + originalFeed.ScheduleNextCheck(weeklyEntryCount, refreshDelayInMinutes) slog.Debug("Updated next check date", slog.Int64("user_id", userID), slog.Int64("feed_id", feedID), - slog.Int("ttl", newTTL), + slog.Int("refresh_delay_in_minutes", refreshDelayInMinutes), slog.Time("new_next_check_at", originalFeed.NextCheckAt), )