diff --git a/app/services/check_notify_email_service.rb b/app/services/check_notify_email_service.rb new file mode 100644 index 000000000..82511acc8 --- /dev/null +++ b/app/services/check_notify_email_service.rb @@ -0,0 +1,18 @@ +class CheckNotifyEmailService + def initialize(status) + @status = status + @client = Notifications::Client.new(Rails.application.credentials.notify_api_key) + end + + def present?(reference) + results = @client.get_notifications( + template_type: "email", + status: @status, + reference:, + ) + + results.collection.count.positive? + rescue Notifications::Client::RequestError, Net::OpenTimeout, Net::ReadTimeout, SocketError => e + Rails.logger.debug("Unable to contact Notify to determine status of email reference #{reference}: #{e}") + end +end diff --git a/app/workers/alert_check_worker.rb b/app/workers/polling_alert_check_worker.rb similarity index 53% rename from app/workers/alert_check_worker.rb rename to app/workers/polling_alert_check_worker.rb index 5bcf1b05f..890f52435 100644 --- a/app/workers/alert_check_worker.rb +++ b/app/workers/polling_alert_check_worker.rb @@ -1,11 +1,11 @@ -class AlertCheckWorker < ApplicationWorker +class PollingAlertCheckWorker < ApplicationWorker include SearchAlertList def perform(document_type) content_items = get_alert_content_items(document_type:) delivered = 0 content_items.each do |ci| - if any_emails_delivered_for?(ci[:content_id], ci[:valid_from]) + if any_emails_delivered_for?(ci[:content_id], ci[:valid_from], document_type) delivered += 1 else Rails.logger.warn("Couldn't find any delivered emails for #{document_type.titleize} records with content id #{ci[:content_id]} (at #{ci[:url]})") @@ -18,7 +18,14 @@ def perform(document_type) Rails.cache.write("delivered_#{document_type}s", delivered, expires_in: 15.minutes) end - def any_emails_delivered_for?(content_id, valid_from) - Email.where("notify_status = 'delivered' AND content_id = ? AND created_at > ?", content_id, valid_from).exists? + def any_emails_delivered_for?(content_id, valid_from, document_type) + return true if Email.where("notify_status = 'delivered' AND content_id = ? AND created_at > ?", content_id, valid_from).exists? + + Rails.logger.info("First pass couldn't find any delivered emails for #{document_type.titleize} records with content id #{content_id}, reverting to polling") + + service = CheckNotifyEmailService.new("delivered") + Email.sent.where("content_id = ? AND created_at > ? AND notify_status IS NULL", content_id, valid_from).order(:created_at).first(100).any? do |email| + service.present?(email.id) + end end end diff --git a/config/sidekiq.yml b/config/sidekiq.yml index 3f3f6e890..80a0b431e 100644 --- a/config/sidekiq.yml +++ b/config/sidekiq.yml @@ -41,9 +41,9 @@ class: RecoverLostJobsWorker check_medical_safety_alerts: every: '15m' - class: AlertCheckWorker + class: PollingAlertCheckWorker args: ["medical_safety_alert"] check_travel_advice_alerts: every: '15m' - class: AlertCheckWorker + class: PollingAlertCheckWorker args: ["travel_advice"] diff --git a/docs/alert_check_scheduled_jobs.md b/docs/alert_check_scheduled_jobs.md index cca74e1e3..8af557a38 100644 --- a/docs/alert_check_scheduled_jobs.md +++ b/docs/alert_check_scheduled_jobs.md @@ -8,6 +8,10 @@ The jobs run every 15 minutes, and checks for alerts that meet the following cri - created at least one hour ago (very new alerts are excluded) - created no more than 2 days ago (older alerts are excluded) +## Fallback to active polling + +As a backup in case there are no emails with a notify_status of "delivered" (this can happen if the callback url from notify gets mass-blocked), the system will explicitly poll Notify for the status of emails related to the alert. It will poll up to a hundred different emails, and only if none of them have been delivered will the alert state be set. + ## Multiple changes to a content item It is possible that a given travel advice item or medical alert might be updated more than once in two days, but there's no way to know about that through the Search API (we can only find out the last update to a given page). In this case we can only check that emails were delivered after the last public update. The 1 hour lag before an alert becomes checkable plus the 15 minute update frequency means that if a change happens between 60 and 75 minutes after the previous change the system can only confirms that emails were sent out for the most recent update, and if further proof of the previous update is needed a developer will have to check that manually. If the change happens later than 75 minutes, both changes will have been checked by separate runs of the job. @@ -41,12 +45,11 @@ kubectl config use-context govuk-integration kubectl -n apps deploy/email-alert-api -- rails c (Rails console) -> emails = Email.where(content_id: notify_status: "delivered") -> emails.each { |eml| eml.notify_status = nil; eml.save } -> AlertCheckWorker.new.perform() +> Email.where(content_id: ).delete_all +> PollingAlertCheckWorker.new.perform() ``` -This will clear the delivered status for the email, and run the alert check worker. Prometheus should collect the metrics after a minute, and set off the alert. +The alert check worker will find no emails with notify status "delivered", and nothing to actively poll (no emails with notify status nil), and will set the alert metric. Prometheus should collect the metrics after a minute, and set off the alert. ## Support Tasks diff --git a/spec/support/notify_request_helpers.rb b/spec/support/notify_request_helpers.rb index 8eca3801e..a062ce1b3 100644 --- a/spec/support/notify_request_helpers.rb +++ b/spec/support/notify_request_helpers.rb @@ -4,6 +4,31 @@ def stub_notify stub_request(:post, /notifications\.service\.gov\.uk/).to_return(body:) end + def stub_notify_email_delivery_status_delivered(reference) + stub_request(:get, "https://api.notifications.service.gov.uk/v2/notifications?reference=#{reference}&status=delivered&template_type=email").to_return( + body: { + notifications: [ + { + "id": "740e5834-3a29-46b4-9a6f-16142fde533a", + "reference": reference, + "type": "email", + "status": "delivered", + }, + ], + "links": {}, + }.to_json, + ) + end + + def stub_notify_email_delivery_status_delivered_empty(reference) + stub_request(:get, "https://api.notifications.service.gov.uk/v2/notifications?reference=#{reference}&status=delivered&template_type=email").to_return( + body: { + notifications: [], + "links": {}, + }.to_json, + ) + end + def expect_an_email_was_sent(subject: /.*/, address: "test@test.com") request_data = nil diff --git a/spec/support/search_alert_list_helpers.rb b/spec/support/search_alert_list_helpers.rb index 7f006880e..45e840979 100644 --- a/spec/support/search_alert_list_helpers.rb +++ b/spec/support/search_alert_list_helpers.rb @@ -26,7 +26,7 @@ def medical_safety_alert_search_body(content_id:, public_timestamp:) } end - def stub_medical_safety_alert_feed(content_id:, age:) + def stub_medical_safety_alert_query(content_id:, age:) stub_request(:get, "http://search-api.dev.gov.uk/search.json?count=50&fields=content_id,link,public_timestamp&filter_format=medical_safety_alert&order=-public_timestamp") .to_return(status: 200, body: medical_safety_alert_search_body(content_id:, public_timestamp: (Time.zone.now - age).strftime("%Y-%m-%dT%H:%M:%SZ")).to_json, headers: {}) end diff --git a/spec/workers/alert_check_worker_spec.rb b/spec/workers/alert_check_worker_spec.rb deleted file mode 100644 index 088f40c9e..000000000 --- a/spec/workers/alert_check_worker_spec.rb +++ /dev/null @@ -1,71 +0,0 @@ -RSpec.describe AlertCheckWorker do - include SearchAlertListHelpers - - describe "#perform", caching: true do - def perform - described_class.new.perform("medical_safety_alert") - end - - context "there are no alerts older than an hour" do - before { stub_medical_safety_alert_feed(content_id: SecureRandom.uuid, age: 30.minutes) } - - it "should put 0/0 in the cache" do - expect(Rails.cache).to receive(:write).with("current_medical_safety_alerts", 0, expires_in: 15.minutes) - expect(Rails.cache).to receive(:write).with("delivered_medical_safety_alerts", 0, expires_in: 15.minutes) - perform - end - end - - context "there are no alerts younger than 2 days" do - before { stub_medical_safety_alert_feed(content_id: SecureRandom.uuid, age: 3.days) } - - it "should put 0/0 in the cache" do - expect(Rails.cache).to receive(:write).with("current_medical_safety_alerts", 0, expires_in: 15.minutes) - expect(Rails.cache).to receive(:write).with("delivered_medical_safety_alerts", 0, expires_in: 15.minutes) - perform - end - end - - context "there is a valid alert with delivered emails" do - before do - content_id = SecureRandom.uuid - stub_medical_safety_alert_feed(content_id:, age: 2.hours) - create(:email, content_id:, notify_status: "delivered") - end - - it "should put 1/1 in the cache" do - expect(Rails.cache).to receive(:write).with("current_medical_safety_alerts", 1, expires_in: 15.minutes) - expect(Rails.cache).to receive(:write).with("delivered_medical_safety_alerts", 1, expires_in: 15.minutes) - perform - end - end - - context "there is a valid alerts with undelivered emails" do - before do - content_id = SecureRandom.uuid - stub_medical_safety_alert_feed(content_id:, age: 2.hours) - create(:email, content_id:, notify_status: nil) - end - - it "should put 1/0 in the cache" do - expect(Rails.cache).to receive(:write).with("current_medical_safety_alerts", 1, expires_in: 15.minutes) - expect(Rails.cache).to receive(:write).with("delivered_medical_safety_alerts", 0, expires_in: 15.minutes) - perform - end - end - - context "there is a valid alert with delivered emails that are too old to be valid" do - before do - content_id = SecureRandom.uuid - stub_medical_safety_alert_feed(content_id:, age: 2.hours) - create(:email, content_id:, notify_status: "delivered", created_at: Time.zone.now - 3.hours) - end - - it "should put 1/0 in the cache" do - expect(Rails.cache).to receive(:write).with("current_medical_safety_alerts", 1, expires_in: 15.minutes) - expect(Rails.cache).to receive(:write).with("delivered_medical_safety_alerts", 0, expires_in: 15.minutes) - perform - end - end - end -end diff --git a/spec/workers/polling_alert_check_worker_spec.rb b/spec/workers/polling_alert_check_worker_spec.rb new file mode 100644 index 000000000..8853bddb6 --- /dev/null +++ b/spec/workers/polling_alert_check_worker_spec.rb @@ -0,0 +1,98 @@ +RSpec.describe PollingAlertCheckWorker do + include SearchAlertListHelpers + include NotifyRequestHelpers + + describe "#perform", caching: true do + def perform + described_class.new.perform("medical_safety_alert") + end + + context "there are no email alerts older than an hour" do + before { stub_medical_safety_alert_query(content_id: SecureRandom.uuid, age: 30.minutes) } + + it "should put 0/0 in the cache" do + expect(Rails.cache).to receive(:write).with("current_medical_safety_alerts", 0, expires_in: 15.minutes) + expect(Rails.cache).to receive(:write).with("delivered_medical_safety_alerts", 0, expires_in: 15.minutes) + perform + end + end + + context "there are no email alerts younger than 2 days" do + before { stub_medical_safety_alert_query(content_id: SecureRandom.uuid, age: 3.days) } + + it "should put 0/0 in the cache" do + expect(Rails.cache).to receive(:write).with("current_medical_safety_alerts", 0, expires_in: 15.minutes) + expect(Rails.cache).to receive(:write).with("delivered_medical_safety_alerts", 0, expires_in: 15.minutes) + perform + end + end + + context "there is a valid email alert with delivered emails" do + before do + content_id = SecureRandom.uuid + stub_medical_safety_alert_query(content_id:, age: 2.hours) + create(:email, content_id:, notify_status: "delivered", status: "sent") + end + + it "should put 1/1 in the cache" do + expect(Rails.cache).to receive(:write).with("current_medical_safety_alerts", 1, expires_in: 15.minutes) + expect(Rails.cache).to receive(:write).with("delivered_medical_safety_alerts", 1, expires_in: 15.minutes) + perform + end + end + + context "there is a valid email alert with undelivered emails" do + before do + content_id = SecureRandom.uuid + stub_medical_safety_alert_query(content_id:, age: 2.hours) + email = create(:email, content_id:, notify_status: nil, status: "sent") + stub_notify_email_delivery_status_delivered_empty(email.id) + end + + it "should put 1/0 in the cache" do + expect(Rails.cache).to receive(:write).with("current_medical_safety_alerts", 1, expires_in: 15.minutes) + expect(Rails.cache).to receive(:write).with("delivered_medical_safety_alerts", 0, expires_in: 15.minutes) + perform + end + end + + context "there is a valid email alert with undelivered emails but polling confirms they were actually delivered" do + let(:content_id) { SecureRandom.uuid } + + before do + stub_medical_safety_alert_query(content_id:, age: 2.hours) + email1 = create(:email, content_id:, notify_status: nil, status: "sent") + email2 = create(:email, content_id:, notify_status: nil, status: "sent") + stub_notify_email_delivery_status_delivered_empty(email1.id) + stub_notify_email_delivery_status_delivered(email2.id) + end + + it "should put 1/1 in the cache" do + expect(Rails.cache).to receive(:write).with("current_medical_safety_alerts", 1, expires_in: 15.minutes) + expect(Rails.cache).to receive(:write).with("delivered_medical_safety_alerts", 1, expires_in: 15.minutes) + perform + end + + it "doesn't poll Notify once it's found a delivered email" do + email3 = create(:email, content_id:, notify_status: nil, status: "sent") + request = stub_notify_email_delivery_status_delivered(email3.id) + perform + expect(request).not_to have_been_made + end + end + + context "there is a valid email alert but the delivered emails are too old to have been caused by it" do + before do + content_id = SecureRandom.uuid + stub_medical_safety_alert_query(content_id:, age: 2.hours) + create(:email, content_id:, notify_status: "delivered", created_at: Time.zone.now - 3.hours, status: "sent") + end + + it "should put 1/0 in the cache" do + expect(Rails.cache).to receive(:write).with("current_medical_safety_alerts", 1, expires_in: 15.minutes) + expect(Rails.cache).to receive(:write).with("delivered_medical_safety_alerts", 0, expires_in: 15.minutes) + perform + end + end + end +end