From ddd5608a372635bdc6daaa0a99aed7c8eb1309b2 Mon Sep 17 00:00:00 2001 From: Keith Lawrence Date: Tue, 14 Nov 2023 10:18:52 +0000 Subject: [PATCH 1/5] Documentation for Alert Check Scheduled Jobs --- docs/alert_check_scheduled_jobs.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 docs/alert_check_scheduled_jobs.md diff --git a/docs/alert_check_scheduled_jobs.md b/docs/alert_check_scheduled_jobs.md new file mode 100644 index 000000000..9374633c1 --- /dev/null +++ b/docs/alert_check_scheduled_jobs.md @@ -0,0 +1,17 @@ +# Alert Check Scheduled Jobs + +Two sets of alerts (Medical Safety Alerts and Travel Advice Alerts) are critical, so it's important to check that they have gone out correctly. To do this we have a pair of scheduled sidekiq jobs. These jobs use the public search API to find a list of relevant Content IDs, then iterate that list checking for the existence of at least one Email record which has that Content ID and a notify_status of "delivered", and which was created after the public update time . Via the prometheus collector, we return metrics of the number of alerts and the number of alerts that have at least one delivery record. Ideally those number should be the same (0 & 0 is okay, 2 & 2 is okay, but 2 & 1 is an alert state). + +The jobs run every 15 minutes, and checks for alerts that meet the following criteria from the Search API results: +- document type is `medical_safety_alert` or `travel_advice` +- is in the 50 most recently updated documents of that type +- created at least one hour ago (very new alerts are excluded) +- created no more than 2 days ago (older alerts are excluded) + +## Multiple changes to a content item + +It is possible that a given travel advice item or medical alert might be updated more than once in two days, but there's no way to know about that through the Search API (we can only find out the last update to a given page). In this case we can only check that emails were delivered after the last public update. The 1 hour lag before an alert becomes checkable plus the 15 minute update frequency means that if a change happens between 60 and 75 minutes after the previous change the system can only confirms that emails were sent out for the most recent update, and if further proof of the previous update is needed a developer will have to check that manually. If the change happens later than 75 minutes, both changes will have been checked by separate runs of the job. + +## Dependencies + +This system depends on the Search API structure, so radical changes to the search API's results format may cause problems. From ccfb718afa0da855324fabb4a852d44b6431ceb9 Mon Sep 17 00:00:00 2001 From: Keith Lawrence Date: Wed, 15 Nov 2023 10:00:14 +0000 Subject: [PATCH 2/5] Add prometheus collector - current/delivered medical safety alerts - current/delivered travel advice alerts --- config/initializers/prometheus.rb | 4 +++- lib/collectors/global_prometheus_collector.rb | 23 +++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 lib/collectors/global_prometheus_collector.rb diff --git a/config/initializers/prometheus.rb b/config/initializers/prometheus.rb index 4b842d841..7c2d42743 100644 --- a/config/initializers/prometheus.rb +++ b/config/initializers/prometheus.rb @@ -1,2 +1,4 @@ require "govuk_app_config/govuk_prometheus_exporter" -GovukPrometheusExporter.configure +require "collectors/global_prometheus_collector" + +GovukPrometheusExporter.configure(collectors: [Collectors::GlobalPrometheusCollector]) diff --git a/lib/collectors/global_prometheus_collector.rb b/lib/collectors/global_prometheus_collector.rb new file mode 100644 index 000000000..cb5aabceb --- /dev/null +++ b/lib/collectors/global_prometheus_collector.rb @@ -0,0 +1,23 @@ +require "prometheus_exporter" +require "prometheus_exporter/server" + +module Collectors + class GlobalPrometheusCollector < PrometheusExporter::Server::TypeCollector + def type + "email_alert_api_global" + end + + def metrics + current_medical_safety_alerts = PrometheusExporter::Metric::Gauge.new("email_alert_api_current_medical_safety_alerts", "Number of medical safety alerts email-alert-api is checking") + current_medical_safety_alerts.observe(Rails.cache.fetch("current_medical_safety_alerts") { 0 }) + delivered_medical_safety_alerts = PrometheusExporter::Metric::Gauge.new("email_alert_api_delivered_medical_safety_alerts", "Number of current medical safety alerts marked as delivered") + delivered_medical_safety_alerts.observe(Rails.cache.fetch("delivered_medical_safety_alerts") { 0 }) + current_travel_advice_alerts = PrometheusExporter::Metric::Gauge.new("email_alert_api_current_travel_advice_alerts", "Number of travel advice alerts email-alert-api is checking") + current_travel_advice_alerts.observe(Rails.cache.fetch("current_travel_advice_alerts") { 0 }) + delivered_travel_advice_alerts = PrometheusExporter::Metric::Gauge.new("email_alert_api_delivered_travel_advice_alerts", "Number of current travel advice alerts marked as delivered") + delivered_travel_advice_alerts.observe(Rails.cache.fetch("delivered_travel_advice_alerts") { 0 }) + + [current_medical_safety_alerts, delivered_medical_safety_alerts, current_travel_advice_alerts, delivered_travel_advice_alerts] + end + end +end From 0a86e2cd1e5fea883bfa0f73a069339d2e562cda Mon Sep 17 00:00:00 2001 From: Keith Lawrence Date: Thu, 16 Nov 2023 10:19:22 +0000 Subject: [PATCH 3/5] Add SearchAlertlist module and test helpers --- lib/search_alert_list.rb | 18 +++++++++++++ spec/support/search_alert_list_helpers.rb | 33 +++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 lib/search_alert_list.rb create mode 100644 spec/support/search_alert_list_helpers.rb diff --git a/lib/search_alert_list.rb b/lib/search_alert_list.rb new file mode 100644 index 000000000..7229fb09b --- /dev/null +++ b/lib/search_alert_list.rb @@ -0,0 +1,18 @@ +module SearchAlertList + MIN_AGE = 1.hour + MAX_AGE = 2.days + MAX_RESULTS = 50 + + def get_alert_content_items(document_type:) + results = GdsApi.search.search(count: MAX_RESULTS, fields: "content_id,link,public_timestamp", filter_format: document_type, order: "-public_timestamp") + + output = results["results"].map do |result| + publish_time = Time.zone.parse(result["public_timestamp"]) + if publish_time.between?(Time.zone.now - MAX_AGE, Time.zone.now - MIN_AGE) + { content_id: result["content_id"], valid_from: publish_time, url: result["link"] } + end + end + + output.compact + end +end diff --git a/spec/support/search_alert_list_helpers.rb b/spec/support/search_alert_list_helpers.rb new file mode 100644 index 000000000..7f006880e --- /dev/null +++ b/spec/support/search_alert_list_helpers.rb @@ -0,0 +1,33 @@ +module SearchAlertListHelpers + def medical_safety_alert_search_body(content_id:, public_timestamp:) + { + results: [ + { + content_id:, + link: "/drug-device-alerts/test", + public_timestamp:, + index: "govuk", + es_score: nil, + model_score: nil, + original_rank: nil, + combined_score: nil, + _id: "/drug-device-alerts/test", + elasticsearch_type: "medical_safety_alert", + document_type: "medical_safety_alert", + }, + ], + total: 1, + start: 0, + aggregates: {}, + suggested_queries: [], + suggested_autocomplete: [], + es_cluster: "A", + reranked: false, + } + end + + def stub_medical_safety_alert_feed(content_id:, age:) + stub_request(:get, "http://search-api.dev.gov.uk/search.json?count=50&fields=content_id,link,public_timestamp&filter_format=medical_safety_alert&order=-public_timestamp") + .to_return(status: 200, body: medical_safety_alert_search_body(content_id:, public_timestamp: (Time.zone.now - age).strftime("%Y-%m-%dT%H:%M:%SZ")).to_json, headers: {}) + end +end From deeff397b27c08ea8262567d1c4570760c1ed63a Mon Sep 17 00:00:00 2001 From: Keith Lawrence Date: Thu, 9 Nov 2023 16:39:04 +0000 Subject: [PATCH 4/5] Add AlertCheckWorker and tests --- app/workers/alert_check_worker.rb | 24 +++++++++ spec/workers/alert_check_worker_spec.rb | 71 +++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 app/workers/alert_check_worker.rb create mode 100644 spec/workers/alert_check_worker_spec.rb diff --git a/app/workers/alert_check_worker.rb b/app/workers/alert_check_worker.rb new file mode 100644 index 000000000..8990994cc --- /dev/null +++ b/app/workers/alert_check_worker.rb @@ -0,0 +1,24 @@ +class AlertCheckWorker < ApplicationWorker + include SearchAlertList + + def perform(document_type) + content_items = get_alert_content_items(document_type:) + delivered = 0 + content_items.each do |ci| + if any_emails_delivered_for?(ci[:content_id], ci[:valid_from]) + delivered += 1 + else + Rails.logger.warn("Couldn't find any delivered emails for #{document_type.titleize}s with content id #{ci[:content_id]} (at #{ci[:url]})") + end + end + + Rails.logger.info("Checking #{document_type.titleize}s: #{delivered} out of #{content_items.count} alerts have been delivered to at least one recipient") + + Rails.cache.write("current_#{document_type}s", content_items.count, expires_in: 15.minutes) + Rails.cache.write("delivered_#{document_type}s", delivered, expires_in: 15.minutes) + end + + def any_emails_delivered_for?(content_id, valid_from) + Email.where("notify_status = 'delivered' AND content_id = ? AND created_at > ?", content_id, valid_from).exists? + end +end diff --git a/spec/workers/alert_check_worker_spec.rb b/spec/workers/alert_check_worker_spec.rb new file mode 100644 index 000000000..088f40c9e --- /dev/null +++ b/spec/workers/alert_check_worker_spec.rb @@ -0,0 +1,71 @@ +RSpec.describe AlertCheckWorker do + include SearchAlertListHelpers + + describe "#perform", caching: true do + def perform + described_class.new.perform("medical_safety_alert") + end + + context "there are no alerts older than an hour" do + before { stub_medical_safety_alert_feed(content_id: SecureRandom.uuid, age: 30.minutes) } + + it "should put 0/0 in the cache" do + expect(Rails.cache).to receive(:write).with("current_medical_safety_alerts", 0, expires_in: 15.minutes) + expect(Rails.cache).to receive(:write).with("delivered_medical_safety_alerts", 0, expires_in: 15.minutes) + perform + end + end + + context "there are no alerts younger than 2 days" do + before { stub_medical_safety_alert_feed(content_id: SecureRandom.uuid, age: 3.days) } + + it "should put 0/0 in the cache" do + expect(Rails.cache).to receive(:write).with("current_medical_safety_alerts", 0, expires_in: 15.minutes) + expect(Rails.cache).to receive(:write).with("delivered_medical_safety_alerts", 0, expires_in: 15.minutes) + perform + end + end + + context "there is a valid alert with delivered emails" do + before do + content_id = SecureRandom.uuid + stub_medical_safety_alert_feed(content_id:, age: 2.hours) + create(:email, content_id:, notify_status: "delivered") + end + + it "should put 1/1 in the cache" do + expect(Rails.cache).to receive(:write).with("current_medical_safety_alerts", 1, expires_in: 15.minutes) + expect(Rails.cache).to receive(:write).with("delivered_medical_safety_alerts", 1, expires_in: 15.minutes) + perform + end + end + + context "there is a valid alerts with undelivered emails" do + before do + content_id = SecureRandom.uuid + stub_medical_safety_alert_feed(content_id:, age: 2.hours) + create(:email, content_id:, notify_status: nil) + end + + it "should put 1/0 in the cache" do + expect(Rails.cache).to receive(:write).with("current_medical_safety_alerts", 1, expires_in: 15.minutes) + expect(Rails.cache).to receive(:write).with("delivered_medical_safety_alerts", 0, expires_in: 15.minutes) + perform + end + end + + context "there is a valid alert with delivered emails that are too old to be valid" do + before do + content_id = SecureRandom.uuid + stub_medical_safety_alert_feed(content_id:, age: 2.hours) + create(:email, content_id:, notify_status: "delivered", created_at: Time.zone.now - 3.hours) + end + + it "should put 1/0 in the cache" do + expect(Rails.cache).to receive(:write).with("current_medical_safety_alerts", 1, expires_in: 15.minutes) + expect(Rails.cache).to receive(:write).with("delivered_medical_safety_alerts", 0, expires_in: 15.minutes) + perform + end + end + end +end From fe6335867f288dfdc30b507a826be3ce117fdb59 Mon Sep 17 00:00:00 2001 From: Keith Lawrence Date: Thu, 9 Nov 2023 15:04:50 +0000 Subject: [PATCH 5/5] Add scheduled jobs --- config/sidekiq.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/config/sidekiq.yml b/config/sidekiq.yml index 17bfa1397..4422ea388 100644 --- a/config/sidekiq.yml +++ b/config/sidekiq.yml @@ -39,3 +39,11 @@ recover_lost_jobs: every: '30m' class: RecoverLostJobsWorker + check_medical_safety_alerts: + every: '15m' + class: AlertCheckWorker + args: ["medical_safety_alert"] + check_travel_advice_alerts: + every: '15m' + class: AlertCheckWorker + args: ["travel_advice_alert"]