Skip to content

Commit

Permalink
Merge pull request #49 from MITLibraries/tco17-historical-snapshots-a…
Browse files Browse the repository at this point in the history
…ggregations

Tco17 historical snapshots aggregations
  • Loading branch information
JPrevost authored Jul 10, 2024
2 parents a31cd9b + 044b1a0 commit 69fe1f1
Show file tree
Hide file tree
Showing 11 changed files with 306 additions and 5 deletions.
72 changes: 72 additions & 0 deletions app/models/metrics/algorithms.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# frozen_string_literal: true

# == Schema Information
#
# Table name: metrics_algorithms
#
# id :integer not null, primary key
# month :date
# doi :integer
# issn :integer
# isbn :integer
# pmid :integer
# unmatched :integer
# created_at :datetime not null
# updated_at :datetime not null
#
module Metrics
# Algorithms aggregates statistics for matches for all SearchEvents
class Algorithms < ApplicationRecord
self.table_name = 'metrics_algorithms'

# generate metrics data about SearchEvents matches
#
# @note This is expected to only be run once per month per type of aggregation (once with no month supplied, once
# with a month supplied), ideally at the beginning of the following month to ensure as
# accurate as possible statistics. Running further from the month in question will work, but matches will use the
# current versions of all algorithms which may not match the algorithm in place during the month the SearchEvent
# occurred.
# @note We don't currently prevent this running more than once per month per type of aggregation.
# @param month [DateTime] A DateTime object within the `month` to be generated. Defaults to nil will runs is how
# total algorithm statistics are created.
# @example
# # Generate metrics for all SearchEvents
# Metrics::Algorithms.new.generate
#
# # Generate metrics for all SearchEvents last month
# Metrics::Algorithms.new.generate(1.month.ago)
# @return [Metrics::Algorithms] The created Metrics::Algorithms object.
def generate(month = nil)
matches = if month.present?
count_matches(SearchEvent.single_month(month).includes(:term))
else
count_matches(SearchEvent.all.includes(:term))
end
Metrics::Algorithms.create(month:, doi: matches[:doi], issn: matches[:issn], isbn: matches[:isbn],
pmid: matches[:pmid], unmatched: matches[:unmatched])
end

# Counts matches supplied events
#
# @note We currently only have StandardIdentifiers to match. As we add new algorithms, this method will need to
# expand to handle additional match types.
# @param events [Array of SearchEvents] An array of SearchEvents to check for matches.
# @return [Hash] A Hash with keys for each known algorithm and the count of matched SearchEvents.
def count_matches(events)
matches = Hash.new(0)
known_ids = %i[unmatched pmid isbn issn doi]

events.each do |event|
ids = StandardIdentifiers.new(event.term.phrase)

matches[:unmatched] += 1 if ids.identifiers.blank?

known_ids.each do |id|
matches[id] += 1 if ids.identifiers[id].present?
end
end

matches
end
end
end
8 changes: 8 additions & 0 deletions app/models/search_event.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,16 @@
# created_at :datetime not null
# updated_at :datetime not null
#

# SearchEvent represents an instance of a logged search Term
class SearchEvent < ApplicationRecord
belongs_to :term

validates :source, presence: true

# :single_month filters to requested month
#
# @param month [DateTime] A DateTime object within the `month` to be filtered.
# @return [Array<SearchEvent>] All SearchEvents for the supplied `month`.
scope :single_month, ->(month) { where(created_at: month.beginning_of_month..month.end_of_month) }
end
2 changes: 1 addition & 1 deletion app/models/standard_identifiers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def term_patterns
def strip_invalid_issns
return unless @identifiers[:issn]

@identifiers[:issn] = nil unless validate_issn(@identifiers[:issn])
@identifiers.delete(:issn) unless validate_issn(@identifiers[:issn])
end

# validate_issn is only called when the regex for an ISSN has indicated an ISSN
Expand Down
10 changes: 7 additions & 3 deletions config/environments/development.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
require "active_support/core_ext/integer/time"
require 'active_support/core_ext/integer/time'

Rails.application.configure do
# Settings specified here will take precedence over those in config/application.rb.
Expand All @@ -19,13 +19,13 @@

# Enable/disable caching. By default caching is disabled.
# Run rails dev:cache to toggle caching.
if Rails.root.join("tmp/caching-dev.txt").exist?
if Rails.root.join('tmp/caching-dev.txt').exist?
config.action_controller.perform_caching = true
config.action_controller.enable_fragment_cache_logging = true

config.cache_store = :memory_store
config.public_file_server.headers = {
"Cache-Control" => "public, max-age=#{2.days.to_i}"
'Cache-Control' => "public, max-age=#{2.days.to_i}"
}
else
config.action_controller.perform_caching = false
Expand Down Expand Up @@ -73,4 +73,8 @@

# Raise error when a before_action's only/except options reference missing actions
config.action_controller.raise_on_missing_callback_actions = true

# Local logging overrides
config.logger = Logger.new(STDOUT)
config.log_level = :debug
end
13 changes: 13 additions & 0 deletions db/migrate/20240621132136_create_metrics_algorithms.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class CreateMetricsAlgorithms < ActiveRecord::Migration[7.1]
def change
create_table :metrics_algorithms do |t|
t.date :month
t.integer :doi
t.integer :issn
t.integer :isbn
t.integer :pmid
t.integer :unmatched
t.timestamps
end
end
end
13 changes: 12 additions & 1 deletion db/schema.rb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions test/fixtures/search_events.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,19 @@ timdex_cool:
bento_hi:
term: hi
source: bento
current_month_pmid:
term: pmid_38908367
source: test
old_month_pmid:
term: pmid_38908367
source: test
created_at: <%= 1.year.ago %>
current_month_issn:
term: issn_1075_8623
source: test
current_month_doi:
term: doi
source: test
current_month_isbn:
term: isbn_9781319145446
source: test
12 changes: 12 additions & 0 deletions test/fixtures/terms.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,15 @@ cool:

hi:
phrase: hello world

pmid_38908367:
phrase: 'TERT activation targets DNA methylation and multiple aging hallmarks. Shim HS, et al. Cell. 2024. PMID: 38908367'

issn_1075_8623:
phrase: 1075-8623

doi:
phrase: '10.1016/j.physio.2010.12.004'

isbn_9781319145446:
phrase: 'Sadava, D. E., D. M. Hillis, et al. Life: The Science of Biology. 11th ed. W. H. Freeman, 2016. ISBN: 9781319145446'
147 changes: 147 additions & 0 deletions test/models/metrics/algorithms_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
# frozen_string_literal: true

# == Schema Information
#
# Table name: metrics_algorithms
#
# id :integer not null, primary key
# month :date
# doi :integer
# issn :integer
# isbn :integer
# pmid :integer
# unmatched :integer
# created_at :datetime not null
# updated_at :datetime not null
#
require 'test_helper'

class Algorithms < ActiveSupport::TestCase
# Monthlies
test 'dois counts are included in monthly aggregation' do
aggregate = Metrics::Algorithms.new.generate(DateTime.now)
assert aggregate.doi == 1
end

test 'issns counts are included in monthly aggregation' do
aggregate = Metrics::Algorithms.new.generate(DateTime.now)
assert aggregate.issn == 1
end

test 'isbns counts are included in monthly aggregation' do
aggregate = Metrics::Algorithms.new.generate(DateTime.now)
assert aggregate.isbn == 1
end

test 'pmids counts are included in monthly aggregation' do
aggregate = Metrics::Algorithms.new.generate(DateTime.now)
assert aggregate.pmid == 1
end

test 'unmatched counts are included are included in monthly aggregation' do
aggregate = Metrics::Algorithms.new.generate(DateTime.now)
assert aggregate.unmatched == 2
end

test 'creating lots of searchevents leads to correct data for monthly' do
# drop all searchevents to make math easier and minimize fragility over time as more fixtures are created
SearchEvent.delete_all

doi_expected_count = rand(1...100)
doi_expected_count.times do
SearchEvent.create(term: terms(:doi), source: 'test')
end

issn_expected_count = rand(1...100)
issn_expected_count.times do
SearchEvent.create(term: terms(:issn_1075_8623), source: 'test')
end

isbn_expected_count = rand(1...100)
isbn_expected_count.times do
SearchEvent.create(term: terms(:isbn_9781319145446), source: 'test')
end

pmid_expected_count = rand(1...100)
pmid_expected_count.times do
SearchEvent.create(term: terms(:pmid_38908367), source: 'test')
end

unmatched_expected_count = rand(1...100)
unmatched_expected_count.times do
SearchEvent.create(term: terms(:hi), source: 'test')
end

aggregate = Metrics::Algorithms.new.generate(DateTime.now)

assert doi_expected_count == aggregate.doi
assert issn_expected_count == aggregate.issn
assert isbn_expected_count == aggregate.isbn
assert pmid_expected_count == aggregate.pmid
assert unmatched_expected_count == aggregate.unmatched
end

# Total
test 'dois counts are included in total aggregation' do
aggregate = Metrics::Algorithms.new.generate
assert aggregate.doi == 1
end

test 'issns counts are included in total aggregation' do
aggregate = Metrics::Algorithms.new.generate
assert aggregate.issn == 1
end

test 'isbns counts are included in total aggregation' do
aggregate = Metrics::Algorithms.new.generate
assert aggregate.isbn == 1
end

test 'pmids counts are included in total aggregation' do
aggregate = Metrics::Algorithms.new.generate
assert aggregate.pmid == 2
end

test 'unmatched counts are included are included in total aggregation' do
aggregate = Metrics::Algorithms.new.generate
assert aggregate.unmatched == 2
end

test 'creating lots of searchevents leads to correct data for total' do
# drop all searchevents to make math easier and minimize fragility over time as more fixtures are created
SearchEvent.delete_all

doi_expected_count = rand(1...100)
doi_expected_count.times do
SearchEvent.create(term: terms(:doi), source: 'test')
end

issn_expected_count = rand(1...100)
issn_expected_count.times do
SearchEvent.create(term: terms(:issn_1075_8623), source: 'test')
end

isbn_expected_count = rand(1...100)
isbn_expected_count.times do
SearchEvent.create(term: terms(:isbn_9781319145446), source: 'test')
end

pmid_expected_count = rand(1...100)
pmid_expected_count.times do
SearchEvent.create(term: terms(:pmid_38908367), source: 'test')
end

unmatched_expected_count = rand(1...100)
unmatched_expected_count.times do
SearchEvent.create(term: terms(:hi), source: 'test')
end

aggregate = Metrics::Algorithms.new.generate

assert doi_expected_count == aggregate.doi
assert issn_expected_count == aggregate.issn
assert isbn_expected_count == aggregate.isbn
assert pmid_expected_count == aggregate.pmid
assert unmatched_expected_count == aggregate.unmatched
end
end
10 changes: 10 additions & 0 deletions test/models/search_event_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,14 @@ class SearchEventTest < ActiveSupport::TestCase
s.source = nil
refute(s.valid?)
end

test 'monthly scope returns requested month of SearchEvents' do
assert SearchEvent.all.include?(search_events(:current_month_pmid))
assert SearchEvent.single_month(Time.now).include?(search_events(:current_month_pmid))
end

test 'monthly scope does not return SearchEvents outside the requested month' do
assert SearchEvent.all.include?(search_events(:old_month_pmid))
refute SearchEvent.single_month(Time.now).include?(search_events(:old_month_pmid))
end
end
8 changes: 8 additions & 0 deletions test/test_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ class TestCase
# Run tests in parallel with specified workers
parallelize(workers: :number_of_processors)

parallelize_setup do |worker|
SimpleCov.command_name "#{SimpleCov.command_name}-#{worker}"
end

parallelize_teardown do |worker|
SimpleCov.result
end

# Setup all fixtures in test/fixtures/*.yml for all tests in alphabetical order.
fixtures :all

Expand Down

0 comments on commit 69fe1f1

Please sign in to comment.