Skip to content

Commit

Permalink
Merge pull request #115 from MITLibraries/tco-90
Browse files Browse the repository at this point in the history
Patch bug around DETECTOR_VERSION managing new record creation
  • Loading branch information
matt-bernhardt authored Oct 4, 2024
2 parents 69b5702 + 2ff7db7 commit dc936a9
Show file tree
Hide file tree
Showing 12 changed files with 97 additions and 75 deletions.
12 changes: 0 additions & 12 deletions app/models/categorization.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,6 @@ class Categorization < ApplicationRecord
belongs_to :term
belongs_to :category

# We use the before_create hook to prevent needing to override the initialize method, which Rails frowns upon.
before_create :set_defaults

# These scopes allow for easy filtering of Categorization records by a single parameter.
scope :current, -> { where(detector_version: ENV.fetch('DETECTOR_VERSION', 'unset')) }

private

# This looks up the current Detector Version from the environment, storing the value as part of the record which is
# about to be saved. This prevents the rest of the application from having to worry about this value, while also
# providing a mechanism to prevent duplicate records from being created.
def set_defaults
self.detector_version = ENV.fetch('DETECTOR_VERSION', 'unset')
end
end
12 changes: 0 additions & 12 deletions app/models/detection.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ class Detection < ApplicationRecord
belongs_to :term
belongs_to :detector

# We use the before_create hook to prevent needing to override the initialize method, which Rails frowns upon.
before_create :set_defaults

# These scopes allow for easy filtering of Detection records by a single parameter.
scope :current, -> { where(detector_version: ENV.fetch('DETECTOR_VERSION', 'unset')) }
scope :for_detector, ->(detector) { where(detector_id: detector.id) }
Expand All @@ -44,13 +41,4 @@ class Detection < ApplicationRecord
def scores
detector.detector_categories.map { |dc| { dc.category_id => dc.confidence } }
end

private

# This looks up the current Detector Version from the environment, storing the value as part of the record which is
# about to be saved. This prevents the rest of the application from having to worry about this value, while also
# providing a mechanism to prevent duplicate records from being created.
def set_defaults
self.detector_version = ENV.fetch('DETECTOR_VERSION', 'unset')
end
end
3 changes: 2 additions & 1 deletion app/models/detector/journal.rb
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ def self.record(term)

Detection.find_or_create_by(
term:,
detector: Detector.where(name: 'Journal').first
detector: Detector.where(name: 'Journal').first,
detector_version: ENV.fetch('DETECTOR_VERSION', 'unset')
)

nil
Expand Down
3 changes: 2 additions & 1 deletion app/models/detector/standard_identifiers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def self.record(term)
si.identifiers.each_key do |k|
Detection.find_or_create_by(
term:,
detector: Detector.where(name: k.to_s.upcase).first
detector: Detector.where(name: k.to_s.upcase).first,
detector_version: ENV.fetch('DETECTOR_VERSION', 'unset')
)
end

Expand Down
3 changes: 2 additions & 1 deletion app/models/detector/suggested_resource.rb
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ def self.record(term)

Detection.find_or_create_by(
term:,
detector: Detector.where(name: 'SuggestedResource').first
detector: Detector.where(name: 'SuggestedResource').first,
detector_version: ENV.fetch('DETECTOR_VERSION', 'unset')
)

nil
Expand Down
3 changes: 2 additions & 1 deletion app/models/term.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ def calculate_categorizations
Categorization.find_or_create_by(
term: self,
category: Category.where(id: cat).first,
confidence: calculate_confidence(vals)
confidence: calculate_confidence(vals),
detector_version: ENV.fetch('DETECTOR_VERSION', 'unset')
)
end
end
Expand Down
31 changes: 7 additions & 24 deletions test/models/categorization_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ class CategorizationTest < ActiveSupport::TestCase
sample = {
term: terms('hi'),
category: categories('transactional'),
confidence: 0.5
confidence: 0.5,
detector_version: '1'
}

Categorization.create!(sample)
Expand All @@ -47,7 +48,8 @@ class CategorizationTest < ActiveSupport::TestCase
new_sample = {
term: sample.term,
category: sample.category,
confidence: sample.confidence
confidence: sample.confidence,
detector_version: sample.detector_version
}

# A purely duplicate record fails to save...
Expand All @@ -56,29 +58,10 @@ class CategorizationTest < ActiveSupport::TestCase
end

# ...but when we update the DETECTOR_VERSION env, now the same record does save.
new_version = 'updated'

assert_not_equal(ENV.fetch('DETECTOR_VERSION'), new_version)

ClimateControl.modify DETECTOR_VERSION: new_version do
Categorization.create!(new_sample)

assert_equal(initial_count + 1, Categorization.count)
end
end

test 'categorizations are assigned the current DETECTOR_VERSION value from env' do
new_categorization = {
term: terms('hi'),
category: categories('transactional'),
confidence: 0.96
}

Categorization.create!(new_categorization)

confirmation = Categorization.last
new_sample[:detector_version] = '2'
Categorization.create!(new_sample)

assert_equal(confirmation.detector_version, ENV.fetch('DETECTOR_VERSION'))
assert_equal(initial_count + 1, Categorization.count)
end

test 'categorization current scope filters on current env value' do
Expand Down
30 changes: 7 additions & 23 deletions test/models/detection_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ class DetectionTest < ActiveSupport::TestCase

sample = {
term: terms('hi'),
detector: detectors('doi')
detector: detectors('doi'),
detector_version: '1'
}

Detection.create!(sample)
Expand All @@ -44,7 +45,8 @@ class DetectionTest < ActiveSupport::TestCase

new_sample = {
term: sample.term,
detector: sample.detector
detector: sample.detector,
detector_version: '1'
}

# A purely duplicate record fails to save...
Expand All @@ -53,28 +55,10 @@ class DetectionTest < ActiveSupport::TestCase
end

# ...but when we update the DETECTOR_VERSION env, now the same record does save.
new_version = 'updated'

assert_not_equal(ENV.fetch('DETECTOR_VERSION'), new_version)

ClimateControl.modify DETECTOR_VERSION: new_version do
Detection.create!(new_sample)

assert_equal(initial_count + 1, Detection.count)
end
end

test 'detections are assigned the current DETECTOR_VERSION value from env' do
new_detection = {
term: terms('hi'),
detector: detectors('pmid')
}

Detection.create!(new_detection)

confirmation = Detection.last
new_sample[:detector_version] = '2'
Detection.create!(new_sample)

assert_equal(confirmation.detector_version, ENV.fetch('DETECTOR_VERSION'))
assert_equal(initial_count + 1, Detection.count)
end

test 'detector current scope filters on current env value' do
Expand Down
19 changes: 19 additions & 0 deletions test/models/detector/journal_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,24 @@ class JournalTest < ActiveSupport::TestCase

assert_equal(detection_count, Detection.count)
end

test 'record respects changes to the DETECTOR_VERSION value' do
# Create a relevant detection
Detector::Journal.record(terms('journal_nature_medicine'))

detection_count = Detection.count

# Calling the record method again doesn't do anything, but does not error.
Detector::Journal.record(terms('journal_nature_medicine'))

assert_equal(detection_count, Detection.count)

# Calling the record method after DETECTOR_VERSION is incremented results in a new Detection
ClimateControl.modify DETECTOR_VERSION: 'updated' do
Detector::Journal.record(terms('journal_nature_medicine'))

assert_equal detection_count + 1, Detection.count
end
end
end
end
19 changes: 19 additions & 0 deletions test/models/detector/standard_identifiers_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -208,5 +208,24 @@ class StandardIdentifiersTest < ActiveSupport::TestCase

assert_equal(detection_count, Detection.count)
end

test 'record respects changes to the DETECTOR_VERSION value' do
# Create a relevant detection
Detector::StandardIdentifiers.record(terms('isbn_9781319145446'))

detection_count = Detection.count

# Calling the record method again doesn't do anything, but does not error.
Detector::StandardIdentifiers.record(terms('isbn_9781319145446'))

assert_equal(detection_count, Detection.count)

# Calling the record method after DETECTOR_VERSION is incremented results in a new Detection
ClimateControl.modify DETECTOR_VERSION: 'updated' do
Detector::StandardIdentifiers.record(terms('isbn_9781319145446'))

assert_equal detection_count + 1, Detection.count
end
end
end
end
19 changes: 19 additions & 0 deletions test/models/detector/suggested_resource_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -169,5 +169,24 @@ class SuggestedResourceTest < ActiveSupport::TestCase

assert_equal(detection_count, Detection.count)
end

test 'record respects changes to the DETECTOR_VERSION value' do
# Create a relevant detection
Detector::SuggestedResource.record(terms('suggested_resource_jstor'))

detection_count = Detection.count

# Calling the record method again doesn't do anything, but does not error.
Detector::SuggestedResource.record(terms('suggested_resource_jstor'))

assert_equal(detection_count, Detection.count)

# Calling the record method after DETECTOR_VERSION is incremented results in a new Detection
ClimateControl.modify DETECTOR_VERSION: 'updated' do
Detector::SuggestedResource.record(terms('suggested_resource_jstor'))

assert_equal detection_count + 1, Detection.count
end
end
end
end
18 changes: 18 additions & 0 deletions test/models/term_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -165,4 +165,22 @@ class TermTest < ActiveSupport::TestCase

assert_equal(after_count, repeat_count)
end

test 'running calculate_categorizations when DETECTOR_VERSION changes results in new records' do
t = terms('journal_nature_medicine')

t.calculate_categorizations

categorization_count = Categorization.count

t.calculate_categorizations

assert_equal categorization_count, Categorization.count

ClimateControl.modify DETECTOR_VERSION: 'updated' do
t.calculate_categorizations

assert_equal categorization_count + 1, Categorization.count
end
end
end

0 comments on commit dc936a9

Please sign in to comment.