Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Patch bug around DETECTOR_VERSION managing new record creation #115

Merged
merged 2 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions app/models/categorization.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,6 @@ class Categorization < ApplicationRecord
belongs_to :term
belongs_to :category

# We use the before_create hook to prevent needing to override the initialize method, which Rails frowns upon.
before_create :set_defaults

# These scopes allow for easy filtering of Categorization records by a single parameter.
scope :current, -> { where(detector_version: ENV.fetch('DETECTOR_VERSION', 'unset')) }

private

# This looks up the current Detector Version from the environment, storing the value as part of the record which is
# about to be saved. This prevents the rest of the application from having to worry about this value, while also
# providing a mechanism to prevent duplicate records from being created.
def set_defaults
self.detector_version = ENV.fetch('DETECTOR_VERSION', 'unset')
end
end
12 changes: 0 additions & 12 deletions app/models/detection.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ class Detection < ApplicationRecord
belongs_to :term
belongs_to :detector

# We use the before_create hook to prevent needing to override the initialize method, which Rails frowns upon.
before_create :set_defaults

# These scopes allow for easy filtering of Detection records by a single parameter.
scope :current, -> { where(detector_version: ENV.fetch('DETECTOR_VERSION', 'unset')) }
scope :for_detector, ->(detector) { where(detector_id: detector.id) }
Expand All @@ -44,13 +41,4 @@ class Detection < ApplicationRecord
def scores
detector.detector_categories.map { |dc| { dc.category_id => dc.confidence } }
end

private

# This looks up the current Detector Version from the environment, storing the value as part of the record which is
# about to be saved. This prevents the rest of the application from having to worry about this value, while also
# providing a mechanism to prevent duplicate records from being created.
def set_defaults
self.detector_version = ENV.fetch('DETECTOR_VERSION', 'unset')
end
end
3 changes: 2 additions & 1 deletion app/models/detector/journal.rb
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ def self.record(term)

Detection.find_or_create_by(
term:,
detector: Detector.where(name: 'Journal').first
detector: Detector.where(name: 'Journal').first,
detector_version: ENV.fetch('DETECTOR_VERSION', 'unset')
)

nil
Expand Down
3 changes: 2 additions & 1 deletion app/models/detector/standard_identifiers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def self.record(term)
si.identifiers.each_key do |k|
Detection.find_or_create_by(
term:,
detector: Detector.where(name: k.to_s.upcase).first
detector: Detector.where(name: k.to_s.upcase).first,
detector_version: ENV.fetch('DETECTOR_VERSION', 'unset')
)
end

Expand Down
3 changes: 2 additions & 1 deletion app/models/detector/suggested_resource.rb
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ def self.record(term)

Detection.find_or_create_by(
term:,
detector: Detector.where(name: 'SuggestedResource').first
detector: Detector.where(name: 'SuggestedResource').first,
detector_version: ENV.fetch('DETECTOR_VERSION', 'unset')
)

nil
Expand Down
3 changes: 2 additions & 1 deletion app/models/term.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ def calculate_categorizations
Categorization.find_or_create_by(
term: self,
category: Category.where(id: cat).first,
confidence: calculate_confidence(vals)
confidence: calculate_confidence(vals),
detector_version: ENV.fetch('DETECTOR_VERSION', 'unset')
)
end
end
Expand Down
31 changes: 7 additions & 24 deletions test/models/categorization_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ class CategorizationTest < ActiveSupport::TestCase
sample = {
term: terms('hi'),
category: categories('transactional'),
confidence: 0.5
confidence: 0.5,
detector_version: '1'
}

Categorization.create!(sample)
Expand All @@ -47,7 +48,8 @@ class CategorizationTest < ActiveSupport::TestCase
new_sample = {
term: sample.term,
category: sample.category,
confidence: sample.confidence
confidence: sample.confidence,
detector_version: sample.detector_version
}

# A purely duplicate record fails to save...
Expand All @@ -56,29 +58,10 @@ class CategorizationTest < ActiveSupport::TestCase
end

# ...but when we update the DETECTOR_VERSION env, now the same record does save.
new_version = 'updated'

assert_not_equal(ENV.fetch('DETECTOR_VERSION'), new_version)

ClimateControl.modify DETECTOR_VERSION: new_version do
Categorization.create!(new_sample)

assert_equal(initial_count + 1, Categorization.count)
end
end

test 'categorizations are assigned the current DETECTOR_VERSION value from env' do
new_categorization = {
term: terms('hi'),
category: categories('transactional'),
confidence: 0.96
}

Categorization.create!(new_categorization)

confirmation = Categorization.last
new_sample[:detector_version] = '2'
Categorization.create!(new_sample)

assert_equal(confirmation.detector_version, ENV.fetch('DETECTOR_VERSION'))
assert_equal(initial_count + 1, Categorization.count)
end

test 'categorization current scope filters on current env value' do
Expand Down
30 changes: 7 additions & 23 deletions test/models/detection_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ class DetectionTest < ActiveSupport::TestCase

sample = {
term: terms('hi'),
detector: detectors('doi')
detector: detectors('doi'),
detector_version: '1'
}

Detection.create!(sample)
Expand All @@ -44,7 +45,8 @@ class DetectionTest < ActiveSupport::TestCase

new_sample = {
term: sample.term,
detector: sample.detector
detector: sample.detector,
detector_version: '1'
}

# A purely duplicate record fails to save...
Expand All @@ -53,28 +55,10 @@ class DetectionTest < ActiveSupport::TestCase
end

# ...but when we update the DETECTOR_VERSION env, now the same record does save.
new_version = 'updated'

assert_not_equal(ENV.fetch('DETECTOR_VERSION'), new_version)

ClimateControl.modify DETECTOR_VERSION: new_version do
Detection.create!(new_sample)

assert_equal(initial_count + 1, Detection.count)
end
end

test 'detections are assigned the current DETECTOR_VERSION value from env' do
new_detection = {
term: terms('hi'),
detector: detectors('pmid')
}

Detection.create!(new_detection)

confirmation = Detection.last
new_sample[:detector_version] = '2'
Detection.create!(new_sample)

assert_equal(confirmation.detector_version, ENV.fetch('DETECTOR_VERSION'))
assert_equal(initial_count + 1, Detection.count)
end

test 'detector current scope filters on current env value' do
Expand Down
19 changes: 19 additions & 0 deletions test/models/detector/journal_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,24 @@ class JournalTest < ActiveSupport::TestCase

assert_equal(detection_count, Detection.count)
end

test 'record respects changes to the DETECTOR_VERSION value' do
# Create a relevant detection
Detector::Journal.record(terms('journal_nature_medicine'))

detection_count = Detection.count

# Calling the record method again doesn't do anything, but does not error.
Detector::Journal.record(terms('journal_nature_medicine'))

assert_equal(detection_count, Detection.count)

# Calling the record method after DETECTOR_VERSION is incremented results in a new Detection
ClimateControl.modify DETECTOR_VERSION: 'updated' do
Detector::Journal.record(terms('journal_nature_medicine'))

assert_equal detection_count + 1, Detection.count
end
end
end
end
19 changes: 19 additions & 0 deletions test/models/detector/standard_identifiers_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -208,5 +208,24 @@ class StandardIdentifiersTest < ActiveSupport::TestCase

assert_equal(detection_count, Detection.count)
end

test 'record respects changes to the DETECTOR_VERSION value' do
# Create a relevant detection
Detector::StandardIdentifiers.record(terms('isbn_9781319145446'))

detection_count = Detection.count

# Calling the record method again doesn't do anything, but does not error.
Detector::StandardIdentifiers.record(terms('isbn_9781319145446'))

assert_equal(detection_count, Detection.count)

# Calling the record method after DETECTOR_VERSION is incremented results in a new Detection
ClimateControl.modify DETECTOR_VERSION: 'updated' do
Detector::StandardIdentifiers.record(terms('isbn_9781319145446'))

assert_equal detection_count + 1, Detection.count
end
end
end
end
19 changes: 19 additions & 0 deletions test/models/detector/suggested_resource_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -169,5 +169,24 @@ class SuggestedResourceTest < ActiveSupport::TestCase

assert_equal(detection_count, Detection.count)
end

test 'record respects changes to the DETECTOR_VERSION value' do
# Create a relevant detection
Detector::SuggestedResource.record(terms('suggested_resource_jstor'))

detection_count = Detection.count

# Calling the record method again doesn't do anything, but does not error.
Detector::SuggestedResource.record(terms('suggested_resource_jstor'))

assert_equal(detection_count, Detection.count)

# Calling the record method after DETECTOR_VERSION is incremented results in a new Detection
ClimateControl.modify DETECTOR_VERSION: 'updated' do
Detector::SuggestedResource.record(terms('suggested_resource_jstor'))

assert_equal detection_count + 1, Detection.count
end
end
end
end
18 changes: 18 additions & 0 deletions test/models/term_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -165,4 +165,22 @@ class TermTest < ActiveSupport::TestCase

assert_equal(after_count, repeat_count)
end

test 'running calculate_categorizations when DETECTOR_VERSION changes results in new records' do
t = terms('journal_nature_medicine')

t.calculate_categorizations

categorization_count = Categorization.count

t.calculate_categorizations

assert_equal categorization_count, Categorization.count

ClimateControl.modify DETECTOR_VERSION: 'updated' do
t.calculate_categorizations

assert_equal categorization_count + 1, Categorization.count
end
end
end