From 450ffb99ef47c0b68f6ad99c77a9ec336dcd9696 Mon Sep 17 00:00:00 2001 From: Alex Kiessling <32677188+ajkiessl@users.noreply.github.com> Date: Mon, 31 Oct 2022 12:59:59 -0400 Subject: [PATCH] Add view components for rendering google scholar meta tags for scraping. (#104) * Add view components for rendering google scholar meta tags for scraping. * Adds tests for google scholar meta tags * Add google_scholar_metadata_component_spec.rb * Moved let statement to top level in google_scholar_metadata test --- Gemfile.lock | 3 + .../custom_document_component.html.erb | 3 + ...google_scholar_metadata_component.html.erb | 7 +++ .../google_scholar_metadata_component.rb | 24 ++++++++ .../google_scholar_metadata_component_spec.rb | 61 +++++++++++++++++++ spec/integration/home_page_spec.rb | 7 +++ spec/rails_helper.rb | 3 + 7 files changed, 108 insertions(+) create mode 100644 app/components/google_scholar_metadata_component.html.erb create mode 100644 app/components/google_scholar_metadata_component.rb create mode 100644 spec/component/google_scholar_metadata_component_spec.rb diff --git a/Gemfile.lock b/Gemfile.lock index 334c302..71c7b49 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -236,6 +236,8 @@ GEM rubocop-rspec (~> 2) scss_lint (~> 0.55) nio4r (2.5.8) + nokogiri (1.13.7-arm64-darwin) + racc (~> 1.4) nokogiri (1.13.7-x86_64-darwin) racc (~> 1.4) nokogiri (1.13.7-x86_64-linux) @@ -435,6 +437,7 @@ GEM zeitwerk (2.5.4) PLATFORMS + arm64-darwin-21 x86_64-darwin-20 x86_64-darwin-21 x86_64-linux diff --git a/app/components/custom_document_component.html.erb b/app/components/custom_document_component.html.erb index 8b8790e..4cea85a 100644 --- a/app/components/custom_document_component.html.erb +++ b/app/components/custom_document_component.html.erb @@ -8,6 +8,9 @@ itemtype: @document.itemtype, class: classes.flatten.join(' ') do %> <%= header %> + <% if show? %> + <%= render GoogleScholarMetadataComponent.new(document: @document) %> + <% end %> <% if body.present? %> <%= body %> <% else %> diff --git a/app/components/google_scholar_metadata_component.html.erb b/app/components/google_scholar_metadata_component.html.erb new file mode 100644 index 0000000..925f769 --- /dev/null +++ b/app/components/google_scholar_metadata_component.html.erb @@ -0,0 +1,7 @@ + + + + +<%- if citation_pdf_url %> + +<%- end %> diff --git a/app/components/google_scholar_metadata_component.rb b/app/components/google_scholar_metadata_component.rb new file mode 100644 index 0000000..56aca07 --- /dev/null +++ b/app/components/google_scholar_metadata_component.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +class GoogleScholarMetadataComponent < ViewComponent::Base + attr_reader :document + + def initialize(document) + super + @document = document.fetch(:document) + end + + def citation_publication_date + document[:released_metadata_at_dtsi]&.to_date&.year + end + + def citation_pdf_url + return nil unless document.access_level.current_access_level == 'open_access' + + document.final_submissions.each do |key, value| + return final_submission_file_url(key) if File.extname(value) == '.pdf' + end + + nil + end +end diff --git a/spec/component/google_scholar_metadata_component_spec.rb b/spec/component/google_scholar_metadata_component_spec.rb new file mode 100644 index 0000000..4c286d4 --- /dev/null +++ b/spec/component/google_scholar_metadata_component_spec.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe GoogleScholarMetadataComponent, type: :component do + include Rails.application.routes.url_helpers + + subject(:component) { described_class.new(document: doc) } + + let(:doc) { SolrDocument.new(fake_solr_doc) } + let(:html) { render_inline(component) } + + before do + Blacklight.default_index.connection.add(doc) + Blacklight.default_index.connection.commit + end + + context 'with a final submission file pdf' do + let(:fake_solr_doc) { FakeSolrDocument.new(access_level: 'open_access').doc } + + it 'renders all the meta tags' do + expect(html.search('meta[@name="citation_title"]') + .first['content']).to eq(doc[:title_tesi]) + expect(html.search('meta[@name="citation_author"]') + .first['content']).to eq(doc[:author_name_tesi]) + expect(html.search('meta[@name="citation_publication_date"]') + .first['content']).to eq(doc[:released_metadata_at_dtsi].to_date.year.to_s) + expect(html.search('meta[@name="citation_pdf_url"]').first['content']).to eq( + final_submission_file_url(doc.final_submissions.first.first, host: 'http://test.host') + ) + end + end + + context 'when the final submission file is not a pdf' do + let(:fake_solr_doc) { FakeSolrDocument.new(access_level: 'open_access', file_names: ['word_doc.docx']).doc } + + it 'renders all the meta tags' do + expect(html.search('meta[@name="citation_title"]') + .first['content']).to eq(doc[:title_tesi]) + expect(html.search('meta[@name="citation_author"]') + .first['content']).to eq(doc[:author_name_tesi]) + expect(html.search('meta[@name="citation_publication_date"]') + .first['content']).to eq(doc[:released_metadata_at_dtsi].to_date.year.to_s) + expect(html.search('meta[@name="citation_pdf_url"]')).to be_empty + end + end + + context 'when the final submission is not open_access' do + let(:fake_solr_doc) { FakeSolrDocument.new(access_level: 'restricted_to_institution').doc } + + it 'renders all the meta tags' do + expect(html.search('meta[@name="citation_title"]') + .first['content']).to eq(doc[:title_tesi]) + expect(html.search('meta[@name="citation_author"]') + .first['content']).to eq(doc[:author_name_tesi]) + expect(html.search('meta[@name="citation_publication_date"]') + .first['content']).to eq(doc[:released_metadata_at_dtsi].to_date.year.to_s) + expect(html.search('meta[@name="citation_pdf_url"]')).to be_empty + end + end +end diff --git a/spec/integration/home_page_spec.rb b/spec/integration/home_page_spec.rb index f64127b..83eb065 100644 --- a/spec/integration/home_page_spec.rb +++ b/spec/integration/home_page_spec.rb @@ -19,6 +19,13 @@ it 'shows Explore button to search' do expect(page).to have_button 'Explore' end + + it 'does not display Google Scholar meta tags' do + expect(page).not_to have_css 'meta[@name="citation_title"]' + expect(page).not_to have_css 'meta[@name="citation_author"]' + expect(page).not_to have_css 'meta[@name="citation_publication_date"]' + expect(page).not_to have_css 'meta[@name="citation_pdf_url"]' + end end context 'when a user is logged in' do diff --git a/spec/rails_helper.rb b/spec/rails_helper.rb index 3663542..6964402 100644 --- a/spec/rails_helper.rb +++ b/spec/rails_helper.rb @@ -10,6 +10,7 @@ require 'capybara/rails' # Add additional requires below this line. Rails is not loaded until this point! require 'devise' +require 'view_component/test_helpers' # Requires supporting ruby files with custom matchers and macros, etc, in # spec/support/ and its subdirectories. Files matching `spec/**/*_spec.rb` are @@ -64,6 +65,8 @@ config.include Devise::Test::ControllerHelpers, type: :controller config.include Devise::Test::IntegrationHelpers, type: :feature + config.include ViewComponent::TestHelpers, type: :component + # Filter lines from Rails gems in backtraces. config.filter_rails_from_backtrace! # arbitrary gems may also be filtered via: