Skip to content

Commit

Permalink
Updated SPARQL transformations
Browse files Browse the repository at this point in the history
  • Loading branch information
dev-aravind committed Nov 8, 2024
1 parent 33772ee commit c5e64cb
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 19 deletions.
9 changes: 1 addition & 8 deletions src/lib/headless_browser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
require 'linkeddata'

module HeadlessBrowser
def self.fetch_json_ld_objects(entity_urls, base_url, headers)
def self.fetch_json_ld_objects(entity_urls, base_url, headers, sparql_paths)
puts "Loading browser..."
browser = Ferrum::Browser.new(browser_path: "/usr/bin/google-chrome-stable", headless: true, pending_connection_errors: false, process_timeout: 60, xvfb: true, browser_options: { 'no-sandbox': nil })
browser.headers.set(headers)
Expand All @@ -30,13 +30,6 @@ def self.fetch_json_ld_objects(entity_urls, base_url, headers)
puts "Error processing #{entity_url} in headless mode: #{e.message}"
end
end
sparql_paths = [
"./sparql/replace_blank_nodes.sparql",
"./sparql/fix_entity_type_capital.sparql",
"./sparql/fix_date_timezone.sparql",
"./sparql/fix_address_country_name.sparql",
"./sparql/remove_objects.sparql"
]

SparqlProcessor.perform_sparql_transformations(graph, sparql_paths, base_url)
graph
Expand Down
10 changes: 1 addition & 9 deletions src/lib/rdf_processor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
require 'sparql'
require_relative 'sparql_processor'
module RDFProcessor
def self.process_rdf(entity_urls, base_url, headers)
def self.process_rdf(entity_urls, base_url, headers, sparql_paths)
graph = RDF::Graph.new
add_url_sparql_file = File.read('./sparql/add_derived_from.sparql')

Expand All @@ -20,14 +20,6 @@ def self.process_rdf(entity_urls, base_url, headers)
end
end

sparql_paths = [
"./sparql/replace_blank_nodes.sparql",
"./sparql/fix_entity_type_capital.sparql",
"./sparql/fix_date_timezone.sparql",
"./sparql/fix_address_country_name.sparql",
"./sparql/remove_objects.sparql"
]

SparqlProcessor.perform_sparql_transformations(graph, sparql_paths, base_url)
end

Expand Down
12 changes: 10 additions & 2 deletions src/main.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,21 @@
entity_urls = EntityFetcher.fetch_entity_urls(page_url, entity_identifier, is_paginated, fetch_urls_headlessly, headers)
base_url = page_url.split('/')[0..2].join('/')

sparql_paths = [
"./sparql/remove_objects.sparql",
"./sparql/replace_blank_nodes.sparql",
"./sparql/fix_entity_type_capital.sparql",
"./sparql/fix_date_timezone.sparql",
"./sparql/fix_address_country_name.sparql"
]

if headless == 'true'
graph = HeadlessBrowser.fetch_json_ld_objects(entity_urls, base_url, headers)
graph = HeadlessBrowser.fetch_json_ld_objects(entity_urls, base_url, headers, sparql_paths)
File.open(file_name, 'w') do |file|
file.puts(graph.dump(:jsonld))
end
else
graph = RDFProcessor.process_rdf(entity_urls, base_url, headers)
graph = RDFProcessor.process_rdf(entity_urls, base_url, headers, sparql_paths)
File.open(file_name, 'w') do |file|
file.puts(graph.dump(:jsonld))
end
Expand Down

0 comments on commit c5e64cb

Please sign in to comment.