-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1f522ed
commit f1bff7b
Showing
3 changed files
with
111 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,8 +18,9 @@ jobs: | |
entity-identifier: "article.show a" | ||
token: "${{ secrets.GITHUB_TOKEN }}" | ||
|
||
add-concepts: | ||
add-concepts-and-perform-sparql-transformations: | ||
runs-on: ubuntu-latest | ||
needs: fetch-data | ||
steps: | ||
|
||
- name: Checkout code | ||
|
@@ -30,26 +31,31 @@ jobs: | |
with: | ||
bundler-cache: true | ||
|
||
- name: Run ruby code | ||
- name: Add concepts | ||
run: | | ||
bundle exec ruby main.rb | ||
bundle exec ruby src/add_concepts.rb | ||
- name: Perform SPARQL transformations | ||
run: | | ||
bundle exec ruby src/sparql_transformations.rb | ||
- name: Commit and push changes | ||
run: | | ||
git config --local user.email "[email protected]" | ||
git config --local user.name "GitHub Actions" | ||
git pull | ||
git add "output/grandtheatrequebec-events-with-concept.jsonld" | ||
git add "output/grandtheatrequebec-events-with-concept-and-eventseries.jsonld" | ||
git commit -m "Add data generated by the script" | ||
git push | ||
import-to-artsdata: | ||
runs-on: ubuntu-latest | ||
needs: add-concepts-and-perform-sparql-transformations | ||
steps: | ||
- name: Import data using artsdata pipeline action | ||
uses: culturecreates/artsdata-pipeline-action@v2 | ||
with: | ||
artifact: "derived-grandtheatrequebec-ca" | ||
publisher: "${{ secrets.PUBLISHER_URI_GREGORY }}" | ||
downloadUrl: "https://raw.githubusercontent.com/culturecreates/artsdata-planet-gtq/refs/heads/main/output/grandtheatrequebec-events-with-concept.jsonld" | ||
downloadUrl: "https://raw.githubusercontent.com/culturecreates/artsdata-planet-gtq/refs/heads/main/output/grandtheatrequebec-events-with-concept-and-eventseries.jsonld" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
require 'linkeddata' | ||
require 'nokogiri' | ||
require 'open-uri' | ||
|
||
# Initialize the RDF vocabularies | ||
SCHEMA = RDF::Vocab::SCHEMA | ||
PROV = RDF::Vocab::PROV | ||
SKOS = RDF::Vocab::SKOS | ||
|
||
def get_event_concept_from_web_page(event_page_url) | ||
main_page_html_text = URI.open(event_page_url).read | ||
main_doc = Nokogiri::HTML(main_page_html_text) | ||
event_concept = main_doc.css('div.show-category').first.text.strip | ||
puts "Event concept: #{event_concept}" | ||
event_concept | ||
end | ||
|
||
def fetch_concept_uri_from_concept_graph(event_concept, concept_graph) | ||
object = RDF::Literal.new(event_concept, language: :fr) | ||
concept = concept_graph.query([nil, SKOS.prefLabel, object]).first&.subject | ||
if concept | ||
puts "Concept URI: #{concept}" | ||
concept | ||
else | ||
nil | ||
end | ||
end | ||
|
||
def insert_concept_uri_to_event_graph(event, concept_uri, events_graph) | ||
events_graph.insert([event.subject, SCHEMA.additionalType, concept_uri]) | ||
puts "Event concept added to graph\n\n" | ||
end | ||
|
||
# Load the events graph and the concept graph | ||
events_graph = RDF::Graph.load("output/grandtheatrequebec-events.jsonld") | ||
concept_graph = RDF::Graph.load("gtq-event-type-mapping.ttl") | ||
|
||
events = events_graph.query([nil, RDF.type, SCHEMA.Event]) + | ||
events_graph.query([nil, RDF.type, SCHEMA.EventSeries]) | ||
|
||
puts "Total events found: #{events.count}" | ||
|
||
# For each event, extract the event concept from the event page | ||
events.each do |event| | ||
retry_count = 0 | ||
max_retries = 3 | ||
begin | ||
# Extract the URL of the event page | ||
page_url = events_graph.query([event.subject, PROV.wasDerivedFrom, nil]).first.object | ||
puts "Processing #{page_url}" | ||
|
||
# Extract the event concept from the event page | ||
event_concept = get_event_concept_from_web_page(page_url) | ||
|
||
if event_concept | ||
concept_uri = fetch_concept_uri_from_concept_graph(event_concept, concept_graph) | ||
if concept_uri | ||
insert_concept_uri_to_event_graph(event, concept_uri, events_graph) | ||
else | ||
puts "Concept URI not found in the concept graph" | ||
end | ||
else | ||
puts "No event concept found" | ||
end | ||
rescue StandardError => e | ||
puts "An error occurred while processing #{page_url}: #{e.message}" | ||
retry_count += 1 | ||
if retry_count < max_retries | ||
# Retry after 1 second | ||
puts "Retrying..." | ||
sleep 1 | ||
retry | ||
else | ||
puts "Max retries reached. Skipping..." | ||
end | ||
end | ||
end | ||
|
||
# Save the updated events graph | ||
File.open("output/grandtheatrequebec-events-with-concept.jsonld", 'w') do |file| | ||
file.puts(events_graph.dump(:jsonld)) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
require 'linkeddata' | ||
|
||
events_graph = RDF::Graph.load("output/grandtheatrequebec-events-with-concept.jsonld") | ||
|
||
sparql_paths = [ | ||
"./sparql/create_eventseries.sparql", | ||
"./sparql/copy_subevent_data_to_eventseries.sparql" | ||
] | ||
|
||
sparql_paths.each do |sparql_path| | ||
puts "Executing #{sparql_path}" | ||
file = File.read(sparql_path) | ||
events_graph.query(SPARQL.parse(file, update: true)) | ||
end | ||
|
||
File.open("output/grandtheatrequebec-events-with-concept-and-eventseries.jsonld", 'w') do |file| | ||
file.puts(events_graph.dump(:jsonld)) | ||
end |