Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix curie issue oio branch tests #723

Merged
merged 10 commits into from
Sep 5, 2024
Merged
2 changes: 2 additions & 0 deletions compare_testcase_output_mac.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/usr/bin/env bash
diff -r -q -a -B -w --strip-trailing-cr --exclude=.gitkeep testcases_output/testcases testcases_expected_output/ > testcases_compare_result.log
33 changes: 33 additions & 0 deletions dataload/configs/idocovid19.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"ontologies": [
{
"id": "idocovid19",
"preferredPrefix": "IDO-COVID-19",
"title": "The COVID-19 Infectious Disease Ontology",
"uri": "https://raw.githubusercontent.com/infectious-disease-ontology-extensions/ido-covid-19/master/ontology/ido%20covid-19",
"description": "The COVID-19 Infectious Disease Ontology (IDO-COVID-19) is an extension of the Infectious Disease Ontology (IDO) and the Virus Infectious Disease Ontology (VIDO). IDO-COVID-19 follows OBO Foundry guidelines, employs the Basic Formal Ontology as its starting point, and covers epidemiology, classification, pathogenesis, and treatment of terms used to represent infection by the SARS-CoV-2 virus strain, and the associated COVID-19 disease.",
"homepage": "https://github.com/infectious-disease-ontology-extensions/ido-covid-19",
"mailing_list": "[email protected]",
"definition_property": [
"http://purl.obolibrary.org/obo/IAO_0000115"
],
"synonym_property": [
"http://www.geneontology.org/formats/oboInOwl#hasExactSynonym"
],
"hierarchical_property": [
"http://purl.obolibrary.org/obo/BFO_0000050"
],
"base_uri": [
"http://purl.obolibrary.org/obo/IDO-COVID-19"
],
"oboSlims": false,
"reasoner": "OWL2",
"ontology_purl": "https://gist.githubusercontent.com/haideriqbal/9b5245af626bd7687831c19c2c8076e8/raw/2c75495f31df0a379062bf12d3fab323eedbb7a9/idocovid19.owl"
},
{
"id": "oio",
"ontology_purl": "https://gist.githubusercontent.com/haideriqbal/4a2b1a9aa81d9fa26cae81e0b0b7730b/raw/527665128b9be9d7d6133f9a796379600151c737/oboInOwl.owl",
"base_uri": ["http://www.geneontology.org/formats/oboInOwl#"]
}
]
}
53 changes: 40 additions & 13 deletions dataload/linker/src/main/java/LinkerPass1.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.SetMultimap;
import com.google.common.io.CountingInputStream;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonParser;
import com.google.gson.*;
import com.google.gson.stream.JsonReader;
import com.google.gson.stream.JsonToken;

Expand Down Expand Up @@ -168,15 +163,31 @@ public static LinkerPass1Result run(String inputJsonFilename) throws IOException
for(var entry : result.iriToDefinitions.entrySet()) {

EntityDefinitionSet definitions = entry.getValue();

// definingOntologyIris -> definingOntologyIds
for(String ontologyIri : definitions.definingOntologyIris) {
for(String ontologyId : result.ontologyIriToOntologyIds.get(ontologyIri)) {
definitions.definingOntologyIds.add(ontologyId);
if (result.ontologyIriToOntologyIds.containsKey(ontologyIri)) {
for(String ontologyId : result.ontologyIriToOntologyIds.get(ontologyIri)) {
definitions.definingOntologyIds.add(ontologyId);
}
}
}

for(EntityDefinition def : definitions.definitions) {
if(def.curie != null && entry.getValue().definingOntologyIds.iterator().hasNext()) {
JsonObject curieObject = def.curie.getAsJsonObject();
if(curieObject.has("value")) {
String curieValue = curieObject.get("value").getAsString();
if(!curieValue.contains(":")) {
var definingOntologyId = entry.getValue().definingOntologyIds.iterator().next();
EntityDefinition definingEntity = entry.getValue().ontologyIdToDefinitions.get(definingOntologyId);
if (definingEntity != null && definingEntity.curie != null) {
curieValue = definingEntity.curie.getAsJsonObject().get("value").getAsString();
curieObject.addProperty("value", curieValue);
result.iriToDefinitions.put(entry.getKey(), definitions);
}
}
}
}
if(definitions.definingOntologyIds.contains(def.ontologyId)) {
def.isDefiningOntology = true;
}
Expand Down Expand Up @@ -235,14 +246,30 @@ public static void parseEntity(JsonReader jsonReader, String entityType, String
curie = jsonParser.parse(jsonReader);
} else if(key.equals("type")) {
types = gson.fromJson(jsonReader, Set.class);
} else if(key.equals("http://www.w3.org/2000/01/rdf-schema#definedBy")) {
} else if(key.equals("http://www.w3.org/2000/01/rdf-schema#isDefinedBy")) {
JsonElement jsonDefinedBy = jsonParser.parse(jsonReader);
if(jsonDefinedBy.isJsonArray()) {
JsonArray arr = jsonDefinedBy.getAsJsonArray();
for(JsonElement el : arr) {
definedBy.add( el.getAsString() );
for(JsonElement isDefinedBy : arr) {
if (isDefinedBy.isJsonObject()) {
JsonObject obj = isDefinedBy.getAsJsonObject();
var value = obj.get("value");
if (value.isJsonObject()) {
definedBy.add(value.getAsJsonObject().get("value").getAsString());
} else
definedBy.add(value.getAsString());
} else
definedBy.add( isDefinedBy.getAsString() );
}
} else {
} else if (jsonDefinedBy.isJsonObject()) {
JsonObject obj = jsonDefinedBy.getAsJsonObject();
var value = obj.get("value");
if (value.isJsonObject()) {
definedBy.add(value.getAsJsonObject().get("value").getAsString());
} else
definedBy.add(value.getAsString());
}
else {
definedBy.add(jsonDefinedBy.getAsString());
}
} else {
Expand Down
88 changes: 87 additions & 1 deletion dataload/linker/src/main/java/LinkerPass2.java
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.stream.JsonReader;
import com.google.gson.stream.JsonToken;
Expand Down Expand Up @@ -151,6 +153,10 @@ private static void writeEntityArray(JsonReader jsonReader, JsonWriter jsonWrite
if(name.equals("iri")) {
entityIri = jsonReader.nextString();
jsonWriter.value(entityIri);
} else if (name.equalsIgnoreCase("curie")) {
processCurieObject(jsonReader, jsonWriter, pass1Result, entityIri);
} else if (name.equalsIgnoreCase("shortForm")) {
processShortFormObject(jsonReader, jsonWriter, pass1Result, entityIri);
} else {
CopyJsonGatheringStrings.copyJsonGatheringStrings(jsonReader, jsonWriter, stringsInEntity);
}
Expand Down Expand Up @@ -320,7 +326,7 @@ private static void writeLinkedEntitiesFromGatheredStrings(JsonWriter jsonWriter
private static void writeIriMapping(JsonWriter jsonWriter, EntityDefinitionSet definitions, String ontologyId) throws IOException {

if(definitions.definingDefinitions.size() > 0) {

// There are ontologies which canonically define this term

jsonWriter.name("definedBy");
Expand Down Expand Up @@ -436,4 +442,84 @@ private static class CurieMapResult {
public String source;
}

private static void processShortFormObject(JsonReader jsonReader, JsonWriter jsonWriter, LinkerPass1.LinkerPass1Result pass1Result, String entityIri) throws IOException {
jsonReader.beginObject();
JsonObject shortFormObject = new JsonObject();

while (jsonReader.peek() != JsonToken.END_OBJECT) {
String shortFormFieldName = jsonReader.nextName();
if (shortFormFieldName.equals("type")) {
JsonArray typeArray = new JsonArray();
jsonReader.beginArray();
while (jsonReader.peek() != JsonToken.END_ARRAY) {
typeArray.add(jsonReader.nextString());
}
jsonReader.endArray();
shortFormObject.add("type", typeArray);
} else if (shortFormFieldName.equals("value")) {
String shortFormValue = jsonReader.nextString();
// Modify the value attribute
shortFormValue = getProcessedCurieValue(pass1Result, entityIri).replace(":", "_");
shortFormObject.addProperty("value", shortFormValue);
}
}
jsonReader.endObject();

// Write the modified short form object
jsonWriter.beginObject();
jsonWriter.name("type");
jsonWriter.beginArray();
for (JsonElement typeElement : shortFormObject.getAsJsonArray("type")) {
jsonWriter.value(typeElement.getAsString());
}
jsonWriter.endArray();
jsonWriter.name("value").value(shortFormObject.get("value").getAsString());
jsonWriter.endObject();
}

private static void processCurieObject(JsonReader jsonReader, JsonWriter jsonWriter, LinkerPass1.LinkerPass1Result pass1Result, String entityIri) throws IOException {
jsonReader.beginObject();
JsonObject curieObject = new JsonObject();

while (jsonReader.peek() != JsonToken.END_OBJECT) {
String curieFieldName = jsonReader.nextName();
if (curieFieldName.equals("type")) {
JsonArray typeArray = new JsonArray();
jsonReader.beginArray();
while (jsonReader.peek() != JsonToken.END_ARRAY) {
typeArray.add(jsonReader.nextString());
}
jsonReader.endArray();
curieObject.add("type", typeArray);
} else if (curieFieldName.equals("value")) {
String curieValue = jsonReader.nextString();
// Modify the value attribute
curieValue = getProcessedCurieValue(pass1Result, entityIri);
curieObject.addProperty("value", curieValue);
}
}
jsonReader.endObject();

// Write the modified curie object
jsonWriter.beginObject();
jsonWriter.name("type");
jsonWriter.beginArray();
for (JsonElement typeElement : curieObject.getAsJsonArray("type")) {
jsonWriter.value(typeElement.getAsString());
}
jsonWriter.endArray();
jsonWriter.name("value").value(curieObject.get("value").getAsString());
jsonWriter.endObject();
}

private static String getProcessedCurieValue(LinkerPass1.LinkerPass1Result pass1Result, String entityIri) {
var def = pass1Result.iriToDefinitions.get(entityIri);
if (def.definitions.iterator().hasNext()) {
JsonObject defCurieObject = def.definitions.iterator().next().curie.getAsJsonObject();
if (defCurieObject.has("value")) {
return defCurieObject.get("value").getAsString();
}
}
return "";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ public static void annotateShortForms(OntologyGraph graph) {
if(c.uri == null)
continue;

if (preferredPrefix == null || preferredPrefix.isEmpty()) {
preferredPrefix = graph.config.get("id").toString().toUpperCase();
}

String shortForm = extractShortForm(graph, ontologyBaseUris, preferredPrefix, c.uri);
String curie = shortForm.replaceFirst("_", ":");
Expand Down
69 changes: 69 additions & 0 deletions dev-testing/teststack-mac.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env bash

if [ $# == 0 ]; then
echo "Usage: $0 <rel_json_config_url> <rel_output_dir>"
echo "If <rel_json_config_url> is a file it will read and load this single configuration."
echo "If <rel_json_config_url> as a directory, it will read and load all json configuration in the directory and
subdirectories."
exit 1
fi

config_url=$1
out_dir=$2

# Create or clean output directory
if [ -d "$out_dir" ]; then
echo "$out_dir already exists and will now be cleaned."
rm -Rf $out_dir/*
else
echo "$out_dir does not exist and will now be created."
mkdir "$out_dir"
fi

function process_config {
echo "process_config param1="$1
echo "process_config param2="$2

local config_url=$1
local out_dir=$2


if [ -d "$config_url" ]; then
echo "$config_url is a directory. Processing config files in $config_url"
local basename=$(basename $config_url)
echo "basename for config_url="$basename
local out_dir_basename=$out_dir/$basename
mkdir $out_dir_basename
for filename in $config_url/*.json; do
echo "filename="$filename
process_config $filename $out_dir_basename
done
for dir in $config_url/*/; do
process_config $dir $out_dir_basename
done
elif [ -f "$config_url" ]; then
echo "$config_url is a file. Processing single config file."
local basename=$(basename $config_url .json)

local relative_out_dir=$out_dir/$basename
mkdir $relative_out_dir

local absolute_out_dir=$(realpath -q $relative_out_dir)
echo "absolute_out_dir="$absolute_out_dir

$OLS4_HOME/dataload/create_datafiles.sh $config_url $absolute_out_dir --noDates

$OLS4_HOME/dev-testing/load_test_into_solr.sh $absolute_out_dir
else
echo "$config_url does not exist."
fi
}

$OLS4_HOME/dev-testing/clean-neo4j.sh
$OLS4_HOME/dev-testing/clean-solr.sh
$OLS4_HOME/dev-testing/start-solr.sh

process_config $config_url $out_dir

$OLS4_HOME/dev-testing/load_test_into_neo4j.sh $out_dir
$OLS4_HOME/dev-testing/start-neo4j.sh
5 changes: 5 additions & 0 deletions testcases/annotation-properties/gitIssue502.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
"id": "gitIssue502",
"preferredPrefix": "gitIssue502",
"ontology_purl": "./testcases/annotation-properties/gitIssue502.owl"
},
{
"id": "oio",
"ontology_purl": "https://gist.githubusercontent.com/haideriqbal/4a2b1a9aa81d9fa26cae81e0b0b7730b/raw/527665128b9be9d7d6133f9a796379600151c737/oboInOwl.owl",
"base_uri": ["http://www.geneontology.org/formats/oboInOwl#"]
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -715,3 +715,32 @@
{"ontologyId":"gitissue502","id":"gitissue502+property+http://www.w3.org/2000/01/rdf-schema#label","label":"label"}
{"ontologyId":"gitissue502","id":"gitissue502+property+http://www.w3.org/2004/02/skos/core#closeMatch","label":"closeMatch"}
{"ontologyId":"gitissue502","id":"gitissue502+property+http://www.w3.org/2004/02/skos/core#exactMatch","label":"exactMatch"}
{"ontologyId":"oio","id":"oio+class+http://www.geneontology.org/formats/oboInOwl#DbXref","label":"database_cross_reference"}
{"ontologyId":"oio","id":"oio+class+http://www.geneontology.org/formats/oboInOwl#Definition","label":"definition"}
{"ontologyId":"oio","id":"oio+class+http://www.geneontology.org/formats/oboInOwl#ObsoleteClass","label":"obsolete_class"}
{"ontologyId":"oio","id":"oio+class+http://www.geneontology.org/formats/oboInOwl#Subset","label":"subset"}
{"ontologyId":"oio","id":"oio+class+http://www.geneontology.org/formats/oboInOwl#Synonym","label":"synonym"}
{"ontologyId":"oio","id":"oio+class+http://www.geneontology.org/formats/oboInOwl#SynonymType","label":"synonym_type"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#ObsoleteProperty","label":"obsolete_property"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#SubsetProperty","label":"subset_property"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#SynonymTypeProperty","label":"synonym_type_property"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#consider","label":"consider"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasAlternativeId","label":"has_alternative_id"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasBroadSynonym","label":"has_broad_synonym"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasDate","label":"has_date"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasDbXref","label":"has_dbxref"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasDefaultNamespace","label":"has_default_namespace"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasDefinition","label":"has_definition"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasExactSynonym","label":"has_exact_synonym"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasNarrowSynonym","label":"has_narrow_synonym"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasOBONamespace","label":"has_obo_namespace"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasRelatedSynonym","label":"has_related_synonym"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasSubset","label":"has_subset"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasSynonym","label":"has_synonym"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasSynonymType","label":"has_synonym_type"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasURI","label":"has_URI"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#hasVersion","label":"has_version"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#inSubset","label":"in_subset"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#isCyclic","label":"is_cyclic"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#replacedBy","label":"replaced_by"}
{"ontologyId":"oio","id":"oio+property+http://www.geneontology.org/formats/oboInOwl#savedBy","label":"saved_by"}
Loading
Loading