diff --git a/README.md b/README.md index ff5b4b8..86914b7 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,10 @@ -# RF2 to OWL +# SNOMED CT RF2 and OWL Utilities -This project uses the OWLApi to create an OWL ontology, in RDF/XML format, based on the content of standard SNOMED CT RF2 files. +This project uses the OWLApi perform different transformations of SNOMED CT representations. + +* Create an OWL Ontology file based on the RF2 release of SNOMED CT (Snapshot) +* Create an RF2 OWL Reference Set from an OWL Ontology file +* Create an OWL Ontology file from an RF2 OWL Reference Set file ## Build instructions @@ -16,18 +20,56 @@ Binaries are also available in the releases section of the [github repository](h Run the executable from the command line, for example: -`java -jar owl-test-x.x.x-SNAPSHOT-jar-with-dependencies.jar /folder/sct2_Concept_Snapshot_INT_20150131.txt /folder/sct2_StatedRelationship_Snapshot_INT_20150131.txt /folder/sct2_Description_Snapshot_INT_20150131.txt /folder/der2_cRefset_LanguageSnapshot-en_INT_20150131 /folder/outputFile.owl http://snomed.org/ TRUE` - -Parameters (in this order): - * Path to RF2 Concepts Snapshot file - * Path to RF2 Stated Relationships Snapshot file - * Path to RF2 Stated Descriptons Snapshot file - * Path to RF2 Stated Language Refset Snapshot file - * Path to output file (it will be created or replaced by this process) - * IRI for the resulting ontology - * Boolean value to enable transformation of to concrete domains (Data Properties) ("TRUE"/"FALSE") - -## About the conversion +`java -jar rf2-to-owl-x.x.x-SNAPSHOT-jar-with-dependencies.jar -help` + +Arguments help: +``` +usage: rf2-to-owl + -cd Convert concepts to concrete domains + -cf Concepts file + -df Descriptions file + -help Prints help + -iri IRI for Owl Generation + -lf language refset file + -mode conversion mode, expected values: rf2-to-owl, + owl-to-refset, refset-to-owl + -of Owl file + -orf Owl Refset file + -output Output file + -rf Relationships file + -rf2Folder RF2 Folder + -syntax OWL Syntax, expected values: owlxml, functional, + manchester + -tf Text definition file +``` + +## RF2 Snapshot to OWL file conversion + +Arguments required for this mode: + +``` +java -jar rf2-to-owl-x.x.x-SNAPSHOT-jar-with-dependencies.jar -mode rf2-to-owl -rf2Folder /x/y/z/snapshot -output ontology.owl +``` + +* *-mode*: sets the mode to "rf2-to-owl" +* *-rf2Folder*: sets the folder of a valid SNOMED CT Snpashot release, the +process will automatically find the necessary files inside that folder +* *-output*: the output file where the ontology will be generated + +Optional arguments: + +* -syntax: by default the process generates the ontology in OWL-XML syntax, but +this can be changed to Manchester or Functional syntax by passing this argument, +acceptable values are: owlxml, functional, manchester +* -cd: if this argument is present, the process will convert selected attributes that +are represented using Object Properties in RF2 to Data Properties (concrete domains), +parsing the values from the Fully specified names of the target concepts. The details +of this conversion are described later on this document. + +If the RF2 files are not part of a standard release package, paths for the individual files +can be passed using the arguments described in the arguments help. + +### Output The process generates the following OWL artifacts in the ontology: @@ -41,7 +83,7 @@ Parameters (in this order): * *Has dose form* * A sub-property chain is created between *Direct substance* and *Has active ingredient* -## Concrete domains conversion +### Concrete domains conversion Since the July 2017 release of the international edition, some relationships that specify numerical values are represented using concepts for the example: @@ -58,4 +100,28 @@ Parameters (in this order): * 732944001 | Has presentation strength numerator value (attribute) | - DataType: float * 732947008 | Has presentation strength denominator unit (attribute) | - DataType: float + + ## OWL Ontology file to RF2 OWL Refset file conversion + + Arguments required for this mode: + + ``` + java -jar rf2-to-owl-x.x.x-SNAPSHOT-jar-with-dependencies.jar -mode owl-to-refset -of ontology.owl -output owlRefset.txt + ``` + + * *-mode*: sets the mode to "owl-to-refset" + * *-of*: path to the ontology file + * *-output*: the output file where the refset will be generated + + ## RF2 OWL Refset file ot OWL Ontology file conversion + + Arguments required for this mode: + + ``` + java -jar rf2-to-owl-x.x.x-SNAPSHOT-jar-with-dependencies.jar -mode refset-to-owl -orf owlRefset.txt -output ontology.owl + ``` + + * *-mode*: sets the mode to "owl-to-refset" + * *-orf*: path to the OWL Refset file + * *-output*: the output file where the ontology will be generated diff --git a/pom.xml b/pom.xml index 8a86783..951f3b8 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 com.termmed rf2-to-owl - 0.0.3-SNAPSHOT + 0.0.4-SNAPSHOT net.sourceforge.owlapi @@ -20,7 +20,11 @@ slf4j-log4j12 1.7.5 - + + commons-cli + commons-cli + 1.4 + commons-configuration commons-configuration diff --git a/src/com/termmed/owl/DirectRunner.java b/src/com/termmed/owl/DirectRunner.java index 8b9badc..10eb583 100644 --- a/src/com/termmed/owl/DirectRunner.java +++ b/src/com/termmed/owl/DirectRunner.java @@ -37,14 +37,11 @@ public static void main(String[] args) throws Exception { String outputFile = "/Users/alo/Downloads/conceptsOwlComplete-cd-alo.xml"; String descriptionFile = "/Users/alo/Downloads/SnomedCT_RF2Release_INT_20170731/Snapshot/Terminology/sct2_Description_Snapshot-en_INT_20170731.txt"; String languageFile = "/Users/alo/Downloads/SnomedCT_RF2Release_INT_20170731/Snapshot/Refset/Language/der2_cRefset_LanguageSnapshot-en_INT_20170731.txt"; -// String textDefinitionFile = "/Users/ar/Downloads/SnomedCT_RF2Release_INT_201601731/Snapshot/Terminology/xsct2_TextDefinition_Snapshot-en_INT_20160131.txt"; String textDefinitionFile = null; String iri = "http://snomed.info/id/"; RF2Parser parser = new RF2Parser(conceptFile, relationshipFile, - descriptionFile,textDefinitionFile,languageFile, outputFile,iri, true, true); -// RF2Parser parser = new RF2Parser(conceptFile, relationshipFile, -// null,null,null, outputFile,iri); + descriptionFile,textDefinitionFile,languageFile, outputFile,iri, true, "owlxml"); parser.parse(); System.out.println("Done! The process has generated a new OWL Ontology file: " + outputFile); } diff --git a/src/com/termmed/owl/RF2Parser.java b/src/com/termmed/owl/RF2Parser.java index 92428c2..f1ff0bf 100644 --- a/src/com/termmed/owl/RF2Parser.java +++ b/src/com/termmed/owl/RF2Parser.java @@ -33,25 +33,13 @@ import java.util.Map; import java.util.Set; +import org.semanticweb.owlapi.apibinding.OWLFunctionalSyntaxFactory; import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.formats.ManchesterSyntaxDocumentFormat; import org.semanticweb.owlapi.formats.OWLXMLDocumentFormat; -import org.semanticweb.owlapi.model.IRI; -import org.semanticweb.owlapi.model.OWLAnnotation; -import org.semanticweb.owlapi.model.OWLAnnotationAssertionAxiom; -import org.semanticweb.owlapi.model.OWLAnnotationProperty; -import org.semanticweb.owlapi.model.OWLClass; -import org.semanticweb.owlapi.model.OWLClassExpression; -import org.semanticweb.owlapi.model.OWLDataFactory; -import org.semanticweb.owlapi.model.OWLDataProperty; -import org.semanticweb.owlapi.model.OWLDeclarationAxiom; -import org.semanticweb.owlapi.model.OWLObjectIntersectionOf; -import org.semanticweb.owlapi.model.OWLObjectProperty; -import org.semanticweb.owlapi.model.OWLOntology; -import org.semanticweb.owlapi.model.OWLOntologyCreationException; -import org.semanticweb.owlapi.model.OWLOntologyManager; -import org.semanticweb.owlapi.model.OWLOntologyStorageException; -import org.semanticweb.owlapi.model.OWLSubObjectPropertyOfAxiom; -import org.semanticweb.owlapi.model.OWLSubPropertyChainOfAxiom; +import org.semanticweb.owlapi.formats.OWLXMLDocumentFormatFactory; +import org.semanticweb.owlapi.functional.renderer.OWLFunctionalSyntaxRenderer; +import org.semanticweb.owlapi.model.*; import org.semanticweb.owlapi.util.DefaultPrefixManager; import org.semanticweb.owlapi.vocab.OWL2Datatype; import org.semanticweb.owlapi.vocab.OWLRDFVocabulary; @@ -161,7 +149,7 @@ public class RF2Parser { private Boolean useConcreteDomains; - private Boolean generateOwlRefset; + private String owlSyntax; /** * Instantiates a new r f2 parser. @@ -174,11 +162,11 @@ public class RF2Parser { * @param outputFile the output file * @param iri the iri * @param useConcreteDomains whether to use concrete domains - * @param generateOwlRefset whether to generate Owl Refset + * @param owlSyntax OWL Syntax */ public RF2Parser(String conceptFile, String relationshipFile, String descriptionFile, String textDefinitionFile,String languageFile, - String outputFile, String iri, Boolean useConcreteDomains, boolean generateOwlRefset) { + String outputFile, String iri, Boolean useConcreteDomains, String owlSyntax) { super(); this.conceptFile = conceptFile; this.relationshipFile = relationshipFile; @@ -186,7 +174,7 @@ public RF2Parser(String conceptFile, String relationshipFile, String description this.textDefinitionFile=textDefinitionFile; this.languageFile=languageFile; this.useConcreteDomains=useConcreteDomains; - this.generateOwlRefset = generateOwlRefset; + this.owlSyntax = owlSyntax; this.outputFile = outputFile; this.prefix=iri; @@ -204,7 +192,7 @@ public RF2Parser(String conceptFile, String relationshipFile, String description } public RF2Parser(String inputFolder, - String outputFile, String iri, Boolean useConcreteDomains, boolean generateOwlRefset) { + String outputFile, String iri, Boolean useConcreteDomains, String owlSyntax) { super(); try { this.conceptFile = FileHelper.getFile( new File(inputFolder), "rf2-concepts",null,null,null); @@ -213,7 +201,7 @@ public RF2Parser(String inputFolder, this.relationshipFile = FileHelper.getFile( new File(inputFolder), "rf2-relationships",null,"stated",null); this.textDefinitionFile=FileHelper.getFile( new File(inputFolder), "rf2-textDefinition",null,null,null); this.useConcreteDomains=useConcreteDomains; - this.generateOwlRefset = generateOwlRefset; + this.owlSyntax = owlSyntax; this.outputFile = outputFile; this.prefix=iri; @@ -309,10 +297,6 @@ public void parse()throws OWLOntologyCreationException, } } - File f = new File(outputFile); - IRI documentIRI = IRI.create(f); - - // Set axioms = ont.getAxioms(); // HashSet axClasses = new HashSet(); // for (OWLAxiom axiom:axioms){ @@ -338,15 +322,34 @@ public void parse()throws OWLOntologyCreationException, // manager.saveOntology(ont, manSyntDocFormat,documentIRI); // Manchester sintax process********************************* - - manager.saveOntology(ont, new OWLXMLDocumentFormat(), documentIRI); - if (this.generateOwlRefset) { - RF2OwlRefsetRenderer refsetRenderer = new RF2OwlRefsetRenderer(); - PrintWriter refsetWriter = new PrintWriter("owlRefset.txt", "UTF-8"); - refsetRenderer.render(ont, refsetWriter); - refsetWriter.close(); + File f = new File(outputFile); + IRI documentIRI = IRI.create(f); + switch (owlSyntax) { + case "manchester": + OWLDocumentFormat format = manager.getOntologyFormat(ont); + ManchesterSyntaxDocumentFormat manSyntDocFormat=new ManchesterSyntaxDocumentFormat(); + format.isPrefixOWLDocumentFormat(); + if(format.isPrefixOWLDocumentFormat()) { + manSyntDocFormat.copyPrefixesFrom(format.asPrefixOWLDocumentFormat()); + } + manager.setOntologyFormat(ont, manSyntDocFormat); + manager.saveOntology(ont, manSyntDocFormat,documentIRI); + break; + case "owlxml": + manager.saveOntology(ont, new OWLXMLDocumentFormat(), documentIRI); + break; + case "functional": + PrintWriter writer2 = new PrintWriter(outputFile, "UTF-8"); + OWLFunctionalSyntaxRenderer fr = new OWLFunctionalSyntaxRenderer(); + fr.render(ont,writer2); + writer2.close(); + break; + default: + throw new IllegalArgumentException("Syntax not supported: " + owlSyntax); } manager.removeOntology(ont); + System.out.println(""); + System.out.println("OWL Ontology saved in " + f.getName()); System.gc(); } @@ -460,7 +463,8 @@ private void loadDescriptionsFile(String description, String textDefinition, Str OWLDatatypeImpl dtt=new OWLDatatypeImpl(OWL2Datatype.RDF_PLAIN_LITERAL.getIRI()); OWLAnnotationProperty propA ; if ( spl[6].equals(TEXT_DEFINITION_TYPE)){ - propA = factory.getOWLAnnotationProperty("sctf:",pm); + //propA = factory.getOWLAnnotationProperty("sctf:",pm); + propA = factory.getOWLAnnotationProperty(OWLRDFVocabulary.RDFS_COMMENT.getIRI()); }else{ continue; } diff --git a/src/com/termmed/owl/RefsetToOWLRenderer.java b/src/com/termmed/owl/RefsetToOWLRenderer.java new file mode 100644 index 0000000..ee4a9e6 --- /dev/null +++ b/src/com/termmed/owl/RefsetToOWLRenderer.java @@ -0,0 +1,59 @@ +/* + * + * * Copyright (C) 2014 termMed IT + * * www.termmed.com + * * + * * Licensed under the Apache License, Version 2.0 (the "License"); + * * you may not use this file except in compliance with the License. + * * You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ + +package com.termmed.owl; + +import java.io.*; + +/** + * Created by alo on 4/18/17. + */ +public class RefsetToOWLRenderer { + + public RefsetToOWLRenderer() { + } + + public void render(File owlRefsetFile, Writer writer) throws IOException { + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(owlRefsetFile), "UTF8")); + br.readLine(); + String line; + String[] spl; + System.out.println("Converting refset from " + owlRefsetFile.getName()); + int count = 0; + boolean headerWritten = false; + while ((line=br.readLine())!=null) { + count++; + if (count ==1 || count % 10000 == 0) { + System.out.print("."); + } + spl = line.split("\t", -1); + if (spl.length == 7) { + String axiom = spl[6]; + if (!headerWritten && !axiom.startsWith("Prefix")) { + writer.write("\n"); + writer.write("Ontology(\n"); + writer.write("\n"); + headerWritten = true; + } + writer.write(axiom + "\n"); + } + } + writer.write(")"); + } +} diff --git a/src/com/termmed/owl/Runner.java b/src/com/termmed/owl/Runner.java index 1b51415..8e369c3 100644 --- a/src/com/termmed/owl/Runner.java +++ b/src/com/termmed/owl/Runner.java @@ -19,30 +19,123 @@ package com.termmed.owl; +import org.apache.commons.cli.*; +import org.apache.commons.lang.IncompleteArgumentException; +import org.semanticweb.owlapi.apibinding.OWLManager; +import org.semanticweb.owlapi.model.OWLOntology; +import org.semanticweb.owlapi.model.OWLOntologyManager; + +import java.io.File; +import java.io.PrintWriter; + /** * Created by alo on 4/6/16. */ public class Runner { public static void main(String[] args) throws Exception { - if (args.length != 7) - throw new IllegalArgumentException("Arguments error, required:\n - " + - "conceptsFile\n - relationshipsFile\n - descriptionFile\n - " + - "textDefinitionFile\n - languageFile\n - outputFile\n -" + - "iri\n - useConcreteDomains"); + CommandLineParser parser = new DefaultParser(); + Options options = new Options(); + options.addOption("mode", true, "conversion mode, " + + "expected values: rf2-to-owl, owl-to-refset, refset-to-owl"); + options.addOption("rf2Folder", true, "RF2 Folder"); + options.addOption("cf", true, "Concepts file"); + options.addOption("df", true, "Descriptions file"); + options.addOption("rf", true, "Relationships file"); + options.addOption("lf", true, "language refset file"); + options.addOption("tf", true, "Text definition file"); + options.addOption("of", true, "Owl file"); + options.addOption("orf", true, "Owl Refset file"); + options.addOption("iri", true, "IRI for Owl Generation"); + options.addOption("syntax", true, "OWL Syntax, expected values: owlxml, functional, manchester"); + options.addOption("output", true, "Output file"); + options.addOption("cd", false, "Convert concepts to concrete domains"); + options.addOption("help", false, "Prints help"); - String conceptFile = args[0]; - String relationshipFile = args[1]; - String descriptionFile = args[2]; - //String textDefinitionFile = args[3]; - String languageFile = args[4]; - String outputFile = args[5]; - String iri = args[6]; - Boolean useConcreteDomains = (args[7].equals("TRUE")); + CommandLine cmd = parser.parse( options, args); + if (cmd.hasOption("help")) { + // automatically generate the help statement + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp( "rf2-to-owl", options ); + } else { + if (!cmd.hasOption("mode") || !cmd.hasOption("output")) { + throw new MissingArgumentException("-output and -mode arguments are mandatory," + + " expected values for mode: rf2-to-owl, owl-to-refset, refset-to-owl"); + } else { + String mode = cmd.getOptionValue("mode"); + String output = cmd.getOptionValue("output"); + boolean cd = cmd.hasOption("cd"); + String syntax = "owlxml"; + if (cmd.hasOption("syntax")) { + syntax = cmd.getOptionValue("syntax"); + }; + String iri = "http://snomed.info/id/"; + if (cmd.hasOption("iri")) { + iri = cmd.getOptionValue("iri"); + } + switch (mode) { + case "rf2-to-owl": + if (cmd.hasOption("rf2Folder") ) { - RF2Parser parser = new RF2Parser(conceptFile, relationshipFile, - descriptionFile,null,languageFile, outputFile,iri, useConcreteDomains, false); - //RF2Parser parser = new RF2Parser(conceptFile, relationshipFile, outputFile, iri); - //parser.parse(); - System.out.println("Done! The process has generated a new OWL Ontology file: " + outputFile); + String rf2Folder = cmd.getOptionValue("rf2Folder"); + RF2Parser rf2parser = new RF2Parser(rf2Folder, output,iri, cd, syntax); + rf2parser.parse(); + } else if (cmd.hasOption("cf") && cmd.hasOption("df") && + cmd.hasOption("rf") && cmd.hasOption("lf")) { + String cf = cmd.getOptionValue("cf"); + String df = cmd.getOptionValue("df"); + String lf = cmd.getOptionValue("lf"); + String rf = cmd.getOptionValue("rf"); + String tf = cmd.getOptionValue("tf"); + RF2Parser rf2parser = new RF2Parser(cf, rf, + df,tf,lf, output,iri, cd, syntax); + rf2parser.parse(); + } else { + throw new MissingArgumentException("Missing arguments, either the rf2Folder or " + + "the set of RF2 files arguments are required (cf,df,rf,lf)"); + } + break; + case "owl-to-refset": + if (cmd.hasOption("of") ) { + String of = cmd.getOptionValue("of"); + OWLFunctionalSyntaxRefsetRenderer fr = new OWLFunctionalSyntaxRefsetRenderer(); + long startTime = System.currentTimeMillis(); + OWLOntologyManager manager = OWLManager.createOWLOntologyManager(); + File ontologyFile = new File(of); + System.out.println("Loading ontology from: " + ontologyFile.getName()); + OWLOntology ontology = manager.loadOntologyFromOntologyDocument(ontologyFile); + System.out.println("Ontology loaded in: " + (System.currentTimeMillis() - startTime) + " ms."); + startTime = System.currentTimeMillis(); + PrintWriter writer2 = new PrintWriter(output, "UTF-8"); + fr.render(ontology,writer2); + writer2.close(); + System.out.println(""); + System.out.println("OWL Refset created in: " + (System.currentTimeMillis() - startTime) + + " ms. (" + output + ")"); + } else { + throw new MissingArgumentException("Missing arguments, -of with path to Owl file is required"); + } + break; + case "refset-to-owl": + if (cmd.hasOption("orf") ) { + long startTime = System.currentTimeMillis(); + String orf = cmd.getOptionValue("orf"); + File owlRefsetFile = new File(orf); + RefsetToOWLRenderer rfor = new RefsetToOWLRenderer(); + PrintWriter writer2 = new PrintWriter(output, "UTF-8"); + rfor.render(owlRefsetFile, writer2); + writer2.close(); + System.out.println(""); + System.out.println("OWL Ontology recreated in: " + (System.currentTimeMillis() - startTime) + + " ms. (" + output + ")"); + } else { + throw new MissingArgumentException("Missing arguments, -orf with path to Owl Refset file is required"); + } + break; + default: + throw new IncompleteArgumentException("-mode argument error," + + " expected values: rf2-to-owl, owl-to-refset, refset-to-owl"); + } + } + } } } diff --git a/src/com/termmed/owl/TestRenderer.java b/src/com/termmed/owl/TestRenderer.java index 9482532..22fa1b7 100644 --- a/src/com/termmed/owl/TestRenderer.java +++ b/src/com/termmed/owl/TestRenderer.java @@ -41,8 +41,8 @@ public class TestRenderer { public static void main(String[] args) throws Exception { startTime = System.currentTimeMillis(); OWLOntologyManager manager = OWLManager.createOWLOntologyManager(); -// File testOntology = new File("/Users/alo/Downloads/termspace-owl (39).xml"); - File testOntology = new File("/Users/alo/Downloads/conceptsOwlComplete-cd-alo.xml"); + File testOntology = new File("/Users/alo/Downloads/termspace-owl (39).xml"); +// File testOntology = new File("/Users/alo/Downloads/conceptsOwlComplete-cd-alo.xml"); System.out.println("testOntology: " + testOntology.getName()); OWLOntology ontology = manager.loadOntologyFromOntologyDocument(testOntology); System.out.println("Terminology loaded in: " + (System.currentTimeMillis() - startTime) + " ms.");