diff --git a/doc/release-notes/10790-fix pid handling in exporters and citations.md b/doc/release-notes/10790-fix pid handling in exporters and citations.md new file mode 100644 index 00000000000..da430ed9671 --- /dev/null +++ b/doc/release-notes/10790-fix pid handling in exporters and citations.md @@ -0,0 +1,16 @@ +### Improvements to PID formatting in exports and citations + +Multiple small issues with the formatting of PIDs in the +DDI exporters, and EndNote and BibTeX citation formats have +been addressed. These should improve the ability to import +Dataverse citations into reference managers and fix potential +issues harvesting datasets using PermaLinks. + +Backward Incompatibility + +Changes to PID formatting occur in the DDI/DDI Html export formats +and the EndNote and BibTex citation formats. These changes correct +errors and improve conformance with best practices but could break +parsing of these formats. + +For more information, see #10790. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 32d4c78bd8f..05a17992acf 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -581,6 +581,7 @@ Note: - If you configure ``base-url``, it should include a "/" after the hostname like this: ``https://demo.dataverse.org/``. - When using multiple PermaLink providers, you should avoid ambiguous authority/separator/shoulder combinations that would result in the same overall prefix. +- Configuring PermaLink providers differing only by their separator values is not supported. - In general, PermaLink authority/shoulder values should be alphanumeric. For other cases, admins may need to consider the potential impact of special characters in S3 storage identifiers, resolver URLs, exports, etc. .. _dataverse.pid.*.handlenet: diff --git a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java index 02fb59751fb..8d46e956655 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java @@ -38,6 +38,10 @@ import org.apache.commons.text.StringEscapeUtils; import org.apache.commons.lang3.StringUtils; +import static edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider.DOI_PROTOCOL; +import static edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider.HDL_PROTOCOL; +import static edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider.PERMA_PROTOCOL; + /** * * @author gdurand, qqmyers @@ -293,11 +297,13 @@ public void writeAsBibtexCitation(OutputStream os) throws IOException { out.write("version = {"); out.write(version); out.write("},\r\n"); - out.write("doi = {"); - out.write(persistentId.getAuthority()); - out.write("/"); - out.write(persistentId.getIdentifier()); - out.write("},\r\n"); + if("doi".equals(persistentId.getProtocol())) { + out.write("doi = {"); + out.write(persistentId.getAuthority()); + out.write("/"); + out.write(persistentId.getIdentifier()); + out.write("},\r\n"); + } out.write("url = {"); out.write(persistentId.asURL()); out.write("}\r\n"); @@ -595,11 +601,21 @@ private void createEndNoteXML(XMLStreamWriter xmlw) throws XMLStreamException { } xmlw.writeStartElement("urls"); - xmlw.writeStartElement("related-urls"); - xmlw.writeStartElement("url"); - xmlw.writeCharacters(getPersistentId().asURL()); - xmlw.writeEndElement(); // url - xmlw.writeEndElement(); // related-urls + if (persistentId != null) { + if (PERMA_PROTOCOL.equals(persistentId.getProtocol()) || HDL_PROTOCOL.equals(persistentId.getProtocol())) { + xmlw.writeStartElement("web-urls"); + xmlw.writeStartElement("url"); + xmlw.writeCharacters(getPersistentId().asURL()); + xmlw.writeEndElement(); // url + xmlw.writeEndElement(); // web-urls + } else if (DOI_PROTOCOL.equals(persistentId.getProtocol())) { + xmlw.writeStartElement("related-urls"); + xmlw.writeStartElement("url"); + xmlw.writeCharacters(getPersistentId().asURL()); + xmlw.writeEndElement(); // url + xmlw.writeEndElement(); // related-urls + } + } xmlw.writeEndElement(); // urls // a DataFile citation also includes the filename and (for Tabular @@ -617,10 +633,9 @@ private void createEndNoteXML(XMLStreamWriter xmlw) throws XMLStreamException { xmlw.writeEndElement(); // custom2 } } - if (persistentId != null) { + if (persistentId != null && "doi".equals(persistentId.getProtocol())) { xmlw.writeStartElement("electronic-resource-num"); - String electResourceNum = persistentId.getProtocol() + "/" + persistentId.getAuthority() + "/" - + persistentId.getIdentifier(); + String electResourceNum = persistentId.asRawIdentifier(); xmlw.writeCharacters(electResourceNum); xmlw.writeEndElement(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 5dab43fbdbd..7bb93ea6dde 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -143,12 +143,14 @@ public String visit(DataFile df) { @Column(insertable = false, updatable = false) private String dtype; /* - * Add DOI related fields + * Add PID related fields */ private String protocol; private String authority; + private String separator; + @Temporal(value = TemporalType.TIMESTAMP) private Date globalIdCreateTime; @@ -323,6 +325,16 @@ public void setAuthority(String authority) { globalId=null; } + public String getSeparator() { + return separator; + } + + public void setSeparator(String separator) { + this.separator = separator; + //Remove cached value + globalId=null; + } + public Date getGlobalIdCreateTime() { return globalIdCreateTime; } @@ -353,11 +365,13 @@ public void setGlobalId( GlobalId pid ) { if ( pid == null ) { setProtocol(null); setAuthority(null); + setSeparator(null); setIdentifier(null); } else { //These reset globalId=null setProtocol(pid.getProtocol()); setAuthority(pid.getAuthority()); + setSeparator(pid.getSeparator()); setIdentifier(pid.getIdentifier()); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java index 1c8783c5bd5..058a6269b57 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java @@ -63,6 +63,10 @@ public String getAuthority() { return authority; } + public String getSeparator() { + return separator; + } + public String getIdentifier() { return identifier; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/dto/DatasetDTO.java b/src/main/java/edu/harvard/iq/dataverse/api/dto/DatasetDTO.java index 3fc31730ba2..ec8adfb4eef 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/dto/DatasetDTO.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/dto/DatasetDTO.java @@ -12,6 +12,7 @@ public class DatasetDTO implements java.io.Serializable { private String identifier; private String protocol; private String authority; + private String separator; private String globalIdCreateTime; private String publisher; private String publicationDate; @@ -51,6 +52,14 @@ public void setAuthority(String authority) { this.authority = authority; } + public String getSeparator() { + return separator; + } + + public void setSeparator(String separator) { + this.separator = separator; + } + public String getGlobalIdCreateTime() { return globalIdCreateTime; } @@ -94,7 +103,7 @@ public void setPublicationDate(String publicationDate) { @Override public String toString() { - return "DatasetDTO{" + "id=" + id + ", identifier=" + identifier + ", protocol=" + protocol + ", authority=" + authority + ", globalIdCreateTime=" + globalIdCreateTime + ", datasetVersion=" + datasetVersion + ", dataFiles=" + dataFiles + '}'; + return "DatasetDTO{" + "id=" + id + ", identifier=" + identifier + ", protocol=" + protocol + ", authority=" + authority + ", separator=" + separator + ", globalIdCreateTime=" + globalIdCreateTime + ", datasetVersion=" + datasetVersion + ", dataFiles=" + dataFiles + '}'; } public void setMetadataLanguage(String metadataLanguage) { diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 8fab6a6704d..a1f480af197 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -5,6 +5,7 @@ import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DvObjectContainer; +import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; @@ -85,6 +86,10 @@ public class DdiExportUtil { public static final String NOTE_SUBJECT_CONTENTTYPE = "Content/MIME Type"; public static final String CITATION_BLOCK_NAME = "citation"; + //Some tests don't send real PIDs that can be parsed + //Use constant empty PID in these cases + private static final String EMPTY_PID = "null:nullnullnull"; + public static String datasetDtoAsJson2ddi(String datasetDtoAsJson) { Gson gson = new Gson(); DatasetDTO datasetDto = gson.fromJson(datasetDtoAsJson, DatasetDTO.class); @@ -169,11 +174,14 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) String persistentAuthority = datasetDto.getAuthority(); String persistentId = datasetDto.getIdentifier(); - String pid = persistentProtocol + ":" + persistentAuthority + "/" + persistentId; - String pidUri = pid; - //Some tests don't send real PIDs - don't try to get their URL form - if(!pidUri.equals("null:null/null")) { - pidUri= PidUtil.parseAsGlobalID(persistentProtocol, persistentAuthority, persistentId).asURL(); + GlobalId pid = PidUtil.parseAsGlobalID(persistentProtocol, persistentAuthority, persistentId); + String pidUri, pidString; + if(pid != null) { + pidUri = pid.asURL(); + pidString = pid.asString(); + } else { + pidUri = EMPTY_PID; + pidString = EMPTY_PID; } // The "persistentAgency" tag is used for the "agency" attribute of the // ddi section; back in the DVN3 days we used "handle" and "DOI" @@ -203,7 +211,7 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) XmlWriterUtil.writeAttribute(xmlw, "agency", persistentAgency); - xmlw.writeCharacters(pid); + xmlw.writeCharacters(pidString); xmlw.writeEndElement(); // IDNo writeOtherIdElement(xmlw, version); xmlw.writeEndElement(); // titlStmt @@ -364,14 +372,21 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase String persistentAuthority = datasetDto.getAuthority(); String persistentId = datasetDto.getIdentifier(); - + GlobalId pid = PidUtil.parseAsGlobalID(persistentProtocol, persistentAuthority, persistentId); + String pidString; + if(pid != null) { + pidString = pid.asString(); + } else { + pidString = EMPTY_PID; + } + xmlw.writeStartElement("docDscr"); xmlw.writeStartElement("citation"); xmlw.writeStartElement("titlStmt"); XmlWriterUtil.writeFullElement(xmlw, "titl", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); xmlw.writeStartElement("IDNo"); XmlWriterUtil.writeAttribute(xmlw, "agency", persistentAgency); - xmlw.writeCharacters(persistentProtocol + ":" + persistentAuthority + "/" + persistentId); + xmlw.writeCharacters(pidString); xmlw.writeEndElement(); // IDNo xmlw.writeEndElement(); // titlStmt xmlw.writeStartElement("distStmt"); @@ -396,10 +411,10 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase private static void writeVersionStatement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException{ xmlw.writeStartElement("verStmt"); - xmlw.writeAttribute("source","archive"); + xmlw.writeAttribute("source","archive"); xmlw.writeStartElement("version"); XmlWriterUtil.writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10)); - XmlWriterUtil.writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); + XmlWriterUtil.writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); xmlw.writeCharacters(datasetVersionDTO.getVersionNumber().toString()); xmlw.writeEndElement(); // version xmlw.writeEndElement(); // verStmt @@ -670,7 +685,7 @@ private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO xmlw.writeStartElement("dataColl"); XmlWriterUtil.writeI18NElement(xmlw, "timeMeth", version, DatasetFieldConstant.timeMethod,lang); XmlWriterUtil.writeI18NElement(xmlw, "dataCollector", version, DatasetFieldConstant.dataCollector, lang); - XmlWriterUtil.writeI18NElement(xmlw, "collectorTraining", version, DatasetFieldConstant.collectorTraining, lang); + XmlWriterUtil.writeI18NElement(xmlw, "collectorTraining", version, DatasetFieldConstant.collectorTraining, lang); XmlWriterUtil.writeI18NElement(xmlw, "frequenc", version, DatasetFieldConstant.frequencyOfDataCollection, lang); XmlWriterUtil.writeI18NElement(xmlw, "sampProc", version, DatasetFieldConstant.samplingProcedure, lang); @@ -691,7 +706,7 @@ private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO } } /* and so does : */ - XmlWriterUtil.writeI18NElement(xmlw, "resInstru", version, DatasetFieldConstant.researchInstrument, lang); + XmlWriterUtil.writeI18NElement(xmlw, "resInstru", version, DatasetFieldConstant.researchInstrument, lang); xmlw.writeStartElement("sources"); XmlWriterUtil.writeFullElementList(xmlw, "dataSrc", dto2PrimitiveList(version, DatasetFieldConstant.dataSources)); XmlWriterUtil.writeI18NElement(xmlw, "srcOrig", version, DatasetFieldConstant.originOfSources, lang); @@ -704,7 +719,7 @@ private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO XmlWriterUtil.writeI18NElement(xmlw, "actMin", version, DatasetFieldConstant.actionsToMinimizeLoss, lang); /* "" has the uppercase C: */ XmlWriterUtil.writeI18NElement(xmlw, "ConOps", version, DatasetFieldConstant.controlOperations, lang); - XmlWriterUtil.writeI18NElement(xmlw, "weight", version, DatasetFieldConstant.weighting, lang); + XmlWriterUtil.writeI18NElement(xmlw, "weight", version, DatasetFieldConstant.weighting, lang); XmlWriterUtil.writeI18NElement(xmlw, "cleanOps", version, DatasetFieldConstant.cleaningOperations, lang); xmlw.writeEndElement(); //dataColl @@ -715,7 +730,7 @@ private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO //XmlWriterUtil.writeFullElement(xmlw, "anylInfo", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); XmlWriterUtil.writeI18NElement(xmlw, "respRate", version, DatasetFieldConstant.responseRate, lang); XmlWriterUtil.writeI18NElement(xmlw, "EstSmpErr", version, DatasetFieldConstant.samplingErrorEstimates, lang); - XmlWriterUtil.writeI18NElement(xmlw, "dataAppr", version, DatasetFieldConstant.otherDataAppraisal, lang); + XmlWriterUtil.writeI18NElement(xmlw, "dataAppr", version, DatasetFieldConstant.otherDataAppraisal, lang); xmlw.writeEndElement(); //anlyInfo xmlw.writeEndElement();//method @@ -867,7 +882,7 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!authorName.isEmpty()){ xmlw.writeStartElement("AuthEnty"); - XmlWriterUtil.writeAttribute(xmlw,"affiliation",authorAffiliation); + XmlWriterUtil.writeAttribute(xmlw,"affiliation",authorAffiliation); xmlw.writeCharacters(authorName); xmlw.writeEndElement(); //AuthEnty } @@ -928,8 +943,8 @@ private static void writeContactsElement(XMLStreamWriter xmlw, DatasetVersionDTO // TODO: Since datasetContactEmail is a required field but datasetContactName is not consider not checking if datasetContactName is empty so we can write out datasetContactEmail. if (!datasetContactName.isEmpty()){ xmlw.writeStartElement("contact"); - XmlWriterUtil.writeAttribute(xmlw,"affiliation",datasetContactAffiliation); - XmlWriterUtil.writeAttribute(xmlw,"email",datasetContactEmail); + XmlWriterUtil.writeAttribute(xmlw,"affiliation",datasetContactAffiliation); + XmlWriterUtil.writeAttribute(xmlw,"email",datasetContactEmail); xmlw.writeCharacters(datasetContactName); xmlw.writeEndElement(); //AuthEnty } @@ -1154,7 +1169,7 @@ private static void writeAbstractElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!descriptionText.isEmpty()){ xmlw.writeStartElement("abstract"); - XmlWriterUtil.writeAttribute(xmlw,"date",descriptionDate); + XmlWriterUtil.writeAttribute(xmlw,"date",descriptionDate); if(DvObjectContainer.isMetadataLanguageSet(lang)) { xmlw.writeAttribute("xml:lang", lang); } @@ -1189,7 +1204,7 @@ private static void writeGrantElement(XMLStreamWriter xmlw, DatasetVersionDTO da } if (!grantNumber.isEmpty()){ xmlw.writeStartElement("grantNo"); - XmlWriterUtil.writeAttribute(xmlw,"agency",grantAgency); + XmlWriterUtil.writeAttribute(xmlw,"agency",grantAgency); xmlw.writeCharacters(grantNumber); xmlw.writeEndElement(); //grantno } @@ -1221,7 +1236,7 @@ private static void writeOtherIdElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!otherId.isEmpty()){ xmlw.writeStartElement("IDNo"); - XmlWriterUtil.writeAttribute(xmlw,"agency",otherIdAgency); + XmlWriterUtil.writeAttribute(xmlw,"agency",otherIdAgency); xmlw.writeCharacters(otherId); xmlw.writeEndElement(); //IDNo } @@ -1253,7 +1268,7 @@ private static void writeSoftwareElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!softwareName.isEmpty()){ xmlw.writeStartElement("software"); - XmlWriterUtil.writeAttribute(xmlw,"version",softwareVersion); + XmlWriterUtil.writeAttribute(xmlw,"version",softwareVersion); xmlw.writeCharacters(softwareName); xmlw.writeEndElement(); //software } @@ -1366,8 +1381,8 @@ private static void writeNotesElement(XMLStreamWriter xmlw, DatasetVersionDTO da } if (!notesText.isEmpty()) { xmlw.writeStartElement("notes"); - XmlWriterUtil.writeAttribute(xmlw,"type",notesType); - XmlWriterUtil.writeAttribute(xmlw,"subject",notesSubject); + XmlWriterUtil.writeAttribute(xmlw,"type",notesType); + XmlWriterUtil.writeAttribute(xmlw,"subject",notesSubject); xmlw.writeCharacters(notesText); xmlw.writeEndElement(); } @@ -1441,9 +1456,9 @@ private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, JsonA xmlw.writeStartElement("otherMat"); xmlw.writeAttribute("ID", "f" + fileJson.getJsonNumber(("id").toString())); if (fileJson.containsKey("pidUrl")){ - XmlWriterUtil.writeAttribute(xmlw, "URI", fileJson.getString("pidUrl")); + XmlWriterUtil.writeAttribute(xmlw, "URI", fileJson.getString("pidUrl")); } else { - xmlw.writeAttribute("URI", dataverseUrl + "/api/access/datafile/" + fileJson.getJsonNumber("id").toString()); + xmlw.writeAttribute("URI", dataverseUrl + "/api/access/datafile/" + fileJson.getJsonNumber("id").toString()); } xmlw.writeAttribute("level", "datafile"); @@ -1514,7 +1529,7 @@ private static FieldDTO dto2FieldDTO(DatasetVersionDTO datasetVersionDTO, String } return null; } - + private static boolean StringUtilisEmpty(String str) { if (str == null || str.trim().equals("")) { diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java index 250eae7e5fc..acb0b7e7518 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java @@ -204,6 +204,16 @@ public DvObject generatePid(DvObject dvObject) { + ") doesn't match that of the provider, id: " + getId()); } } + if (dvObject.getSeparator() == null) { + dvObject.setSeparator(getSeparator()); + } else { + if (!dvObject.getSeparator().equals(getSeparator())) { + logger.warning("The separator of the DvObject (" + dvObject.getSeparator() + + ") does not match the configured separator (" + getSeparator() + ")"); + throw new IllegalArgumentException("The separator of the DvObject (" + dvObject.getSeparator() + + ") doesn't match that of the provider, id: " + getId()); + } + } if (dvObject.isInstanceofDataset()) { dvObject.setIdentifier(generateDatasetIdentifier((Dataset) dvObject)); } else { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index b88dfaef4b5..e3ec5e4809b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -402,6 +402,7 @@ public static JsonObjectBuilder json(Dataset ds, Boolean returnOwners) { .add("persistentUrl", ds.getPersistentURL()) .add("protocol", ds.getProtocol()) .add("authority", ds.getAuthority()) + .add("separator", ds.getSeparator()) .add("publisher", BrandingUtil.getInstallationBrandName()) .add("publicationDate", ds.getPublicationDateFormattedYYYYMMDD()) .add("storageIdentifier", ds.getStorageIdentifier()); diff --git a/src/main/resources/db/migration/V6.5.0.4.sql b/src/main/resources/db/migration/V6.5.0.4.sql new file mode 100644 index 00000000000..9c3b24712e1 --- /dev/null +++ b/src/main/resources/db/migration/V6.5.0.4.sql @@ -0,0 +1,3 @@ +ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS separator character varying(255) DEFAULT ''; + +UPDATE dvobject SET separator='/' WHERE protocol = 'doi' OR protocol = 'hdl'; \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/DataCitationTest.java b/src/test/java/edu/harvard/iq/dataverse/DataCitationTest.java index 23a7efedca7..25831992dbd 100644 --- a/src/test/java/edu/harvard/iq/dataverse/DataCitationTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/DataCitationTest.java @@ -266,7 +266,7 @@ public void testToEndNoteString_withTitleAndAuthor() throws ParseException { "V1" + "LibraScholar" + "https://doi.org/10.5072/FK2/LK0D1H" + - "doi/10.5072/FK2/LK0D1H" + + "10.5072/FK2/LK0D1H" + "" + "" + ""; @@ -295,7 +295,7 @@ public void testToEndNoteString_withoutTitleAndAuthor() throws ParseException { "V1" + "LibraScholar" + "https://doi.org/10.5072/FK2/LK0D1H" + - "doi/10.5072/FK2/LK0D1H" + + "10.5072/FK2/LK0D1H" + "" + "" + ""; diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtilTest.java index f594de4757d..360e9dfbafe 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtilTest.java @@ -1,6 +1,15 @@ package edu.harvard.iq.dataverse.export.ddi; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactory; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DataCiteDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DataCiteProviderFactory; +import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; +import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkProviderFactory; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; import edu.harvard.iq.dataverse.util.xml.XmlPrinter; import java.io.ByteArrayOutputStream; @@ -11,12 +20,17 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.logging.Logger; import edu.harvard.iq.dataverse.util.xml.html.HtmlPrinter; import org.jsoup.Jsoup; import org.jsoup.helper.W3CDom; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -33,6 +47,30 @@ import static org.junit.jupiter.api.Assertions.*; @ExtendWith(MockitoExtension.class) +@LocalJvmSettings +//Perma 1 +@JvmSetting(key = JvmSettings.PID_PROVIDER_LABEL, value = "perma 1", varArgs = "perma1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_TYPE, value = PermaLinkPidProvider.TYPE, varArgs = "perma1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_AUTHORITY, value = "PERM", varArgs = "perma1") +@JvmSetting(key = JvmSettings.PERMALINK_BASE_URL, value = "https://example.org/citation?persistentId=perma:", varArgs = "perma1") +//Perma 2 +@JvmSetting(key = JvmSettings.PID_PROVIDER_LABEL, value = "perma 2", varArgs = "perma2") +@JvmSetting(key = JvmSettings.PID_PROVIDER_TYPE, value = PermaLinkPidProvider.TYPE, varArgs = "perma2") +@JvmSetting(key = JvmSettings.PID_PROVIDER_AUTHORITY, value = "PERM2", varArgs = "perma2") +@JvmSetting(key = JvmSettings.PERMALINK_SEPARATOR, value = "-", varArgs = "perma2") +@JvmSetting(key = JvmSettings.PERMALINK_BASE_URL, value = "https://example.org/citation?persistentId=perma:", varArgs = "perma2") +// Datacite 1 +@JvmSetting(key = JvmSettings.PID_PROVIDER_LABEL, value = "dataCite 1", varArgs = "dc1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_TYPE, value = DataCiteDOIProvider.TYPE, varArgs = "dc1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_AUTHORITY, value = "10.5072", varArgs = "dc1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_SHOULDER, value = "FK2", varArgs = "dc1") +@JvmSetting(key = JvmSettings.DATACITE_MDS_API_URL, value = "https://mds.test.datacite.org/", varArgs = "dc1") +@JvmSetting(key = JvmSettings.DATACITE_REST_API_URL, value = "https://api.test.datacite.org", varArgs ="dc1") +@JvmSetting(key = JvmSettings.DATACITE_USERNAME, value = "test", varArgs ="dc1") +@JvmSetting(key = JvmSettings.DATACITE_PASSWORD, value = "changeme", varArgs ="dc1") + +//List to instantiate +@JvmSetting(key = JvmSettings.PID_PROVIDERS, value = "perma1, perma2, dc1") public class DdiExportUtilTest { private static final Logger logger = Logger.getLogger(DdiExportUtilTest.class.getCanonicalName()); @@ -45,6 +83,25 @@ void setup() { Mockito.lenient().when(settingsSvc.isTrueForKey(SettingsServiceBean.Key.ExportInstallationAsDistributorOnlyWhenNotSet, false)).thenReturn(false); DdiExportUtil.injectSettingsService(settingsSvc); } + + @BeforeAll + public static void setUpClass() throws Exception { + Map pidProviderFactoryMap = new HashMap<>(); + pidProviderFactoryMap.put(PermaLinkPidProvider.TYPE, new PermaLinkProviderFactory()); + pidProviderFactoryMap.put(DataCiteDOIProvider.TYPE, new DataCiteProviderFactory()); + + PidUtil.clearPidProviders(); + + //Read list of providers to add + List providers = Arrays.asList(JvmSettings.PID_PROVIDERS.lookup().split(",\\s")); + //Iterate through the list of providers and add them using the PidProviderFactory of the appropriate type + for (String providerId : providers) { + System.out.println("Loading provider: " + providerId); + String type = JvmSettings.PID_PROVIDER_TYPE.lookup(providerId); + PidProviderFactory factory = pidProviderFactoryMap.get(type); + PidUtil.addToProviderList(factory.createPidProvider(providerId)); + } + } @Test @@ -64,6 +121,42 @@ public void testJson2DdiNoFiles() throws Exception { XmlAssert.assertThat(result).and(datasetAsDdi).ignoreWhitespace().areSimilar(); } + + @Test + public void testJson2DdiPermaLink() throws Exception { + // given + Path datasetVersionJson = Path.of("src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-perma.json"); + String datasetVersionAsJson = Files.readString(datasetVersionJson, StandardCharsets.UTF_8); + Path ddiFile = Path.of("src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-perma.xml"); + String datasetAsDdi = XmlPrinter.prettyPrintXml(Files.readString(ddiFile, StandardCharsets.UTF_8)); + logger.fine(datasetAsDdi); + + // when + String result = DdiExportUtil.datasetDtoAsJson2ddi(datasetVersionAsJson); + logger.fine(result); + + // then + XmlAssert.assertThat(result).and(datasetAsDdi).ignoreWhitespace().areSimilar(); + } + + + @Test + public void testJson2DdiPermaLinkWithSeparator() throws Exception { + // given + Path datasetVersionJson = Path.of("src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-perma-w-separator.json"); + String datasetVersionAsJson = Files.readString(datasetVersionJson, StandardCharsets.UTF_8); + Path ddiFile = Path.of("src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-perma-w-separator.xml"); + String datasetAsDdi = XmlPrinter.prettyPrintXml(Files.readString(ddiFile, StandardCharsets.UTF_8)); + logger.fine(datasetAsDdi); + + // when + String result = DdiExportUtil.datasetDtoAsJson2ddi(datasetVersionAsJson); + logger.fine(result); + + // then + XmlAssert.assertThat(result).and(datasetAsDdi).ignoreWhitespace().areSimilar(); + } + @Test public void testJson2DdiNoFilesTermsOfUse() throws Exception { // given diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json index 9cf04bd0e05..b0dace0fb86 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json @@ -4,6 +4,7 @@ "persistentUrl": "https://doi.org/10.5072/FK2/WKUKGV", "protocol": "doi", "authority": "10.5072/FK2", + "separator": "/", "publisher": "Root", "publicationDate": "2020-02-19", "datasetVersion": { diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json index 2d4ca078962..49565d925ab 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json @@ -4,6 +4,7 @@ "persistentUrl": "https://doi.org/10.5072/FK2/PCA2E3", "protocol": "doi", "authority": "10.5072/FK2", + "separator": "/", "metadataLanguage": "en", "datasetVersion": { "id": 2, diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-perma-w-separator.json b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-perma-w-separator.json new file mode 100644 index 00000000000..9b51dc2ff91 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-perma-w-separator.json @@ -0,0 +1,92 @@ +{ + "id": 10, + "identifier": "123456789", + "persistentUrl": "https://example.org/citation?persistentId=PERM2-123456789", + "protocol": "perma", + "authority": "PERM2", + "separator": "-", + "datasetVersion": { + "id": 1, + "versionNumber": 1, + "versionMinorNumber": 0, + "versionState": "RELEASED", + "productionDate": "Production Date", + "lastUpdateTime": "2015-09-29T17:47:35Z", + "releaseTime": "2015-09-29T17:47:35Z", + "createTime": "2015-09-24T16:47:50Z", + "metadataBlocks": { + "citation": { + "displayName": "Citation Metadata", + "fields": [ + { + "typeName": "title", + "multiple": false, + "typeClass": "primitive", + "value": "Spruce Goose" + }, + { + "typeName": "author", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "authorName": { + "typeName": "authorName", + "multiple": false, + "typeClass": "primitive", + "value": "Spruce, Sabrina" + } + } + ] + }, + { + "typeName": "datasetContact", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "datasetContactEmail": { + "typeName": "datasetContactEmail", + "multiple": false, + "typeClass": "primitive", + "value": "spruce@mailinator.com" + } + } + ] + }, + { + "typeName": "dsDescription", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "dsDescriptionValue": { + "typeName": "dsDescriptionValue", + "multiple": false, + "typeClass": "primitive", + "value": "What the Spruce Goose was really made of." + } + } + ] + }, + { + "typeName": "subject", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Other" + ] + }, + { + "typeName": "depositor", + "multiple": false, + "typeClass": "primitive", + "value": "Spruce, Sabrina" + } + ] + } + }, + "files": [], + "citation": "Spruce, Sabrina, 2015, \"Spruce Goose\", https://example.org/citation?persistentId=PERM2-123456789, Root Dataverse, V1" + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-perma-w-separator.xml b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-perma-w-separator.xml new file mode 100644 index 00000000000..2a7d4d09846 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-perma-w-separator.xml @@ -0,0 +1,50 @@ + + + + + + Spruce Goose + perma:PERM2-123456789 + + + + 1 + + Spruce, Sabrina, 2015, "Spruce Goose", https://example.org/citation?persistentId=PERM2-123456789, Root Dataverse, V1 + + + + + + Spruce Goose + perma:PERM2-123456789 + + + Spruce, Sabrina + + + + Spruce, Sabrina + + + + + + Other + + What the Spruce Goose was really made of. + + + + + + + + + + + + + + + diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-perma.json b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-perma.json new file mode 100644 index 00000000000..eb8fc6d1d88 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-perma.json @@ -0,0 +1,92 @@ +{ + "id": 10, + "identifier": "123456789", + "persistentUrl": "https://example.org/citation?persistentId=PERM123456789", + "protocol": "perma", + "authority": "PERM", + "separator": "", + "datasetVersion": { + "id": 1, + "versionNumber": 1, + "versionMinorNumber": 0, + "versionState": "RELEASED", + "productionDate": "Production Date", + "lastUpdateTime": "2015-09-29T17:47:35Z", + "releaseTime": "2015-09-29T17:47:35Z", + "createTime": "2015-09-24T16:47:50Z", + "metadataBlocks": { + "citation": { + "displayName": "Citation Metadata", + "fields": [ + { + "typeName": "title", + "multiple": false, + "typeClass": "primitive", + "value": "Spruce Goose" + }, + { + "typeName": "author", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "authorName": { + "typeName": "authorName", + "multiple": false, + "typeClass": "primitive", + "value": "Spruce, Sabrina" + } + } + ] + }, + { + "typeName": "datasetContact", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "datasetContactEmail": { + "typeName": "datasetContactEmail", + "multiple": false, + "typeClass": "primitive", + "value": "spruce@mailinator.com" + } + } + ] + }, + { + "typeName": "dsDescription", + "multiple": true, + "typeClass": "compound", + "value": [ + { + "dsDescriptionValue": { + "typeName": "dsDescriptionValue", + "multiple": false, + "typeClass": "primitive", + "value": "What the Spruce Goose was really made of." + } + } + ] + }, + { + "typeName": "subject", + "multiple": true, + "typeClass": "controlledVocabulary", + "value": [ + "Other" + ] + }, + { + "typeName": "depositor", + "multiple": false, + "typeClass": "primitive", + "value": "Spruce, Sabrina" + } + ] + } + }, + "files": [], + "citation": "Spruce, Sabrina, 2015, \"Spruce Goose\", https://example.org/citation?persistentId=PERM123456789, Root Dataverse, V1" + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-perma.xml b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-perma.xml new file mode 100644 index 00000000000..341cb7435bd --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-perma.xml @@ -0,0 +1,50 @@ + + + + + + Spruce Goose + perma:PERM123456789 + + + + 1 + + Spruce, Sabrina, 2015, "Spruce Goose", https://example.org/citation?persistentId=PERM123456789, Root Dataverse, V1 + + + + + + Spruce Goose + perma:PERM123456789 + + + Spruce, Sabrina + + + + Spruce, Sabrina + + + + + + Other + + What the Spruce Goose was really made of. + + + + + + + + + + + + + + + diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java index 89a8f8826ec..2058de1d6c2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java @@ -48,6 +48,7 @@ import org.mockito.junit.jupiter.MockitoExtension; import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; @ExtendWith(MockitoExtension.class) @@ -64,7 +65,7 @@ @JvmSetting(key = JvmSettings.PID_PROVIDER_LABEL, value = "perma 2", varArgs = "perma2") @JvmSetting(key = JvmSettings.PID_PROVIDER_TYPE, value = PermaLinkPidProvider.TYPE, varArgs = "perma2") @JvmSetting(key = JvmSettings.PID_PROVIDER_AUTHORITY, value = "DANSLINK", varArgs = "perma2") -@JvmSetting(key = JvmSettings.PID_PROVIDER_SHOULDER, value = "QE", varArgs = "perma2") +@JvmSetting(key = JvmSettings.PID_PROVIDER_SHOULDER, value = "QQ", varArgs = "perma2") @JvmSetting(key = JvmSettings.PID_PROVIDER_MANAGED_LIST, value = "perma:LINKIT/FK2ABCDEF", varArgs ="perma2") @JvmSetting(key = JvmSettings.PERMALINK_SEPARATOR, value = "/", varArgs = "perma2") @JvmSetting(key = JvmSettings.PERMALINK_BASE_URL, value = "https://example.org/123/citation?persistentId=perma:", varArgs = "perma2") @@ -133,6 +134,8 @@ public class PidUtilTest { @Mock private SettingsServiceBean settingsServiceBean; + + static PidProviderFactoryBean pidService; @BeforeAll //FWIW @JvmSetting doesn't appear to work with @BeforeAll @@ -228,7 +231,7 @@ public void testPermaLinkParsing() throws IOException { assertEquals("perma1", pid3.getProviderId()); //Repeat the basics with a permalink associated with perma2 - String pid4String = "perma:DANSLINK/QE-5A-XN55"; + String pid4String = "perma:DANSLINK/QQ-5A-XN55"; GlobalId pid5 = PidUtil.parseAsGlobalID(pid4String); assertEquals("perma2", pid5.getProviderId()); assertEquals(pid4String, pid5.asString()); @@ -236,6 +239,20 @@ public void testPermaLinkParsing() throws IOException { } + @Test + public void testPermaLinkGenerationiWithSeparator() throws IOException { + Dataset ds = new Dataset(); + pidService = Mockito.mock(PidProviderFactoryBean.class); + Mockito.when(pidService.isGlobalIdLocallyUnique(any(GlobalId.class))).thenReturn(true); + PidProvider p = PidUtil.getPidProvider("perma1"); + p.setPidProviderServiceBean(pidService); + p.generatePid(ds); + System.out.println("DS sep " + ds.getSeparator()); + System.out.println("Generated perma identifier" + ds.getGlobalId().asString()); + System.out.println("Provider prefix for perma identifier" + p.getAuthority() + p.getSeparator() + p.getShoulder()); + assertTrue(ds.getGlobalId().asRawIdentifier().startsWith(p.getAuthority() + p.getSeparator() + p.getShoulder())); + } + @Test public void testDOIParsing() throws IOException {