Skip to content

Commit

Permalink
Merge pull request #208 from zazuko/ckan-frequencies
Browse files Browse the repository at this point in the history
CKAN: Improve the way frequencies are handled
  • Loading branch information
ludovicm67 authored Nov 21, 2023
2 parents cba5096 + 8bc9dfc commit 3d25af2
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 15 deletions.
5 changes: 5 additions & 0 deletions .changeset/honest-clouds-clean.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@zazuko/trifid-plugin-ckan": minor
---

Improve the way frequencies are handled.
5 changes: 1 addition & 4 deletions packages/ckan/src/query.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ function datasetsQuery (organizationId) {
?dataset ?p ?o .
?o ?nestedP ?nestedO .
?copyright ${ns.schema.identifier} ?copyrightIdentifier .
?dataset ${ns.dcterms.accrualPeriodicity} ?accrualPeriodicityExactMatch .
?dataset ${ns.dcterms.accrualPeriodicity} ?accrualPeriodicity .
}
WHERE {
GRAPH ?graph {
Expand All @@ -35,9 +35,6 @@ function datasetsQuery (organizationId) {
OPTIONAL {
?dataset ${ns.dcterms.accrualPeriodicity} ?accrualPeriodicity .
GRAPH ?frequencyGraph {
?accrualPeriodicity ${ns.skos.exactMatch} ?accrualPeriodicityExactMatch .
}
}
}
}
Expand Down
84 changes: 75 additions & 9 deletions packages/ckan/src/xml.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
// @ts-check
import rdf from '@zazuko/env'
import prefixes, { shrink } from '@zazuko/prefixes'
import { create as createXml } from 'xmlbuilder2'
import * as ns from './namespace.js'

function toXML (dataset) {
/**
* Generate a CKAN-compatible XML representation of the dataset.
*
* @param {any[]} dataset Dataset to convert.
* @returns {string} XML representation of the dataset.
*/
const toXML = (dataset) => {
const pointer = rdf.clownface({ dataset: rdf.dataset(dataset) })
const datasetsPointer = pointer.node(ns.dcat.Dataset).in(ns.rdf.type)

Expand Down Expand Up @@ -42,6 +49,7 @@ function toXML (dataset) {
: `${identifiers.value}@${creatorSlug}`

// Ignore keywords without a language specified because CKAN rejects them
// @ts-ignore
const keywords = dataset.out(ns.dcat.keyword).filter(({ term: { language } }) => !!language)

const copyright = dataset.out(ns.dcterms.rights).out(ns.schema.identifier)
Expand Down Expand Up @@ -81,6 +89,14 @@ function toXML (dataset) {
// provide the legacy ones to CKAN.
const legacyFreqPrefix = 'http://purl.org/cld/freq/'
const accrualPeriodicity = dataset.out(ns.dcterms.accrualPeriodicity)
.map((t) => {
if (!t.term || !t.term.value) {
return t
}
// If the frequency is not a EU frequency, it is returned unchanged.
t.term.value = convertEUFrequencyToLegacy(t.term.value)
return t
})
.filter(({ term }) => term.value.startsWith(legacyFreqPrefix))

return {
Expand Down Expand Up @@ -112,7 +128,7 @@ function toXML (dataset) {
}).doc().end({ prettyPrint: true }).concat('\n')
}

function serializeTerm (pointer) {
const serializeTerm = (pointer) => {
return pointer.map((value) => {
if (isLiteral(value)) {
return serializeLiteral(value)
Expand All @@ -126,19 +142,19 @@ function serializeTerm (pointer) {
})
}

function isLiteral (pointer) {
const isLiteral = (pointer) => {
return pointer.term.termType === 'Literal'
}

function isNamedNode (pointer) {
const isNamedNode = (pointer) => {
return pointer.term.termType === 'NamedNode'
}

function isBlankNode (pointer) {
const isBlankNode = (pointer) => {
return pointer.term.termType === 'BlankNode'
}

function serializeLiteral ({ term }) {
const serializeLiteral = ({ term }) => {
const attrs = {}

if (term.language) {
Expand All @@ -155,13 +171,13 @@ function serializeLiteral ({ term }) {
}
}

function serializeNamedNode ({ value }) {
const serializeNamedNode = ({ value }) => {
return {
'@': { 'rdf:resource': value },
}
}

function serializeBlankNode (pointer) {
const serializeBlankNode = (pointer) => {
const type = pointer.out(ns.rdf.type).value

if (!type) return {}
Expand All @@ -178,7 +194,7 @@ function serializeBlankNode (pointer) {
}
}

function distributionFormatFromEncoding (encodingPointer) {
const distributionFormatFromEncoding = (encodingPointer) => {
const encoding = encodingPointer.values[0] || ''

/* eslint-disable indent */
Expand All @@ -196,4 +212,54 @@ function distributionFormatFromEncoding (encodingPointer) {
/* eslint-enable indent */
}

/**
* Convert EU frequency to legacy frequency if possible.
* If the frequency is not a EU frequency, it is returned unchanged.
* If there is no mapping for the EU frequency, it is returned unchanged.
*
* @param {string} frequency Frequency to convert.
* @returns {string} Converted frequency.
*/
const convertEUFrequencyToLegacy = (frequency) => {
const legacyFreqPrefix = 'http://purl.org/cld/freq'
const euFreqPrefix = 'http://publications.europa.eu/resource/authority/frequency'
switch (frequency) {
case `${euFreqPrefix}/ANNUAL`:
return `${legacyFreqPrefix}/annual`
case `${euFreqPrefix}/ANNUAL_2`:
return `${legacyFreqPrefix}/semiannual`
case `${euFreqPrefix}/ANNUAL_3`:
return `${legacyFreqPrefix}/threeTimesAYear`
case `${euFreqPrefix}/BIENNIAL`:
return `${legacyFreqPrefix}/biennial`
case `${euFreqPrefix}/BIMONTHLY`:
return `${legacyFreqPrefix}/bimonthly`
case `${euFreqPrefix}/BIWEEKLY`:
return `${legacyFreqPrefix}/biweekly`
case `${euFreqPrefix}/CONT`:
return `${legacyFreqPrefix}/continuous`
case `${euFreqPrefix}/DAILY`:
return `${legacyFreqPrefix}/daily`
case `${euFreqPrefix}/IRREG`:
return `${legacyFreqPrefix}/irregular`
case `${euFreqPrefix}/MONTHLY`:
return `${legacyFreqPrefix}/monthly`
case `${euFreqPrefix}/MONTHLY_2`:
return `${legacyFreqPrefix}/semimonthly`
case `${euFreqPrefix}/MONTHLY_3`:
return `${legacyFreqPrefix}/threeTimesAMonth`
case `${euFreqPrefix}/QUARTERLY`:
return `${legacyFreqPrefix}/quarterly`
case `${euFreqPrefix}/TRIENNIAL`:
return `${legacyFreqPrefix}/triennial`
case `${euFreqPrefix}/WEEKLY`:
return `${legacyFreqPrefix}/weekly`
case `${euFreqPrefix}/WEEKLY_2`:
return `${legacyFreqPrefix}/semiweekly`
case `${euFreqPrefix}/WEEKLY_3`:
return `${legacyFreqPrefix}/threeTimesAWeek`
}
return frequency
}

export { toXML }
20 changes: 19 additions & 1 deletion packages/ckan/test/support/basic-result.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,26 @@
<dcterms:description xml:lang="fr" rdf:datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#langString">Dataset 1 - Description</dcterms:description>
<dcterms:description xml:lang="it" rdf:datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#langString">Dataset 1 - Description</dcterms:description>
<dcterms:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2023-10-31</dcterms:issued>
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2023-10-31T:15:15.000Z</dcterms:modified>
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2023-10-31T15:15:15Z</dcterms:modified>
<dcterms:creator rdf:resource="http://example.com/my-org"/>
<dcterms:accrualPeriodicity rdf:resource="http://purl.org/cld/freq/irregular"/>
</dcat:Dataset>
</dcat:dataset>
<dcat:dataset>
<dcat:Dataset rdf:about="http://example.com/dataset2">
<dcterms:identifier>dataset2@my-org</dcterms:identifier>
<dcterms:title xml:lang="de" rdf:datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#langString">Dataset 2 - Titel</dcterms:title>
<dcterms:title xml:lang="en" rdf:datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#langString">Dataset 2 - Title</dcterms:title>
<dcterms:title xml:lang="fr" rdf:datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#langString">Dataset 2 - Titre</dcterms:title>
<dcterms:title xml:lang="it" rdf:datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#langString">Dataset 2 - Titolo</dcterms:title>
<dcterms:description xml:lang="de" rdf:datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#langString">Dataset 2 - Description</dcterms:description>
<dcterms:description xml:lang="en" rdf:datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#langString">Dataset 2 - Description</dcterms:description>
<dcterms:description xml:lang="fr" rdf:datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#langString">Dataset 2 - Description</dcterms:description>
<dcterms:description xml:lang="it" rdf:datatype="http://www.w3.org/1999/02/22-rdf-syntax-ns#langString">Dataset 2 - Description</dcterms:description>
<dcterms:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2023-11-21</dcterms:issued>
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2023-11-21T11:12:13Z</dcterms:modified>
<dcterms:creator rdf:resource="http://example.com/my-org"/>
<dcterms:accrualPeriodicity rdf:resource="http://purl.org/cld/freq/semiannual"/>
</dcat:Dataset>
</dcat:dataset>
</dcat:Catalog>
Expand Down
34 changes: 33 additions & 1 deletion packages/ckan/test/support/data.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
rdf:type <http://www.w3.org/ns/dcat#Dataset> ;
rdf:type <http://rdfs.org/ns/void#Dataset> ;
dcterms:issued "2023-10-31"^^xsd:date ;
dcterms:modified "2023-10-31T:15:15.000Z"^^xsd:dateTime ;
dcterms:modified "2023-10-31T15:15:15Z"^^xsd:dateTime ;
dcterms:identifier "dataset1" ;
dcterms:accrualPeriodicity <http://purl.org/cld/freq/irregular> ;
dcterms:creator <my-org> ;
schema:workExample <https://ld.admin.ch/application/opendataswiss> ;
schema:creativeWorkStatus <https://ld.admin.ch/vocabulary/CreativeWorkStatus/Published> ;
Expand All @@ -34,3 +35,34 @@
schema:description "Dataset 1 - Description"@de ;
schema:version "1"^^xsd:integer ;
.

<dataset2>
rdf:type schema:Dataset ;
rdf:type <https://cube.link/Cube> ;
rdf:type <http://www.w3.org/ns/dcat#Dataset> ;
rdf:type <http://rdfs.org/ns/void#Dataset> ;
dcterms:issued "2023-11-21"^^xsd:date ;
dcterms:modified "2023-11-21T11:12:13Z"^^xsd:dateTime ;
dcterms:identifier "dataset2" ;
dcterms:accrualPeriodicity <http://publications.europa.eu/resource/authority/frequency/ANNUAL_2> ;
dcterms:creator <my-org> ;
schema:workExample <https://ld.admin.ch/application/opendataswiss> ;
schema:creativeWorkStatus <https://ld.admin.ch/vocabulary/CreativeWorkStatus/Published> ;
dcterms:title "Dataset 2 - Title"@en ;
dcterms:title "Dataset 2 - Titre"@fr ;
dcterms:title "Dataset 2 - Titolo"@it ;
dcterms:title "Dataset 2 - Titel"@de ;
schema:name "Dataset 2 - Title"@en ;
schema:name "Dataset 2 - Titre"@fr ;
schema:name "Dataset 2 - Titolo"@it ;
schema:name "Dataset 2 - Titel"@de ;
dcterms:description "Dataset 2 - Description"@en ;
dcterms:description "Dataset 2 - Description"@fr ;
dcterms:description "Dataset 2 - Description"@it ;
dcterms:description "Dataset 2 - Description"@de ;
schema:description "Dataset 2 - Description"@en ;
schema:description "Dataset 2 - Description"@fr ;
schema:description "Dataset 2 - Description"@it ;
schema:description "Dataset 2 - Description"@de ;
schema:version "1"^^xsd:integer ;
.

0 comments on commit 3d25af2

Please sign in to comment.