Skip to content

Commit

Permalink
Merge pull request #265 from zazuko/ckan-properties-upgrade
Browse files Browse the repository at this point in the history
Update some CKAN properties
  • Loading branch information
tpluscode authored Feb 27, 2024
2 parents 8645c9e + 54a554d commit 3cf0f61
Show file tree
Hide file tree
Showing 10 changed files with 245 additions and 40 deletions.
5 changes: 5 additions & 0 deletions .changeset/fluffy-pillows-dance.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@zazuko/trifid-plugin-ckan": minor
---

Export `dcterms:relation` from source cube
5 changes: 5 additions & 0 deletions .changeset/kind-apples-shout.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@zazuko/trifid-plugin-ckan": patch
---

Added `foaf:page` - dataset documentation
5 changes: 5 additions & 0 deletions .changeset/pink-mails-jog.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@zazuko/trifid-plugin-ckan": major
---

Output [europa.eu themes](https://publications.europa.eu/resource/authority/data-theme) explicitly mapped using `schema:sameAs`. Themes without a mapping are not included in the output.
5 changes: 5 additions & 0 deletions .changeset/seven-horses-brake.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@zazuko/trifid-plugin-ckan": major
---

Use new output for the `dcat:Distribution/dcterms:format`, linking to [europa.eu controlled vocabulary](https://publications.europa.eu/resource/authority/file-type)
4 changes: 2 additions & 2 deletions packages/ckan/src/namespace.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// @ts-check
import _rdf from '@zazuko/env'

const { dcat, dcterms, rdf, schema, skos, vcard, xsd } = _rdf.ns
const { dcat, dcterms, rdf, schema, skos, vcard, xsd, foaf, rdfs } = _rdf.ns

export { dcat, dcterms, rdf, schema, skos, vcard, xsd }
export { dcat, dcterms, rdf, schema, skos, vcard, xsd, foaf, rdfs }
14 changes: 14 additions & 0 deletions packages/ckan/src/query.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ const datasetsQuery = (organizationId) => {
?o ?nestedP ?nestedO .
?copyright ${ns.schema.identifier} ?copyrightIdentifier .
?dataset ${ns.dcterms.accrualPeriodicity} ?accrualPeriodicity .
?publisher ${ns.schema.name} ?publisherName .
?dataset ${ns.dcat.theme} ?euTheme .
}
WHERE {
GRAPH ?graph {
Expand Down Expand Up @@ -42,6 +44,18 @@ const datasetsQuery = (organizationId) => {
OPTIONAL {
?dataset ${ns.dcterms.accrualPeriodicity} ?accrualPeriodicity .
}
OPTIONAL {
?dataset ${ns.dcterms.publisher} ?publisher .
?publisher ${ns.schema.name} ?publisherName .
}
OPTIONAL {
?dataset ${ns.dcat.theme} ?theme .
?theme ${ns.schema.sameAs} ?euTheme .
}
FILTER (?p != ${ns.dcat.theme})
}
}
`
Expand Down
77 changes: 61 additions & 16 deletions packages/ckan/src/xml.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ const toXML = (dataset) => {
dcat: prefixes.dcat,
dcterms: prefixes.dcterms,
vcard: prefixes.vcard,
foaf: prefixes.foaf,
},
}, {
'rdf:RDF': {
Expand Down Expand Up @@ -69,26 +70,45 @@ const toXML = (dataset) => {
.filter(workExample => workExample.out(ns.schema.encodingFormat).terms.length > 0)
.map(workExample => ({
'dcat:Distribution': {
'@': { 'rdf:about': workExample.out(ns.schema.url).value },
'dcterms:issued': serializeTerm(dataset.out(ns.dcterms.issued)),
'dcat:mediaType': serializeTerm(workExample.out(ns.schema.encodingFormat)),
'dcat:accessURL': serializeTerm(workExample.out(ns.schema.url)),
'dcterms:title': serializeTerm(workExample.out(ns.schema.name)),
'dcterms:rights': serializeTerm(copyright),
'dcterms:format': { '#': distributionFormatFromEncoding(workExample.out(ns.schema.encodingFormat)) },
'dcterms:license': serializeTerm(copyright),
'dcterms:format': {
'@': {
'rdf:resource': distributionFormatFromEncoding(workExample.out(ns.schema.encodingFormat)),
},
},
},
}))

const publishers = dataset.out(ns.dcterms.publisher)
.map(publisher => ({
'rdf:Description': {
'rdfs:label': publisher.value,
},
}))
.map(publisher => {
const attr = {}
/** @type {string | string[]} */
let name = publisher.value

if (isNamedNode(publisher)) {
attr['rdf:about'] = publisher.value
if (publisher.out(ns.schema.name).values.length > 0) {
name = publisher.out(ns.schema.name).values
}
}

return {
'foaf:Organization': {
'@': attr,
'foaf:name': name,
},
}
})

// Datasets contain a mix of legacy (DC) frequencies and new (EU) frequencies.
// The query makes sure we get both legacy and new ones, we only
// provide the new ones to CKAN, by converting legacy ones if needed.
const legacyFreqPrefix = 'http://publications.europa.eu/resource/authority/frequency/'
const euFreqPrefix = 'http://publications.europa.eu/resource/authority/frequency/'
const accrualPeriodicity = dataset.out(ns.dcterms.accrualPeriodicity)
.map((t) => {
if (!t.term || !t.term.value) {
Expand All @@ -98,7 +118,7 @@ const toXML = (dataset) => {
t.term.value = convertLegacyFrequency(t.term.value)
return t
})
.filter(({ term }) => term.value.startsWith(legacyFreqPrefix))
.filter(({ term }) => term.value.startsWith(euFreqPrefix))

return {
'dcat:Dataset': {
Expand All @@ -116,14 +136,18 @@ const toXML = (dataset) => {
),
'dcat:theme': serializeTerm(dataset.out(ns.dcat.theme)),
'dcterms:language': serializeTerm(dataset.out(ns.dcterms.language)),
'dcterms:relation': legalBasis,
'dcterms:relation': [
legalBasis,
serializeTerm(dataset.out(ns.dcterms.relation), { properties: [ns.rdfs.label] }),
],
'dcat:keyword': serializeTerm(keywords),
'dcat:landingPage': serializeTerm(dataset.out(ns.dcat.landingPage)),
'dcterms:spatial': serializeTerm(dataset.out(ns.dcterms.spatial)),
'dcterms:coverage': serializeTerm(dataset.out(ns.dcterms.coverage)),
'dcterms:temporal': serializeTerm(dataset.out(ns.dcterms.temporal)),
'dcterms:accrualPeriodicity': serializeTerm(accrualPeriodicity),
'dcat:distribution': distributions,
'foaf:page': serializeTerm(dataset.out(ns.foaf.page)),
},
}
}).filter(Boolean),
Expand All @@ -132,9 +156,15 @@ const toXML = (dataset) => {
}).doc().end({ prettyPrint: true }).concat('\n')
}

const serializeTerm = (pointer) => {
/**
* Serialize a term.
* @param {import('clownface').MultiPointer | Array<import('clownface').GraphPointer>} pointer Pointer to serialize.
* @param {object} [options]
* @param {import('@rdfjs/types').NamedNode[]} [options.properties]
*/
const serializeTerm = (pointer, { properties = [] } = {}) => {
return pointer.map((value) => {
return serializeLiteral(value) || serializeNamedNode(value) || serializeBlankNode(value) || {}
return serializeLiteral(value) || serializeNamedNode(value, properties) || serializeBlankNode(value) || {}
})
}

Expand Down Expand Up @@ -168,11 +198,26 @@ const serializeLiteral = (pointer) => {
* Serialize a named node.
*
* @param {import('clownface').MultiPointer} pointer Pointer to serialize.
* @param {import('@rdfjs/types').NamedNode[]} [properties]
* @return {Record<string, unknown>} Serialized named node.
*/
const serializeNamedNode = (pointer) => {
const serializeNamedNode = (pointer, properties = []) => {
if (!isNamedNode(pointer)) return null

const propertyMap = properties.reduce((acc, property) => ({
...acc,
[shrink(property.value)]: serializeTerm(pointer.out(property)),
}), {})

if (Object.keys(propertyMap).length > 0) {
return {
'rdf:Description': {
'@': { 'rdf:about': pointer.value },
...propertyMap,
},
}
}

return {
'@': { 'rdf:resource': pointer.value },
}
Expand Down Expand Up @@ -217,13 +262,13 @@ const distributionFormatFromEncoding = (encodingPointer) => {

switch (encoding) {
case 'text/html': {
return 'HTML'
return 'http://publications.europa.eu/resource/authority/file-type/HTML'
}
case 'application/sparql-query': {
return 'SERVICE'
return 'http://publications.europa.eu/resource/authority/file-type/SPARQLQ'
}
default: {
return 'UNKNOWN'
return `https://www.iana.org/assignments/media-types/${encoding}`
}
}
}
Expand Down
101 changes: 87 additions & 14 deletions packages/ckan/test/ckan.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ const createTrifidInstance = async () => {

describe('@zazuko/trifid-plugin-ckan', () => {
let trifidListener
const parser = new xml.Parser({
explicitArray: false,
})

beforeEach(async () => {
const trifidInstance = await createTrifidInstance()
Expand Down Expand Up @@ -71,15 +74,40 @@ describe('@zazuko/trifid-plugin-ckan', () => {
strictEqual(body, expectedResult)
})

it('should get a basic result for a known organization', async () => {
const ckanUrl = `${getListenerURL(trifidListener)}/ckan?organization=http://example.com/my-org`
describe('example organization', () => {
let res
let xmlText
let xmlBody

const res = await fetch(ckanUrl)
const body = await res.text()
const expectedResult = await readFile(new URL('./support/basic-result.xml', import.meta.url), 'utf8')
beforeEach(async () => {
const ckanUrl = `${getListenerURL(trifidListener)}/ckan?organization=http://example.com/my-org`
res = await fetch(ckanUrl)
xmlText = await res.text()
xmlBody = await parser.parseStringPromise(xmlText)
})

strictEqual(res.status, 200)
strictEqual(body, expectedResult)
it('should get a basic result for a known organization', async () => {
const expectedResult = await readFile(new URL('./support/basic-result.xml', import.meta.url), 'utf8')

strictEqual(res.status, 200)
strictEqual(xmlText, expectedResult)
})

it('should take publisher at face value', async () => {
const publisher = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcterms:publisher')

const expected = await parser.parseStringPromise(`
<foaf:Organization>
<foaf:name>http://example.com/my-org</foaf:name>
</foaf:Organization>`)
expect(publisher).to.containSubset(expected)
})

it('should get landing page resource', () => {
const landingPage = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:landingPage')

expect(landingPage.$['rdf:resource']).to.eq('https://example.com/')
})
})

it('should convert legacy frequency to EU frequency if possible', async () => {
Expand Down Expand Up @@ -129,16 +157,16 @@ describe('@zazuko/trifid-plugin-ckan', () => {
})

describe('BLW tests', () => {
const parser = new xml.Parser({
explicitArray: false,
})
let xmlBody

it('should get a correct contactPoint', async () => {
beforeEach(async () => {
const ckanUrl = `${getListenerURL(trifidListener)}/ckan?organization=https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-landwirtschaft-blw`

const res = await fetch(ckanUrl)
const body = await parser.parseStringPromise(await res.text())
const contactPoint = xpath.evalFirst(body, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:contactPoint')
xmlBody = await parser.parseStringPromise(await res.text())
})

it('should get a correct contactPoint', async () => {
const contactPoint = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:contactPoint')

const expected = await parser.parseStringPromise(`
<vcard:Organization>
Expand All @@ -147,5 +175,50 @@ describe('@zazuko/trifid-plugin-ckan', () => {
</vcard:Organization>`)
expect(contactPoint).to.containSubset(expected)
})

it('should get structured publisher', async () => {
const publisher = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcterms:publisher')

const expected = await parser.parseStringPromise(`
<foaf:Organization rdf:about="https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-landwirtschaft-blw">
<foaf:name>Bundesamt für Landwirtschaft</foaf:name>
</foaf:Organization>`)
expect(publisher).to.containSubset(expected)
})

it('should get landing page resource', () => {
const landingPage = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:landingPage')

expect(landingPage.$['rdf:resource']).to.eq('https://agrarmarktdaten.admin.ch')
})

it('should use mapped themes', () => {
const themes = xpath.find(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:theme')
.map(theme => theme.$['rdf:resource'])

expect(themes).to.contain.all.members([
'http://publications.europa.eu/resource/authority/data-theme/AGRI',
'http://publications.europa.eu/resource/authority/data-theme/GOVE',
'http://publications.europa.eu/resource/authority/data-theme/ECON',
])
expect(themes).to.have.length(3)
})

it('should get temporal PeriodOfTime', async () => {
const themes = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcterms:temporal')

const expected = await parser.parseStringPromise(`
<dcterms:PeriodOfTime>
<schema:startDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2024-01-01</schema:startDate>
<schema:endDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2024-12-31</schema:endDate>
</dcterms:PeriodOfTime>`)
expect(themes).to.containSubset(expected)
})

it('should build correct distribution format', async () => {
const format = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:distribution/dcat:Distribution/dcterms:format')

expect(format.$['rdf:resource']).to.eq('http://publications.europa.eu/resource/authority/file-type/SPARQLQ')
})
})
})
20 changes: 20 additions & 0 deletions packages/ckan/test/support/basic-result.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,28 @@
<dcterms:description xml:lang="it">Dataset 1 - Description</dcterms:description>
<dcterms:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2023-10-31</dcterms:issued>
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2023-10-31T15:15:15Z</dcterms:modified>
<dcterms:publisher>
<foaf:Organization rdf:about="http://example.com/my-org">
<foaf:name>http://example.com/my-org</foaf:name>
</foaf:Organization>
</dcterms:publisher>
<dcterms:creator rdf:resource="http://example.com/my-org"/>
<dcterms:relation>
<rdf:Description rdf:about="https://www.fedlex.admin.ch/eli/cc/1998/3033_3033_3033/de#art_27">
<rdfs:label>legal_basis</rdfs:label>
</rdf:Description>
</dcterms:relation>
<dcterms:relation>
<rdf:Description rdf:about="http://www.bafu.admin.ch/laerm/index.html?lang=de">
<rdfs:label>Webseite des BAFU</rdfs:label>
</rdf:Description>
</dcterms:relation>
<dcterms:relation>
<rdf:Description rdf:about="http://www.bafu.admin.ch/laerm/index.html"/>
</dcterms:relation>
<dcat:landingPage rdf:resource="https://example.com/"/>
<dcterms:accrualPeriodicity rdf:resource="http://publications.europa.eu/resource/authority/frequency/IRREG"/>
<foaf:page rdf:resource="https://example.com/docs"/>
</dcat:Dataset>
</dcat:dataset>
<dcat:dataset>
Expand Down
Loading

0 comments on commit 3cf0f61

Please sign in to comment.