Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update some CKAN properties #265

Merged
merged 14 commits into from
Feb 27, 2024
5 changes: 5 additions & 0 deletions .changeset/fluffy-pillows-dance.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@zazuko/trifid-plugin-ckan": minor
---

Export `dcterms:relation` from source cube
5 changes: 5 additions & 0 deletions .changeset/kind-apples-shout.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@zazuko/trifid-plugin-ckan": patch
---

Added `foaf:page` - dataset documentation
5 changes: 5 additions & 0 deletions .changeset/pink-mails-jog.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@zazuko/trifid-plugin-ckan": major
---

Output [europa.eu themes](https://publications.europa.eu/resource/authority/data-theme) explicitly mapped using `schema:sameAs`. Themes without a mapping are not included in the output.
5 changes: 5 additions & 0 deletions .changeset/seven-horses-brake.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@zazuko/trifid-plugin-ckan": major
---

Use new output for the `dcat:Distribution/dcterms:format`, linking to [europa.eu controlled vocabulary](https://publications.europa.eu/resource/authority/file-type)
4 changes: 2 additions & 2 deletions packages/ckan/src/namespace.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// @ts-check
import _rdf from '@zazuko/env'

const { dcat, dcterms, rdf, schema, skos, vcard, xsd } = _rdf.ns
const { dcat, dcterms, rdf, schema, skos, vcard, xsd, foaf, rdfs } = _rdf.ns

export { dcat, dcterms, rdf, schema, skos, vcard, xsd }
export { dcat, dcterms, rdf, schema, skos, vcard, xsd, foaf, rdfs }
14 changes: 14 additions & 0 deletions packages/ckan/src/query.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ const datasetsQuery = (organizationId) => {
?o ?nestedP ?nestedO .
?copyright ${ns.schema.identifier} ?copyrightIdentifier .
?dataset ${ns.dcterms.accrualPeriodicity} ?accrualPeriodicity .
?publisher ${ns.schema.name} ?publisherName .
?dataset ${ns.dcat.theme} ?euTheme .
}
WHERE {
GRAPH ?graph {
Expand Down Expand Up @@ -42,6 +44,18 @@ const datasetsQuery = (organizationId) => {
OPTIONAL {
?dataset ${ns.dcterms.accrualPeriodicity} ?accrualPeriodicity .
}

OPTIONAL {
?dataset ${ns.dcterms.publisher} ?publisher .
?publisher ${ns.schema.name} ?publisherName .
}

OPTIONAL {
?dataset ${ns.dcat.theme} ?theme .
?theme ${ns.schema.sameAs} ?euTheme .
}

FILTER (?p != ${ns.dcat.theme})
}
}
`
Expand Down
77 changes: 61 additions & 16 deletions packages/ckan/src/xml.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ const toXML = (dataset) => {
dcat: prefixes.dcat,
dcterms: prefixes.dcterms,
vcard: prefixes.vcard,
foaf: prefixes.foaf,
},
}, {
'rdf:RDF': {
Expand Down Expand Up @@ -69,26 +70,45 @@ const toXML = (dataset) => {
.filter(workExample => workExample.out(ns.schema.encodingFormat).terms.length > 0)
.map(workExample => ({
'dcat:Distribution': {
'@': { 'rdf:about': workExample.out(ns.schema.url).value },
'dcterms:issued': serializeTerm(dataset.out(ns.dcterms.issued)),
'dcat:mediaType': serializeTerm(workExample.out(ns.schema.encodingFormat)),
'dcat:accessURL': serializeTerm(workExample.out(ns.schema.url)),
'dcterms:title': serializeTerm(workExample.out(ns.schema.name)),
'dcterms:rights': serializeTerm(copyright),
'dcterms:format': { '#': distributionFormatFromEncoding(workExample.out(ns.schema.encodingFormat)) },
'dcterms:license': serializeTerm(copyright),
'dcterms:format': {
'@': {
'rdf:resource': distributionFormatFromEncoding(workExample.out(ns.schema.encodingFormat)),
},
},
},
}))

const publishers = dataset.out(ns.dcterms.publisher)
.map(publisher => ({
'rdf:Description': {
'rdfs:label': publisher.value,
},
}))
.map(publisher => {
const attr = {}
/** @type {string | string[]} */
let name = publisher.value

if (isNamedNode(publisher)) {
attr['rdf:about'] = publisher.value
if (publisher.out(ns.schema.name).values.length > 0) {
name = publisher.out(ns.schema.name).values
}
}

return {
'foaf:Organization': {
'@': attr,
'foaf:name': name,
},
}
})

// Datasets contain a mix of legacy (DC) frequencies and new (EU) frequencies.
// The query makes sure we get both legacy and new ones, we only
// provide the new ones to CKAN, by converting legacy ones if needed.
const legacyFreqPrefix = 'http://publications.europa.eu/resource/authority/frequency/'
const euFreqPrefix = 'http://publications.europa.eu/resource/authority/frequency/'
const accrualPeriodicity = dataset.out(ns.dcterms.accrualPeriodicity)
.map((t) => {
if (!t.term || !t.term.value) {
Expand All @@ -98,7 +118,7 @@ const toXML = (dataset) => {
t.term.value = convertLegacyFrequency(t.term.value)
return t
})
.filter(({ term }) => term.value.startsWith(legacyFreqPrefix))
.filter(({ term }) => term.value.startsWith(euFreqPrefix))

return {
'dcat:Dataset': {
Expand All @@ -116,14 +136,18 @@ const toXML = (dataset) => {
),
'dcat:theme': serializeTerm(dataset.out(ns.dcat.theme)),
'dcterms:language': serializeTerm(dataset.out(ns.dcterms.language)),
'dcterms:relation': legalBasis,
'dcterms:relation': [
legalBasis,
serializeTerm(dataset.out(ns.dcterms.relation), { properties: [ns.rdfs.label] }),
],
'dcat:keyword': serializeTerm(keywords),
'dcat:landingPage': serializeTerm(dataset.out(ns.dcat.landingPage)),
'dcterms:spatial': serializeTerm(dataset.out(ns.dcterms.spatial)),
'dcterms:coverage': serializeTerm(dataset.out(ns.dcterms.coverage)),
'dcterms:temporal': serializeTerm(dataset.out(ns.dcterms.temporal)),
'dcterms:accrualPeriodicity': serializeTerm(accrualPeriodicity),
'dcat:distribution': distributions,
'foaf:page': serializeTerm(dataset.out(ns.foaf.page)),
},
}
}).filter(Boolean),
Expand All @@ -132,9 +156,15 @@ const toXML = (dataset) => {
}).doc().end({ prettyPrint: true }).concat('\n')
}

const serializeTerm = (pointer) => {
/**
* Serialize a term.
* @param {import('clownface').MultiPointer | Array<import('clownface').GraphPointer>} pointer Pointer to serialize.
* @param {object} [options]
* @param {import('@rdfjs/types').NamedNode[]} [options.properties]
*/
const serializeTerm = (pointer, { properties = [] } = {}) => {
return pointer.map((value) => {
return serializeLiteral(value) || serializeNamedNode(value) || serializeBlankNode(value) || {}
return serializeLiteral(value) || serializeNamedNode(value, properties) || serializeBlankNode(value) || {}
})
}

Expand Down Expand Up @@ -168,11 +198,26 @@ const serializeLiteral = (pointer) => {
* Serialize a named node.
*
* @param {import('clownface').MultiPointer} pointer Pointer to serialize.
* @param {import('@rdfjs/types').NamedNode[]} [properties]
* @return {Record<string, unknown>} Serialized named node.
*/
const serializeNamedNode = (pointer) => {
const serializeNamedNode = (pointer, properties = []) => {
if (!isNamedNode(pointer)) return null

const propertyMap = properties.reduce((acc, property) => ({
...acc,
[shrink(property.value)]: serializeTerm(pointer.out(property)),
}), {})

if (Object.keys(propertyMap).length > 0) {
return {
'rdf:Description': {
'@': { 'rdf:about': pointer.value },
...propertyMap,
},
}
}

return {
'@': { 'rdf:resource': pointer.value },
}
Expand Down Expand Up @@ -217,13 +262,13 @@ const distributionFormatFromEncoding = (encodingPointer) => {

switch (encoding) {
case 'text/html': {
return 'HTML'
return 'http://publications.europa.eu/resource/authority/file-type/HTML'
}
case 'application/sparql-query': {
return 'SERVICE'
return 'http://publications.europa.eu/resource/authority/file-type/SPARQLQ'
}
default: {
return 'UNKNOWN'
return `https://www.iana.org/assignments/media-types/${encoding}`
}
}
}
Expand Down
101 changes: 87 additions & 14 deletions packages/ckan/test/ckan.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ const createTrifidInstance = async () => {

describe('@zazuko/trifid-plugin-ckan', () => {
let trifidListener
const parser = new xml.Parser({
explicitArray: false,
})

beforeEach(async () => {
const trifidInstance = await createTrifidInstance()
Expand Down Expand Up @@ -71,15 +74,40 @@ describe('@zazuko/trifid-plugin-ckan', () => {
strictEqual(body, expectedResult)
})

it('should get a basic result for a known organization', async () => {
const ckanUrl = `${getListenerURL(trifidListener)}/ckan?organization=http://example.com/my-org`
describe('example organization', () => {
let res
let xmlText
let xmlBody

const res = await fetch(ckanUrl)
const body = await res.text()
const expectedResult = await readFile(new URL('./support/basic-result.xml', import.meta.url), 'utf8')
beforeEach(async () => {
const ckanUrl = `${getListenerURL(trifidListener)}/ckan?organization=http://example.com/my-org`
res = await fetch(ckanUrl)
xmlText = await res.text()
xmlBody = await parser.parseStringPromise(xmlText)
})

strictEqual(res.status, 200)
strictEqual(body, expectedResult)
it('should get a basic result for a known organization', async () => {
const expectedResult = await readFile(new URL('./support/basic-result.xml', import.meta.url), 'utf8')

strictEqual(res.status, 200)
strictEqual(xmlText, expectedResult)
})

it('should take publisher at face value', async () => {
const publisher = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcterms:publisher')

const expected = await parser.parseStringPromise(`
<foaf:Organization>
<foaf:name>http://example.com/my-org</foaf:name>
</foaf:Organization>`)
expect(publisher).to.containSubset(expected)
})

it('should get landing page resource', () => {
const landingPage = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:landingPage')

expect(landingPage.$['rdf:resource']).to.eq('https://example.com/')
})
})

it('should convert legacy frequency to EU frequency if possible', async () => {
Expand Down Expand Up @@ -129,16 +157,16 @@ describe('@zazuko/trifid-plugin-ckan', () => {
})

describe('BLW tests', () => {
const parser = new xml.Parser({
explicitArray: false,
})
let xmlBody

it('should get a correct contactPoint', async () => {
beforeEach(async () => {
const ckanUrl = `${getListenerURL(trifidListener)}/ckan?organization=https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-landwirtschaft-blw`

const res = await fetch(ckanUrl)
const body = await parser.parseStringPromise(await res.text())
const contactPoint = xpath.evalFirst(body, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:contactPoint')
xmlBody = await parser.parseStringPromise(await res.text())
})

it('should get a correct contactPoint', async () => {
const contactPoint = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:contactPoint')

const expected = await parser.parseStringPromise(`
<vcard:Organization>
Expand All @@ -147,5 +175,50 @@ describe('@zazuko/trifid-plugin-ckan', () => {
</vcard:Organization>`)
expect(contactPoint).to.containSubset(expected)
})

it('should get structured publisher', async () => {
const publisher = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcterms:publisher')

const expected = await parser.parseStringPromise(`
<foaf:Organization rdf:about="https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-landwirtschaft-blw">
<foaf:name>Bundesamt für Landwirtschaft</foaf:name>
</foaf:Organization>`)
expect(publisher).to.containSubset(expected)
})

it('should get landing page resource', () => {
const landingPage = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:landingPage')

expect(landingPage.$['rdf:resource']).to.eq('https://agrarmarktdaten.admin.ch')
})

it('should use mapped themes', () => {
const themes = xpath.find(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:theme')
.map(theme => theme.$['rdf:resource'])

expect(themes).to.contain.all.members([
'http://publications.europa.eu/resource/authority/data-theme/AGRI',
'http://publications.europa.eu/resource/authority/data-theme/GOVE',
'http://publications.europa.eu/resource/authority/data-theme/ECON',
])
expect(themes).to.have.length(3)
})

it('should get temporal PeriodOfTime', async () => {
const themes = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcterms:temporal')

const expected = await parser.parseStringPromise(`
<dcterms:PeriodOfTime>
<schema:startDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2024-01-01</schema:startDate>
<schema:endDate rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2024-12-31</schema:endDate>
</dcterms:PeriodOfTime>`)
expect(themes).to.containSubset(expected)
})

it('should build correct distribution format', async () => {
const format = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:distribution/dcat:Distribution/dcterms:format')

expect(format.$['rdf:resource']).to.eq('http://publications.europa.eu/resource/authority/file-type/SPARQLQ')
})
})
})
20 changes: 20 additions & 0 deletions packages/ckan/test/support/basic-result.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,28 @@
<dcterms:description xml:lang="it">Dataset 1 - Description</dcterms:description>
<dcterms:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2023-10-31</dcterms:issued>
<dcterms:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2023-10-31T15:15:15Z</dcterms:modified>
<dcterms:publisher>
<foaf:Organization rdf:about="http://example.com/my-org">
<foaf:name>http://example.com/my-org</foaf:name>
</foaf:Organization>
</dcterms:publisher>
<dcterms:creator rdf:resource="http://example.com/my-org"/>
<dcterms:relation>
<rdf:Description rdf:about="https://www.fedlex.admin.ch/eli/cc/1998/3033_3033_3033/de#art_27">
<rdfs:label>legal_basis</rdfs:label>
</rdf:Description>
</dcterms:relation>
<dcterms:relation>
<rdf:Description rdf:about="http://www.bafu.admin.ch/laerm/index.html?lang=de">
<rdfs:label>Webseite des BAFU</rdfs:label>
</rdf:Description>
</dcterms:relation>
<dcterms:relation>
<rdf:Description rdf:about="http://www.bafu.admin.ch/laerm/index.html"/>
</dcterms:relation>
<dcat:landingPage rdf:resource="https://example.com/"/>
<dcterms:accrualPeriodicity rdf:resource="http://publications.europa.eu/resource/authority/frequency/IRREG"/>
<foaf:page rdf:resource="https://example.com/docs"/>
</dcat:Dataset>
</dcat:dataset>
<dcat:dataset>
Expand Down
Loading