diff --git a/.changeset/fluffy-pillows-dance.md b/.changeset/fluffy-pillows-dance.md new file mode 100644 index 00000000..ce43d856 --- /dev/null +++ b/.changeset/fluffy-pillows-dance.md @@ -0,0 +1,5 @@ +--- +"@zazuko/trifid-plugin-ckan": minor +--- + +Export `dcterms:relation` from source cube diff --git a/.changeset/kind-apples-shout.md b/.changeset/kind-apples-shout.md new file mode 100644 index 00000000..d20a606e --- /dev/null +++ b/.changeset/kind-apples-shout.md @@ -0,0 +1,5 @@ +--- +"@zazuko/trifid-plugin-ckan": patch +--- + +Added `foaf:page` - dataset documentation diff --git a/.changeset/pink-mails-jog.md b/.changeset/pink-mails-jog.md new file mode 100644 index 00000000..846df160 --- /dev/null +++ b/.changeset/pink-mails-jog.md @@ -0,0 +1,5 @@ +--- +"@zazuko/trifid-plugin-ckan": major +--- + +Output [europa.eu themes](https://publications.europa.eu/resource/authority/data-theme) explicitly mapped using `schema:sameAs`. Themes without a mapping are not included in the output. diff --git a/.changeset/seven-horses-brake.md b/.changeset/seven-horses-brake.md new file mode 100644 index 00000000..a2f23c69 --- /dev/null +++ b/.changeset/seven-horses-brake.md @@ -0,0 +1,5 @@ +--- +"@zazuko/trifid-plugin-ckan": major +--- + +Use new output for the `dcat:Distribution/dcterms:format`, linking to [europa.eu controlled vocabulary](https://publications.europa.eu/resource/authority/file-type) diff --git a/packages/ckan/src/namespace.js b/packages/ckan/src/namespace.js index 084bb348..d0381f3a 100644 --- a/packages/ckan/src/namespace.js +++ b/packages/ckan/src/namespace.js @@ -1,6 +1,6 @@ // @ts-check import _rdf from '@zazuko/env' -const { dcat, dcterms, rdf, schema, skos, vcard, xsd } = _rdf.ns +const { dcat, dcterms, rdf, schema, skos, vcard, xsd, foaf, rdfs } = _rdf.ns -export { dcat, dcterms, rdf, schema, skos, vcard, xsd } +export { dcat, dcterms, rdf, schema, skos, vcard, xsd, foaf, rdfs } diff --git a/packages/ckan/src/query.js b/packages/ckan/src/query.js index ff862256..dbc7ca4f 100644 --- a/packages/ckan/src/query.js +++ b/packages/ckan/src/query.js @@ -15,6 +15,8 @@ const datasetsQuery = (organizationId) => { ?o ?nestedP ?nestedO . ?copyright ${ns.schema.identifier} ?copyrightIdentifier . ?dataset ${ns.dcterms.accrualPeriodicity} ?accrualPeriodicity . + ?publisher ${ns.schema.name} ?publisherName . + ?dataset ${ns.dcat.theme} ?euTheme . } WHERE { GRAPH ?graph { @@ -42,6 +44,18 @@ const datasetsQuery = (organizationId) => { OPTIONAL { ?dataset ${ns.dcterms.accrualPeriodicity} ?accrualPeriodicity . } + + OPTIONAL { + ?dataset ${ns.dcterms.publisher} ?publisher . + ?publisher ${ns.schema.name} ?publisherName . + } + + OPTIONAL { + ?dataset ${ns.dcat.theme} ?theme . + ?theme ${ns.schema.sameAs} ?euTheme . + } + + FILTER (?p != ${ns.dcat.theme}) } } ` diff --git a/packages/ckan/src/xml.js b/packages/ckan/src/xml.js index 90d93afa..5d541fb9 100644 --- a/packages/ckan/src/xml.js +++ b/packages/ckan/src/xml.js @@ -28,6 +28,7 @@ const toXML = (dataset) => { dcat: prefixes.dcat, dcterms: prefixes.dcterms, vcard: prefixes.vcard, + foaf: prefixes.foaf, }, }, { 'rdf:RDF': { @@ -69,26 +70,45 @@ const toXML = (dataset) => { .filter(workExample => workExample.out(ns.schema.encodingFormat).terms.length > 0) .map(workExample => ({ 'dcat:Distribution': { + '@': { 'rdf:about': workExample.out(ns.schema.url).value }, 'dcterms:issued': serializeTerm(dataset.out(ns.dcterms.issued)), 'dcat:mediaType': serializeTerm(workExample.out(ns.schema.encodingFormat)), 'dcat:accessURL': serializeTerm(workExample.out(ns.schema.url)), 'dcterms:title': serializeTerm(workExample.out(ns.schema.name)), - 'dcterms:rights': serializeTerm(copyright), - 'dcterms:format': { '#': distributionFormatFromEncoding(workExample.out(ns.schema.encodingFormat)) }, + 'dcterms:license': serializeTerm(copyright), + 'dcterms:format': { + '@': { + 'rdf:resource': distributionFormatFromEncoding(workExample.out(ns.schema.encodingFormat)), + }, + }, }, })) const publishers = dataset.out(ns.dcterms.publisher) - .map(publisher => ({ - 'rdf:Description': { - 'rdfs:label': publisher.value, - }, - })) + .map(publisher => { + const attr = {} + /** @type {string | string[]} */ + let name = publisher.value + + if (isNamedNode(publisher)) { + attr['rdf:about'] = publisher.value + if (publisher.out(ns.schema.name).values.length > 0) { + name = publisher.out(ns.schema.name).values + } + } + + return { + 'foaf:Organization': { + '@': attr, + 'foaf:name': name, + }, + } + }) // Datasets contain a mix of legacy (DC) frequencies and new (EU) frequencies. // The query makes sure we get both legacy and new ones, we only // provide the new ones to CKAN, by converting legacy ones if needed. - const legacyFreqPrefix = 'http://publications.europa.eu/resource/authority/frequency/' + const euFreqPrefix = 'http://publications.europa.eu/resource/authority/frequency/' const accrualPeriodicity = dataset.out(ns.dcterms.accrualPeriodicity) .map((t) => { if (!t.term || !t.term.value) { @@ -98,7 +118,7 @@ const toXML = (dataset) => { t.term.value = convertLegacyFrequency(t.term.value) return t }) - .filter(({ term }) => term.value.startsWith(legacyFreqPrefix)) + .filter(({ term }) => term.value.startsWith(euFreqPrefix)) return { 'dcat:Dataset': { @@ -116,7 +136,10 @@ const toXML = (dataset) => { ), 'dcat:theme': serializeTerm(dataset.out(ns.dcat.theme)), 'dcterms:language': serializeTerm(dataset.out(ns.dcterms.language)), - 'dcterms:relation': legalBasis, + 'dcterms:relation': [ + legalBasis, + serializeTerm(dataset.out(ns.dcterms.relation), { properties: [ns.rdfs.label] }), + ], 'dcat:keyword': serializeTerm(keywords), 'dcat:landingPage': serializeTerm(dataset.out(ns.dcat.landingPage)), 'dcterms:spatial': serializeTerm(dataset.out(ns.dcterms.spatial)), @@ -124,6 +147,7 @@ const toXML = (dataset) => { 'dcterms:temporal': serializeTerm(dataset.out(ns.dcterms.temporal)), 'dcterms:accrualPeriodicity': serializeTerm(accrualPeriodicity), 'dcat:distribution': distributions, + 'foaf:page': serializeTerm(dataset.out(ns.foaf.page)), }, } }).filter(Boolean), @@ -132,9 +156,15 @@ const toXML = (dataset) => { }).doc().end({ prettyPrint: true }).concat('\n') } -const serializeTerm = (pointer) => { +/** + * Serialize a term. + * @param {import('clownface').MultiPointer | Array} pointer Pointer to serialize. + * @param {object} [options] + * @param {import('@rdfjs/types').NamedNode[]} [options.properties] + */ +const serializeTerm = (pointer, { properties = [] } = {}) => { return pointer.map((value) => { - return serializeLiteral(value) || serializeNamedNode(value) || serializeBlankNode(value) || {} + return serializeLiteral(value) || serializeNamedNode(value, properties) || serializeBlankNode(value) || {} }) } @@ -168,11 +198,26 @@ const serializeLiteral = (pointer) => { * Serialize a named node. * * @param {import('clownface').MultiPointer} pointer Pointer to serialize. + * @param {import('@rdfjs/types').NamedNode[]} [properties] * @return {Record} Serialized named node. */ -const serializeNamedNode = (pointer) => { +const serializeNamedNode = (pointer, properties = []) => { if (!isNamedNode(pointer)) return null + const propertyMap = properties.reduce((acc, property) => ({ + ...acc, + [shrink(property.value)]: serializeTerm(pointer.out(property)), + }), {}) + + if (Object.keys(propertyMap).length > 0) { + return { + 'rdf:Description': { + '@': { 'rdf:about': pointer.value }, + ...propertyMap, + }, + } + } + return { '@': { 'rdf:resource': pointer.value }, } @@ -217,13 +262,13 @@ const distributionFormatFromEncoding = (encodingPointer) => { switch (encoding) { case 'text/html': { - return 'HTML' + return 'http://publications.europa.eu/resource/authority/file-type/HTML' } case 'application/sparql-query': { - return 'SERVICE' + return 'http://publications.europa.eu/resource/authority/file-type/SPARQLQ' } default: { - return 'UNKNOWN' + return `https://www.iana.org/assignments/media-types/${encoding}` } } } diff --git a/packages/ckan/test/ckan.test.js b/packages/ckan/test/ckan.test.js index 97de5aab..fe545fb5 100644 --- a/packages/ckan/test/ckan.test.js +++ b/packages/ckan/test/ckan.test.js @@ -43,6 +43,9 @@ const createTrifidInstance = async () => { describe('@zazuko/trifid-plugin-ckan', () => { let trifidListener + const parser = new xml.Parser({ + explicitArray: false, + }) beforeEach(async () => { const trifidInstance = await createTrifidInstance() @@ -71,15 +74,40 @@ describe('@zazuko/trifid-plugin-ckan', () => { strictEqual(body, expectedResult) }) - it('should get a basic result for a known organization', async () => { - const ckanUrl = `${getListenerURL(trifidListener)}/ckan?organization=http://example.com/my-org` + describe('example organization', () => { + let res + let xmlText + let xmlBody - const res = await fetch(ckanUrl) - const body = await res.text() - const expectedResult = await readFile(new URL('./support/basic-result.xml', import.meta.url), 'utf8') + beforeEach(async () => { + const ckanUrl = `${getListenerURL(trifidListener)}/ckan?organization=http://example.com/my-org` + res = await fetch(ckanUrl) + xmlText = await res.text() + xmlBody = await parser.parseStringPromise(xmlText) + }) - strictEqual(res.status, 200) - strictEqual(body, expectedResult) + it('should get a basic result for a known organization', async () => { + const expectedResult = await readFile(new URL('./support/basic-result.xml', import.meta.url), 'utf8') + + strictEqual(res.status, 200) + strictEqual(xmlText, expectedResult) + }) + + it('should take publisher at face value', async () => { + const publisher = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcterms:publisher') + + const expected = await parser.parseStringPromise(` + + http://example.com/my-org + `) + expect(publisher).to.containSubset(expected) + }) + + it('should get landing page resource', () => { + const landingPage = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:landingPage') + + expect(landingPage.$['rdf:resource']).to.eq('https://example.com/') + }) }) it('should convert legacy frequency to EU frequency if possible', async () => { @@ -129,16 +157,16 @@ describe('@zazuko/trifid-plugin-ckan', () => { }) describe('BLW tests', () => { - const parser = new xml.Parser({ - explicitArray: false, - }) + let xmlBody - it('should get a correct contactPoint', async () => { + beforeEach(async () => { const ckanUrl = `${getListenerURL(trifidListener)}/ckan?organization=https://register.ld.admin.ch/opendataswiss/org/bundesamt-fur-landwirtschaft-blw` - const res = await fetch(ckanUrl) - const body = await parser.parseStringPromise(await res.text()) - const contactPoint = xpath.evalFirst(body, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:contactPoint') + xmlBody = await parser.parseStringPromise(await res.text()) + }) + + it('should get a correct contactPoint', async () => { + const contactPoint = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:contactPoint') const expected = await parser.parseStringPromise(` @@ -147,5 +175,50 @@ describe('@zazuko/trifid-plugin-ckan', () => { `) expect(contactPoint).to.containSubset(expected) }) + + it('should get structured publisher', async () => { + const publisher = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcterms:publisher') + + const expected = await parser.parseStringPromise(` + + Bundesamt für Landwirtschaft + `) + expect(publisher).to.containSubset(expected) + }) + + it('should get landing page resource', () => { + const landingPage = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:landingPage') + + expect(landingPage.$['rdf:resource']).to.eq('https://agrarmarktdaten.admin.ch') + }) + + it('should use mapped themes', () => { + const themes = xpath.find(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:theme') + .map(theme => theme.$['rdf:resource']) + + expect(themes).to.contain.all.members([ + 'http://publications.europa.eu/resource/authority/data-theme/AGRI', + 'http://publications.europa.eu/resource/authority/data-theme/GOVE', + 'http://publications.europa.eu/resource/authority/data-theme/ECON', + ]) + expect(themes).to.have.length(3) + }) + + it('should get temporal PeriodOfTime', async () => { + const themes = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcterms:temporal') + + const expected = await parser.parseStringPromise(` + + 2024-01-01 + 2024-12-31 + `) + expect(themes).to.containSubset(expected) + }) + + it('should build correct distribution format', async () => { + const format = xpath.evalFirst(xmlBody, '//rdf:RDF/dcat:Catalog/dcat:dataset/dcat:Dataset/dcat:distribution/dcat:Distribution/dcterms:format') + + expect(format.$['rdf:resource']).to.eq('http://publications.europa.eu/resource/authority/file-type/SPARQLQ') + }) }) }) diff --git a/packages/ckan/test/support/basic-result.xml b/packages/ckan/test/support/basic-result.xml index d0f6c9f7..b84b8874 100644 --- a/packages/ckan/test/support/basic-result.xml +++ b/packages/ckan/test/support/basic-result.xml @@ -14,8 +14,28 @@ Dataset 1 - Description 2023-10-31 2023-10-31T15:15:15Z + + + http://example.com/my-org + + + + + legal_basis + + + + + Webseite des BAFU + + + + + + + diff --git a/packages/ckan/test/support/data.ttl b/packages/ckan/test/support/data.ttl index 59425f29..942fc6a7 100644 --- a/packages/ckan/test/support/data.ttl +++ b/packages/ckan/test/support/data.ttl @@ -1,3 +1,5 @@ +PREFIX rdfs: +PREFIX foaf: @base . @prefix schema: . @prefix dcterms: . @@ -40,7 +42,17 @@ schema:description "Dataset 1 - Description"@it ; schema:description "Dataset 1 - Description"@de ; schema:version "1"^^xsd:integer ; - . + dcterms:publisher ; + dcat:landingPage ; + foaf:page ; + dcterms:relation ; + dcterms:relation ; + dcterms:license ; +. + + + rdfs:label "Webseite des BAFU" ; +. rdf:type schema:Dataset ; @@ -71,7 +83,7 @@ schema:description "Dataset 2 - Description"@it ; schema:description "Dataset 2 - Description"@de ; schema:version "1"^^xsd:integer ; - . +. ######################################### # @@ -113,11 +125,7 @@ schema:publisher ; schema:contactPoint _:genid2de581ad199db849c2a2500f49babdf0072d0b695fb815fb9 ; schema:datePublished "2024-02-01"^^xsd:date ; - dcat:landingPage - , - , - , - ; + dcat:landingPage ; schema:workExample , , @@ -141,7 +149,12 @@ ; cube:observationConstraint ; - admin:euDataTheme theme:AGRI, theme:ECON . + admin:euDataTheme theme:AGRI, theme:ECON ; + dcterms:temporal [ + a dcterms:PeriodOfTime ; + schema:startDate "2024-01-01"^^xsd:date ; + schema:endDate "2024-12-31"^^xsd:date ; + ] . _:genid2de581ad199db849c2a2500f49babdf0072d0b695fb815fb9 a schema:ContactPoint, vcard:Organization ; dcterms:title "Bundesamt für Landwirtschaft, Fachbereich Marktanalysen" ; @@ -149,3 +162,23 @@ _:genid2de581ad199db849c2a2500f49babdf0072d0b695fb815fb9 a schema:ContactPoint, schema:email "marktanalysen@blw.admin.ch" ; vcard:fn "Bundesamt für Landwirtschaft, Fachbereich Marktanalysen" ; vcard:hasEmail . + + + rdf:type schema:Organization ; + schema:name "Bundesamt für Landwirtschaft"@de ; + # schema:name "Federal Office for Agriculture"@en ; + # schema:name "Office fédéral de l'agriculture"@fr ; + # schema:name "Ufficio federale dell'agricoltura"@it ; +. + +category:administration + schema:sameAs ; +. + +category:agriculture + schema:sameAs ; +. + +category:national-economy + schema:sameAs ; +.