diff --git a/.changeset/three-berries-perform.md b/.changeset/three-berries-perform.md new file mode 100644 index 00000000..302bc53d --- /dev/null +++ b/.changeset/three-berries-perform.md @@ -0,0 +1,5 @@ +--- +"@zazuko/trifid-entity-renderer": minor +--- + +Add support for multiple `datasetBaseUrl` diff --git a/packages/entity-renderer/index.js b/packages/entity-renderer/index.js index b8d2c9c7..6c836874 100644 --- a/packages/entity-renderer/index.js +++ b/packages/entity-renderer/index.js @@ -5,7 +5,10 @@ import { parsers } from '@rdfjs/formats-common' import rdf from '@zazuko/env' import { sparqlSerializeQuadStream, sparqlSupportedTypes, sparqlGetRewriteConfiguration } from 'trifid-core' +import { defaultConfiguration } from './lib/config.js' import { getAcceptHeader } from './lib/headers.js' +import { checkDatasetBaseUrl } from './lib/base.js' + import { createEntityRenderer } from './renderer/entity.js' import { createMetadataProvider } from './renderer/metadata.js' @@ -17,70 +20,6 @@ const replaceIriInQuery = (query, iri) => { return query.split('{{iri}}').join(iri) } -const defaultConfiguration = { - resourceNoSlash: true, - resourceExistsQuery: 'ASK { <{{iri}}> ?p ?o }', - resourceGraphQuery: 'DESCRIBE <{{iri}}>', - containerExistsQuery: 'ASK { ?s a ?o. FILTER REGEX(STR(?s), "^{{iri}}") }', - containerGraphQuery: - 'CONSTRUCT { ?s a ?o. } WHERE { ?s a ?o. FILTER REGEX(STR(?s), "^{{iri}}") }', - redirectQuery: ` - PREFIX http2011: - PREFIX http2006: - PREFIX rdf: - - SELECT ?req ?res ?location ?responseCode ?validFrom - WHERE { - GRAPH ?g { - - # Handle 2011 version - { - ?req2011 rdf:type http2011:GetRequest. - ?req2011 http2011:requestURI <{{iri}}>. - ?req2011 http2011:response ?res2011. - - ?res2011 rdf:type http2011:Response. - ?res2011 http2011:location ?location2011. - ?res2011 http2011:responseCode ?responseCode2011. - - OPTIONAL { - ?res2011 ?validFrom2011. - } - } - - UNION - - # Handle 2006 version - { - ?req2006 rdf:type http2006:GetRequest. - ?req2006 http2006:requestURI <{{iri}}>. - ?req2006 http2006:response ?res2006. - - ?res2006 rdf:type http2006:Response. - ?res2006 http2006:location ?location2006. - ?res2006 http2006:responseCode ?responseCode2006. - - OPTIONAL { - ?res2006 ?validFrom2006. - } - } - - # Combine results, using priority for 2011 version over 2006 version - BIND(COALESCE(?req2011, ?req2006) AS ?req) - BIND(COALESCE(?res2011, ?res2006) AS ?res) - BIND(COALESCE(?location2011, ?location2006) AS ?location) - BIND(COALESCE(?validFrom2011, ?validFrom2006) AS ?validFrom) - # Just get the response code as a string instead of the full IRI - BIND(STRAFTER(STR(COALESCE(?responseCode2011, ?responseCode2006)), "#") AS ?responseCode) - } - } - LIMIT 1 - `, - followRedirects: false, - enableSchemaUrlRedirect: false, // Experimental - allowEndpointSwitch: false, // Experimental -} - const fixContentTypeHeader = (contentType) => { return contentType.split(';')[0].trim().toLocaleLowerCase() } @@ -91,11 +30,24 @@ const factory = async (trifid) => { const entityRenderer = createEntityRenderer({ options: config, logger, query }) const metadataProvider = createMetadataProvider({ options: config }) - const { path, ignorePaths, rewrite: rewriteConfigValue, datasetBaseUrl, allowEndpointSwitch: allowEndpointSwitchConfigValue } = config + const { path, ignorePaths, rewrite: rewriteConfigValue, datasetBaseUrl: datasetBaseUrlValue, allowEndpointSwitch: allowEndpointSwitchConfigValue } = config const allowEndpointSwitch = `${allowEndpointSwitchConfigValue}` === 'true' const entityTemplatePath = path || `${currentDir}/views/render.hbs` - const rewriteConfig = sparqlGetRewriteConfiguration(rewriteConfigValue, datasetBaseUrl) - const { rewrite: rewriteValue, replaceIri, iriOrigin } = rewriteConfig + const datasetBaseUrls = checkDatasetBaseUrl(logger, datasetBaseUrlValue) + + /** + * Map of dataset base URLs with their rewrite configuration. + * @type {Map string, iriOrigin: (iri: string) => string, datasetBaseUrl: string }>} + */ + const dbu = new Map() + datasetBaseUrls.forEach((value) => { + const rewriteConfig = sparqlGetRewriteConfiguration(rewriteConfigValue, value) + // Just to have all the fields explicitly defined + const { rewrite: rewriteValue, replaceIri, iriOrigin, datasetBaseUrl } = rewriteConfig + dbu.set(value, { rewrite: rewriteValue, replaceIri, iriOrigin, datasetBaseUrl }) + + logger.debug(`Rewriting is ${rewriteValue ? 'enabled' : 'disabled'} for '${value}' dataset base URL`) + }) const additionalRewritesConfig = config.additionalRewrites || [] if (!Array.isArray(additionalRewritesConfig)) { @@ -111,15 +63,6 @@ const factory = async (trifid) => { return value }) - logger.debug(`Rewriting is ${rewriteValue ? 'enabled' : 'disabled'}`) - - if (rewriteValue) { - if (!datasetBaseUrl.endsWith('/')) { - logger.warn('The value for `datasetBaseUrl` should usually end with a `/`') - } - logger.debug(`Using '${datasetBaseUrl}' as dataset base URL`) - } - // If `ignorePaths` is not provided or invalid, we configure some defaults values let ignoredPaths = ignorePaths if (!ignorePaths || !Array.isArray(ignorePaths)) { @@ -172,9 +115,37 @@ const factory = async (trifid) => { iriUrl.search = '' iriUrl.searchParams.forEach((_value, key) => iriUrl.searchParams.delete(key)) const iriUrlString = iriUrl.toString() - const iri = replaceIri(iriUrlString) - const isContainer = mergedConfig.resourceNoSlash && iri.endsWith('/') - logger.debug(`IRI value: ${iri}${rewriteValue ? ' (rewritten)' : ''} - is container: ${isContainer ? 'true' : 'false'}`) + + let iri + let iriOrigin + let replaceIri + let rewriteValue + let datasetBaseUrl + let isContainer + for (const [_key, value] of dbu) { + if (iri !== undefined) { + break + } + + const tmpIri = value.replaceIri(iriUrlString) + const tmpIsContainer = mergedConfig.resourceNoSlash && tmpIri.endsWith('/') + logger.debug(`IRI value: ${tmpIri}${value.rewriteValue ? ' (rewritten)' : ''} - is container: ${tmpIsContainer ? 'true' : 'false'}`) + + // Check if the IRI exists in the dataset ; if so, use it for the rest of the process + const askQuery = tmpIsContainer ? mergedConfig.containerExistsQuery : mergedConfig.resourceExistsQuery + const exists = await query(replaceIriInQuery(askQuery, tmpIri), { ask: true, headers: queryHeaders }) + if (exists) { + iriOrigin = value.iriOrigin + replaceIri = value.replaceIri + rewriteValue = value.rewrite + datasetBaseUrl = value.datasetBaseUrl + isContainer = tmpIsContainer + iri = tmpIri + logger.debug(`IRI found: ${iri}`) + break + } + } + const rewriteResponse = rewriteValue ? [ ...additionalRewrites.map(({ find, replace }) => { @@ -188,10 +159,8 @@ const factory = async (trifid) => { ] : [] - // Check if the IRI exists in the dataset - const askQuery = isContainer ? mergedConfig.containerExistsQuery : mergedConfig.resourceExistsQuery - const exists = await query(replaceIriInQuery(askQuery, iri), { ask: true, headers: queryHeaders }) - if (!exists) { + // If the IRI is not found, we return a 404 + if (!iri) { reply.callNotFound() return reply } diff --git a/packages/entity-renderer/lib/base.js b/packages/entity-renderer/lib/base.js new file mode 100644 index 00000000..1b337b72 --- /dev/null +++ b/packages/entity-renderer/lib/base.js @@ -0,0 +1,50 @@ +// @ts-check + +/** + * Check the dataset base URL. + * Some hints are provided if the dataset base URL is not correctly formatted. + * If a value is empty, an error is thrown. + * + * @param {{warn: Function }} logger - The logger instance + * @param {string} datasetBaseUrl - The dataset base URL + * @returns {true} The dataset base URL as an array + */ +export const checkSingleDatasetBaseUrl = (logger, datasetBaseUrl) => { + if (typeof datasetBaseUrl !== 'string') { + throw new Error('The datasetBaseUrl must be a string') + } + + if (!datasetBaseUrl) { + throw new Error("Value for 'datasetBaseUrl' is missing") + } + + if (!datasetBaseUrl.endsWith('/')) { + logger.warn(`The value for 'datasetBaseUrl' should usually end with a '/' ; it is not the case for '${datasetBaseUrl}'`) + } + + return true +} + +/** + * Check the dataset base URL, and make sure it returns an array. + * Some hints are provided if the dataset base URL is not correctly formatted. + * If the dataset base URL is an array, each value is checked. + * If a value is empty, then an error is thrown. + * + * @param {{warn: Function }} logger - The logger instance + * @param {string | string[]} datasetBaseUrl - The dataset base URL + * @returns {string[]} The dataset base URL as an array + */ +export const checkDatasetBaseUrl = (logger, datasetBaseUrl) => { + if (!datasetBaseUrl) { + throw new Error('No datasetBaseUrl provided') + } + + if (Array.isArray(datasetBaseUrl)) { + datasetBaseUrl.forEach((value) => checkSingleDatasetBaseUrl(logger, value)) + return datasetBaseUrl + } else { + checkSingleDatasetBaseUrl(logger, datasetBaseUrl) + return [datasetBaseUrl] + } +} diff --git a/packages/entity-renderer/lib/config.js b/packages/entity-renderer/lib/config.js new file mode 100644 index 00000000..aefb54d4 --- /dev/null +++ b/packages/entity-renderer/lib/config.js @@ -0,0 +1,63 @@ +export const defaultConfiguration = { + resourceNoSlash: true, + resourceExistsQuery: 'ASK { <{{iri}}> ?p ?o }', + resourceGraphQuery: 'DESCRIBE <{{iri}}>', + containerExistsQuery: 'ASK { ?s a ?o. FILTER REGEX(STR(?s), "^{{iri}}") }', + containerGraphQuery: + 'CONSTRUCT { ?s a ?o. } WHERE { ?s a ?o. FILTER REGEX(STR(?s), "^{{iri}}") }', + redirectQuery: ` + PREFIX http2011: + PREFIX http2006: + PREFIX rdf: + + SELECT ?req ?res ?location ?responseCode ?validFrom + WHERE { + GRAPH ?g { + + # Handle 2011 version + { + ?req2011 rdf:type http2011:GetRequest. + ?req2011 http2011:requestURI <{{iri}}>. + ?req2011 http2011:response ?res2011. + + ?res2011 rdf:type http2011:Response. + ?res2011 http2011:location ?location2011. + ?res2011 http2011:responseCode ?responseCode2011. + + OPTIONAL { + ?res2011 ?validFrom2011. + } + } + + UNION + + # Handle 2006 version + { + ?req2006 rdf:type http2006:GetRequest. + ?req2006 http2006:requestURI <{{iri}}>. + ?req2006 http2006:response ?res2006. + + ?res2006 rdf:type http2006:Response. + ?res2006 http2006:location ?location2006. + ?res2006 http2006:responseCode ?responseCode2006. + + OPTIONAL { + ?res2006 ?validFrom2006. + } + } + + # Combine results, using priority for 2011 version over 2006 version + BIND(COALESCE(?req2011, ?req2006) AS ?req) + BIND(COALESCE(?res2011, ?res2006) AS ?res) + BIND(COALESCE(?location2011, ?location2006) AS ?location) + BIND(COALESCE(?validFrom2011, ?validFrom2006) AS ?validFrom) + # Just get the response code as a string instead of the full IRI + BIND(STRAFTER(STR(COALESCE(?responseCode2011, ?responseCode2006)), "#") AS ?responseCode) + } + } + LIMIT 1 + `, + followRedirects: false, + enableSchemaUrlRedirect: false, // Experimental + allowEndpointSwitch: false, // Experimental +} diff --git a/packages/entity-renderer/test/base.test.js b/packages/entity-renderer/test/base.test.js new file mode 100644 index 00000000..62b8c23c --- /dev/null +++ b/packages/entity-renderer/test/base.test.js @@ -0,0 +1,95 @@ +// @ts-check + +import { strictEqual, deepStrictEqual, throws } from 'node:assert' +import { afterEach, beforeEach, describe, it } from 'node:test' + +import { checkSingleDatasetBaseUrl, checkDatasetBaseUrl } from '../lib/base.js' + +describe('lib/base', () => { + describe('checkSingleDatasetBaseUrl', () => { + let logger + let loggerValues + + beforeEach(() => { + loggerValues = [] + logger = { + warn: (/** @type {string} */ msg) => { loggerValues.push(msg) }, + } + }) + + afterEach(() => { + logger = undefined + loggerValues = undefined + }) + + it('should not throw on valid value', () => { + strictEqual(checkSingleDatasetBaseUrl(logger, 'http://example.com/'), true) + }) + + it('should warn on missing trailing slash', () => { + strictEqual(checkSingleDatasetBaseUrl(logger, 'http://example.com'), true) + strictEqual(loggerValues.length, 1) + }) + + it('should throw on empty value', () => { + throws(() => checkSingleDatasetBaseUrl(logger, '')) + }) + + it('should throw on non-string value', () => { + // @ts-expect-error + throws(() => checkSingleDatasetBaseUrl(logger, 42)) + // @ts-expect-error + throws(() => checkSingleDatasetBaseUrl(logger, ['http://example.com/'])) + }) + }) + + describe('checkDatasetBaseUrl', () => { + let logger + let loggerValues + + beforeEach(() => { + loggerValues = [] + logger = { + warn: (/** @type {string} */ msg) => { loggerValues.push(msg) }, + } + }) + + afterEach(() => { + logger = undefined + loggerValues = undefined + }) + + it('should not throw on valid value (string)', () => { + deepStrictEqual(checkDatasetBaseUrl(logger, 'http://example.com/'), ['http://example.com/']) + }) + + it('should not throw on valid value (array)', () => { + deepStrictEqual(checkDatasetBaseUrl(logger, ['http://example.com/']), ['http://example.com/']) + }) + + it('should warn on missing trailing slash', () => { + deepStrictEqual(checkDatasetBaseUrl(logger, 'http://example.com'), ['http://example.com']) + strictEqual(loggerValues.length, 1) + }) + + it('should throw on empty value', () => { + throws(() => checkDatasetBaseUrl(logger, '')) + }) + + it('should throw on array with an empty value', () => { + throws(() => checkDatasetBaseUrl(logger, [''])) + }) + + it('should throw on array that contains an empty value somewhere', () => { + throws(() => checkDatasetBaseUrl(logger, ['', 'http://example.com'])) + throws(() => checkDatasetBaseUrl(logger, ['http://example.com', ''])) + }) + + it('should throw on array that contains a value that is not a string', () => { + // @ts-expect-error + throws(() => checkDatasetBaseUrl(logger, [42, 'http://example.com'])) + // @ts-expect-error + throws(() => checkDatasetBaseUrl(logger, ['http://example.com', 42])) + }) + }) +})