Skip to content

Commit

Permalink
entity-renderer: add support for multiple datasetBaseUrl
Browse files Browse the repository at this point in the history
  • Loading branch information
ludovicm67 committed Dec 4, 2024
1 parent 0dacbee commit 0056079
Show file tree
Hide file tree
Showing 5 changed files with 265 additions and 83 deletions.
5 changes: 5 additions & 0 deletions .changeset/three-berries-perform.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@zazuko/trifid-entity-renderer": minor
---

Add support for multiple `datasetBaseUrl`
135 changes: 52 additions & 83 deletions packages/entity-renderer/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@ import { parsers } from '@rdfjs/formats-common'
import rdf from '@zazuko/env'
import { sparqlSerializeQuadStream, sparqlSupportedTypes, sparqlGetRewriteConfiguration } from 'trifid-core'

import { defaultConfiguration } from './lib/config.js'
import { getAcceptHeader } from './lib/headers.js'
import { checkDatasetBaseUrl } from './lib/base.js'

import { createEntityRenderer } from './renderer/entity.js'
import { createMetadataProvider } from './renderer/metadata.js'

Expand All @@ -17,70 +20,6 @@ const replaceIriInQuery = (query, iri) => {
return query.split('{{iri}}').join(iri)
}

const defaultConfiguration = {
resourceNoSlash: true,
resourceExistsQuery: 'ASK { <{{iri}}> ?p ?o }',
resourceGraphQuery: 'DESCRIBE <{{iri}}>',
containerExistsQuery: 'ASK { ?s a ?o. FILTER REGEX(STR(?s), "^{{iri}}") }',
containerGraphQuery:
'CONSTRUCT { ?s a ?o. } WHERE { ?s a ?o. FILTER REGEX(STR(?s), "^{{iri}}") }',
redirectQuery: `
PREFIX http2011: <http://www.w3.org/2011/http#>
PREFIX http2006: <http://www.w3.org/2006/http#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT ?req ?res ?location ?responseCode ?validFrom
WHERE {
GRAPH ?g {
# Handle 2011 version
{
?req2011 rdf:type http2011:GetRequest.
?req2011 http2011:requestURI <{{iri}}>.
?req2011 http2011:response ?res2011.
?res2011 rdf:type http2011:Response.
?res2011 http2011:location ?location2011.
?res2011 http2011:responseCode ?responseCode2011.
OPTIONAL {
?res2011 <http://schema.org/validFrom> ?validFrom2011.
}
}
UNION
# Handle 2006 version
{
?req2006 rdf:type http2006:GetRequest.
?req2006 http2006:requestURI <{{iri}}>.
?req2006 http2006:response ?res2006.
?res2006 rdf:type http2006:Response.
?res2006 http2006:location ?location2006.
?res2006 http2006:responseCode ?responseCode2006.
OPTIONAL {
?res2006 <http://schema.org/validFrom> ?validFrom2006.
}
}
# Combine results, using priority for 2011 version over 2006 version
BIND(COALESCE(?req2011, ?req2006) AS ?req)
BIND(COALESCE(?res2011, ?res2006) AS ?res)
BIND(COALESCE(?location2011, ?location2006) AS ?location)
BIND(COALESCE(?validFrom2011, ?validFrom2006) AS ?validFrom)
# Just get the response code as a string instead of the full IRI
BIND(STRAFTER(STR(COALESCE(?responseCode2011, ?responseCode2006)), "#") AS ?responseCode)
}
}
LIMIT 1
`,
followRedirects: false,
enableSchemaUrlRedirect: false, // Experimental
allowEndpointSwitch: false, // Experimental
}

const fixContentTypeHeader = (contentType) => {
return contentType.split(';')[0].trim().toLocaleLowerCase()
}
Expand All @@ -91,11 +30,24 @@ const factory = async (trifid) => {
const entityRenderer = createEntityRenderer({ options: config, logger, query })
const metadataProvider = createMetadataProvider({ options: config })

const { path, ignorePaths, rewrite: rewriteConfigValue, datasetBaseUrl, allowEndpointSwitch: allowEndpointSwitchConfigValue } = config
const { path, ignorePaths, rewrite: rewriteConfigValue, datasetBaseUrl: datasetBaseUrlValue, allowEndpointSwitch: allowEndpointSwitchConfigValue } = config
const allowEndpointSwitch = `${allowEndpointSwitchConfigValue}` === 'true'
const entityTemplatePath = path || `${currentDir}/views/render.hbs`
const rewriteConfig = sparqlGetRewriteConfiguration(rewriteConfigValue, datasetBaseUrl)
const { rewrite: rewriteValue, replaceIri, iriOrigin } = rewriteConfig
const datasetBaseUrls = checkDatasetBaseUrl(logger, datasetBaseUrlValue)

/**
* Map of dataset base URLs with their rewrite configuration.
* @type {Map<string, { rewrite: boolean, replaceIri: (iri: string) => string, iriOrigin: (iri: string) => string, datasetBaseUrl: string }>}
*/
const dbu = new Map()
datasetBaseUrls.forEach((value) => {
const rewriteConfig = sparqlGetRewriteConfiguration(rewriteConfigValue, value)
// Just to have all the fields explicitly defined
const { rewrite: rewriteValue, replaceIri, iriOrigin, datasetBaseUrl } = rewriteConfig
dbu.set(value, { rewrite: rewriteValue, replaceIri, iriOrigin, datasetBaseUrl })

logger.debug(`Rewriting is ${rewriteValue ? 'enabled' : 'disabled'} for '${value}' dataset base URL`)
})

const additionalRewritesConfig = config.additionalRewrites || []
if (!Array.isArray(additionalRewritesConfig)) {
Expand All @@ -111,15 +63,6 @@ const factory = async (trifid) => {
return value
})

logger.debug(`Rewriting is ${rewriteValue ? 'enabled' : 'disabled'}`)

if (rewriteValue) {
if (!datasetBaseUrl.endsWith('/')) {
logger.warn('The value for `datasetBaseUrl` should usually end with a `/`')
}
logger.debug(`Using '${datasetBaseUrl}' as dataset base URL`)
}

// If `ignorePaths` is not provided or invalid, we configure some defaults values
let ignoredPaths = ignorePaths
if (!ignorePaths || !Array.isArray(ignorePaths)) {
Expand Down Expand Up @@ -172,9 +115,37 @@ const factory = async (trifid) => {
iriUrl.search = ''
iriUrl.searchParams.forEach((_value, key) => iriUrl.searchParams.delete(key))
const iriUrlString = iriUrl.toString()
const iri = replaceIri(iriUrlString)
const isContainer = mergedConfig.resourceNoSlash && iri.endsWith('/')
logger.debug(`IRI value: ${iri}${rewriteValue ? ' (rewritten)' : ''} - is container: ${isContainer ? 'true' : 'false'}`)

let iri
let iriOrigin
let replaceIri
let rewriteValue
let datasetBaseUrl
let isContainer
for (const [_key, value] of dbu) {
if (iri !== undefined) {
break
}

const tmpIri = value.replaceIri(iriUrlString)
const tmpIsContainer = mergedConfig.resourceNoSlash && tmpIri.endsWith('/')
logger.debug(`IRI value: ${tmpIri}${value.rewriteValue ? ' (rewritten)' : ''} - is container: ${tmpIsContainer ? 'true' : 'false'}`)

// Check if the IRI exists in the dataset ; if so, use it for the rest of the process
const askQuery = tmpIsContainer ? mergedConfig.containerExistsQuery : mergedConfig.resourceExistsQuery
const exists = await query(replaceIriInQuery(askQuery, tmpIri), { ask: true, headers: queryHeaders })
if (exists) {
iriOrigin = value.iriOrigin
replaceIri = value.replaceIri
rewriteValue = value.rewrite
datasetBaseUrl = value.datasetBaseUrl
isContainer = tmpIsContainer
iri = tmpIri
logger.debug(`IRI found: ${iri}`)
break
}
}

const rewriteResponse = rewriteValue
? [
...additionalRewrites.map(({ find, replace }) => {
Expand All @@ -188,10 +159,8 @@ const factory = async (trifid) => {
]
: []

// Check if the IRI exists in the dataset
const askQuery = isContainer ? mergedConfig.containerExistsQuery : mergedConfig.resourceExistsQuery
const exists = await query(replaceIriInQuery(askQuery, iri), { ask: true, headers: queryHeaders })
if (!exists) {
// If the IRI is not found, we return a 404
if (!iri) {
reply.callNotFound()
return reply
}
Expand Down
50 changes: 50 additions & 0 deletions packages/entity-renderer/lib/base.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// @ts-check

/**
* Check the dataset base URL.
* Some hints are provided if the dataset base URL is not correctly formatted.
* If a value is empty, an error is thrown.
*
* @param {{warn: Function }} logger - The logger instance
* @param {string} datasetBaseUrl - The dataset base URL
* @returns {true} The dataset base URL as an array
*/
export const checkSingleDatasetBaseUrl = (logger, datasetBaseUrl) => {
if (typeof datasetBaseUrl !== 'string') {
throw new Error('The datasetBaseUrl must be a string')
}

if (!datasetBaseUrl) {
throw new Error("Value for 'datasetBaseUrl' is missing")
}

if (!datasetBaseUrl.endsWith('/')) {
logger.warn(`The value for 'datasetBaseUrl' should usually end with a '/' ; it is not the case for '${datasetBaseUrl}'`)
}

return true
}

/**
* Check the dataset base URL, and make sure it returns an array.
* Some hints are provided if the dataset base URL is not correctly formatted.
* If the dataset base URL is an array, each value is checked.
* If a value is empty, then an error is thrown.
*
* @param {{warn: Function }} logger - The logger instance
* @param {string | string[]} datasetBaseUrl - The dataset base URL
* @returns {string[]} The dataset base URL as an array
*/
export const checkDatasetBaseUrl = (logger, datasetBaseUrl) => {
if (!datasetBaseUrl) {
throw new Error('No datasetBaseUrl provided')
}

if (Array.isArray(datasetBaseUrl)) {
datasetBaseUrl.forEach((value) => checkSingleDatasetBaseUrl(logger, value))
return datasetBaseUrl
} else {
checkSingleDatasetBaseUrl(logger, datasetBaseUrl)
return [datasetBaseUrl]
}
}
63 changes: 63 additions & 0 deletions packages/entity-renderer/lib/config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
export const defaultConfiguration = {
resourceNoSlash: true,
resourceExistsQuery: 'ASK { <{{iri}}> ?p ?o }',
resourceGraphQuery: 'DESCRIBE <{{iri}}>',
containerExistsQuery: 'ASK { ?s a ?o. FILTER REGEX(STR(?s), "^{{iri}}") }',
containerGraphQuery:
'CONSTRUCT { ?s a ?o. } WHERE { ?s a ?o. FILTER REGEX(STR(?s), "^{{iri}}") }',
redirectQuery: `
PREFIX http2011: <http://www.w3.org/2011/http#>
PREFIX http2006: <http://www.w3.org/2006/http#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT ?req ?res ?location ?responseCode ?validFrom
WHERE {
GRAPH ?g {
# Handle 2011 version
{
?req2011 rdf:type http2011:GetRequest.
?req2011 http2011:requestURI <{{iri}}>.
?req2011 http2011:response ?res2011.
?res2011 rdf:type http2011:Response.
?res2011 http2011:location ?location2011.
?res2011 http2011:responseCode ?responseCode2011.
OPTIONAL {
?res2011 <http://schema.org/validFrom> ?validFrom2011.
}
}
UNION
# Handle 2006 version
{
?req2006 rdf:type http2006:GetRequest.
?req2006 http2006:requestURI <{{iri}}>.
?req2006 http2006:response ?res2006.
?res2006 rdf:type http2006:Response.
?res2006 http2006:location ?location2006.
?res2006 http2006:responseCode ?responseCode2006.
OPTIONAL {
?res2006 <http://schema.org/validFrom> ?validFrom2006.
}
}
# Combine results, using priority for 2011 version over 2006 version
BIND(COALESCE(?req2011, ?req2006) AS ?req)
BIND(COALESCE(?res2011, ?res2006) AS ?res)
BIND(COALESCE(?location2011, ?location2006) AS ?location)
BIND(COALESCE(?validFrom2011, ?validFrom2006) AS ?validFrom)
# Just get the response code as a string instead of the full IRI
BIND(STRAFTER(STR(COALESCE(?responseCode2011, ?responseCode2006)), "#") AS ?responseCode)
}
}
LIMIT 1
`,
followRedirects: false,
enableSchemaUrlRedirect: false, // Experimental
allowEndpointSwitch: false, // Experimental
}
95 changes: 95 additions & 0 deletions packages/entity-renderer/test/base.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// @ts-check

import { strictEqual, deepStrictEqual, throws } from 'node:assert'
import { afterEach, beforeEach, describe, it } from 'node:test'

import { checkSingleDatasetBaseUrl, checkDatasetBaseUrl } from '../lib/base.js'

describe('lib/base', () => {
describe('checkSingleDatasetBaseUrl', () => {
let logger
let loggerValues

beforeEach(() => {
loggerValues = []
logger = {
warn: (/** @type {string} */ msg) => { loggerValues.push(msg) },
}
})

afterEach(() => {
logger = undefined
loggerValues = undefined
})

it('should not throw on valid value', () => {
strictEqual(checkSingleDatasetBaseUrl(logger, 'http://example.com/'), true)
})

it('should warn on missing trailing slash', () => {
strictEqual(checkSingleDatasetBaseUrl(logger, 'http://example.com'), true)
strictEqual(loggerValues.length, 1)
})

it('should throw on empty value', () => {
throws(() => checkSingleDatasetBaseUrl(logger, ''))
})

it('should throw on non-string value', () => {
// @ts-expect-error
throws(() => checkSingleDatasetBaseUrl(logger, 42))
// @ts-expect-error
throws(() => checkSingleDatasetBaseUrl(logger, ['http://example.com/']))
})
})

describe('checkDatasetBaseUrl', () => {
let logger
let loggerValues

beforeEach(() => {
loggerValues = []
logger = {
warn: (/** @type {string} */ msg) => { loggerValues.push(msg) },
}
})

afterEach(() => {
logger = undefined
loggerValues = undefined
})

it('should not throw on valid value (string)', () => {
deepStrictEqual(checkDatasetBaseUrl(logger, 'http://example.com/'), ['http://example.com/'])
})

it('should not throw on valid value (array)', () => {
deepStrictEqual(checkDatasetBaseUrl(logger, ['http://example.com/']), ['http://example.com/'])
})

it('should warn on missing trailing slash', () => {
deepStrictEqual(checkDatasetBaseUrl(logger, 'http://example.com'), ['http://example.com'])
strictEqual(loggerValues.length, 1)
})

it('should throw on empty value', () => {
throws(() => checkDatasetBaseUrl(logger, ''))
})

it('should throw on array with an empty value', () => {
throws(() => checkDatasetBaseUrl(logger, ['']))
})

it('should throw on array that contains an empty value somewhere', () => {
throws(() => checkDatasetBaseUrl(logger, ['', 'http://example.com']))
throws(() => checkDatasetBaseUrl(logger, ['http://example.com', '']))
})

it('should throw on array that contains a value that is not a string', () => {
// @ts-expect-error
throws(() => checkDatasetBaseUrl(logger, [42, 'http://example.com']))
// @ts-expect-error
throws(() => checkDatasetBaseUrl(logger, ['http://example.com', 42]))
})
})
})

0 comments on commit 0056079

Please sign in to comment.