Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Entity renderer: add support for multiple datasetBaseUrl #589

Merged
merged 2 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/three-berries-perform.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@zazuko/trifid-entity-renderer": minor
---

Add support for multiple `datasetBaseUrl`
165 changes: 53 additions & 112 deletions packages/entity-renderer/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ import { fileURLToPath } from 'node:url'
import { parsers } from '@rdfjs/formats-common'
import rdf from '@zazuko/env'
import { sparqlSerializeQuadStream, sparqlSupportedTypes, sparqlGetRewriteConfiguration } from 'trifid-core'
import mimeparse from 'mimeparse'

import { defaultConfiguration } from './lib/config.js'
import { getAcceptHeader } from './lib/headers.js'
import { checkDatasetBaseUrl } from './lib/base.js'

import { createEntityRenderer } from './renderer/entity.js'
import { createMetadataProvider } from './renderer/metadata.js'
Expand All @@ -13,102 +16,10 @@ const currentDir = dirname(fileURLToPath(import.meta.url))

const DEFAULT_ENDPOINT_NAME = 'default'

const getAcceptHeader = (req) => {
const queryStringValue = req.query.format

const supportedQueryStringValues = {
ttl: 'text/turtle',
jsonld: 'application/ld+json',
xml: 'application/rdf+xml',
nt: 'application/n-triples',
trig: 'application/trig',
csv: 'text/csv',
html: 'text/html',
}

if (
Object.hasOwnProperty.call(supportedQueryStringValues, queryStringValue)
) {
return supportedQueryStringValues[queryStringValue]
}

const acceptHeader = `${req.headers.accept || ''}`.toLocaleLowerCase()
const selectedHeader = mimeparse.bestMatch([
...sparqlSupportedTypes,
'text/html',
], acceptHeader)

return selectedHeader || acceptHeader
}

const replaceIriInQuery = (query, iri) => {
return query.split('{{iri}}').join(iri)
}

const defaultConfiguration = {
resourceNoSlash: true,
resourceExistsQuery: 'ASK { <{{iri}}> ?p ?o }',
resourceGraphQuery: 'DESCRIBE <{{iri}}>',
containerExistsQuery: 'ASK { ?s a ?o. FILTER REGEX(STR(?s), "^{{iri}}") }',
containerGraphQuery:
'CONSTRUCT { ?s a ?o. } WHERE { ?s a ?o. FILTER REGEX(STR(?s), "^{{iri}}") }',
redirectQuery: `
PREFIX http2011: <http://www.w3.org/2011/http#>
PREFIX http2006: <http://www.w3.org/2006/http#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT ?req ?res ?location ?responseCode ?validFrom
WHERE {
GRAPH ?g {

# Handle 2011 version
{
?req2011 rdf:type http2011:GetRequest.
?req2011 http2011:requestURI <{{iri}}>.
?req2011 http2011:response ?res2011.

?res2011 rdf:type http2011:Response.
?res2011 http2011:location ?location2011.
?res2011 http2011:responseCode ?responseCode2011.

OPTIONAL {
?res2011 <http://schema.org/validFrom> ?validFrom2011.
}
}

UNION

# Handle 2006 version
{
?req2006 rdf:type http2006:GetRequest.
?req2006 http2006:requestURI <{{iri}}>.
?req2006 http2006:response ?res2006.

?res2006 rdf:type http2006:Response.
?res2006 http2006:location ?location2006.
?res2006 http2006:responseCode ?responseCode2006.

OPTIONAL {
?res2006 <http://schema.org/validFrom> ?validFrom2006.
}
}

# Combine results, using priority for 2011 version over 2006 version
BIND(COALESCE(?req2011, ?req2006) AS ?req)
BIND(COALESCE(?res2011, ?res2006) AS ?res)
BIND(COALESCE(?location2011, ?location2006) AS ?location)
BIND(COALESCE(?validFrom2011, ?validFrom2006) AS ?validFrom)
# Just get the response code as a string instead of the full IRI
BIND(STRAFTER(STR(COALESCE(?responseCode2011, ?responseCode2006)), "#") AS ?responseCode)
}
}
LIMIT 1
`,
followRedirects: false,
enableSchemaUrlRedirect: false, // Experimental
allowEndpointSwitch: false, // Experimental
}

const fixContentTypeHeader = (contentType) => {
return contentType.split(';')[0].trim().toLocaleLowerCase()
}
Expand All @@ -119,11 +30,24 @@ const factory = async (trifid) => {
const entityRenderer = createEntityRenderer({ options: config, logger, query })
const metadataProvider = createMetadataProvider({ options: config })

const { path, ignorePaths, rewrite: rewriteConfigValue, datasetBaseUrl, allowEndpointSwitch: allowEndpointSwitchConfigValue } = config
const { path, ignorePaths, rewrite: rewriteConfigValue, datasetBaseUrl: datasetBaseUrlValue, allowEndpointSwitch: allowEndpointSwitchConfigValue } = config
const allowEndpointSwitch = `${allowEndpointSwitchConfigValue}` === 'true'
const entityTemplatePath = path || `${currentDir}/views/render.hbs`
const rewriteConfig = sparqlGetRewriteConfiguration(rewriteConfigValue, datasetBaseUrl)
const { rewrite: rewriteValue, replaceIri, iriOrigin } = rewriteConfig
const datasetBaseUrls = checkDatasetBaseUrl(logger, datasetBaseUrlValue)

/**
* Map of dataset base URLs with their rewrite configuration.
* @type {Map<string, { rewrite: boolean, replaceIri: (iri: string) => string, iriOrigin: (iri: string) => string, datasetBaseUrl: string }>}
*/
const dbu = new Map()
datasetBaseUrls.forEach((value) => {
const rewriteConfig = sparqlGetRewriteConfiguration(rewriteConfigValue, value)
// Just to have all the fields explicitly defined
const { rewrite: rewriteValue, replaceIri, iriOrigin, datasetBaseUrl } = rewriteConfig
dbu.set(value, { rewrite: rewriteValue, replaceIri, iriOrigin, datasetBaseUrl })

logger.debug(`Rewriting is ${rewriteValue ? 'enabled' : 'disabled'} for '${value}' dataset base URL`)
})

const additionalRewritesConfig = config.additionalRewrites || []
if (!Array.isArray(additionalRewritesConfig)) {
Expand All @@ -139,15 +63,6 @@ const factory = async (trifid) => {
return value
})

logger.debug(`Rewriting is ${rewriteValue ? 'enabled' : 'disabled'}`)

if (rewriteValue) {
if (!datasetBaseUrl.endsWith('/')) {
logger.warn('The value for `datasetBaseUrl` should usually end with a `/`')
}
logger.debug(`Using '${datasetBaseUrl}' as dataset base URL`)
}

// If `ignorePaths` is not provided or invalid, we configure some defaults values
let ignoredPaths = ignorePaths
if (!ignorePaths || !Array.isArray(ignorePaths)) {
Expand Down Expand Up @@ -200,9 +115,37 @@ const factory = async (trifid) => {
iriUrl.search = ''
iriUrl.searchParams.forEach((_value, key) => iriUrl.searchParams.delete(key))
const iriUrlString = iriUrl.toString()
const iri = replaceIri(iriUrlString)
const isContainer = mergedConfig.resourceNoSlash && iri.endsWith('/')
logger.debug(`IRI value: ${iri}${rewriteValue ? ' (rewritten)' : ''} - is container: ${isContainer ? 'true' : 'false'}`)

let iri
let iriOrigin
let replaceIri
let rewriteValue
let datasetBaseUrl
let isContainer
for (const [_key, value] of dbu) {
if (iri !== undefined) {
break
}

const tmpIri = value.replaceIri(iriUrlString)
const tmpIsContainer = mergedConfig.resourceNoSlash && tmpIri.endsWith('/')
logger.debug(`IRI value: ${tmpIri}${value.rewriteValue ? ' (rewritten)' : ''} - is container: ${tmpIsContainer ? 'true' : 'false'}`)

// Check if the IRI exists in the dataset ; if so, use it for the rest of the process
const askQuery = tmpIsContainer ? mergedConfig.containerExistsQuery : mergedConfig.resourceExistsQuery
const exists = await query(replaceIriInQuery(askQuery, tmpIri), { ask: true, headers: queryHeaders })
if (exists) {
iriOrigin = value.iriOrigin
replaceIri = value.replaceIri
rewriteValue = value.rewrite
datasetBaseUrl = value.datasetBaseUrl
isContainer = tmpIsContainer
iri = tmpIri
logger.debug(`IRI found: ${iri}`)
break
}
}

const rewriteResponse = rewriteValue
? [
...additionalRewrites.map(({ find, replace }) => {
Expand All @@ -216,10 +159,8 @@ const factory = async (trifid) => {
]
: []

// Check if the IRI exists in the dataset
const askQuery = isContainer ? mergedConfig.containerExistsQuery : mergedConfig.resourceExistsQuery
const exists = await query(replaceIriInQuery(askQuery, iri), { ask: true, headers: queryHeaders })
if (!exists) {
// If the IRI is not found, we return a 404
if (!iri) {
reply.callNotFound()
return reply
}
Expand Down
50 changes: 50 additions & 0 deletions packages/entity-renderer/lib/base.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// @ts-check

/**
* Check the dataset base URL.
* Some hints are provided if the dataset base URL is not correctly formatted.
* If a value is empty, an error is thrown.
*
* @param {{warn: Function }} logger - The logger instance
* @param {string} datasetBaseUrl - The dataset base URL
* @returns {true} The dataset base URL as an array
*/
export const checkSingleDatasetBaseUrl = (logger, datasetBaseUrl) => {
if (typeof datasetBaseUrl !== 'string') {
throw new Error('The datasetBaseUrl must be a string')
}

if (!datasetBaseUrl) {
throw new Error("Value for 'datasetBaseUrl' is missing")
}

if (!datasetBaseUrl.endsWith('/')) {
logger.warn(`The value for 'datasetBaseUrl' should usually end with a '/' ; it is not the case for '${datasetBaseUrl}'`)
}

return true
}

/**
* Check the dataset base URL, and make sure it returns an array.
* Some hints are provided if the dataset base URL is not correctly formatted.
* If the dataset base URL is an array, each value is checked.
* If a value is empty, then an error is thrown.
*
* @param {{warn: Function }} logger - The logger instance
* @param {string | string[]} datasetBaseUrl - The dataset base URL
* @returns {string[]} The dataset base URL as an array
*/
export const checkDatasetBaseUrl = (logger, datasetBaseUrl) => {
if (!datasetBaseUrl) {
throw new Error('No datasetBaseUrl provided')
}

if (Array.isArray(datasetBaseUrl)) {
datasetBaseUrl.forEach((value) => checkSingleDatasetBaseUrl(logger, value))
return datasetBaseUrl
} else {
checkSingleDatasetBaseUrl(logger, datasetBaseUrl)
return [datasetBaseUrl]
}
}
63 changes: 63 additions & 0 deletions packages/entity-renderer/lib/config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
export const defaultConfiguration = {
resourceNoSlash: true,
resourceExistsQuery: 'ASK { <{{iri}}> ?p ?o }',
resourceGraphQuery: 'DESCRIBE <{{iri}}>',
containerExistsQuery: 'ASK { ?s a ?o. FILTER REGEX(STR(?s), "^{{iri}}") }',
containerGraphQuery:
'CONSTRUCT { ?s a ?o. } WHERE { ?s a ?o. FILTER REGEX(STR(?s), "^{{iri}}") }',
redirectQuery: `
PREFIX http2011: <http://www.w3.org/2011/http#>
PREFIX http2006: <http://www.w3.org/2006/http#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

SELECT ?req ?res ?location ?responseCode ?validFrom
WHERE {
GRAPH ?g {

# Handle 2011 version
{
?req2011 rdf:type http2011:GetRequest.
?req2011 http2011:requestURI <{{iri}}>.
?req2011 http2011:response ?res2011.

?res2011 rdf:type http2011:Response.
?res2011 http2011:location ?location2011.
?res2011 http2011:responseCode ?responseCode2011.

OPTIONAL {
?res2011 <http://schema.org/validFrom> ?validFrom2011.
}
}

UNION

# Handle 2006 version
{
?req2006 rdf:type http2006:GetRequest.
?req2006 http2006:requestURI <{{iri}}>.
?req2006 http2006:response ?res2006.

?res2006 rdf:type http2006:Response.
?res2006 http2006:location ?location2006.
?res2006 http2006:responseCode ?responseCode2006.

OPTIONAL {
?res2006 <http://schema.org/validFrom> ?validFrom2006.
}
}

# Combine results, using priority for 2011 version over 2006 version
BIND(COALESCE(?req2011, ?req2006) AS ?req)
BIND(COALESCE(?res2011, ?res2006) AS ?res)
BIND(COALESCE(?location2011, ?location2006) AS ?location)
BIND(COALESCE(?validFrom2011, ?validFrom2006) AS ?validFrom)
# Just get the response code as a string instead of the full IRI
BIND(STRAFTER(STR(COALESCE(?responseCode2011, ?responseCode2006)), "#") AS ?responseCode)
}
}
LIMIT 1
`,
followRedirects: false,
enableSchemaUrlRedirect: false, // Experimental
allowEndpointSwitch: false, // Experimental
}
36 changes: 36 additions & 0 deletions packages/entity-renderer/lib/headers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import mimeparse from 'mimeparse'
import { sparqlSupportedTypes } from 'trifid-core'

/**
* Get the accept header from the request
*
* @param {{query?: Record<string, any>, headers?: Record<string, any>}} req - The request object
* @returns {string} The accept header
*/
export const getAcceptHeader = (req) => {
const queryStringValue = req.query.format

const supportedQueryStringValues = {
ttl: 'text/turtle',
jsonld: 'application/ld+json',
xml: 'application/rdf+xml',
nt: 'application/n-triples',
trig: 'application/trig',
csv: 'text/csv',
html: 'text/html',
}

if (
Object.hasOwnProperty.call(supportedQueryStringValues, queryStringValue)
) {
return supportedQueryStringValues[queryStringValue]
}

const acceptHeader = `${req.headers?.accept || 'text/html'}`.toLocaleLowerCase()
const selectedHeader = mimeparse.bestMatch([
...sparqlSupportedTypes,
'text/html',
], acceptHeader)

return selectedHeader || acceptHeader
}
Loading
Loading