Skip to content

Commit

Permalink
handler-fetch: use oxigraph
Browse files Browse the repository at this point in the history
  • Loading branch information
ludovicm67 committed Jan 3, 2024
1 parent f925eb5 commit 2e81c93
Show file tree
Hide file tree
Showing 14 changed files with 254 additions and 1,493 deletions.
12 changes: 12 additions & 0 deletions .changeset/shiny-weeks-do.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
"trifid-handler-fetch": major
---

Completely change the way it is working.

Instead of being an handler that is made to be called only when dereferencing, it is now a Trifid middleware that is exposing a new endpoint `/query` (for example) that can be used to perform SPARQL queries against the dataset.

This means that it is now possible to perform SPARQL queries against a dataset that is loaded from a URL, which was not possible before.
This also means that it is possible to use all other Trifid plugins that were only working with a SPARQL endpoint and not with a dataset coming from a simple file.

Please take a look on how to use it in the documentation here: https://github.com/zazuko/trifid/tree/main/packages/handler-fetch#readme
613 changes: 6 additions & 607 deletions package-lock.json

Large diffs are not rendered by default.

2 changes: 0 additions & 2 deletions packages/handler-fetch/.npmignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
.github/
node_modules/
.eslintrc
.gitignore
yarn.lock
*.tgz
55 changes: 33 additions & 22 deletions packages/handler-fetch/README.md
Original file line number Diff line number Diff line change
@@ -1,40 +1,51 @@
# trifid-handler-fetch

Handler for Trifid which reads the data from a URL.
[fetch-lite](https://github.com/rdfjs-base/fetch-lite) is used for `http://` and `https://` URLs.
[file-fetch](https://www.npmjs.com/package/file-fetch) is used for `file://` URLs.
This Trifid plugin exposes an endpoint where you can perform SPARQL queries against a dataset which is loaded from a URL.

The URL can be a local file or a remote resource.

At the start of the server, the dataset is loaded from the URL and stored in memory, using [Oxigraph](https://github.com/oxigraph/oxigraph).
The dataset is not updated automatically when the resource changes.

## Usage

Add the `trifid-handler-fetch` package to your dependencies:

npm install trifid-handler-fetch --save
```sh
npm install trifid-handler-fetch
```

Change the `handler` property in the config like in the example below and adapt the options.
And update the Trifid configuration to something similar as shown in the example below.

## Example

This example config uses [The Big Bang Theory dataset](https://www.npmjs.com/package/tbbt-ld/):

```
{
"baseConfig": "trifid:config.json",
"handler": {
"module": "trifid-handler-fetch",
"options": {
"url": "https://raw.githubusercontent.com/zazuko/tbbt-ld/master/dist/tbbt.nt",
"contentType": "application/n-triples",
"split": "true",
"cache": "true"
}
}
}
```yaml
middlewares:
# […]
handler-fetch:
module: "trifid-handler-fetch"
paths: /query
config:
url: https://raw.githubusercontent.com/zazuko/tbbt-ld/master/dist/tbbt.nt
contentType: application/n-triples
baseIRI: http://example.com
graphName: http://example.com/graph
```
## Options
- `url`: URL to the resource which contains the dataset
- `contentType`: If set, parse the content with a parser for the given media type
- `resource`: If set, the dataset will be loaded into the given Named Graph
- `split`: If true, the dataset will be split into subgraphs for each Named Node
- `cache`: Reads the resource only once at the first request and caches the dataset for other request
- `contentType`: the format of the serialization. See below for the supported formats.
- `baseIRI`: the base IRI to use to resolve the relative IRIs in the serialization.
- `graphName`: for triple serialization formats, the name of the named graph the triple should be loaded to.

Supported formats:

- [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` or `ttl`
- [TriG](https://www.w3.org/TR/trig/): `application/trig` or `trig`
- [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` or `nt`
- [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` or `nq`
- [N3](https://w3c.github.io/N3/spec/): `text/n3` or `n3`
- [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` or `rdf`
133 changes: 50 additions & 83 deletions packages/handler-fetch/index.js
Original file line number Diff line number Diff line change
@@ -1,91 +1,58 @@
import path from 'path'
import url from 'url'
import formats from '@rdfjs/formats-common/index.js'
import rdf from 'rdf-ext'
import rdfHandler from '@rdfjs/express-handler'

import SerializerJsonld from '@rdfjs/serializer-jsonld-ext'
import Fetcher from './lib/Fetcher.js'

// @TODO discuss what are the best serialization options.
const jsonLdSerializer = new SerializerJsonld({
encoding: 'string',
// compact: true,
// flatten: true
})

formats.serializers.set('application/json', jsonLdSerializer)
formats.serializers.set('application/ld+json', jsonLdSerializer)

const guessProtocol = (candidate) => {
try {
return new url.URL(candidate).protocol
} catch (error) {
return undefined
}
}

export class FetchHandler {
constructor(options) {
this.dataset = rdf.dataset()
this.url = options.url
this.cache = options.cache
this.contentType = options.contentType
this.options = options.options || {}
this.resource = options.resource
this.split = options.split

// add file:// and resolve with cwd if no protocol was given
if (this.url && !guessProtocol(this.url)) {
this.url = 'file://' + path.resolve(this.url)
}

this.handle = this._handle.bind(this)

// legacy interface
this.get = this._get.bind(this)
}

_handle(req, res, next) {
rdfHandler
.attach(req, res, { formats })
.then(() => {
return Fetcher.load(this.dataset, this)
})
.then(async () => {
const dataset = this.dataset.match(
null,
null,
null,
rdf.namedNode(req.iri),
)

if (dataset.size === 0) {
next()
return null
}

await res.dataset(dataset)
})
.catch(next)
/* eslint-disable no-console */
import { readFile } from 'node:fs/promises'
import { resolve as pathResolve } from 'node:path'

import oxigraph from 'oxigraph'

import { performOxigraphQuery } from './lib/query.js'

/**
* Fetch file content from URL or path.
*
* @param {string} url URL or path to file to fetch.
* @returns {Promise<string>} File content.
*/
const getContent = async (url) => {
let content

if (url.startsWith('http://') || url.startsWith('https://')) {
const response = await fetch(url)
content = await response.text()
} else {
const resolvedPath = pathResolve(url)
content = await readFile(resolvedPath, 'utf8')
}

// legacy interface
_get(req, res, next, iri) {
req.iri = iri

this.handle(req, res, next)
}
return content
}

const factory = (trifid) => {
const { config } = trifid
export const storeMiddleware = async (trifid) => {
const { config, logger } = trifid
const { contentType, url, baseIri, graphName } = config

// read data from file or URL
const data = await getContent(url)
logger.debug(`Loaded ${data.length} bytes of data from ${url}`)

// create a store and load the data
const store = new oxigraph.Store()
store.load(data, contentType, baseIri, oxigraph.namedNode(graphName))
logger.debug('Loaded data into store')

return async (req, res, _next) => {
let query
if (req.method === 'GET') {
query = req.query.query
} else if (req.method === 'POST') {
query = req.body.query || req.body
}

const handler = new FetchHandler(config)
if (!query) {
return res.status(400).send('Missing query parameter')
}

return (req, res, next) => {
handler.handle(req, res, next)
const { response, contentType } = await performOxigraphQuery(store, query)
res.set('Content-Type', contentType)
return res.status(200).send(response)
}
}

export default factory
77 changes: 0 additions & 77 deletions packages/handler-fetch/lib/Fetcher.js

This file was deleted.

Loading

0 comments on commit 2e81c93

Please sign in to comment.