handler-fetch: use oxigraph

zazuko · Jan 3, 2024 · 2e81c93 · 2e81c93
1 parent f925eb5
commit 2e81c93
Show file tree

Hide file tree

Showing 14 changed files with 254 additions and 1,493 deletions.
diff --git a/.changeset/shiny-weeks-do.md b/.changeset/shiny-weeks-do.md
@@ -0,0 +1,12 @@
+---
+"trifid-handler-fetch": major
+---
+
+Completely change the way it is working.
+
+Instead of being an handler that is made to be called only when dereferencing, it is now a Trifid middleware that is exposing a new endpoint `/query` (for example) that can be used to perform SPARQL queries against the dataset.
+
+This means that it is now possible to perform SPARQL queries against a dataset that is loaded from a URL, which was not possible before.
+This also means that it is possible to use all other Trifid plugins that were only working with a SPARQL endpoint and not with a dataset coming from a simple file.
+
+Please take a look on how to use it in the documentation here: https://github.com/zazuko/trifid/tree/main/packages/handler-fetch#readme
diff --git a/package-lock.json b/package-lock.json
diff --git a/packages/handler-fetch/.npmignore b/packages/handler-fetch/.npmignore
@@ -1,6 +1,4 @@
-.github/
 node_modules/
-.eslintrc
 .gitignore
 yarn.lock
 *.tgz
diff --git a/packages/handler-fetch/README.md b/packages/handler-fetch/README.md
@@ -1,40 +1,51 @@
 # trifid-handler-fetch
 
-Handler for Trifid which reads the data from a URL.
-[fetch-lite](https://github.com/rdfjs-base/fetch-lite) is used for `http://` and `https://` URLs.
-[file-fetch](https://www.npmjs.com/package/file-fetch) is used for `file://` URLs.
+This Trifid plugin exposes an endpoint where you can perform SPARQL queries against a dataset which is loaded from a URL.
+
+The URL can be a local file or a remote resource.
+
+At the start of the server, the dataset is loaded from the URL and stored in memory, using [Oxigraph](https://github.com/oxigraph/oxigraph).
+The dataset is not updated automatically when the resource changes.
 
 ## Usage
 
 Add the `trifid-handler-fetch` package to your dependencies:
 
-    npm install trifid-handler-fetch --save
+```sh
+npm install trifid-handler-fetch
+```
 
-Change the `handler` property in the config like in the example below and adapt the options.
+And update the Trifid configuration to something similar as shown in the example below.
 
 ## Example
 
 This example config uses [The Big Bang Theory dataset](https://www.npmjs.com/package/tbbt-ld/):
 
-```
-{
-  "baseConfig": "trifid:config.json",
-  "handler": {
-    "module": "trifid-handler-fetch",
-      "options": {
-        "url": "https://raw.githubusercontent.com/zazuko/tbbt-ld/master/dist/tbbt.nt",
-        "contentType": "application/n-triples",
-        "split": "true",
-        "cache": "true"
-    }
-  }
-}
+```yaml
+middlewares:
+  # […]
+  handler-fetch:
+    module: "trifid-handler-fetch"
+    paths: /query
+    config:
+      url: https://raw.githubusercontent.com/zazuko/tbbt-ld/master/dist/tbbt.nt
+      contentType: application/n-triples
+      baseIRI: http://example.com
+      graphName: http://example.com/graph
 ```
 
 ## Options
 
 - `url`: URL to the resource which contains the dataset
-- `contentType`: If set, parse the content with a parser for the given media type
-- `resource`: If set, the dataset will be loaded into the given Named Graph
-- `split`: If true, the dataset will be split into subgraphs for each Named Node
-- `cache`: Reads the resource only once at the first request and caches the dataset for other request
+- `contentType`: the format of the serialization. See below for the supported formats.
+- `baseIRI`: the base IRI to use to resolve the relative IRIs in the serialization.
+- `graphName`: for triple serialization formats, the name of the named graph the triple should be loaded to.
+
+Supported formats:
+
+- [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` or `ttl`
+- [TriG](https://www.w3.org/TR/trig/): `application/trig` or `trig`
+- [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` or `nt`
+- [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` or `nq`
+- [N3](https://w3c.github.io/N3/spec/): `text/n3` or `n3`
+- [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` or `rdf`
diff --git a/packages/handler-fetch/index.js b/packages/handler-fetch/index.js
@@ -1,91 +1,58 @@
-import path from 'path'
-import url from 'url'
-import formats from '@rdfjs/formats-common/index.js'
-import rdf from 'rdf-ext'
-import rdfHandler from '@rdfjs/express-handler'
-
-import SerializerJsonld from '@rdfjs/serializer-jsonld-ext'
-import Fetcher from './lib/Fetcher.js'
-
-// @TODO discuss what are the best serialization options.
-const jsonLdSerializer = new SerializerJsonld({
-  encoding: 'string',
-  // compact: true,
-  // flatten: true
-})
-
-formats.serializers.set('application/json', jsonLdSerializer)
-formats.serializers.set('application/ld+json', jsonLdSerializer)
-
-const guessProtocol = (candidate) => {
-  try {
-    return new url.URL(candidate).protocol
-  } catch (error) {
-    return undefined
-  }
-}
-
-export class FetchHandler {
-  constructor(options) {
-    this.dataset = rdf.dataset()
-    this.url = options.url
-    this.cache = options.cache
-    this.contentType = options.contentType
-    this.options = options.options || {}
-    this.resource = options.resource
-    this.split = options.split
-
-    // add file:// and resolve with cwd if no protocol was given
-    if (this.url && !guessProtocol(this.url)) {
-      this.url = 'file://' + path.resolve(this.url)
-    }
-
-    this.handle = this._handle.bind(this)
-
-    // legacy interface
-    this.get = this._get.bind(this)
-  }
-
-  _handle(req, res, next) {
-    rdfHandler
-      .attach(req, res, { formats })
-      .then(() => {
-        return Fetcher.load(this.dataset, this)
-      })
-      .then(async () => {
-        const dataset = this.dataset.match(
-          null,
-          null,
-          null,
-          rdf.namedNode(req.iri),
-        )
-
-        if (dataset.size === 0) {
-          next()
-          return null
-        }
-
-        await res.dataset(dataset)
-      })
-      .catch(next)
+/* eslint-disable no-console */
+import { readFile } from 'node:fs/promises'
+import { resolve as pathResolve } from 'node:path'
+
+import oxigraph from 'oxigraph'
+
+import { performOxigraphQuery } from './lib/query.js'
+
+/**
+ * Fetch file content from URL or path.
+ *
+ * @param {string} url URL or path to file to fetch.
+ * @returns {Promise<string>} File content.
+ */
+const getContent = async (url) => {
+  let content
+
+  if (url.startsWith('http://') || url.startsWith('https://')) {
+    const response = await fetch(url)
+    content = await response.text()
+  } else {
+    const resolvedPath = pathResolve(url)
+    content = await readFile(resolvedPath, 'utf8')
   }
 
-  // legacy interface
-  _get(req, res, next, iri) {
-    req.iri = iri
-
-    this.handle(req, res, next)
-  }
+  return content
 }
 
-const factory = (trifid) => {
-  const { config } = trifid
+export const storeMiddleware = async (trifid) => {
+  const { config, logger } = trifid
+  const { contentType, url, baseIri, graphName } = config
+
+  // read data from file or URL
+  const data = await getContent(url)
+  logger.debug(`Loaded ${data.length} bytes of data from ${url}`)
+
+  // create a store and load the data
+  const store = new oxigraph.Store()
+  store.load(data, contentType, baseIri, oxigraph.namedNode(graphName))
+  logger.debug('Loaded data into store')
+
+  return async (req, res, _next) => {
+    let query
+    if (req.method === 'GET') {
+      query = req.query.query
+    } else if (req.method === 'POST') {
+      query = req.body.query || req.body
+    }
 
-  const handler = new FetchHandler(config)
+    if (!query) {
+      return res.status(400).send('Missing query parameter')
+    }
 
-  return (req, res, next) => {
-    handler.handle(req, res, next)
+    const { response, contentType } = await performOxigraphQuery(store, query)
+    res.set('Content-Type', contentType)
+    return res.status(200).send(response)
   }
 }
-
-export default factory
diff --git a/packages/handler-fetch/lib/Fetcher.js b/packages/handler-fetch/lib/Fetcher.js