Skip to content

Commit

Permalink
feat: Add subject filter
Browse files Browse the repository at this point in the history
  • Loading branch information
ddeboer committed Dec 5, 2023
1 parent 647e2db commit 591ff92
Show file tree
Hide file tree
Showing 16 changed files with 1,693 additions and 2,785 deletions.
6 changes: 3 additions & 3 deletions jest.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ export default {
coverageReporters: ['json-summary', 'text'],
coverageThreshold: {
global: {
lines: 20.8,
statements: 20.8,
branches: 11.9,
lines: 21.91,
statements: 21.91,
branches: 12.76,
functions: 21.05,
},
},
Expand Down
4,385 changes: 1,619 additions & 2,766 deletions package-lock.json

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"@comunica/query-sparql": "^2.10.0",
"@netwerk-digitaal-erfgoed/network-of-terms-catalog": "^8.0.0",
"@netwerk-digitaal-erfgoed/network-of-terms-query": "^4.0.0",
"@types/rdf-ext": "^2.2.5",
"asynciterator": "^3.8.1",
"axios": "^1.6.2",
"env-schema": "^5.2.1",
Expand All @@ -37,7 +38,7 @@
"n3": "^1.17.2",
"pino": "^8.16.2",
"rdf-data-factory": "^1.1.2",
"rdf-ext": "^2.4.0",
"rdf-dereference": "^2.2.0",
"rdf-js": "^4.0.2"
},
"devDependencies": {
Expand Down
1 change: 1 addition & 0 deletions queries/analysis/class-partition.rq
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ CONSTRUCT {
] .
} WHERE {
SELECT (COUNT(?type) AS ?entities) ?type {
#subjectFilter#
?s a ?type .
}
GROUP BY ?type
Expand Down
1 change: 1 addition & 0 deletions queries/analysis/class-properties.rq
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ CONSTRUCT {
] .
} WHERE {
SELECT ?p ?t (COUNT(DISTINCT ?s) AS ?subjects) {
#subjectFilter#
?s a ?t ;
?p ?o .
}
Expand Down
1 change: 1 addition & 0 deletions queries/analysis/entity-properties.rq
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ CONSTRUCT {
] .
} WHERE {
SELECT (COUNT(?p) AS ?entities) ?p {
#subjectFilter#
?s ?p ?o .
}
GROUP BY ?p
Expand Down
1 change: 1 addition & 0 deletions queries/analysis/object-literals.rq
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ CONSTRUCT {
nde:distinctObjectsLiteral ?total .
} WHERE {
SELECT (COUNT(?o) as ?total) {
#subjectFilter#
?s ?p ?o .
FILTER(ISLITERAL(?o))
}
Expand Down
1 change: 1 addition & 0 deletions queries/analysis/object-uri-space.rq
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ CONSTRUCT {
void:triples ?count .
} WHERE {
SELECT DISTINCT ?prefix (COUNT(?prefix) AS ?count) {
#subjectFilter#
?s ?p ?o .
FILTER(ISIRI(?o))
BIND(REPLACE(STR(?o), "([^/]+$)", "") AS ?prefix)
Expand Down
1 change: 1 addition & 0 deletions queries/analysis/object-uris.rq
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ CONSTRUCT {
nde:distinctObjectsURI ?total .
} WHERE {
SELECT (COUNT(?o) as ?total) {
#subjectFilter#
?s ?p ?o .
FILTER(ISIRI(?o))
}
Expand Down
1 change: 1 addition & 0 deletions queries/analysis/properties.rq
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ CONSTRUCT {
void:properties ?count .
} WHERE {
SELECT (COUNT(DISTINCT ?p) as ?count) {
#subjectFilter#
?s ?p ?o
}
}
1 change: 1 addition & 0 deletions queries/analysis/subjects.rq
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ CONSTRUCT {
void:distinctSubjects ?count .
} WHERE {
SELECT (COUNT(DISTINCT ?s) as ?count) {
#subjectFilter#
?s ?p ?o .
FILTER(!ISBLANK(?s))
}
Expand Down
1 change: 1 addition & 0 deletions queries/analysis/triples.rq
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ CONSTRUCT {
void:triples ?count .
} WHERE {
SELECT (COUNT(*) as ?count) {
#subjectFilter#
?s ?p ?o
}
}
14 changes: 14 additions & 0 deletions queries/selection/supplemental.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
@prefix nde: <https://data.netwerkdigitaalerfgoed.nl/def/> .

<http://data.bibliotheken.nl/id/dataset/albac> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/albac>" .
<http://data.bibliotheken.nl/id/dataset/brinkman> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/brinkman>" .
<http://data.bibliotheken.nl/id/dataset/corps> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/corps>" .
<http://data.bibliotheken.nl/id/dataset/dbnla> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/dbnla>" .
<http://data.bibliotheken.nl/id/dataset/dbnlt> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/dbnlt>" .
<http://data.bibliotheken.nl/id/dataset/gtt> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/gtt>" .
<http://data.bibliotheken.nl/id/dataset/kbcode> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/kbcode>" .
<http://data.bibliotheken.nl/id/dataset/nbt> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/nbt>" .
<http://data.bibliotheken.nl/id/dataset/persons> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/persone>" .
<http://data.bibliotheken.nl/id/dataset/rise-alba> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/rise-alba>" .
<http://data.bibliotheken.nl/id/dataset/rise-centsprenten> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/rise-centsprenten>" .
<http://data.bibliotheken.nl/id/dataset/stcn> nde:subjectFilter "?s schema:mainEntityOfPage/schema:isPartOf <http://data.bibliotheken.nl/id/dataset/stcn>" .
27 changes: 15 additions & 12 deletions src/analyzer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,18 +71,21 @@ export class SparqlQueryAnalyzer implements Analyzer {
type?: string
): Promise<AsyncIterator<Quad> & ResultStream<Quad>> {
try {
return await new QueryEngine().queryQuads(this.query, {
initialBindings: this.bindingsFactory.fromRecord({
dataset: this.dataFactory.namedNode(dataset.iri),
}) as unknown as Bindings,
sources: [
{
type: 'sparql',
value: endpoint,
},
],
httpTimeout: 300_000, // Some SPARQL queries really take this long.
});
return await new QueryEngine().queryQuads(
this.query.replace('#subjectFilter#', dataset.subjectFilter ?? ''),
{
initialBindings: this.bindingsFactory.fromRecord({
dataset: this.dataFactory.namedNode(dataset.iri),
}) as unknown as Bindings,
sources: [
{
type: 'sparql',
value: endpoint,
},
],
httpTimeout: 300_000, // Some SPARQL queries really take this long.
}
);
} catch (e) {
if (type !== undefined) {
// Retry without explicit SPARQL type, which is needed for endpoints that offer a SPARQL Service Description.
Expand Down
6 changes: 5 additions & 1 deletion src/dataset.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
export class Dataset {
constructor(
public readonly iri: string,
public distributions: Distribution[]
public distributions: Distribution[],

// On the level of the dataset instead of the distribution because distribution may not have a URI, so cannot be
// referenced from supplemental.ttl.
public subjectFilter?: string
) {}

public getSparqlDistribution(): Distribution | null {
Expand Down
28 changes: 26 additions & 2 deletions src/selector.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import {Dataset, Distribution} from './dataset.js';
import {QueryEngine} from '@comunica/query-sparql';
import {Quad} from 'n3';
import {DataFactory, Quad} from 'n3';
import {resolve} from 'node:path';
import rdfDereferencer from 'rdf-dereference';
import namedNode = DataFactory.namedNode;
import factory from 'rdf-ext';

export interface Selector {
select(): Promise<Set<Dataset>>;
Expand All @@ -15,6 +19,14 @@ export class SparqlQuerySelector implements Selector {
private readonly queryEngine: QueryEngine
) {}
async select(): Promise<Set<Dataset>> {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
const {data} = await rdfDereferencer.default.dereference(
resolve('queries/selection/supplemental.ttl'),
{localFiles: true}
);
const supplementalStore = await factory.dataset().import(data);

const quadStream = await this.queryEngine.queryQuads(this.config.query, {
sources: [
{
Expand All @@ -34,7 +46,19 @@ export class SparqlQuerySelector implements Selector {
quad.predicate.value &&
'http://www.w3.org/ns/dcat#Dataset' === quad.object.value
) {
dataset = new Dataset(quad.subject.value, []);
const subjectFilter = [
...supplementalStore.match(
quad.subject,
namedNode(
'https://data.netwerkdigitaalerfgoed.nl/def/subjectFilter'
)
),
][0]?.object.value;
dataset = new Dataset(
quad.subject.value,
[],
subjectFilter ? subjectFilter + '.' : undefined
);
datasets.add(dataset);
}

Expand Down

0 comments on commit 591ff92

Please sign in to comment.