-
Notifications
You must be signed in to change notification settings - Fork 0
/
calculateQueryStats.js
111 lines (103 loc) · 5.49 KB
/
calculateQueryStats.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import fs from 'fs';
import { QueryEngine } from '@comunica/query-sparql';
import queryEndpoint from './queryEndpoint.js';
import { BindingsFactory } from '@comunica/bindings-factory';
import { DataFactory } from 'rdf-data-factory';
import factory from '@rdfjs/data-model';
const rdfFactory = new DataFactory();
const bindingsFactory = new BindingsFactory();
const comunicaEngine = new QueryEngine();
const datasetsQuery = fs.readFileSync('./queries/datasets.rq');
const queryStatsQuery = '' + fs.readFileSync('./queries/queryStats.rq');
async function calculateStatsForDataset(dataset, datasetId, options) {
const {inputEndpointURL, outputDirPath} = options;
const result = await comunicaEngine.query(
queryStatsQuery
.replaceAll('$execs', `<${dataset}/execs>`)
.replaceAll('$queries', `<${dataset}/queries>`), {
sources: [{
type: 'sparql',
value: inputEndpointURL
}]
// initialBindings: bindingsFactory.fromRecord({
// dataset: factory.namedNode(dataset)
// })
});
const { data } = await comunicaEngine.resultToString(result, 'text/csv');
const outputFilePath = outputDirPath + datasetId + '.csv';
data.pipe(fs.createWriteStream(outputFilePath));
}
async function calculateStats(options) {
const {inputEndpointURL, datasetsGraphname} = options;
if (datasetsGraphname) {
const datasets = queryEndpoint(
inputEndpointURL, [datasetsGraphname], datasetsQuery);
for await (const {dataset, datasetId} of datasets) {
if (!options.excludeDatasets || !options.excludeDatasets.includes(dataset)) {
console.log('Dataset: ' + dataset);
await calculateStatsForDataset(dataset, datasetId, options);
}
}
}
}
async function main() {
await calculateStatsForDataset(
'http://lsq.aksw.org/datasets/dbpedia', 'dbpedia', {
// calculateStats({
inputEndpointURL: 'http://localhost:3030/lsq2/query',
// datasetsGraphname: 'http://lsq.aksw.org/datasets',
outputDirPath: './output/queries/stats/',
// excludeDatasets: [
// 'http://lsq.aksw.org/datasets/bench-affymetrix-lsq2',
// 'http://lsq.aksw.org/datasets/bench-biomedels-lsq2',
// 'http://lsq.aksw.org/datasets/bench-bioportal-lsq2',
// 'http://lsq.aksw.org/datasets/bench-ctd-lsq2',
// 'http://lsq.aksw.org/datasets/bench-dbpedia-20151025-lsq2',
// 'http://lsq.aksw.org/datasets/bench-dbpedia-20151124-lsq2',
// 'http://lsq.aksw.org/datasets/bench-dbpedia-20151126-lsq2',
// 'http://lsq.aksw.org/datasets/bench-dbpedia-20151213-lsq2',
// 'http://lsq.aksw.org/datasets/bench-dbpedia-20151230-lsq2',
// 'http://lsq.aksw.org/datasets/bench-dbpedia-20160117-lsq2',
// 'http://lsq.aksw.org/datasets/bench-dbpedia-20160212-lsq2',
// 'http://lsq.aksw.org/datasets/bench-dbpedia-20160222-lsq2',
// 'http://lsq.aksw.org/datasets/bench-dbpedia-20160301-lsq2',
// 'http://lsq.aksw.org/datasets/bench-dbpedia-20160303-lsq2',
// 'http://lsq.aksw.org/datasets/bench-dbpedia-20160304-lsq2',
// 'http://lsq.aksw.org/datasets/bench-dbpedia-20160314-lsq2',
// 'http://lsq.aksw.org/datasets/bench-dbpedia-20160411-lsq2',
// 'http://lsq.aksw.org/datasets/bench-dbpedia.3.5.1.log-lsq2',
// 'http://lsq.aksw.org/datasets/bench-dbsnp-lsq2', // missing
// 'http://lsq.aksw.org/datasets/bench-drugbank-lsq2',
// 'http://lsq.aksw.org/datasets/bench-genage-lsq2',
// 'http://lsq.aksw.org/datasets/bench-gendr-lsq2',
// 'http://lsq.aksw.org/datasets/bench-gene-lsq2',
// 'http://lsq.aksw.org/datasets/bench-goa-lsq2',
// 'http://lsq.aksw.org/datasets/bench-hgnc-lsq2', // missing
// 'http://lsq.aksw.org/datasets/bench-homologene-lsq2',
// 'http://lsq.aksw.org/datasets/bench-irefindex-lsq2',
// 'http://lsq.aksw.org/datasets/bench-kegg-lsq2',
// 'http://lsq.aksw.org/datasets/bench-linkedGeoData-lsq2',
// 'http://lsq.aksw.org/datasets/bench-linkedspl-lsq2',
// 'http://lsq.aksw.org/datasets/bench-mgi-lsq2',
// 'http://lsq.aksw.org/datasets/bench-ncbigene-lsq2',
// 'http://lsq.aksw.org/datasets/bench-omim-lsq2',
// 'http://lsq.aksw.org/datasets/bench-pharmgkb-lsq2',
// 'http://lsq.aksw.org/datasets/bench-sabiork-lsq2',
// 'http://lsq.aksw.org/datasets/bench-sgd-lsq2',
// 'http://lsq.aksw.org/datasets/bench-sidr-lsq2',
// 'http://lsq.aksw.org/datasets/bench-swdf-lsq2',
// 'http://lsq.aksw.org/datasets/bench-taxonomy-lsq2',
// 'http://lsq.aksw.org/datasets/bench-wikidata-interval1-organic-lsq2',
// 'http://lsq.aksw.org/datasets/bench-wikidata-interval2-organic-lsq2',
// 'http://lsq.aksw.org/datasets/bench-wikidata-interval3-organic-lsq2',
// 'http://lsq.aksw.org/datasets/bench-wikidata-interval4-organic-lsq2',
// 'http://lsq.aksw.org/datasets/bench-wikidata-interval5-organic-lsq2',
// 'http://lsq.aksw.org/datasets/bench-wikidata-interval6-organic-lsq2',
// 'http://lsq.aksw.org/datasets/bench-wikidata-interval7-organic-lsq2',
// 'http://lsq.aksw.org/datasets/bench-wormbase-lsq2'
// ]
})
}
main().then(() => {
console.log('done!');
});