-
Notifications
You must be signed in to change notification settings - Fork 0
/
aggregateAndSpecialize.js
128 lines (117 loc) · 5.22 KB
/
aggregateAndSpecialize.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import {buildSpecializationTree, createGeneralizedQuery, toString, mergePreambles} from './queryHandling.js';
import {preambleToString} from './sparqlEncoding.js'
function aggregateInstances({instances, specializations, ...queryData}) {
return {
...queryData,
instances,
numOfInstances: instances.length,
numOfExecutions: instances.reduce((sum, instance) => sum + instance.numOfExecutions, 0),
timeOfFirstExecution: instances
.map(instance => instance.timeOfFirstExecution)
.reduce((minTime, time) => minTime < time ? minTime : time),
timeOfLastExecution: instances
.map(instance => instance.timeOfLastExecution)
.reduce((maxTime, time) => maxTime > time ? maxTime : time),
specializations: specializations.map(aggregateInstances),
sumOfLogOfNumOfExecutions:
instances.reduce((sum, instance) => sum + Math.log2(instance.numOfExecutions), 0)
}
}
function textualForm({queryPieces, parameterByPosition, preamble, specializations, ...queryData}) {
return {
text: preambleToString(preamble) + '\n' + toString({queryPieces, parameterByPosition}),
preamble,
...queryData,
specializations: specializations.map(textualForm)
}
}
function sortByNumOfExecutions(queryArray) {
queryArray.sort((a, b) => b.numOfExecutions - a.numOfExecutions);
for (const {specializations} of queryArray) {
sortByNumOfExecutions(specializations);
}
}
export default async function aggregateAndSpecialize(queryStream, options = {}) {
const paramQueryMap = new Map();
console.time('create generalization dictionary');
var queryCounter = 0;
var totalExecutions = 0;
var timeOfFirstExecution = null, timeOfLastExecution = null;
for await (const {text: queryText, ...queryData} of queryStream) {
if (queryCounter % 1000 === 0) {
process.stdout.write((' ' + queryCounter / 1000).padStart(8, ' ') + ' K\r');
}
const {generalizedQuery, constants, preamble} = createGeneralizedQuery(queryText, options);
const queryStr = toString(generalizedQuery);
const instance = {
bindings: constants,
numOfOriginalRdfTerms : constants.length,
...queryData
};
if (paramQueryMap.has(queryStr)) {
const queryObj = paramQueryMap.get(queryStr);
queryObj.instances.push(instance);
queryObj.preamble = mergePreambles(queryObj.preamble, preamble);
} else {
paramQueryMap.set(queryStr, {
...generalizedQuery,
instances: [instance],
preamble
});
}
queryCounter++;
totalExecutions += queryData.numOfExecutions || 0;
timeOfFirstExecution = queryData.timeOfFirstExecution &&
(!timeOfFirstExecution || queryData.timeOfFirstExecution < timeOfFirstExecution) ?
queryData.timeOfFirstExecution :
timeOfFirstExecution
timeOfLastExecution = queryData.timeOfLastExecution &&
(!timeOfLastExecution || queryData.timeOfLastExecution > timeOfLastExecution) ?
queryData.timeOfLastExecution :
timeOfLastExecution
}
console.timeEnd('create generalization dictionary');
console.log(queryCounter + ' queries managed');
const totalQueries = queryCounter;
console.time('build specialization forest');
var queryForest = [];
const nonClusterizedQueryIds = options.includeSimpleQueries ? [] : null;
queryCounter = 0;
for (const [queryStr, queryData] of paramQueryMap) {
if (queryCounter % 1000 === 0) {
process.stdout.write((' ' + queryCounter / 1000).padStart(8, ' ') + ' K\r');
}
if ((!options.minNumOfInstances ||
queryData.instances.length >= options.minNumOfInstances)
&& (!options.minNumOfExecutions ||
queryData.instances.reduce((sum, instance) => sum + instance.numOfExecutions, 0) >= options.minNumOfExecutions)) {
queryForest.push(buildSpecializationTree(queryData, options));
} else if (options.includeSimpleQueries
&& queryData.instances.length == 1
&& (!options.minNumOfExecutionsForSimpleQueries
|| queryData.instances[0].numOfExecutions >= options.minNumOfExecutionsForSimpleQueries)) {
nonClusterizedQueryIds.push(queryData.instances[0].id);
}
queryCounter++;
}
console.timeEnd('build specialization forest');
console.log(queryCounter + ' specialization trees built');
if (options.countInstances) {
console.time('aggregating instances');
queryForest = queryForest.map(aggregateInstances);
console.timeEnd('aggregating instances');
}
console.time('queries as text');
queryForest = queryForest.map(textualForm);
console.timeEnd('queries as text');
if (options.sortResults) {
console.time('sort queries');
sortByNumOfExecutions(queryForest);
console.timeEnd('sort queries');
}
return {
queryForest, nonClusterizedQueryIds,
totalQueries, totalExecutions,
timeOfFirstExecution, timeOfLastExecution
};
}