-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathworker.js
199 lines (199 loc) · 8.76 KB
/
worker.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
importScripts("data/toakue.js");
let escapeHTML = s => s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">");
let error = (words, err) => ({ err: words.join(`« <code>${escapeHTML(err)}</code> »`) });
const orders = {
default: (a, b) => b[1] - a[1],
random: true,
alpha: (a, b) => dict.indexOf(a[0]) - dict.indexOf(b[0]),
newest: (a, b) => new Date(b[0].date) - new Date(a[0].date),
score: (a, b) => b[0].score - a[0].score
}
function shuffle(a) {
for (let i = a.length - 1; i > 0; i--) {
let j = Math.floor(Math.random() * (i + 1));
[a[i], a[j]] = [a[j], a[i]];
}
return a;
}
function search(q) {
let terms = q.split(" ");
terms = terms.map(term => {
let [_, operator, query] = term.match(/^(==|[=~@#/$!^-]|[a-z]*:)(.*)/) ?? [];
if (!operator) return {op: "", orig: term, value: term.toLowerCase()};
let colon = operator.endsWith(":");
operator = operator.replace(/:$/, "");
const operators = ["head", "body", "user", "score", "id", "scope", "arity", "not", "order"];
if (colon && !operators.includes(operator))
return error`bu jıq mıjóaıchase ${operator}`;
if (["/", "arity"].includes(operator) && !/^[0-9]?$/.test(query))
return error`bu tıozıu mí ${query} (kïo tıao máo kóam kı)`;
if (["^", "score"].includes(operator) && isNaN(query.replace(/^=/, "")))
return error`bu zıu mí ${query.replace(/^=/, "")}`;
if (["head", "=", "~"].includes(operator)) {
let regex = queryToRegex(query);
if (regex.err) return regex;
}
if (operator == "order") {
if (terms.length == 1)
return {err: "sua pó méuq joaıteoq"};
if (!orders[query])
return error`bu chase suım mí ${query}`;
}
return {
op: operator,
orig: query,
value: query.toLowerCase()
};
});
if (terms.filter(t => t.op == "order").length > 1)
return error`bu daı gaoshì pó mí ${"order"}`;
let err = terms.find(t => t.err);
if (err) return err;
let excluded = terms
.filter(t => ["!", "-", "not"].includes(t.op))
.map(t => search(t.orig));
err = excluded.find(e => e.err);
if (err) return err;
excluded = new Set(excluded.flat().map(e => e[0].id));
let res = [];
for (const entry of dict) {
if (excluded.has(entry.id)) continue;
let arities = entry.body.split(/[;.?!]/).map(b => b.split("▯").length - 1);
if (!arities.every(x => x == 0)) {
arities = arities.filter(x => x != 0);
}
let scores = terms.filter(t => t.op != "order").map(({op, orig, value}) => {
// 6: id
if (["#", "id"].includes(op) && entry.id == orig) return 6;
// 5: head
if (["=", "head", "~", ""].includes(op) && compareish(normalize_query(value), normalize(entry.head))) return 5.2;
if (!op && compareish(normalizeToneless(value), normalizeToneless(entry.head))) return 5.1;
// and regex matching
if (["=", "head", "~"].includes(op)) {
let regex = queryToRegex(normalize_query(orig, false), op != '~');
if (regex.test(normalize(entry.head))) return 5;
}
// 3: body
if (["body", ""].includes(op)) {
const v = normalize_query(value).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
const body = normalize(entry.body);
if (RegExp(`▯ ?(is|are)?( an?)? ([^ /▯]+/)*${v}`, "iu").test(body)) return 3.2
if (RegExp(`([^'’]\\b|(?!['’])\\W|^)${v}`, "iu").test(body)) return 3.1
if (body.includes(normalize_query(value))) return 3;
}
// 1-2: no op
if (!op) {
if (entry.notes.some(n => normalize(n.content).includes(normalize_query(value)))) return 2;
if (normalize(entry.head).startsWith(normalize_query(value))) return 1.1;
if (normalizeToneless(entry.head).includes(normalizeToneless(value))) return 1;
}
// other
if (
["@", "user"].includes(op) && entry.user.toLowerCase() == value.toLowerCase()
|| ["$", "scope"].includes(op) && entry.scope.toLowerCase() == value.toLowerCase()
|| ["/", "arity"].includes(op) && arities.includes(+value)
|| ["^", "score"].includes(op) && (entry.score >= value || entry.score == value.replace(/^=/, ""))
|| ["!", "-", "not"].includes(op)
) return 0.1;
})
if (scores.some(s => !s)) continue;
let bonus = entry.user == "official" ? 0.3 :
entry.user == "oldofficial" || /^(old)?(countries|examples)$/.test(entry.user) ? -0.3 : 0;
bonus += entry.score / 20;
res.push([entry, Math.max(...scores) + bonus]);
}
let order = terms.find(t => t.op == "order") || {value: "default"};
if (order.value == "random") return shuffle(res);
return res.sort(orders[order.value]);
}
const tones = `\u0300\u0301\u0308\u0302`;
const underdot = `\u0323`;
const vowels = `aeıou`;
const char_match = `(?:.[${tones}]?${underdot}?)`;
const vowel_match = `(?:[${vowels}][${tones}]?${underdot}?)`;
const init_consonants = `(?:[mpbfntdczsrljꝡkg'h]|[ncs]h)`;
const letter = `(?:${vowel_match}|${init_consonants}|q)`;
const finals = `[mq]`;
const diphthongs = `([aeo]ı|ao)`;
const raku = `((?<= |^)|${init_consonants})${vowel_match}?(${diphthongs}|${vowel_match}${finals}?)`;
let substitutions = {
'*': '.*',
'?': letter,
'C': init_consonants,
'V': vowel_match,
'F': diphthongs,
'Q': finals,
'R': raku,
'_': ' ',
}
// If a tone is present in the query, it's required in the word; if not present any tone(s) are allowed.
// Underdots are dealt with separately, so query nạbie matches word nạ́bıe
for (let vowel of vowels) {
substitutions[vowel] = `${vowel}[${tones}]?${underdot}?`
substitutions[vowel + underdot] = `${vowel}[${tones}]?${underdot}`
for (let tone of tones) {
substitutions[vowel + tone] = `${vowel}${tone}${underdot}?`
}
}
const word_diacritic_regex = new RegExp(`(${letter}+)([1234])`, "iug");
const diacritic_tones = {
'1': '\u0300',
'2': '\u0301',
'3': '\u0308',
'4': '\u0302',
}
const vowel_regex = new RegExp(`${vowel_match}`, "iu");
const underdot_regex = new RegExp(`(${raku})([\.])`, "iug");
const isTone = c => /^[\u0300\u0301\u0308\u0302\u0323]$/.test(c);
// attach a cache to a function, so that it doesn't recalculate the same values
const memoize = fn => {
const cache = new Map();
return (...args) => {
let hash = args.join("\x00");
if (cache.has(hash)) return cache.get(hash);
let res = fn(...args);
cache.set(hash, res);
return res;
}
}
const normalizeToneless = memoize(w => [...normalize(w)].filter(c => !isTone(c)).join(""));
// for regex search purposes, we don't want to convert to lowercase since C/F/Q/R/V exist
const normalize = memoize((w, lowercase = true) =>
(lowercase ? w.toLowerCase() : w)
.normalize("NFD")
.replace(/i/g, "ı")
.replace(/[vw]/g, "ꝡ")
.replace(/[x‘’]/g, "'")
.replace(/\u0323([\u0301\u0308\u0302])/, "$1\u0323"))
// queries also have underdot and number replacements, which can be dealt with separately (and are somewhat expensive)
const normalize_query = memoize((w, lowercase = true) =>
normalize(w, lowercase).replace(word_diacritic_regex, (_, word, number) =>
word.replace(vowel_regex, c => c + diacritic_tones[number])
).replace(underdot_regex, (_, word) =>
word.replace(vowel_regex, c => c + underdot)
)
);
// handle prefix hyphens
const compareish = (query, word) => query == word || query == word.replace(/-$/, "");
const char_regex = new RegExp(`${char_match}`, "iug");
const char_brackets_regex = new RegExp(`\\[${char_match}*?\\]`, "iug");
const queryToRegex = memoize((query, anchored = true) => {
// due to [...] not being true character classes, we can't directly substitute them
// and instead have to turn [abc] into (a|b|c)
let compiled = query
.replace(char_brackets_regex, c => `(${c.slice(1, -1).match(char_regex)?.join("|") ?? ''})`)
.replace(char_regex, c => substitutions[c] ?? c)
// Rather than attempting to deal with invalid regexes manually, just let javascript barf if something goes wrong
// -? is added to the end to allow for prefix hyphens
try {
let regex = new RegExp(anchored ? `^(${compiled})-?$` : `(${compiled})-?`, "ui");
return regex;
} catch (e) {
return error`bu sekogeq mí ${query}`;
}
})
onmessage = e => {
var q = e.data.q;
var res = search(q);
postMessage(res);
}