-
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsimilarityUtils.js
86 lines (70 loc) · 3.04 KB
/
similarityUtils.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import { createEmbedding } from './embeddingUtils.js';
// -----------------------------------------------------
// -- Calculate cosine similarity between two vectors --
// -----------------------------------------------------
export function cosineSimilarity(vecA, vecB) {
let dotProduct = 0.0;
let normA = 0.0;
let normB = 0.0;
for (let i = 0; i < vecA.length; i++) {
dotProduct += vecA[i] * vecB[i];
normA += vecA[i] ** 2;
normB += vecB[i] ** 2;
}
normA = Math.sqrt(normA);
normB = Math.sqrt(normB);
if (normA === 0 || normB === 0) {
return 0; // To avoid division by zero
} else {
return dotProduct / (normA * normB);
}
}
// ---------------------------------------------------------------
// -- Function to compute advanced similarities with statistics --
// ---------------------------------------------------------------
export async function computeAdvancedSimilarities(sentences, { numSimilaritySentencesLookahead = 2, logging = false } = {}) {
if (logging) console.log('numSimilaritySentencesLookahead', numSimilaritySentencesLookahead);
const embeddings = await Promise.all(sentences.map(sentence => createEmbedding(sentence)));
let similarities = [];
let similaritySum = 0;
for (let i = 0; i < embeddings.length - 1; i++) {
let maxSimilarity = cosineSimilarity(embeddings[i], embeddings[i + 1]);
if (logging) {
console.log(`\nSimilarity scores for sentence ${i}:`);
console.log(`Base similarity with next sentence: ${maxSimilarity}`);
}
for (let j = i + 2; j <= i + numSimilaritySentencesLookahead && j < embeddings.length; j++) {
const sim = cosineSimilarity(embeddings[i], embeddings[j]);
if (logging) {
console.log(`Similarity with sentence ${j}: ${sim}`);
}
maxSimilarity = Math.max(maxSimilarity, sim);
}
similarities.push(maxSimilarity);
similaritySum += maxSimilarity;
}
const average = similaritySum / similarities.length;
const variance = similarities.reduce((acc, sim) => acc + (sim - average) ** 2, 0) / similarities.length;
return { similarities, average, variance };
}
// -----------------------------------------------------------
// -- Function to dynamically adjust the similarity threshold --
// -----------------------------------------------------------
export function adjustThreshold(average, variance, baseThreshold = 0.5, lowerBound = 0.2, upperBound = 0.8) {
if (lowerBound >= upperBound) {
console.error("Invalid bounds: lowerBound must be less than upperBound.");
return baseThreshold;
}
let adjustedThreshold = baseThreshold;
if (variance < 0.01) {
adjustedThreshold -= 0.1;
} else if (variance > 0.05) {
adjustedThreshold += 0.1;
}
if (average < 0.3) {
adjustedThreshold += 0.05;
} else if (average > 0.7) {
adjustedThreshold -= 0.05;
}
return Math.min(Math.max(adjustedThreshold, lowerBound), upperBound);
}