-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbk_tree.go
116 lines (106 loc) · 2.91 KB
/
bk_tree.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
// Package go_bk_tree is a tree data structure (implemented in Golang) specialized to index data in a metric space.
// The BK-tree data structure was proposed by Burkhard and Keller in 1973 as a solution to the problem of
// searching a set of keys to find a key which is closest to a given query key. (Doc reference: http://signal-to-noise.xyz/post/bk-tree/)
package go_bk_tree
import (
"runtime"
"time"
)
type Distance int
// MetricTensor is an interface of data that needs to be indexed
//
// Example:
// import l "github.com/texttheater/golang-levenshtein/levenshtein"
//
// type Word string
//
// func (w Word) DistanceFrom(w2 MetricTensor) Distance {
// return Distance(l.DistanceForStrings([]rune(string(w)), []rune(string(w2.(Word))), l.DefaultOptions))
// }
type MetricTensor interface {
DistanceFrom(other MetricTensor) Distance
}
type bkTreeNode struct {
MetricTensor
Children map[Distance]*bkTreeNode
}
func newbkTreeNode(v MetricTensor) *bkTreeNode {
return &bkTreeNode{
MetricTensor: v,
Children: make(map[Distance]*bkTreeNode),
}
}
type BKTree struct {
root *bkTreeNode
}
// Add a node to BK-Tree, the location of the new node
// depends on how distance between different tensors are defined
func (tree *BKTree) Add(val MetricTensor) {
node := newbkTreeNode(val)
if tree.root == nil {
tree.root = node
return
}
curNode := tree.root
for {
dist := curNode.DistanceFrom(val)
target := curNode.Children[dist]
if target == nil {
curNode.Children[dist] = node
break
}
curNode = target
}
}
func (tree *BKTree) Search(val MetricTensor, radius Distance) []MetricTensor {
candidates := make([]*bkTreeNode, 0, 10)
candidates = append(candidates, tree.root)
results := make([]MetricTensor, 0, 5)
for {
cand := candidates[0]
candidates = candidates[1:]
dist := cand.DistanceFrom(val)
if dist <= radius {
results = append(results, cand.MetricTensor)
}
low, high := dist-radius, dist+radius
for dist, child := range cand.Children {
if dist >= low && dist <= high {
candidates = append(candidates, child)
}
}
if len(candidates) == 0 {
break
}
}
return results
}
var numCPU = runtime.NumCPU()
// Notice: this is an async implementation using goroutines for fun in order to see if async will out-perform the traditional
// implementation. Turns out it DID NOT.
func (tree *BKTree) SearchAsync(val MetricTensor, radius Distance) []MetricTensor {
results := make([]MetricTensor, 0, 5)
candsChan := make(chan *bkTreeNode, 100)
candsChan <- tree.root
LOOP:
for {
select {
case cand := <-candsChan:
go func() {
dist := cand.DistanceFrom(val)
if dist <= radius {
results = append(results, cand.MetricTensor)
}
low, high := dist-radius, dist+radius
for dist, child := range cand.Children {
if dist >= low && dist <= high {
candsChan <- child
}
}
}()
case <-time.After(time.Millisecond * 1):
break LOOP
}
}
return results
}