-
Notifications
You must be signed in to change notification settings - Fork 1
/
document_test.go
72 lines (63 loc) · 2.06 KB
/
document_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
package main
import (
"reflect"
"testing"
)
func Test_TokenizeFile(t *testing.T) {
var d = []struct{
in string
want []string
}{
{"corpus/Test_TokenizeFile.txt", []string{"play", "sports", "today"}},
}
for _, v := range d {
doc := NewDocument()
doc.TokenizeFile(v.in)
if !reflect.DeepEqual(doc.tokens, v.want) {
t.Errorf("TokenizeFile(%s) != %v, got %v", v.in, v.want, doc.tokens)
}
}
}
func Test_TokenizeString(t *testing.T) {
var d = []struct{
in string
want []string
}{
{"play sports today", []string{"play", "sports", "today"}},
}
for _, v := range d {
doc := NewDocument()
doc.TokenizeString(v.in)
if !reflect.DeepEqual(doc.tokens, v.want) {
t.Errorf("TokenizeFile(%s) != %v, got %v", v.in, v.want, doc.tokens)
}
}
}
func Test_GenerateNGrams(t *testing.T) {
class := "class1"
num := 2
var d = []struct{
in string
want map[string]nGram
}{
{"play sports today", map[string]nGram{
"8e332df73afd1944b529f1ee94eb0d7d": nGram{Length: num, Tokens: []string{"play", "sports"}, Hash: "8e332df73afd1944b529f1ee94eb0d7d", Count: map[string]int{class: 1}},
"d3364f66e254f86cfef25c00cb30fe59": nGram{Length: num, Tokens: []string{"sports", "today"}, Hash: "d3364f66e254f86cfef25c00cb30fe59", Count: map[string]int{class: 1}},
},
},
{"play play play sports today", map[string]nGram{
"8e332df73afd1944b529f1ee94eb0d7d": nGram{Length: num, Tokens: []string{"play", "sports"}, Hash: "8e332df73afd1944b529f1ee94eb0d7d", Count: map[string]int{class: 1}},
"d3364f66e254f86cfef25c00cb30fe59": nGram{Length: num, Tokens: []string{"sports", "today"}, Hash: "d3364f66e254f86cfef25c00cb30fe59", Count: map[string]int{class: 1}},
"ec7841687efd9cf97ac07f0c80c48e8e": nGram{Length: num, Tokens: []string{"play", "play"}, Hash: "ec7841687efd9cf97ac07f0c80c48e8e", Count: map[string]int{class: 2}},
},
},
}
for _, v := range d {
doc := NewDocument()
doc.TokenizeString(v.in)
doc.GenerateNGrams(num, class)
if !reflect.DeepEqual(doc.ngrams, v.want) {
t.Errorf("TokenizeFile(%s) \n\t%v, got \n\t%v", v.in, v.want, doc.ngrams)
}
}
}