Skip to content

Commit

Permalink
feat: add support for ignore, change init logic
Browse files Browse the repository at this point in the history
  • Loading branch information
radulucut committed Apr 19, 2024
1 parent b359bcc commit 15679e0
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 30 deletions.
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,14 @@ var items = []Book{

func main() {
// Create a new search engine
engine := search.NewEngine(items, func(item Book) (int64, string) {
return item.Id, item.Text
}, nil)
engine := search.NewEngine()
engine.SetTolerance(2)

// Add items to the search engine
for _, item := range items {
engine.SetItem(item.Id, item.Text)
}

// Search for a book
results := engine.Search("Eliade", 5)

Expand Down
51 changes: 28 additions & 23 deletions search.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,50 +12,39 @@ type Engine struct {
mx sync.RWMutex
}

// NewEngine creates a new search engine.
//
// items is a slice of items to be indexed.
//
// mapFunc is a function that maps an item to an id and a string.
//
// tokenizeFunc is an optional function that tokenizes a string into words.
func NewEngine[T any](
items []T,
mapFunc func(T) (int64, string),
tokenizeFunc TokenizeFunc,
) *Engine {
func NewEngine() *Engine {
engine := &Engine{
items: make(map[int64][][]rune),
tolerance: 1,
tokenize: Tokenize,
mx: sync.RWMutex{},
}
if tokenizeFunc != nil {
engine.tokenize = tokenizeFunc
}
for i := range items {
id, s := mapFunc(items[i])
engine.items[id] = engine.tokenize(s)
}
return engine
}

// SetTolerance sets the maximum number of typos per word allowed.
// Set custom tokenize function.
func (e *Engine) SetTokenizeFunc(f TokenizeFunc) {
e.mx.Lock()
defer e.mx.Unlock()
e.tokenize = f
}

// Set the maximum number of typos per word allowed.
// The default value is 1.
func (e *Engine) SetTolerance(tolerance int) {
e.mx.Lock()
defer e.mx.Unlock()
e.tolerance = tolerance
}

// SetItem adds a new item to the search engine.
// Add a new item to the search engine.
func (e *Engine) SetItem(id int64, text string) {
e.mx.Lock()
defer e.mx.Unlock()
e.items[id] = e.tokenize(text)
}

// DeleteItem removes an item from the search engine.
// Remove an item from the search engine.
func (e *Engine) DeleteItem(id int64) {
e.mx.Lock()
defer e.mx.Unlock()
Expand All @@ -69,12 +58,28 @@ type itemScore struct {

// Search finds the most similar items to the given query.
// limit is the maximum number of items to return.
func (e *Engine) Search(query string, limit int) []int64 {
// ignore is a list of item ids to ignore.
func (e *Engine) Search(query string, limit int, ignore []int64) []int64 {
var ignoreMap map[int64]struct{}
hasIgnore := false
if len(ignore) != 0 {
hasIgnore = true
ignoreMap = make(map[int64]struct{})
for i := range ignore {
ignoreMap[ignore[i]] = struct{}{}
}
}

q := e.tokenize(query)
e.mx.RLock()
defer e.mx.RUnlock()
scores := make([]itemScore, 0)
for id := range e.items {
if hasIgnore {
if _, ok := ignoreMap[id]; ok {
continue
}
}
score := e.score(q, e.items[id])
if score == -1 {
continue
Expand Down
35 changes: 31 additions & 4 deletions search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,11 @@ var items = []Book{
}

func Test_Engine(t *testing.T) {
engine := NewEngine(items, func(item Book) (int64, string) {
return item.Id, item.Text
}, nil)
engine := NewEngine()
engine.SetTolerance(2)
for _, item := range items {
engine.SetItem(item.Id, item.Text)
}
tests := []struct {
query string
expected []int64
Expand All @@ -48,12 +49,38 @@ func Test_Engine(t *testing.T) {

for _, test := range tests {
t.Run(test.query, func(t *testing.T) {
actual := engine.Search(test.query, 5)
actual := engine.Search(test.query, 5, nil)
if diff := cmp.Diff(test.expected, actual); diff != "" {
t.Errorf("mismatch (-want +got):\n%s", diff)
}
})
}

t.Run("Ignore ids", func(t *testing.T) {
actual := engine.Search("maitreyi", 5, []int64{4})
expected := []int64{}
if diff := cmp.Diff(expected, actual); diff != "" {
t.Errorf("mismatch (-want +got):\n%s", diff)
}
})

engine.SetItem(16, "Ciocoii vechi și noi de Nicolae Filimon")
t.Run("SetItem", func(t *testing.T) {
actual := engine.Search("Ciocoii vechi", 5, nil)
expected := []int64{16}
if diff := cmp.Diff(expected, actual); diff != "" {
t.Errorf("mismatch (-want +got):\n%s", diff)
}
})

engine.DeleteItem(7)
t.Run("DeleteItem", func(t *testing.T) {
actual := engine.Search("Moara", 5, nil)
expected := []int64{}
if diff := cmp.Diff(expected, actual); diff != "" {
t.Errorf("mismatch (-want +got):\n%s", diff)
}
})
}

func Test_Tokenize(t *testing.T) {
Expand Down

0 comments on commit 15679e0

Please sign in to comment.