Skip to content

Commit

Permalink
feat: add offset and update sort
Browse files Browse the repository at this point in the history
  • Loading branch information
radulucut committed Sep 4, 2024
1 parent 88feefc commit a66a920
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 46 deletions.
7 changes: 6 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,9 @@ module github.com/radulucut/search

go 1.22.1

require github.com/google/go-cmp v0.6.0
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/stretchr/testify v1.9.0
gopkg.in/yaml.v3 v3.0.1 // indirect
)
12 changes: 10 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,2 +1,10 @@
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
49 changes: 32 additions & 17 deletions search.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
package search

import (
"sort"
"math"
"slices"
"sync"
)

Expand Down Expand Up @@ -55,24 +56,30 @@ type itemScore struct {
score int
}

type SearchOptions struct {
Query string
Limit int
Offset int
Ignore []int64
}

// Search finds the most similar items to the given query.
// limit is the maximum number of items to return.
// ignore is a list of item ids to ignore.
func (e *Engine) Search(query string, limit int, ignore []int64) []int64 {
func (e *Engine) Search(opts SearchOptions) []int64 {
var ignoreMap map[int64]struct{}
hasIgnore := false
if len(ignore) != 0 {
if len(opts.Ignore) != 0 {
hasIgnore = true
ignoreMap = make(map[int64]struct{})
for i := range ignore {
ignoreMap[ignore[i]] = struct{}{}
for i := range opts.Ignore {
ignoreMap[opts.Ignore[i]] = struct{}{}
}
}

q := e.tokenize(query)
q := e.tokenize(opts.Query)
e.RLock()
defer e.RUnlock()
scores := make([]itemScore, 0)
scores := make([]*itemScore, 0)
for id := range e.items {
if hasIgnore {
if _, ok := ignoreMap[id]; ok {
Expand All @@ -83,18 +90,26 @@ func (e *Engine) Search(query string, limit int, ignore []int64) []int64 {
if score == -1 {
continue
}
scores = append(scores, itemScore{id: id, score: score})
scores = append(scores, &itemScore{id: id, score: score})
}
sort.Slice(scores, func(i, j int) bool {
if scores[i].score == scores[j].score {
return scores[i].id > scores[j].id
} else {
return scores[i].score < scores[j].score
slices.SortFunc(scores, func(a, b *itemScore) int {
if a.score < b.score {
return -1
}
if a.score > b.score {
return 1
}
if a.id > b.id {
return -1
}
if a.id < b.id {
return 1
}
return 0
})
limit = min(limit, len(scores))
limit := min(opts.Offset+opts.Limit, len(scores))
res := make([]int64, 0, limit)
for i := 0; i < limit; i++ {
for i := opts.Offset; i < limit; i++ {
res = append(res, scores[i].id)
}
return res
Expand All @@ -104,7 +119,7 @@ func (e *Engine) score(q, b [][]rune) int {
var score int
skip := true
for i := range q {
best := (1<<63 - 1)
best := math.MaxInt
for j := range b {
best = min(best, LevenshteinDistance(q[i], b[j]))
}
Expand Down
58 changes: 32 additions & 26 deletions search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package search
import (
"testing"

"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
)

type Book struct {
Expand Down Expand Up @@ -45,41 +45,51 @@ func Test_Engine(t *testing.T) {
{"spânzuraţilor", []int64{2}},
{"amintiri din copilărie", []int64{8, 11, 10, 5, 15}},
{"xyz zyx", []int64{}},
{"din", []int64{11, 8, 15, 14, 13}},
}

for _, test := range tests {
t.Run(test.query, func(t *testing.T) {
actual := engine.Search(test.query, 5, nil)
if diff := cmp.Diff(test.expected, actual); diff != "" {
t.Errorf("mismatch (-want +got):\n%s", diff)
}
actual := engine.Search(SearchOptions{Query: test.query, Limit: 5})
assert.Equal(t, test.expected, actual)
})
}

t.Run("offset", func(t *testing.T) {
actual := engine.Search(SearchOptions{
Query: "de",
Limit: 5,
Offset: 5,
Ignore: []int64{15},
})
assert.Equal(t, []int64{9, 8, 7, 6, 5}, actual)
})

t.Run("Ignore ids", func(t *testing.T) {
actual := engine.Search("maitreyi", 5, []int64{4})
expected := []int64{}
if diff := cmp.Diff(expected, actual); diff != "" {
t.Errorf("mismatch (-want +got):\n%s", diff)
}
actual := engine.Search(SearchOptions{
Query: "maitreyi",
Limit: 5,
Ignore: []int64{4},
})
assert.ElementsMatch(t, []int64{}, actual)
})

engine.SetItem(16, "Ciocoii vechi și noi de Nicolae Filimon")
t.Run("SetItem", func(t *testing.T) {
actual := engine.Search("Ciocoii vechi", 5, nil)
expected := []int64{16}
if diff := cmp.Diff(expected, actual); diff != "" {
t.Errorf("mismatch (-want +got):\n%s", diff)
}
actual := engine.Search(SearchOptions{
Query: "Ciocoii vechi",
Limit: 5,
})
assert.ElementsMatch(t, []int64{16}, actual)
})

engine.DeleteItem(7)
t.Run("DeleteItem", func(t *testing.T) {
actual := engine.Search("Moara", 5, nil)
expected := []int64{}
if diff := cmp.Diff(expected, actual); diff != "" {
t.Errorf("mismatch (-want +got):\n%s", diff)
}
actual := engine.Search(SearchOptions{
Query: "Moara",
Limit: 5,
})
assert.ElementsMatch(t, []int64{}, actual)
})
}

Expand All @@ -94,9 +104,7 @@ func Test_Tokenize(t *testing.T) {
{'4'},
{'a', 'a', 'a', 'a', 'i', 'i', 's', 's', 's', 's', 't', 't', 't', 't'},
}
if diff := cmp.Diff(expected, tokens); diff != "" {
t.Errorf("mismatch (-want +got):\n%s", diff)
}
assert.Equal(t, expected, tokens)
}

func Test_LevenshteinDistance(t *testing.T) {
Expand All @@ -121,9 +129,7 @@ func Test_LevenshteinDistance(t *testing.T) {
}
for _, test := range tests {
t.Run("LevenshteinDistance", func(t *testing.T) {
if diff := cmp.Diff(test.expected, LevenshteinDistance(test.a, test.b)); diff != "" {
t.Errorf("mismatch (-want +got):\n%s", diff)
}
assert.Equal(t, test.expected, LevenshteinDistance(test.a, test.b))
})
}
}

0 comments on commit a66a920

Please sign in to comment.