diff --git a/Makefile b/Makefile index bdb50979..152af0e7 100644 --- a/Makefile +++ b/Makefile @@ -12,3 +12,6 @@ test: update: go get -u ./... go mod tidy + +lint: + golangci-lint run \ No newline at end of file diff --git a/generate/searcher/searcher.go b/generate/searcher/searcher.go index 34df4e06..24d30b68 100644 --- a/generate/searcher/searcher.go +++ b/generate/searcher/searcher.go @@ -72,6 +72,7 @@ func main() { } else { if len(tags) != 0 { log.Fatal("-tags option applies only to directories, not when files are specified") + // ^FATAL } dir = path.Dir(args[0]) } @@ -108,6 +109,7 @@ func main() { err := os.WriteFile(outputName, src, 0644) if err != nil { log.Fatalf("writing output: %s", err) + // ^FATAL } } @@ -127,9 +129,11 @@ func (g *Generator) parsePackage(patterns []string, tags []string) { pkgs, err := packages.Load(cfg, patterns...) if err != nil { log.Fatal(err) + // ^FATAL } if len(pkgs) != 1 { log.Fatalf("error: %d packages matching %v", len(pkgs), strings.Join(patterns, " ")) + // ^FATAL } g.addPackage(pkgs[0]) } @@ -167,6 +171,7 @@ func (g *Generator) generate(typeName string) { if len(values) == 0 { log.Fatalf("no values defined for type %s", typeName) + // ^FATAL } // Generate code for importing engines @@ -271,19 +276,23 @@ func (f *File) genDecl(node ast.Node) bool { obj, ok := f.pkg.defs[name] if !ok { log.Fatalf("no value for constant %s", name) + // ^FATAL } info := obj.Type().Underlying().(*types.Basic).Info() if info&types.IsInteger == 0 { log.Fatalf("can't handle non-integer constant type %s", typ) + // ^FATAL } value := obj.(*types.Const).Val() // Guaranteed to succeed as this is CONST. if value.Kind() != constant.Int { log.Fatalf("can't happen: constant is not an integer %s", name) + // ^FATAL } i64, isInt := constant.Int64Val(value) u64, isUint := constant.Uint64Val(value) if !isInt && !isUint { log.Fatalf("internal error: value of %s is not an integer: %s", name, value.String()) + // ^FATAL } if !isInt { u64 = uint64(i64) diff --git a/generate/searcher/util.go b/generate/searcher/util.go index 450bb35c..d953fb38 100644 --- a/generate/searcher/util.go +++ b/generate/searcher/util.go @@ -24,6 +24,7 @@ func isDirectoryFatal(path string) bool { info, err := os.Stat(path) if err != nil { log.Fatal(err) + // ^FATAL } return info.IsDir() } diff --git a/go.mod b/go.mod index 5b55e070..d90f6a6c 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ toolchain go1.21.3 require ( github.com/alecthomas/kong v0.8.1 - github.com/cockroachdb/pebble v0.0.0-20231106144427-a0b01b62e8f9 + github.com/cockroachdb/pebble v0.0.0-20231206044450-348b3a068f94 github.com/fxamacker/cbor/v2 v2.5.0 github.com/gin-contrib/cors v1.5.0 github.com/gin-contrib/graceful v0.1.0 @@ -50,9 +50,9 @@ require ( github.com/go-playground/validator/v10 v10.16.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/snappy v0.0.4 // indirect - github.com/google/pprof v0.0.0-20231101202521-4ca4178f5c7a // indirect + github.com/google/pprof v0.0.0-20231205033806-a5a03c77bf08 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/klauspost/compress v1.17.2 // indirect + github.com/klauspost/compress v1.17.4 // indirect github.com/klauspost/cpuid/v2 v2.2.6 // indirect github.com/knadh/koanf/maps v0.1.1 // indirect github.com/kr/pretty v0.3.1 // indirect @@ -73,11 +73,11 @@ require ( github.com/prometheus/procfs v0.12.0 // indirect github.com/rogpeppe/go-internal v1.11.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect - github.com/ugorji/go/codec v1.2.11 // indirect + github.com/ugorji/go/codec v1.2.12 // indirect github.com/x448/float16 v0.8.4 // indirect golang.org/x/arch v0.6.0 // indirect golang.org/x/crypto v0.16.0 // indirect - golang.org/x/exp v0.0.0-20231006140011-7918f672742d // indirect + golang.org/x/exp v0.0.0-20231127185646-65229373498e // indirect golang.org/x/mod v0.14.0 // indirect golang.org/x/sync v0.5.0 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect diff --git a/go.sum b/go.sum index f38c1add..505558fb 100644 --- a/go.sum +++ b/go.sum @@ -63,8 +63,10 @@ github.com/cockroachdb/errors v1.11.1 h1:xSEW75zKaKCWzR3OfxXUxgrk/NtT4G1MiOv5lWZ github.com/cockroachdb/errors v1.11.1/go.mod h1:8MUxA3Gi6b25tYlFEBGLf+D8aISL+M4MIpiWMSNRfxw= github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b h1:r6VH0faHjZeQy818SGhaone5OnYfxFR/+AzdY3sf5aE= github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs= -github.com/cockroachdb/pebble v0.0.0-20231106144427-a0b01b62e8f9 h1:sCTdjoX7OXa5jjHNXw/jwR+gnmM/yLhwsEsAG1OFtBQ= -github.com/cockroachdb/pebble v0.0.0-20231106144427-a0b01b62e8f9/go.mod h1:acMRUGd/BK8AUmQNK3spUCCGzFLZU2bSST3NMXSq2Kc= +github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895 h1:XANOgPYtvELQ/h4IrmPAohXqe2pWA8Bwhejr3VQoZsA= +github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895/go.mod h1:aPd7gM9ov9M8v32Yy5NJrDyOcD8z642dqs+F0CeNXfA= +github.com/cockroachdb/pebble v0.0.0-20231206044450-348b3a068f94 h1:BnNrdQcwSlz6eJF62X/zP5gN6efSNoz0CBu1yQdpzLQ= +github.com/cockroachdb/pebble v0.0.0-20231206044450-348b3a068f94/go.mod h1:BHuaMa/lK7fUe75BlsteiiTu8ptIG+qSAuDtGMArP18= github.com/cockroachdb/redact v1.1.5 h1:u1PMllDkdFfPWaNGMyLD1+so+aq3uUItthCFqzwPJ30= github.com/cockroachdb/redact v1.1.5/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 h1:zuQyyAKVxetITBuuhv3BI9cMrmStnpT18zmgmTxunpo= @@ -155,8 +157,8 @@ github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20211214055906-6f57359322fd/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg= -github.com/google/pprof v0.0.0-20231101202521-4ca4178f5c7a h1:fEBsGL/sjAuJrgah5XqmmYsTLzJp/TO9Lhy39gkverk= -github.com/google/pprof v0.0.0-20231101202521-4ca4178f5c7a/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik= +github.com/google/pprof v0.0.0-20231205033806-a5a03c77bf08 h1:PxlBVtIFHR/mtWk2i0gTEdCz+jBnqiuHNSki0epDbVs= +github.com/google/pprof v0.0.0-20231205033806-a5a03c77bf08/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w= @@ -167,8 +169,8 @@ github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8Nz github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4= -github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= +github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4= +github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc= github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= @@ -278,8 +280,8 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M= github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY= -github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= -github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= +github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -297,8 +299,8 @@ golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf golang.org/x/crypto v0.16.0 h1:mMMrFzRSCF0GvB7Ne27XVtVAaXLrPmgPC7/v0tkwHaY= golang.org/x/crypto v0.16.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= -golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= +golang.org/x/exp v0.0.0-20231127185646-65229373498e h1:Gvh4YaCaXNs6dKTlfgismwWZKyjVZXwOPfIyUaqU3No= +golang.org/x/exp v0.0.0-20231127185646-65229373498e/go.mod h1:iRJReGqOEeBhDZGkGbynYwcHlctCvnjTYIamk7uXpHI= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= diff --git a/src/bucket/bucket.go b/src/bucket/bucket.go index f76fe0bb..095df106 100644 --- a/src/bucket/bucket.go +++ b/src/bucket/bucket.go @@ -1,6 +1,7 @@ package bucket import ( + "fmt" "sync" "github.com/gocolly/colly/v2" @@ -62,12 +63,12 @@ func AddSEResult(seResult *engines.RetrievedResult, seName engines.Name, relay * if !exists && options.VisitPages { if err := pagesCol.Visit(seResult.URL); err != nil { - log.Error().Err(err).Msgf("bucket: failed visiting %v", seResult.URL) + log.Error().Err(err).Msgf("bucket.AddSEResult(): failed visiting %v", seResult.URL) } } } -func SetResultResponse(link string, response *colly.Response, relay *Relay, seName engines.Name) { +func SetResultResponse(link string, response *colly.Response, relay *Relay, seName engines.Name) error { log.Trace().Msgf("%v: Got Response -> %v", seName, link) relay.Mutex.Lock() @@ -76,12 +77,15 @@ func SetResultResponse(link string, response *colly.Response, relay *Relay, seNa if !exists { relay.Mutex.Unlock() relay.Mutex.RLock() - log.Error().Msgf("URL not in map when adding response! Should not be possible. URL: %v.\nRelay: %v", link, relay) + err := fmt.Errorf("bucket.SetResultResponse(): URL not in map when adding response, should not be possible. URL: %v.\nRelay: %v", link, relay) relay.Mutex.RUnlock() + return err } else { mapRes.Response = response relay.Mutex.Unlock() } + + return nil } func MakeSEResult(urll string, title string, description string, searchEngineName engines.Name, sePage int, seOnPageRank int) *engines.RetrievedResult { diff --git a/src/cache/interfaces.go b/src/cache/interfaces.go index 74b5b680..7fadcafb 100644 --- a/src/cache/interfaces.go +++ b/src/cache/interfaces.go @@ -2,8 +2,8 @@ package cache type DB interface { Close() - Set(k string, v Value) - Get(k string, o Value) + Set(k string, v Value) error + Get(k string, o Value) error } type Value interface{} diff --git a/src/cache/nocache/nocache.go b/src/cache/nocache/nocache.go index 2f89ae6a..eed8b7b1 100644 --- a/src/cache/nocache/nocache.go +++ b/src/cache/nocache/nocache.go @@ -10,6 +10,6 @@ func New() *DB { return nil } func (db *DB) Close() {} -func (db *DB) Set(k string, v cache.Value) {} +func (db *DB) Set(k string, v cache.Value) error { return nil } -func (db *DB) Get(k string, o cache.Value) {} +func (db *DB) Get(k string, o cache.Value) error { return nil } diff --git a/src/cache/pebble/pebble.go b/src/cache/pebble/pebble.go index 2c7b439f..e8f25bcb 100644 --- a/src/cache/pebble/pebble.go +++ b/src/cache/pebble/pebble.go @@ -1,13 +1,14 @@ package pebble import ( + "fmt" "path" "time" "github.com/cockroachdb/pebble" "github.com/fxamacker/cbor/v2" - "github.com/rs/zerolog/log" "github.com/hearchco/hearchco/src/cache" + "github.com/rs/zerolog/log" ) type DB struct { @@ -19,7 +20,8 @@ func New(dataDirPath string) *DB { pdb, err := pebble.Open(pebblePath, &pebble.Options{}) if err != nil { - log.Fatal().Err(err).Msgf("Error opening pebble at path: %v", pebblePath) + log.Fatal().Err(err).Msgf("pebble.New(): error opening pebble at path: %v", pebblePath) + // ^FATAL } else { log.Info().Msgf("Successfully opened pebble (path: %v)", pebblePath) } @@ -29,37 +31,43 @@ func New(dataDirPath string) *DB { func (db *DB) Close() { if err := db.pdb.Close(); err != nil { - log.Fatal().Err(err).Msg("Error closing pebble") + log.Fatal().Err(err).Msg("pebble.Close(): error closing pebble") + // ^FATAL } else { log.Debug().Msg("Successfully closed pebble") } } -func (db *DB) Set(k string, v cache.Value) { +func (db *DB) Set(k string, v cache.Value) error { log.Debug().Msg("Caching...") cacheTimer := time.Now() if val, err := cbor.Marshal(v); err != nil { - log.Error().Err(err).Msg("Error marshaling value") + return fmt.Errorf("pebble.Set(): error marshaling value: %w", err) } else if err := db.pdb.Set([]byte(k), val, pebble.NoSync); err != nil { - log.Fatal().Err(err).Msg("Error setting KV to pebble") + log.Fatal().Err(err).Msg("pebble.Set(): error setting KV to pebble") + // ^FATAL } else { cacheTimeSince := time.Since(cacheTimer) log.Debug().Msgf("Cached results in %vms (%vns)", cacheTimeSince.Milliseconds(), cacheTimeSince.Nanoseconds()) } + return nil } -func (db *DB) Get(k string, o cache.Value) { +func (db *DB) Get(k string, o cache.Value) error { v, c, err := db.pdb.Get([]byte(k)) val := []byte(v) // copy data before closing, casting needed for unmarshal if err == pebble.ErrNotFound { - log.Trace().Msgf("Found no value in pebble for key %v", k) + log.Trace().Msgf("Found no value in pebble for key: \"%v\"", k) } else if err != nil { - log.Fatal().Err(err).Msgf("Error getting value from pebble for key %v", k) + log.Fatal().Err(err).Msgf("pebble.Get(): error getting value from pebble for key %v", k) + // ^FATAL } else if err := c.Close(); err != nil { - log.Fatal().Err(err).Msgf("Error closing io to pebble for key %v", k) + log.Fatal().Err(err).Msgf("pebble.Get(): error closing io to pebble for key %v", k) + // ^FATAL } else if err := cbor.Unmarshal(val, o); err != nil { - log.Error().Err(err).Msgf("Failed unmarshaling value from pebble for key %v", k) + return fmt.Errorf("pebble.Get(): failed unmarshaling value from pebble for key %v: %w", k, err) } + return nil } diff --git a/src/cache/redis/redis.go b/src/cache/redis/redis.go index afda3d89..731a3452 100644 --- a/src/cache/redis/redis.go +++ b/src/cache/redis/redis.go @@ -6,10 +6,10 @@ import ( "time" "github.com/fxamacker/cbor/v2" - "github.com/redis/go-redis/v9" - "github.com/rs/zerolog/log" "github.com/hearchco/hearchco/src/cache" "github.com/hearchco/hearchco/src/config" + "github.com/redis/go-redis/v9" + "github.com/rs/zerolog/log" ) type DB struct { @@ -25,7 +25,8 @@ func New(ctx context.Context, config config.Redis) *DB { }) if err := rdb.Ping(ctx).Err(); err != nil { - log.Fatal().Err(err).Msgf("Error connecting to redis with addr: %v:%v/%v", config.Host, config.Port, config.Database) + log.Fatal().Err(err).Msgf("redis.New(): error connecting to redis with addr: %v:%v/%v", config.Host, config.Port, config.Database) + // ^FATAL } else { log.Info().Msgf("Successful connection to redis (addr: %v:%v/%v)", config.Host, config.Port, config.Database) } @@ -35,35 +36,40 @@ func New(ctx context.Context, config config.Redis) *DB { func (db *DB) Close() { if err := db.rdb.Close(); err != nil { - log.Fatal().Err(err).Msg("Error disconnecting from redis") + log.Fatal().Err(err).Msg("redis.Close(): error disconnecting from redis") + // ^FATAL } else { log.Debug().Msg("Successfully disconnected from redis") } } -func (db *DB) Set(k string, v cache.Value) { +func (db *DB) Set(k string, v cache.Value) error { log.Debug().Msg("Caching...") cacheTimer := time.Now() if val, err := cbor.Marshal(v); err != nil { - log.Error().Err(err).Msg("Error marshaling value") + return fmt.Errorf("redis.Set(): error marshaling value: %w", err) } else if err := db.rdb.Set(db.ctx, k, val, 0).Err(); err != nil { - log.Fatal().Err(err).Msg("Error setting KV to redis") + log.Fatal().Err(err).Msg("redis.Set(): error setting KV to redis") + // ^FATAL } else { cacheTimeSince := time.Since(cacheTimer) log.Debug().Msgf("Cached results in %vms (%vns)", cacheTimeSince.Milliseconds(), cacheTimeSince.Nanoseconds()) } + return nil } -func (db *DB) Get(k string, o cache.Value) { +func (db *DB) Get(k string, o cache.Value) error { v, err := db.rdb.Get(db.ctx, k).Result() val := []byte(v) // copy data before closing, casting needed for unmarshal if err == redis.Nil { - log.Trace().Msgf("Found no value in redis for key %v", k) + log.Trace().Msgf("found no value in redis for key \"%v\"", k) } else if err != nil { - log.Fatal().Err(err).Msgf("Error getting value from redis for key %v", k) + log.Fatal().Err(err).Msgf("redis.Get(): error getting value from redis for key %v", k) + // ^FATAL } else if err := cbor.Unmarshal(val, o); err != nil { - log.Error().Err(err).Msgf("Failed unmarshaling value from redis for key %v", k) + return fmt.Errorf("redis.Set(): failed unmarshaling value from redis for key %v: %w", k, err) } + return nil } diff --git a/src/cli/climode.go b/src/cli/climode.go index 3cf5aa90..26319c0b 100644 --- a/src/cli/climode.go +++ b/src/cli/climode.go @@ -4,13 +4,13 @@ import ( "fmt" "time" - "github.com/rs/zerolog/log" "github.com/hearchco/hearchco/src/bucket/result" "github.com/hearchco/hearchco/src/cache" "github.com/hearchco/hearchco/src/category" "github.com/hearchco/hearchco/src/config" "github.com/hearchco/hearchco/src/engines" "github.com/hearchco/hearchco/src/search" + "github.com/rs/zerolog/log" ) func printResults(results []result.Result) { @@ -44,13 +44,23 @@ func Run(flags Flags, db cache.DB, conf *config.Config) { // todo: ctx cancelling (important since pebble is NoSync) var results []result.Result - db.Get(flags.Query, &results) + gerr := db.Get(flags.Query, &results) + if gerr != nil { + log.Fatal().Err(gerr).Msgf("cli.Run(): failed accessing cache for query %v", flags.Query) + // ^FATAL + } + if results != nil { log.Debug().Msgf("Found results for query (%v) in cache", flags.Query) } else { log.Debug().Msg("Nothing found in cache, doing a clean search") + results = search.PerformSearch(flags.Query, options, conf) - db.Set(flags.Query, results) + + serr := db.Set(flags.Query, results) + if serr != nil { + log.Error().Err(serr).Msgf("cli.Run(): error updating database with search results") + } } duration := time.Since(start) diff --git a/src/cli/setup.go b/src/cli/setup.go index 5780df86..d321a0f6 100644 --- a/src/cli/setup.go +++ b/src/cli/setup.go @@ -26,7 +26,8 @@ func Setup() Flags { ) if err := ctx.Validate(); err != nil { - log.Panic().Err(err).Msg("Failed parsing cli") + log.Panic().Err(err).Msg("cli.Setup(): failed parsing cli") // panic is also run inside the library. when does this happen? + // ^PANIC } return cli diff --git a/src/config/load.go b/src/config/load.go index e841ab51..21ab30d2 100644 --- a/src/config/load.go +++ b/src/config/load.go @@ -6,14 +6,14 @@ import ( "strings" "time" + "github.com/hearchco/hearchco/src/category" + "github.com/hearchco/hearchco/src/engines" "github.com/knadh/koanf/parsers/yaml" "github.com/knadh/koanf/providers/env" "github.com/knadh/koanf/providers/file" "github.com/knadh/koanf/providers/structs" "github.com/knadh/koanf/v2" "github.com/rs/zerolog/log" - "github.com/hearchco/hearchco/src/category" - "github.com/hearchco/hearchco/src/engines" ) var EnabledEngines []engines.Name = make([]engines.Name, 0) @@ -30,7 +30,7 @@ func (c *Config) fromReader(rc *ReaderConfig) { keyName, err := engines.NameString(key) if err != nil { log.Panic().Err(err).Msgf("failed reading config. invalid engine name: %v", key) - return + // ^PANIC } nc.Settings[keyName] = val } @@ -42,7 +42,7 @@ func (c *Config) fromReader(rc *ReaderConfig) { engineName, nameErr := engines.NameString(name) if nameErr != nil { log.Panic().Err(nameErr).Msg("failed converting string to engine name") - return + // ^PANIC } engArr = append(engArr, engineName) @@ -112,33 +112,38 @@ func (c *Config) Load(dataDirPath string, logDirPath string) { // We provide a struct along with the struct tag `koanf` to the // provider. if err := k.Load(structs.Provider(&rc, "koanf"), nil); err != nil { - log.Panic().Err(err).Msg("failed loading default values") + log.Panic().Err(err).Msg("config.Load(): failed loading default values") + // ^PANIC } // Load YAML config yamlPath := path.Join(dataDirPath, "hearchco.yaml") if _, err := os.Stat(yamlPath); err != nil { - log.Trace().Msgf("no yaml config present at path: %v, looking for .yml", yamlPath) + log.Trace().Msgf("config.Load(): no yaml config present at path: %v, looking for .yml", yamlPath) yamlPath = path.Join(dataDirPath, "hearchco.yml") if _, errr := os.Stat(yamlPath); errr != nil { - log.Trace().Msgf("no yaml config present at path: %v", yamlPath) + log.Trace().Msgf("config.Load(): no yaml config present at path: %v", yamlPath) } else if errr := k.Load(file.Provider(yamlPath), yaml.Parser()); errr != nil { - log.Panic().Err(err).Msg("error loading yaml config") + log.Panic().Err(err).Msg("config.Load(): error loading yaml config") + // ^PANIC } } else if err := k.Load(file.Provider(yamlPath), yaml.Parser()); err != nil { - log.Panic().Err(err).Msg("error loading yaml config") + log.Panic().Err(err).Msg("config.Load(): error loading yaml config") + // ^PANIC } // Load ENV config if err := k.Load(env.Provider("HEARCHCO_", ".", func(s string) string { return strings.Replace(strings.ToLower(strings.TrimPrefix(s, "HEARCHCO_")), "_", ".", -1) }), nil); err != nil { - log.Panic().Err(err).Msg("error loading env config") + log.Panic().Err(err).Msg("config.Load(): error loading env config") + // ^PANIC } // Unmarshal config into struct if err := k.Unmarshal("", &rc); err != nil { - log.Panic().Err(err).Msg("failed unmarshaling koanf config") + log.Panic().Err(err).Msg("config.Load(): failed unmarshaling koanf config") + // ^PANIC } c.fromReader(&rc) diff --git a/src/engines/bing/bing.go b/src/engines/bing/bing.go index ecbf5f12..1b75809b 100644 --- a/src/engines/bing/bing.go +++ b/src/engines/bing/bing.go @@ -8,12 +8,12 @@ import ( "strings" "github.com/gocolly/colly/v2" - "github.com/rs/zerolog/log" "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" "github.com/hearchco/hearchco/src/engines" "github.com/hearchco/hearchco/src/search/parse" "github.com/hearchco/hearchco/src/sedefaults" + "github.com/rs/zerolog/log" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { @@ -27,26 +27,26 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx, &retError) + sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) sedefaults.PagesColError(Info.Name, pagesCol) sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, &ctx, &retError) - sedefaults.ColError(Info.Name, col, &retError) + sedefaults.ColRequest(Info.Name, col, ctx) + sedefaults.ColError(Info.Name, col) var pageRankCounter []int = make([]int, options.MaxPages*Info.ResultsPerPage) col.OnHTML(dompaths.Result, func(e *colly.HTMLElement) { dom := e.DOM - linkHref, _ := dom.Find(dompaths.Link).Attr("href") + linkHref, hrefExists := dom.Find(dompaths.Link).Attr("href") linkText := parse.ParseURL(linkHref) linkText = removeTelemetry(linkText) titleText := strings.TrimSpace(dom.Find(dompaths.Title).Text()) descText := strings.TrimSpace(dom.Find(dompaths.Description).Text()) - if linkText != "" && linkText != "#" && titleText != "" { + if hrefExists && linkText != "" && linkText != "#" && titleText != "" { if descText == "" { descText = strings.TrimSpace(dom.Find("p.b_algoSlug").Text()) } @@ -54,8 +54,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi descText = strings.Split(descText, "Web")[1] } - var pageStr string = e.Request.Ctx.Get("page") - page, _ := strconv.Atoi(pageStr) + page := sedefaults.PageFromContext(e.Request.Ctx, Info.Name) res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1) bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol) @@ -68,23 +67,13 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi colCtx := colly.NewContext() colCtx.Put("page", strconv.Itoa(1)) - err := col.Request("GET", Info.URL+query, nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } + sedefaults.DoGetRequest(Info.URL+query, colCtx, col, Info.Name, &retError) for i := 1; i < options.MaxPages; i++ { colCtx = colly.NewContext() colCtx.Put("page", strconv.Itoa(i+1)) - err := col.Request("GET", Info.URL+query+"&first="+strconv.Itoa(i*10+1), nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method on page", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method on page", Info.Name) - } + sedefaults.DoGetRequest(Info.URL+query+"&first="+strconv.Itoa(i*10+1), colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/brave/brave.go b/src/engines/brave/brave.go index 2bfd5f55..40cd42c8 100644 --- a/src/engines/brave/brave.go +++ b/src/engines/brave/brave.go @@ -6,7 +6,6 @@ import ( "strings" "github.com/gocolly/colly/v2" - "github.com/rs/zerolog/log" "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" "github.com/hearchco/hearchco/src/engines" @@ -25,24 +24,24 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx, &retError) + sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) sedefaults.PagesColError(Info.Name, pagesCol) sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, &ctx, &retError) - sedefaults.ColError(Info.Name, col, &retError) + sedefaults.ColRequest(Info.Name, col, ctx) + sedefaults.ColError(Info.Name, col) var pageRankCounter []int = make([]int, options.MaxPages*Info.ResultsPerPage) col.OnHTML(dompaths.Result, func(e *colly.HTMLElement) { dom := e.DOM - linkHref, _ := dom.Find(dompaths.Link).Attr("href") + linkHref, hrefExists := dom.Find(dompaths.Link).Attr("href") linkText := parse.ParseURL(linkHref) titleText := strings.TrimSpace(dom.Find(dompaths.Title).Text()) descText := strings.TrimSpace(dom.Find(dompaths.Description).Text()) - if linkText != "" && linkText != "#" && titleText != "" { + if hrefExists && linkText != "" && linkText != "#" && titleText != "" { if descText == "" { descText = strings.TrimSpace(dom.Find("div.product > div.flex-hcenter > div > div[class=\"text-sm text-gray\"]").Text()) } @@ -50,8 +49,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi descText = strings.TrimSpace(dom.Find("p.snippet-description").Text()) } - var pageStr string = e.Request.Ctx.Get("page") - page, _ := strconv.Atoi(pageStr) + page := sedefaults.PageFromContext(e.Request.Ctx, Info.Name) res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1) bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol) @@ -62,23 +60,13 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi colCtx := colly.NewContext() colCtx.Put("page", strconv.Itoa(1)) - err := col.Request("GET", Info.URL+query+"&source=web", nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } + sedefaults.DoGetRequest(Info.URL+query+"&source=web", colCtx, col, Info.Name, &retError) for i := 1; i < options.MaxPages; i++ { colCtx = colly.NewContext() colCtx.Put("page", strconv.Itoa(i+1)) - err := col.Request("GET", Info.URL+query+"&spellcheck=0&offset="+strconv.Itoa(i), nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method on page", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method on page", Info.Name) - } + sedefaults.DoGetRequest(Info.URL+query+"&spellcheck=0&offset="+strconv.Itoa(i), colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/duckduckgo/duckduckgo.go b/src/engines/duckduckgo/duckduckgo.go index a07438f2..e9b1ed61 100644 --- a/src/engines/duckduckgo/duckduckgo.go +++ b/src/engines/duckduckgo/duckduckgo.go @@ -8,7 +8,6 @@ import ( "github.com/PuerkitoBio/goquery" "github.com/gocolly/colly/v2" - "github.com/rs/zerolog/log" "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" "github.com/hearchco/hearchco/src/engines" @@ -27,18 +26,16 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx, &retError) + sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) sedefaults.PagesColError(Info.Name, pagesCol) sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, &ctx, &retError) - sedefaults.ColError(Info.Name, col, &retError) + sedefaults.ColRequest(Info.Name, col, ctx) + sedefaults.ColError(Info.Name, col) col.OnHTML(dompaths.ResultsContainer, func(e *colly.HTMLElement) { - var linkText string - var linkScheme string - var titleText string - var descText string + var linkText, linkScheme, titleText, descText string + var hrefExists bool var rrank int var pageStr string = e.Request.Ctx.Get("page") @@ -49,7 +46,8 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi case 0: rankText := strings.TrimSpace(row.Children().First().Text()) fmt.Sscanf(rankText, "%d", &rrank) - linkHref, _ := row.Find(dompaths.Link).Attr("href") + var linkHref string + linkHref, hrefExists = row.Find(dompaths.Link).Attr("href") if strings.Contains(linkHref, "https") { linkScheme = "https://" } else { @@ -62,7 +60,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi rawURL := linkScheme + row.Find("td > span.link-text").Text() linkText = parse.ParseURL(rawURL) case 3: - if linkText != "" && linkText != "#" && titleText != "" { + if hrefExists && linkText != "" && linkText != "#" && titleText != "" { res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, (i/4 + 1)) bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol) } @@ -73,23 +71,13 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi colCtx := colly.NewContext() colCtx.Put("page", strconv.Itoa(1)) - err := col.Request("GET", Info.URL+"?q="+query, nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } + sedefaults.DoGetRequest(Info.URL+"?q="+query, colCtx, col, Info.Name, &retError) for i := 1; i < options.MaxPages; i++ { colCtx = colly.NewContext() colCtx.Put("page", strconv.Itoa(i+1)) - err := col.Request("POST", Info.URL, strings.NewReader("q="+query+"&dc="+strconv.Itoa(i*20)), colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with POST method on page", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with POST method on page", Info.Name) - } + sedefaults.DoPostRequest(Info.URL, strings.NewReader("q="+query+"&dc="+strconv.Itoa(i*20)), colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/etools/etools.go b/src/engines/etools/etools.go index 7687c78f..991dc63a 100644 --- a/src/engines/etools/etools.go +++ b/src/engines/etools/etools.go @@ -6,12 +6,12 @@ import ( "strings" "github.com/gocolly/colly/v2" - "github.com/rs/zerolog/log" "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" "github.com/hearchco/hearchco/src/engines" "github.com/hearchco/hearchco/src/search/parse" "github.com/hearchco/hearchco/src/sedefaults" + "github.com/rs/zerolog/log" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { @@ -25,12 +25,12 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx, &retError) + sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) sedefaults.PagesColError(Info.Name, pagesCol) sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, &ctx, &retError) - sedefaults.ColError(Info.Name, col, &retError) + sedefaults.ColRequest(Info.Name, col, ctx) + sedefaults.ColError(Info.Name, col) var pageRankCounter []int = make([]int, options.MaxPages*Info.ResultsPerPage) @@ -38,7 +38,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi dom := e.DOM linkEl := dom.Find(dompaths.Link) - linkHref, _ := linkEl.Attr("href") + linkHref, hrefExists := linkEl.Attr("href") var linkText string if linkHref[0] == 'h' { @@ -52,9 +52,8 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi titleText := strings.TrimSpace(linkEl.Text()) descText := strings.TrimSpace(dom.Find(dompaths.Description).Text()) - if linkText != "" && linkText != "#" && titleText != "" { - var pageStr string = e.Request.Ctx.Get("page") - page, _ := strconv.Atoi(pageStr) + if hrefExists && linkText != "" && linkText != "#" && titleText != "" { + page := sedefaults.PageFromContext(e.Request.Ctx, Info.Name) res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1) bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol) @@ -71,12 +70,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi colCtx := colly.NewContext() colCtx.Put("page", strconv.Itoa(1)) - err := col.Request("POST", Info.URL, strings.NewReader("query="+query+"&country=web&language=all"), colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with POST method", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with POST method", Info.Name) - } + sedefaults.DoPostRequest(Info.URL, strings.NewReader("query="+query+"&country=web&language=all"), colCtx, col, Info.Name, &retError) col.Wait() //wait so I can get the JSESSION cookie back for i := 1; i < options.MaxPages; i++ { @@ -84,12 +78,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi colCtx = colly.NewContext() colCtx.Put("page", pageStr) - err := col.Request("GET", pageURL+pageStr, nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method on page", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method on page", Info.Name) - } + sedefaults.DoGetRequest(pageURL+pageStr, colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/google/google.go b/src/engines/google/google.go index bd30a838..38ab7820 100644 --- a/src/engines/google/google.go +++ b/src/engines/google/google.go @@ -6,7 +6,6 @@ import ( "strings" "github.com/gocolly/colly/v2" - "github.com/rs/zerolog/log" "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" "github.com/hearchco/hearchco/src/engines" @@ -25,26 +24,25 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx, &retError) + sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) sedefaults.PagesColError(Info.Name, pagesCol) sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, &ctx, &retError) - sedefaults.ColError(Info.Name, col, &retError) + sedefaults.ColRequest(Info.Name, col, ctx) + sedefaults.ColError(Info.Name, col) var pageRankCounter []int = make([]int, options.MaxPages*Info.ResultsPerPage) col.OnHTML(dompaths.Result, func(e *colly.HTMLElement) { dom := e.DOM - linkHref, _ := dom.Find(dompaths.Link).Attr("href") + linkHref, hrefExists := dom.Find(dompaths.Link).Attr("href") linkText := parse.ParseURL(linkHref) titleText := strings.TrimSpace(dom.Find(dompaths.Title).Text()) descText := strings.TrimSpace(dom.Find(dompaths.Description).Text()) - if linkText != "" && linkText != "#" && titleText != "" { - var pageStr string = e.Request.Ctx.Get("page") - page, _ := strconv.Atoi(pageStr) + if hrefExists && linkText != "" && linkText != "#" && titleText != "" { + page := sedefaults.PageFromContext(e.Request.Ctx, Info.Name) res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1) bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol) @@ -55,23 +53,13 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi colCtx := colly.NewContext() colCtx.Put("page", strconv.Itoa(1)) - err := col.Request("GET", Info.URL+query, nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } + sedefaults.DoGetRequest(Info.URL+query, colCtx, col, Info.Name, &retError) for i := 1; i < options.MaxPages; i++ { colCtx = colly.NewContext() colCtx.Put("page", strconv.Itoa(i+1)) - err := col.Request("GET", Info.URL+query+"&start="+strconv.Itoa(i*10), nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method on page", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method on page", Info.Name) - } + sedefaults.DoGetRequest(Info.URL+query+"&start="+strconv.Itoa(i*10), colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/mojeek/mojeek.go b/src/engines/mojeek/mojeek.go index c6670d4f..f3903968 100644 --- a/src/engines/mojeek/mojeek.go +++ b/src/engines/mojeek/mojeek.go @@ -6,7 +6,6 @@ import ( "strings" "github.com/gocolly/colly/v2" - "github.com/rs/zerolog/log" "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" "github.com/hearchco/hearchco/src/engines" @@ -25,12 +24,12 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx, &retError) + sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) sedefaults.PagesColError(Info.Name, pagesCol) sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, &ctx, &retError) - sedefaults.ColError(Info.Name, col, &retError) + sedefaults.ColRequest(Info.Name, col, ctx) + sedefaults.ColError(Info.Name, col) var pageRankCounter []int = make([]int, options.MaxPages*Info.ResultsPerPage) @@ -38,14 +37,13 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi dom := e.DOM titleEl := dom.Find(dompaths.Title) - linkHref, _ := titleEl.Attr("href") + linkHref, hrefExists := titleEl.Attr("href") linkText := parse.ParseURL(linkHref) titleText := strings.TrimSpace(titleEl.Text()) descText := strings.TrimSpace(dom.Find(dompaths.Description).Text()) - if linkText != "" && linkText != "#" && titleText != "" { - var pageStr string = e.Request.Ctx.Get("page") - page, _ := strconv.Atoi(pageStr) + if hrefExists && linkText != "" && linkText != "#" && titleText != "" { + page := sedefaults.PageFromContext(e.Request.Ctx, Info.Name) res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1) bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol) @@ -56,23 +54,13 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi colCtx := colly.NewContext() colCtx.Put("page", strconv.Itoa(1)) - err := col.Request("GET", Info.URL+query, nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } + sedefaults.DoGetRequest(Info.URL+query, colCtx, col, Info.Name, &retError) for i := 1; i < options.MaxPages; i++ { colCtx = colly.NewContext() colCtx.Put("page", strconv.Itoa(i+1)) - err := col.Request("GET", Info.URL+query+"&s="+strconv.Itoa(i*10+1), nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method on page", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method on page", Info.Name) - } + sedefaults.DoGetRequest(Info.URL+query+"&s="+strconv.Itoa(i*10+1), colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/presearch/presearch.go b/src/engines/presearch/presearch.go index 016df247..cbb4cece 100644 --- a/src/engines/presearch/presearch.go +++ b/src/engines/presearch/presearch.go @@ -7,12 +7,12 @@ import ( "strings" "github.com/gocolly/colly/v2" - "github.com/rs/zerolog/log" "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" "github.com/hearchco/hearchco/src/engines" "github.com/hearchco/hearchco/src/search/parse" "github.com/hearchco/hearchco/src/sedefaults" + "github.com/rs/zerolog/log" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { @@ -26,22 +26,16 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx, &retError) + sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) sedefaults.PagesColError(Info.Name, pagesCol) sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColError(Info.Name, col, &retError) + sedefaults.ColRequest(Info.Name, col, ctx) + sedefaults.ColError(Info.Name, col) safeSearch := getSafeSearch(options.SafeSearch) col.OnRequest(func(r *colly.Request) { - if err := ctx.Err(); err != nil { - log.Error().Msgf("%v: SE Collector; Error OnRequest %v", Info.Name, r) - r.Abort() - retError = err - return - } - r.Headers.Add("Cookie", "use_local_search_results=false") r.Headers.Add("Cookie", "ai_results_disable=1") r.Headers.Add("Cookie", "use_safe_search="+safeSearch) @@ -93,24 +87,14 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi colCtx.Put("page", strconv.Itoa(1)) colCtx.Put("isAPI", "false") - err := col.Request("GET", Info.URL+query, nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } + sedefaults.DoGetRequest(Info.URL+query, colCtx, col, Info.Name, &retError) for i := 1; i < options.MaxPages; i++ { colCtx = colly.NewContext() colCtx.Put("page", strconv.Itoa(i+1)) colCtx.Put("isAPI", "false") - err := col.Request("GET", Info.URL+query+"&page="+strconv.Itoa(i+1), nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method on page", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method on page", Info.Name) - } + sedefaults.DoGetRequest(Info.URL+query+"&page="+strconv.Itoa(i+1), colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/qwant/qwant.go b/src/engines/qwant/qwant.go index bcb9ca6d..28a2354d 100644 --- a/src/engines/qwant/qwant.go +++ b/src/engines/qwant/qwant.go @@ -7,12 +7,12 @@ import ( "strings" "github.com/gocolly/colly/v2" - "github.com/rs/zerolog/log" "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" "github.com/hearchco/hearchco/src/engines" "github.com/hearchco/hearchco/src/search/parse" "github.com/hearchco/hearchco/src/sedefaults" + "github.com/rs/zerolog/log" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { @@ -26,12 +26,12 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx, &retError) + sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) sedefaults.PagesColError(Info.Name, pagesCol) sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, &ctx, &retError) - sedefaults.ColError(Info.Name, col, &retError) + sedefaults.ColRequest(Info.Name, col, ctx) + sedefaults.ColError(Info.Name, col) col.OnResponse(func(r *colly.Response) { var pageStr string = r.Ctx.Get("page") @@ -74,12 +74,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi colCtx.Put("page", strconv.Itoa(i+1)) reqString := Info.URL + query + "&count=" + strconv.Itoa(nRequested) + "&locale=" + locale + "&offset=" + strconv.Itoa(i*nRequested) + "&device=" + device + "&safesearch=" + safeSearch - err := col.Request("GET", reqString, nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } + sedefaults.DoGetRequest(reqString, colCtx, col, Info.Name, &retError) } col.Wait() @@ -117,12 +112,12 @@ col.OnHTML("div[data-testid=\"sectionWeb\"] > div > div", func(e *colly.HTMLElem dom := e.DOM baseDOM := dom.Find("div[data-testid=\"webResult\"] > div > div > div > div > div") hrefElement := baseDOM.Find("a[data-testid=\"serTitle\"]") - linkHref, _ := hrefElement.Attr("href") + linkHref, hrefExists := hrefElement.Attr("href") linkText := parse.ParseURL(linkHref) titleText := strings.TrimSpace(hrefElement.Text()) descText := strings.TrimSpace(baseDOM.Find("div > span").Text()) - if linkText != "" && linkText != "#" && titleText != "" { + if hrefExists && linkText != "" && linkText != "#" && titleText != "" { var pageStr string = e.Request.Ctx.Get("page") page, _ := strconv.Atoi(pageStr) diff --git a/src/engines/startpage/startpage.go b/src/engines/startpage/startpage.go index 0e837564..f907e2ba 100644 --- a/src/engines/startpage/startpage.go +++ b/src/engines/startpage/startpage.go @@ -6,12 +6,12 @@ import ( "strings" "github.com/gocolly/colly/v2" - "github.com/rs/zerolog/log" "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" "github.com/hearchco/hearchco/src/engines" "github.com/hearchco/hearchco/src/search/parse" "github.com/hearchco/hearchco/src/sedefaults" + "github.com/rs/zerolog/log" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { @@ -25,26 +25,25 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx, &retError) + sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) sedefaults.PagesColError(Info.Name, pagesCol) sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, &ctx, &retError) - sedefaults.ColError(Info.Name, col, &retError) + sedefaults.ColRequest(Info.Name, col, ctx) + sedefaults.ColError(Info.Name, col) var pageRankCounter []int = make([]int, options.MaxPages*Info.ResultsPerPage) col.OnHTML(dompaths.Result, func(e *colly.HTMLElement) { dom := e.DOM - linkHref, _ := dom.Find(dompaths.Link).Attr("href") + linkHref, hrefExists := dom.Find(dompaths.Link).Attr("href") linkText := parse.ParseURL(linkHref) titleText := strings.TrimSpace(dom.Find(dompaths.Title).Text()) descText := strings.TrimSpace(dom.Find(dompaths.Description).Text()) - if linkText != "" && linkText != "#" && titleText != "" { - var pageStr string = e.Request.Ctx.Get("page") - page, _ := strconv.Atoi(pageStr) + if hrefExists && linkText != "" && linkText != "#" && titleText != "" { + page := sedefaults.PageFromContext(e.Request.Ctx, Info.Name) res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1) bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol) @@ -67,23 +66,13 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi colCtx := colly.NewContext() colCtx.Put("page", strconv.Itoa(1)) - err := col.Request("GET", Info.URL+query+safeSearch, nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } + sedefaults.DoGetRequest(Info.URL+query+safeSearch, colCtx, col, Info.Name, &retError) for i := 1; i < options.MaxPages; i++ { colCtx = colly.NewContext() colCtx.Put("page", strconv.Itoa(i+1)) - err := col.Request("GET", Info.URL+query+"&page="+strconv.Itoa(i+1)+safeSearch, nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method on page", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method on page", Info.Name) - } + sedefaults.DoGetRequest(Info.URL+query+"&page="+strconv.Itoa(i+1)+safeSearch, colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/swisscows/authenticator.go b/src/engines/swisscows/authenticator.go index 2eed5ea5..eaa69664 100644 --- a/src/engines/swisscows/authenticator.go +++ b/src/engines/swisscows/authenticator.go @@ -1,6 +1,7 @@ package swisscows import ( + "fmt" "math/rand" "strings" "time" @@ -58,7 +59,7 @@ func rot13Switch(str string) string { return switchCapitalization(rot13(str)) } -func generateSignature(params string, nonce string) string { +func generateSignature(params string, nonce string) (string, error) { var rot13Nonce string = rot13Switch(nonce) var data string = "/web/search" + params + rot13Nonce @@ -67,14 +68,20 @@ func generateSignature(params string, nonce string) string { encData = strings.ReplaceAll(encData, "+", "-") encData = strings.ReplaceAll(encData, "/", "_") - return string(encData) + //log.Debug().Msgf("Final: %v", encData) + + return string(encData), nil } // returns nonce, signature -func generateAuth(params string) (string, string) { +func generateAuth(params string) (string, string, error) { params = strings.ReplaceAll(params, "+", " ") nonce := generateNonce(32) - auth := generateSignature(params, nonce) - return nonce, auth + auth, err := generateSignature(params, nonce) + if err != nil { + return "", "", fmt.Errorf("generateAuth(): %w", err) + } + + return nonce, auth, nil } diff --git a/src/engines/swisscows/swisscows.go b/src/engines/swisscows/swisscows.go index b560777f..3963915f 100644 --- a/src/engines/swisscows/swisscows.go +++ b/src/engines/swisscows/swisscows.go @@ -3,16 +3,15 @@ package swisscows import ( "context" "encoding/json" - "os" "strconv" "github.com/gocolly/colly/v2" - "github.com/rs/zerolog/log" "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" "github.com/hearchco/hearchco/src/engines" "github.com/hearchco/hearchco/src/search/parse" "github.com/hearchco/hearchco/src/sedefaults" + "github.com/rs/zerolog/log" ) func Search(ctx context.Context, query string, relay *bucket.Relay, options engines.Options, settings config.Settings, timings config.Timings) error { @@ -26,24 +25,24 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx, &retError) + sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) sedefaults.PagesColError(Info.Name, pagesCol) sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - col.OnRequest(func(r *colly.Request) { - if err := (ctx).Err(); err != nil { - log.Error().Msgf("%v: SE Collector; Error OnRequest %v", Info.Name, r) - r.Abort() - retError = err - return - } + sedefaults.ColRequest(Info.Name, col, ctx) + sedefaults.ColError(Info.Name, col) + col.OnRequest(func(r *colly.Request) { if r.Method == "OPTIONS" { return } var qry string = "?" + r.URL.RawQuery - nonce, sig := generateAuth(qry) + nonce, sig, err := generateAuth(qry) + if err != nil { + log.Error().Err(err).Msgf("swisscows.Search() -> col.OnRequest: failed building request: failed generating auth") + return + } //log.Debug().Msgf("qry: %v\nnonce: %v\nsignature: %v", qry, nonce, sig) @@ -52,18 +51,8 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi r.Headers.Set("Pragma", "no-cache") }) - col.OnError(func(r *colly.Response, err error) { - log.Error().Err(err).Msgf("%v: SE Collector - OnError.\nMethod: %v\nURL: %v", Info.Name, r.Request.Method, r.Request.URL.String()) - log.Error().Msgf("%v: HTML Response written to %v%v_col.log.html", Info.Name, config.LogDumpLocation, Info.Name) - writeErr := os.WriteFile(config.LogDumpLocation+string(Info.Name)+"_col.log.html", r.Body, 0644) - if writeErr != nil { - log.Error().Err(writeErr) - } - retError = err - }) - col.OnResponse(func(r *colly.Response) { - log.Trace().Msgf("URL: %v\nNonce: %v\nSig: %v", r.Request.URL.String(), r.Request.Headers.Get("X-Request-Nonce"), r.Request.Headers.Get("X-Request-Signature")) + log.Trace().Msgf("swisscows.Search() -> col.OnResponse(): url: %v | nonce: %v | signature: %v", r.Request.URL.String(), r.Request.Headers.Get("X-Request-Nonce"), r.Request.Headers.Get("X-Request-Signature")) var pageStr string = r.Ctx.Get("page") page, _ := strconv.Atoi(pageStr) @@ -71,7 +60,8 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi var parsedResponse SCResponse err := json.Unmarshal(r.Body, &parsedResponse) if err != nil { - log.Error().Err(err).Msgf("%v: Failed body unmarshall to json:\n%v", Info.Name, string(r.Body)) + log.Error().Err(err).Msgf("swissco Failed body unmarshall to json:\n%v", string(r.Body)) + return } counter := 1 @@ -96,12 +86,8 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi //col.Request("OPTIONS", seAPIURL+"freshness=All&itemsCount="+strconv.Itoa(sResCount)+"&offset="+strconv.Itoa(i*10)+"&query="+query+"®ion="+locale, nil, colCtx, nil) //col.Wait() - err := col.Request("GET", Info.URL+"freshness=All&itemsCount="+strconv.Itoa(settings.RequestedResultsPerPage)+"&offset="+strconv.Itoa(i*10)+"&query="+query+"®ion="+locale, nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } + reqURL := Info.URL + "freshness=All&itemsCount=" + strconv.Itoa(settings.RequestedResultsPerPage) + "&offset=" + strconv.Itoa(i*10) + "&query=" + query + "®ion=" + locale + sedefaults.DoGetRequest(reqURL, colCtx, col, Info.Name, &retError) } col.Wait() @@ -119,12 +105,12 @@ var pageRankCounter []int = make([]int, options.MaxPages*Info.ResPerPage) col.OnHTML("div.web-results > article.item-web", func(e *colly.HTMLElement) { dom := e.DOM - linkHref, _ := dom.Find("a.site").Attr("href") + linkHref, hrefExists := dom.Find("a.site").Attr("href") linkText := parse.ParseURL(linkHref) titleText := strings.TrimSpace(dom.Find("h2.title").Text()) descText := strings.TrimSpace(dom.Find("p.description").Text()) - if linkText != "" && linkText != "#" && titleText != "" { + if hrefExists && linkText != "" && linkText != "#" && titleText != "" { var pageStr string = e.Request.Ctx.Get("page") page, _ := strconv.Atoi(pageStr) diff --git a/src/engines/timeout.go b/src/engines/timeout.go index 8143b67c..3ccfee77 100644 --- a/src/engines/timeout.go +++ b/src/engines/timeout.go @@ -1,8 +1,12 @@ package engines -import "net" +import ( + "net" +) func IsTimeoutError(err error) bool { - e, ok := err.(net.Error) - return ok && e.Timeout() + if perr, ok := err.(net.Error); ok && perr.Timeout() { + return true + } + return false } diff --git a/src/engines/yahoo/yahoo.go b/src/engines/yahoo/yahoo.go index b2198a26..1d6e5562 100644 --- a/src/engines/yahoo/yahoo.go +++ b/src/engines/yahoo/yahoo.go @@ -6,7 +6,6 @@ import ( "strings" "github.com/gocolly/colly/v2" - "github.com/rs/zerolog/log" "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" "github.com/hearchco/hearchco/src/engines" @@ -25,12 +24,12 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx, &retError) + sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) sedefaults.PagesColError(Info.Name, pagesCol) sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, &ctx, &retError) - sedefaults.ColError(Info.Name, col, &retError) + sedefaults.ColRequest(Info.Name, col, ctx) + sedefaults.ColError(Info.Name, col) var pageRankCounter []int = make([]int, options.MaxPages*Info.ResultsPerPage) @@ -38,16 +37,15 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi dom := e.DOM titleEl := dom.Find(dompaths.Title) - linkHref, _ := titleEl.Attr("href") + linkHref, hrefExists := titleEl.Attr("href") linkText := parse.ParseURL(linkHref) linkText = removeTelemetry(linkText) - titleAria, _ := titleEl.Attr("aria-label") + titleAria, labelExists := titleEl.Attr("aria-label") titleText := strings.TrimSpace(titleAria) descText := strings.TrimSpace(dom.Find(dompaths.Description).Text()) - if linkText != "" && linkText != "#" && titleText != "" { - var pageStr string = e.Request.Ctx.Get("page") - page, _ := strconv.Atoi(pageStr) + if labelExists && hrefExists && linkText != "" && linkText != "#" && titleText != "" { + page := sedefaults.PageFromContext(e.Request.Ctx, Info.Name) res := bucket.MakeSEResult(linkText, titleText, descText, Info.Name, page, pageRankCounter[page]+1) bucket.AddSEResult(res, Info.Name, relay, &options, pagesCol) @@ -58,23 +56,13 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi colCtx := colly.NewContext() colCtx.Put("page", strconv.Itoa(1)) - err := col.Request("GET", Info.URL+query, nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } + sedefaults.DoGetRequest(Info.URL+query, colCtx, col, Info.Name, &retError) for i := 1; i < options.MaxPages; i++ { colCtx = colly.NewContext() colCtx.Put("page", strconv.Itoa(i+1)) - err := col.Request("GET", Info.URL+query+"&b="+strconv.Itoa((i+1)*10), nil, colCtx, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method on page", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method on page", Info.Name) - } + sedefaults.DoGetRequest(Info.URL+query+"&b="+strconv.Itoa((i+1)*10), colCtx, col, Info.Name, &retError) } col.Wait() diff --git a/src/engines/yep/yep.go b/src/engines/yep/yep.go index e5578f55..e6640221 100644 --- a/src/engines/yep/yep.go +++ b/src/engines/yep/yep.go @@ -6,7 +6,6 @@ import ( "strings" "github.com/gocolly/colly/v2" - "github.com/rs/zerolog/log" "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" "github.com/hearchco/hearchco/src/engines" @@ -25,12 +24,12 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi sedefaults.InitializeCollectors(&col, &pagesCol, &options, &timings) - sedefaults.PagesColRequest(Info.Name, pagesCol, ctx, &retError) + sedefaults.PagesColRequest(Info.Name, pagesCol, ctx) sedefaults.PagesColError(Info.Name, pagesCol) sedefaults.PagesColResponse(Info.Name, pagesCol, relay) - sedefaults.ColRequest(Info.Name, col, &ctx, &retError) - sedefaults.ColError(Info.Name, col, &retError) + sedefaults.ColRequest(Info.Name, col, ctx) + sedefaults.ColError(Info.Name, col) col.OnRequest(func(r *colly.Request) { r.Headers.Del("Accept") @@ -66,12 +65,7 @@ func Search(ctx context.Context, query string, relay *bucket.Relay, options engi apiURL = Info.URL + "client=web&gl=" + locale + "&limit=" + strconv.Itoa(nRequested) + "&no_correct=false&q=" + query + "&safeSearch=" + safeSearch + "&type=web" } - err := col.Request("GET", apiURL, nil, nil, nil) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } else if err != nil { - log.Error().Err(err).Msgf("%v: failed requesting with GET method", Info.Name) - } + sedefaults.DoGetRequest(apiURL, nil, col, Info.Name, &retError) col.Wait() pagesCol.Wait() diff --git a/src/main.go b/src/main.go index 69995cd3..71deeee0 100644 --- a/src/main.go +++ b/src/main.go @@ -7,7 +7,6 @@ import ( "syscall" "time" - "github.com/rs/zerolog/log" "github.com/hearchco/hearchco/src/cache" "github.com/hearchco/hearchco/src/cache/nocache" "github.com/hearchco/hearchco/src/cache/pebble" @@ -16,6 +15,7 @@ import ( "github.com/hearchco/hearchco/src/config" "github.com/hearchco/hearchco/src/logger" "github.com/hearchco/hearchco/src/router" + "github.com/rs/zerolog/log" ) func main() { @@ -54,7 +54,8 @@ func main() { cli.Run(cliFlags, db, conf) } else { if rw, err := router.New(conf, cliFlags.Verbosity); err != nil { - log.Error().Err(err).Msg("Failed creating a router") + log.Fatal().Err(err).Msg("main.main(): failed creating a router") + // ^FATAL } else { rw.Start(ctx, db, cliFlags.ServeProfiler) } diff --git a/src/profiling.go b/src/profiling.go index debaefb1..97bbf0fa 100644 --- a/src/profiling.go +++ b/src/profiling.go @@ -3,8 +3,8 @@ package main import ( "log" - "github.com/pkg/profile" "github.com/hearchco/hearchco/src/cli" + "github.com/pkg/profile" ) type profiler struct { @@ -54,8 +54,8 @@ func runProfiler(cliFlags *cli.Flags) (bool, func()) { profilerToRun := profiler{enabled: false} for _, p := range profilers { if profilerToRun.enabled && p.enabled { - log.Fatal("Only one profiler can be run at a time.") - return false, func() {} + log.Fatal("main.runProfiler(): only one profiler can be run at a time.") + // ^FATAL } else if p.enabled { profilerToRun = p } diff --git a/src/rank/sorting.go b/src/rank/sorting.go index 7e71f70b..a0f9c44d 100644 --- a/src/rank/sorting.go +++ b/src/rank/sorting.go @@ -23,6 +23,6 @@ func (r ByRetrievedRank) Less(i, j int) bool { return r[i].RetRank.OnPageRank < r[j].RetRank.OnPageRank } - log.Error().Msgf("failed at ranking: %v, %v", r[i], r[j]) + log.Error().Msgf("rank.(r ByRetrievedRank)Less(): failed at ranking: %v, %v", r[i], r[j]) return true } diff --git a/src/router/router.go b/src/router/router.go index 5d7debe2..682186db 100644 --- a/src/router/router.go +++ b/src/router/router.go @@ -41,7 +41,7 @@ func (rw *RouterWrapper) addCors() { func (rw *RouterWrapper) runWithContext(ctx context.Context) { if err := rw.router.RunWithContext(ctx); err != context.Canceled { - log.Error().Err(err).Msg("Failed starting router") + log.Error().Err(err).Msg("router.runWithContext(): failed starting router") } else if err != nil { log.Info().Msg("Stopping router...") rw.router.Close() @@ -58,10 +58,16 @@ func (rw *RouterWrapper) Start(ctx context.Context, db cache.DB, serveProfiler b // search rw.router.GET("/search", func(c *gin.Context) { - Search(c, rw.config, db) + err := Search(c, rw.config, db) + if err != nil { + log.Error().Err(err).Msgf("router.Start() (.GET): failed search") + } }) rw.router.POST("/search", func(c *gin.Context) { - Search(c, rw.config, db) + err := Search(c, rw.config, db) + if err != nil { + log.Error().Err(err).Msgf("router.Start() (.POST): failed search") + } }) if serveProfiler { diff --git a/src/router/search.go b/src/router/search.go index b5f90f15..4cb1b639 100644 --- a/src/router/search.go +++ b/src/router/search.go @@ -1,6 +1,7 @@ package router import ( + "fmt" "net/http" "strconv" @@ -15,7 +16,7 @@ import ( "github.com/hearchco/hearchco/src/search" ) -func Search(c *gin.Context, config *config.Config, db cache.DB) { +func Search(c *gin.Context, config *config.Config, db cache.DB) error { var query, pages, deepSearch string switch c.Request.Method { @@ -38,7 +39,7 @@ func Search(c *gin.Context, config *config.Config, db cache.DB) { } else { maxPages, err := strconv.Atoi(pages) if err != nil { - log.Error().Err(err).Msgf("cannot convert \"%v\" to int, reverting to default value of 1", pages) + log.Debug().Err(err).Msgf("router.Search(): cannot convert \"%v\" to int, reverting to default value of 1", pages) maxPages = 1 } @@ -54,21 +55,34 @@ func Search(c *gin.Context, config *config.Config, db cache.DB) { } var results []result.Result - db.Get(query, &results) - if results != nil { + gerr := db.Get(query, &results) + if gerr != nil { + return fmt.Errorf("router.Search(): failed accessing cache for query %v. error: %w", query, gerr) + } + foundInDB := results != nil + + if foundInDB { log.Debug().Msgf("Found results for query (%v) in cache", query) } else { log.Debug().Msg("Nothing found in cache, doing a clean search") + results = search.PerformSearch(query, options, config) - defer db.Set(query, results) } resultsShort := result.Shorten(results) if resultsJson, err := json.Marshal(resultsShort); err != nil { - log.Error().Err(err).Msg("failed marshalling results") c.String(http.StatusInternalServerError, "") + return fmt.Errorf("router.Search(): failed marshalling results: %v\n with error: %w", resultsShort, err) } else { c.String(http.StatusOK, string(resultsJson)) } + + if !foundInDB { + serr := db.Set(query, results) + if serr != nil { + log.Error().Err(serr).Msgf("router.Search(): error updating database with search results") + } + } } + return nil } diff --git a/src/search/parse/parse.go b/src/search/parse/parse.go index 795d6d40..fb62ac2a 100644 --- a/src/search/parse/parse.go +++ b/src/search/parse/parse.go @@ -1,6 +1,7 @@ package parse import ( + "fmt" "net/url" "strings" @@ -10,37 +11,52 @@ import ( ) func ParseURL(rawURL string) string { + urll, err := parseURL(rawURL) + if err != nil { + log.Error().Err(err).Msgf("parse.ParseURL(): couldn't parse url(%v)", urll) + return rawURL + } + return urll +} + +func parseURL(rawURL string) (string, error) { // rawURL may be empty string, function should return empty string then. rawURL = strings.TrimSpace(rawURL) rawURL, unescErr := url.QueryUnescape(rawURL) // if the url was part of a telemetry link, this will help. if unescErr != nil { - log.Error().Err(unescErr).Msgf("Couldn't unescape URL: %v", rawURL) - return rawURL + return "", fmt.Errorf("parse.parseURL(): failed url.QueryUnescape() on url(%v). error: %w", rawURL, unescErr) } - parsedURL, err := url.Parse(rawURL) - if err != nil { - log.Error().Err(err).Msgf("Couldn't parse URL: %v", rawURL) - return rawURL + parsedURL, parseErr := url.Parse(rawURL) + if parseErr != nil { + return "", fmt.Errorf("parse.parseURL(): failed url.Parse() on url(%v). error: %w", rawURL, parseErr) } urlString := parsedURL.String() if len(urlString) != 0 && len(parsedURL.Path) == 0 { // https://example.org -> https://example.org/ urlString += "/" } - return urlString + return urlString, nil } func ParseTextWithHTML(rawHTML string) string { + text, err := parseTextWithHTML(rawHTML) + if err != nil { + log.Error().Err(err).Msgf("parse.ParseTextWithHTML(): failed parsing text with html(%v)", rawHTML) + return rawHTML + } + return text +} + +func parseTextWithHTML(rawHTML string) (string, error) { var result string = "" htmlNode, perr := html.ParseFragment(strings.NewReader(rawHTML), nil) if perr != nil { - log.Error().Err(perr).Msgf("Couldn't utility.ParseTextWithHTML: %v", rawHTML) - return "" + return "", fmt.Errorf("parse.parseTextWithHTML(): failed html.ParseFragment on %v. error: %w", rawHTML, perr) } for _, el := range htmlNode { sel := goquery.NewDocumentFromNode(el) result += sel.Text() } - return result + return result, nil } diff --git a/src/search/search.go b/src/search/search.go index fb58c42d..516c5a9d 100644 --- a/src/search/search.go +++ b/src/search/search.go @@ -6,14 +6,14 @@ import ( "strings" "time" - "github.com/rs/zerolog/log" - "github.com/sourcegraph/conc" "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/bucket/result" "github.com/hearchco/hearchco/src/category" "github.com/hearchco/hearchco/src/config" "github.com/hearchco/hearchco/src/engines" "github.com/hearchco/hearchco/src/rank" + "github.com/rs/zerolog/log" + "github.com/sourcegraph/conc" ) func PerformSearch(query string, options engines.Options, conf *config.Config) []result.Result { @@ -57,11 +57,10 @@ func runEngines(engs []engines.Name, timings config.Timings, settings map[engine for i := range engs { eng := engs[i] // dont change for to `for _, eng := range engs {`, eng retains the same address throughout the whole loop worker.Go(func() { + // if an error can be handled inside, it wont be returned err := engineStarter[eng](context.Background(), query, relay, options, settings[eng], timings) - if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("failed searching %v", eng) - } else if err != nil { - log.Error().Err(err).Msgf("failed searching %v", eng) + if err != nil { + log.Error().Err(err).Msgf("search.runEngines(): error while searching %v", eng) } }) } @@ -71,7 +70,8 @@ func procBang(query *string, options *engines.Options, conf *config.Config) (con useSpec, specEng := procSpecificEngine(*query, options, conf) goodCat := procCategory(*query, options) if !goodCat && !useSpec && (*query)[0] == '!' { - log.Error().Msgf("invalid bang (not category or engine shortcut). query: %v", *query) + // options.category is set to GENERAL + log.Debug().Msgf("search.procBang(): invalid bang (not category or engine shortcut). query: %v", *query) } trimBang(query) diff --git a/src/sedefaults/sedefaults.go b/src/sedefaults/sedefaults.go index 57235942..88e44358 100644 --- a/src/sedefaults/sedefaults.go +++ b/src/sedefaults/sedefaults.go @@ -2,26 +2,28 @@ package sedefaults import ( "context" + "fmt" + "io" "os" + "strconv" "github.com/gocolly/colly/v2" - "github.com/rs/zerolog/log" "github.com/hearchco/hearchco/src/bucket" "github.com/hearchco/hearchco/src/config" "github.com/hearchco/hearchco/src/engines" "github.com/hearchco/hearchco/src/search/useragent" + "github.com/rs/zerolog/log" ) -func PagesColRequest(seName engines.Name, pagesCol *colly.Collector, ctx context.Context, retError *error) { +func PagesColRequest(seName engines.Name, pagesCol *colly.Collector, ctx context.Context) { pagesCol.OnRequest(func(r *colly.Request) { if err := ctx.Err(); err != nil { if engines.IsTimeoutError(err) { - log.Trace().Msgf("%v: Pages Collector; Error OnRequest %v", seName, r) + log.Trace().Err(err).Msgf("sedefaults.PagesColRequest() from %v -> pagesCol.OnRequest(): context timeout error", seName) } else { - log.Error().Msgf("%v: Pages Collector; Error OnRequest %v", seName, r) + log.Error().Err(err).Msgf("sedefaults.PagesColRequest() from %v -> pagesCol.OnRequest(): context error", seName) } r.Abort() - *retError = err return } r.Ctx.Put("originalURL", r.URL.String()) @@ -30,10 +32,11 @@ func PagesColRequest(seName engines.Name, pagesCol *colly.Collector, ctx context func PagesColError(seName engines.Name, pagesCol *colly.Collector) { pagesCol.OnError(func(r *colly.Response, err error) { + urll := r.Ctx.Get("originalURL") if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: Pages Collector - OnError.\nURL: %v", seName, r.Ctx.Get("originalURL")) + log.Trace().Err(err).Msgf("sedefaults.PagesColError() from %v -> pagesCol.OnError(): request timeout error for %v", seName, urll) } else { - log.Error().Err(err).Msgf("%v: Pages Collector - OnError.\nURL: %v", seName, r.Ctx.Get("originalURL")) + log.Trace().Err(err).Msgf("sedefaults.PagesColError() from %v -> pagesCol.OnError(): request error for %v\nresponse: %v", seName, urll, r) } }) } @@ -41,55 +44,58 @@ func PagesColError(seName engines.Name, pagesCol *colly.Collector) { func PagesColResponse(seName engines.Name, pagesCol *colly.Collector, relay *bucket.Relay) { pagesCol.OnResponse(func(r *colly.Response) { urll := r.Ctx.Get("originalURL") - bucket.SetResultResponse(urll, r, relay, seName) + err := bucket.SetResultResponse(urll, r, relay, seName) + if err != nil { + log.Error().Err(err).Msg("sedefaults.PagesColResponse(): error setting result") + } }) } -func ColRequest(seName engines.Name, col *colly.Collector, ctx *context.Context, retError *error) { +func ColRequest(seName engines.Name, col *colly.Collector, ctx context.Context) { col.OnRequest(func(r *colly.Request) { - if err := (*ctx).Err(); err != nil { + if err := ctx.Err(); err != nil { if engines.IsTimeoutError(err) { - log.Trace().Msgf("%v: SE Collector; Error OnRequest %v", seName, r) + log.Trace().Err(err).Msgf("sedefaults.ColRequest() from %v -> col.OnRequest(): context timeout error", seName) } else { - log.Error().Msgf("%v: SE Collector; Error OnRequest %v", seName, r) + log.Error().Err(err).Msgf("sedefaults.ColRequest() from %v -> col.OnRequest(): context error", seName) } r.Abort() - *retError = err return } }) } -func ColError(seName engines.Name, col *colly.Collector, retError *error) { +func ColError(seName engines.Name, col *colly.Collector) { col.OnError(func(r *colly.Response, err error) { + urll := r.Request.URL.String() if engines.IsTimeoutError(err) { - log.Trace().Err(err).Msgf("%v: SE Collector - OnError.\nURL: %v", seName, r.Request.URL.String()) + log.Trace().Err(err).Msgf("sedefaults.ColError() from %v -> col.OnError(): request timeout error for %v", seName, urll) } else { - log.Error().Err(err).Msgf("%v: SE Collector - OnError.\nURL: %v", seName, r.Request.URL.String()) - log.Debug().Msgf("%v: HTML Response written to %v%v_col.log.html", seName, config.LogDumpLocation, seName) - writeErr := os.WriteFile(config.LogDumpLocation+string(seName)+"_col.log.html", r.Body, 0644) - if writeErr != nil { - log.Error().Err(writeErr) + log.Error().Err(err).Msgf("sedefaults.ColError() from %v -> col.OnError(): request error for %v\nresponse(%v): %v", seName, urll, r.StatusCode, string(r.Body)) + log.Debug().Msgf("sedefaults.ColError() from %v -> col.OnError(): html response written to %v%v_col.log.html", seName, config.LogDumpLocation, seName) + + bodyWriteErr := os.WriteFile(config.LogDumpLocation+seName.String()+"_col.log.html", r.Body, 0644) + if bodyWriteErr != nil { + log.Error().Err(bodyWriteErr).Msgf("sedefaults.ColError() from %v -> col.OnError(): error writing html response body to file", seName) } } - *retError = err }) } func Prepare(seName engines.Name, options *engines.Options, settings *config.Settings, support *engines.SupportedSettings, info *engines.Info, ctx *context.Context) error { if ctx == nil { *ctx = context.Background() - } //^ not necessary as ctx is always passed in search.go, branch predictor will skip this if + } if options.UserAgent == "" { options.UserAgent = useragent.RandomUserAgent() } log.Trace().Msgf("%v: UserAgent: %v", seName, options.UserAgent) - // These two ifs, could be moved to config.SetupConfig + // TODO: move to config.SetupConfig if settings.RequestedResultsPerPage != 0 && !support.RequestedResultsPerPage { - log.Error().Msgf("%v: Variable settings.RequestedResultsPerPage is set, but not supported in this search engine. Its value is: %v", seName, settings.RequestedResultsPerPage) - panic("sedefaults.Prepare(): Setting not supported.") + log.Panic().Msgf("sedefaults.Prepare() from %v: setting not supported. variable settings.RequestedResultsPerPage is set in the config for %v. that setting is not supported for this search engine. the settings value is: %v", seName, seName, settings.RequestedResultsPerPage) + // ^PANIC } if settings.RequestedResultsPerPage == 0 && support.RequestedResultsPerPage { // If its used in the code but not set, give it the default value. @@ -116,11 +122,7 @@ func Prepare(seName engines.Name, options *engines.Options, settings *config.Set } func InitializeCollectors(colPtr **colly.Collector, pagesColPtr **colly.Collector, options *engines.Options, timings *config.Timings) { - if options.MaxPages == 1 { - *colPtr = colly.NewCollector(colly.MaxDepth(1), colly.UserAgent(options.UserAgent)) // so there is no thread creation overhead - } else { - *colPtr = colly.NewCollector(colly.MaxDepth(1), colly.UserAgent(options.UserAgent), colly.Async()) - } + *colPtr = colly.NewCollector(colly.MaxDepth(1), colly.UserAgent(options.UserAgent), colly.Async()) *pagesColPtr = colly.NewCollector(colly.MaxDepth(1), colly.UserAgent(options.UserAgent), colly.Async()) if timings != nil { @@ -132,7 +134,7 @@ func InitializeCollectors(colPtr **colly.Collector, pagesColPtr **colly.Collecto } if err := (*colPtr).Limit(limitRule); err != nil { - log.Error().Err(err).Msg("sedefaults: failed adding a new limit rule") + log.Error().Err(err).Msgf("sedefaults.InitializeCollectors(): failed adding new limit rule: %v", limitRule) } if timings.Timeout != 0 { (*colPtr).SetRequestTimeout(timings.Timeout) @@ -142,3 +144,27 @@ func InitializeCollectors(colPtr **colly.Collector, pagesColPtr **colly.Collecto } } } + +func DoGetRequest(urll string, colCtx *colly.Context, collector *colly.Collector, packageName engines.Name, retError *error) { + err := collector.Request("GET", urll, nil, colCtx, nil) + if err != nil { + *retError = fmt.Errorf("%v.Search(): failed GET request to %v with %w", packageName.ToLower(), urll, err) + } +} + +func DoPostRequest(urll string, requestData io.Reader, colCtx *colly.Context, collector *colly.Collector, packageName engines.Name, retError *error) { + err := collector.Request("POST", urll, requestData, colCtx, nil) + if err != nil { + *retError = fmt.Errorf("%v.Search(): failed POST request to %v and body %v. error %w", packageName.ToLower(), requestData, urll, err) + } +} + +func PageFromContext(ctx *colly.Context, seName engines.Name) int { + var pageStr string = ctx.Get("page") + page, converr := strconv.Atoi(pageStr) + if converr != nil { + log.Panic().Err(converr).Msgf("sedefaults.PageFromContext from %v: failed to convert page number to int. pageStr: %v", seName, pageStr) + // ^PANIC + } + return page +}