-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Allow for paging through more results than the limit set by index.max_result_window
#67
Comments
Used this script to bulk load 15,000 occurrences and try to page through them: bulk.gopackage main
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"time"
"github.com/brianvoe/gofakeit/v6"
"github.com/grafeas/grafeas/proto/v1beta1/build_go_proto"
"github.com/grafeas/grafeas/proto/v1beta1/common_go_proto"
"github.com/grafeas/grafeas/proto/v1beta1/grafeas_go_proto"
"github.com/grafeas/grafeas/proto/v1beta1/provenance_go_proto"
"github.com/grafeas/grafeas/proto/v1beta1/source_go_proto"
"google.golang.org/grpc"
)
const (
chunkSize = 1000
numberOfOccurrences = 15000
project = "rode"
grafeasUrl = "localhost:8080"
)
var (
fake = gofakeit.New(0)
)
func main() {
conn, client := createGrafeasClient()
defer conn.Close()
createProject()
log.Println("created project")
loadOccurrences(client)
log.Println("loaded occurrences")
pageThroughOccurrences(client)
}
func createProject() {
client := http.Client{
Timeout: time.Minute,
}
projectPayload := map[string]string{
"name": "projects/" + project,
}
response, err := client.Post(fmt.Sprintf("%s/v1beta1/projects", "http://" + grafeasUrl), "application/json", jsonBody(&projectPayload))
if err != nil {
log.Fatal("error creating project", err)
}
if response.StatusCode != http.StatusOK {
log.Fatal("unexpected response creating project", response.StatusCode)
}
}
func createGrafeasClient() (*grpc.ClientConn, grafeas_go_proto.GrafeasV1Beta1Client) {
connection, err := grpc.DialContext(context.Background(), grafeasUrl, grpc.WithInsecure(), grpc.WithBlock())
if err != nil {
log.Fatal("error creating grafeas client", err)
}
grafeasClient := grafeas_go_proto.NewGrafeasV1Beta1Client(connection)
return connection, grafeasClient
}
func loadOccurrences(client grafeas_go_proto.GrafeasV1Beta1Client) {
occurrences := make([]*grafeas_go_proto.Occurrence, numberOfOccurrences)
for i := 0; i < len(occurrences); i++ {
occurrences[i] = createRandomBuildOccurrence()
}
var occurrenceChunks [][]*grafeas_go_proto.Occurrence
for i := 0; i < len(occurrences); i+= chunkSize {
end := i + chunkSize
if end > len(occurrences) {
end = len(occurrences)
}
occurrenceChunks = append(occurrenceChunks, occurrences[i:end])
}
for i := range occurrenceChunks {
o := occurrenceChunks[i]
_, err := client.BatchCreateOccurrences(context.Background(), &grafeas_go_proto.BatchCreateOccurrencesRequest{
Parent: "projects/"+project,
Occurrences: o,
})
if err != nil {
log.Fatal("error batch creating occurrences", err)
}
}
}
func pageThroughOccurrences(client grafeas_go_proto.GrafeasV1Beta1Client) {
currentPage := 1
pageToken := ""
for {
log.Println("requesting page", currentPage)
request := &grafeas_go_proto.ListOccurrencesRequest{
Parent: "projects/"+project,
Filter: "",
PageSize: 1000,
PageToken: pageToken,
}
response, err := client.ListOccurrences(context.Background(), request)
if err != nil {
log.Fatal("failed to list occurrences", err)
}
currentPage++
pageToken = response.NextPageToken
log.Printf("got %d occurrences\n", len(response.Occurrences))
if len(response.Occurrences) == 0 {
log.Println("reached the end of the result set")
break
}
}
}
func createRandomBuildOccurrence() *grafeas_go_proto.Occurrence {
return &grafeas_go_proto.Occurrence{
Name: fake.Name(),
Resource: &grafeas_go_proto.Resource{
Uri: fake.URL(),
},
NoteName: fmt.Sprintf("projects/%s/notes/%s", project, fake.UUID()),
Kind: common_go_proto.NoteKind_BUILD,
Remediation: "",
CreateTime: nil,
UpdateTime: nil,
Details: &grafeas_go_proto.Occurrence_Build{
Build: &build_go_proto.Details{
Provenance: &provenance_go_proto.BuildProvenance{
Id: fake.UUID(),
ProjectId: "projects/rode",
Commands: nil,
BuiltArtifacts: []*provenance_go_proto.Artifact{
{
Checksum: fake.LetterN(5),
Id: fake.UUID(),
Names: []string{
fake.URL(),
fake.URL(),
},
},
},
SourceProvenance: &provenance_go_proto.Source{
ArtifactStorageSourceUri: fake.URL(),
Context: &source_go_proto.SourceContext{
Context: &source_go_proto.SourceContext_Git{
Git: &source_go_proto.GitSourceContext{
Url: fake.URL(),
RevisionId: fake.LetterN(7),
},
},
Labels: nil,
},
},
},
},
},
}
}
func jsonBody(val interface{}) io.Reader {
jsonBytes, err := json.Marshal(val)
if err != nil {
log.Fatal("serialization error", err)
}
return bytes.NewReader(jsonBytes)
} output$ go run bulk.go
WARNING: Package "github.com/golang/protobuf/protoc-gen-go/generator" is deprecated.
A future release of golang/protobuf will delete this package,
which has long been excluded from the compatibility promise.
2021/04/09 16:52:11 requesting page 1
2021/04/09 16:52:11 got 1000 occurrences
2021/04/09 16:52:11 requesting page 2
2021/04/09 16:52:12 got 1000 occurrences
2021/04/09 16:52:12 requesting page 3
2021/04/09 16:52:12 got 1000 occurrences
2021/04/09 16:52:12 requesting page 4
2021/04/09 16:52:12 got 1000 occurrences
2021/04/09 16:52:12 requesting page 5
2021/04/09 16:52:12 got 1000 occurrences
2021/04/09 16:52:12 requesting page 6
2021/04/09 16:52:12 got 1000 occurrences
2021/04/09 16:52:12 requesting page 7
2021/04/09 16:52:12 got 1000 occurrences
2021/04/09 16:52:12 requesting page 8
2021/04/09 16:52:13 got 1000 occurrences
2021/04/09 16:52:13 requesting page 9
2021/04/09 16:52:13 got 1000 occurrences
2021/04/09 16:52:13 requesting page 10
2021/04/09 16:52:13 got 1000 occurrences
2021/04/09 16:52:13 requesting page 11
2021/04/09 16:52:13 failed to list occurrencesrpc error: code = Internal desc = unexpected response from elasticsearch
exit status 1 On the 11th page, this error is returned from Elasticsearch: {
"error": {
"root_cause": [
{
"type": "illegal_argument_exception",
"reason": "Result window is too large, from + size must be less than or equal to: [10000] but was [11000]. See the scroll api for a more efficient way to request large data sets. This limit can be set by changing the [index.max_result_window] index level setting."
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "grafeas-v1beta2-rode-occurrences",
"node": "y40fPpNDRm648olC-Ut-tA",
"reason": {
"type": "illegal_argument_exception",
"reason": "Result window is too large, from + size must be less than or equal to: [10000] but was [11000]. See the scroll api for a more efficient way to request large data sets. This limit can be set by changing the [index.max_result_window] index level setting."
}
}
],
"caused_by": {
"type": "illegal_argument_exception",
"reason": "Result window is too large, from + size must be less than or equal to: [10000] but was [11000]. See the scroll api for a more efficient way to request large data sets. This limit can be set by changing the [index.max_result_window] index level setting.",
"caused_by": {
"type": "illegal_argument_exception",
"reason": "Result window is too large, from + size must be less than or equal to: [10000] but was [11000]. See the scroll api for a more efficient way to request large data sets. This limit can be set by changing the [index.max_result_window] index level setting."
}
}
},
"status": 400
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Came out of this discussion.
The documentation that Elasticsearch provides on pagination makes it sound like there is a hard cap on the number of results than can be paged through using
from
andsize
:We need to determine if that's the case or not by loading a number of notes or occurrences greater than
index.max_result_window
and attempting to page through them.If it is, we'll need to make some changes to grab the
sort
value from the lasthit
in the results, encode that in the page token, and send it along in future requests as thesearch_after
parameter.The text was updated successfully, but these errors were encountered: