Skip to content

Commit

Permalink
Add additional latency metrics to query service
Browse files Browse the repository at this point in the history
Adding some additoinal metrics to the query service. This will allow us to see which operations take a long time. If query service takes a long time, but indexer/store are fast, then there is something withing the RunQuery body taking too long.
  • Loading branch information
Jordan Pellizzari committed Dec 14, 2023
1 parent dbfa652 commit 1d69664
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 0 deletions.
61 changes: 61 additions & 0 deletions pkg/query/internal/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package metrics

import (
"fmt"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"

"github.com/prometheus/client_golang/prometheus"
)

const queryServiceSubSystem = "query"

const RunQueryAction = "RunQuery"

const (
FailedLabel = "error"
SuccessLabel = "success"
)

var QueryServiceLatencyHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Subsystem: queryServiceSubSystem,
Name: "latency_seconds",
Help: "query service latency",
Buckets: prometheus.LinearBuckets(0.01, 0.01, 10),
}, []string{"action", "status"})

func QueryServiceSetLatency(action string, status string, duration time.Duration) {
QueryServiceLatencyHistogram.WithLabelValues(action, status).Observe(duration.Seconds())
}

func init() {
prometheus.MustRegister(QueryServiceLatencyHistogram)
}

func AssertMetrics(t *testing.T, ts *httptest.Server, expMetrics []string) {
resp, err := http.Get(ts.URL)
if err != nil {
t.Error(err)
return
}

b, err := io.ReadAll(resp.Body)
if err != nil {
t.Error(err)
return
}

metrics := string(b)

fmt.Println(metrics)

for _, expMetric := range expMetrics {
if !strings.Contains(metrics, expMetric) {
t.Errorf("Expected metric not found: %s", expMetric)
}
}
}
14 changes: 14 additions & 0 deletions pkg/query/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ package query
import (
"context"
"fmt"
"time"

"github.com/weaveworks/weave-gitops/core/logger"

"github.com/go-logr/logr"
"github.com/weaveworks/weave-gitops-enterprise/pkg/query/configuration"
querymetrics "github.com/weaveworks/weave-gitops-enterprise/pkg/query/internal/metrics"
"github.com/weaveworks/weave-gitops-enterprise/pkg/query/internal/models"
store "github.com/weaveworks/weave-gitops-enterprise/pkg/query/store"
"github.com/weaveworks/weave-gitops/pkg/server/auth"
Expand Down Expand Up @@ -72,6 +74,9 @@ type qs struct {
}

func (q *qs) RunQuery(ctx context.Context, query store.Query, opts store.QueryOption) ([]models.Object, error) {
var err error
defer recordMetrics(querymetrics.RunQueryAction, time.Now(), err)

principal := auth.Principal(ctx)
if principal == nil {
return nil, fmt.Errorf("principal not found")
Expand Down Expand Up @@ -150,3 +155,12 @@ func (q *qs) GetAccessRules(ctx context.Context) ([]models.AccessRule, error) {
func (q *qs) ListFacets(ctx context.Context, cat configuration.ObjectCategory) (store.Facets, error) {
return q.index.ListFacets(ctx, cat)
}

func recordMetrics(action string, start time.Time, err error) {
status := querymetrics.SuccessLabel
if err != nil {
status = querymetrics.FailedLabel
}

querymetrics.QueryServiceSetLatency(action, status, time.Since(start))
}
71 changes: 71 additions & 0 deletions pkg/query/query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"net/http/httptest"
"os"
"testing"

Expand All @@ -14,8 +15,12 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

sourcev1beta2 "github.com/fluxcd/source-controller/api/v1beta2"

"github.com/weaveworks/weave-gitops-enterprise/pkg/query/configuration"
"github.com/weaveworks/weave-gitops-enterprise/pkg/query/internal/metrics"
"github.com/weaveworks/weave-gitops-enterprise/pkg/query/internal/models"

wegometrics "github.com/weaveworks/weave-gitops-enterprise/pkg/monitoring/metrics"
"github.com/weaveworks/weave-gitops-enterprise/pkg/query/store"
"github.com/weaveworks/weave-gitops/pkg/server/auth"
)
Expand Down Expand Up @@ -769,6 +774,72 @@ func TestQueryOrdering_Realistic(t *testing.T) {

}

func TestQuery_Metrics(t *testing.T) {
g := NewGomegaWithT(t)

dir, err := os.MkdirTemp("", "test")
g.Expect(err).NotTo(HaveOccurred())

db, err := store.CreateSQLiteDB(dir)
g.Expect(err).NotTo(HaveOccurred())

s, err := store.NewSQLiteStore(db, logr.Discard())
g.Expect(err).NotTo(HaveOccurred())

idx, err := store.NewIndexer(s, dir, logr.Discard())
g.Expect(err).NotTo(HaveOccurred())

ctx := auth.WithPrincipal(context.Background(), &auth.UserPrincipal{
ID: "test",
})

objects := []models.Object{
{
Cluster: "test-cluster-1",
Name: "obj-1",
Namespace: "namespace-a",
Kind: "Deployment",
APIGroup: "apps",
APIVersion: "v1",
},
{
Cluster: "test-cluster-1",
Name: "obj-2",
Namespace: "namespace-b",
Kind: "Deployment",
APIGroup: "apps",
APIVersion: "v1",
},
}

g.Expect(store.SeedObjects(db, objects)).To(Succeed())
g.Expect(idx.Add(context.Background(), objects)).To(Succeed())

q := &qs{
log: logr.Discard(),
debug: logr.Discard(),
r: s,
index: idx,
authorizer: allowAll,
}

qy := &query{
terms: "",
limit: 3,
}

_, h := wegometrics.NewDefaultPrometheusHandler()
ts := httptest.NewServer(h)
defer ts.Close()

_, err = q.RunQuery(ctx, qy, qy)
g.Expect(err).NotTo(HaveOccurred())

metrics.AssertMetrics(t, ts, []string{
`query_latency_seconds_bucket{action="RunQuery",status="success",le="0.01"}`,
})
}

type query struct {
terms string
filters []string
Expand Down

0 comments on commit 1d69664

Please sign in to comment.