Skip to content

Commit

Permalink
feat(metrics): add scheduler related metrics (#2076)
Browse files Browse the repository at this point in the history
Signed-off-by: Alexei Dodon <[email protected]>
  • Loading branch information
adodon2go authored Dec 4, 2023
1 parent 8bac653 commit 2e733b3
Show file tree
Hide file tree
Showing 21 changed files with 564 additions and 67 deletions.
2 changes: 1 addition & 1 deletion pkg/api/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ func (c *Controller) Shutdown() {
}

func (c *Controller) StartBackgroundTasks(reloadCtx context.Context) {
c.taskScheduler = scheduler.NewScheduler(c.Config, c.Log)
c.taskScheduler = scheduler.NewScheduler(c.Config, c.Metrics, c.Log)
c.taskScheduler.RunScheduler(reloadCtx)

// Enable running garbage-collect periodically for DefaultStore
Expand Down
10 changes: 10 additions & 0 deletions pkg/api/cookiestore.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package api
import (
"context"
"encoding/gob"
"fmt"
"io/fs"
"os"
"path"
Expand Down Expand Up @@ -157,3 +158,12 @@ func (cleanTask *CleanTask) DoWork(ctx context.Context) error {

return nil
}

func (cleanTask *CleanTask) String() string {
return fmt.Sprintf("{Name: %s, sessions: %s}",
cleanTask.Name(), cleanTask.sessions)
}

func (cleanTask *CleanTask) Name() string {
return "SessionCleanupTask"
}
12 changes: 12 additions & 0 deletions pkg/exporter/api/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"fmt"
"math/big"
"net/http"
"runtime"
"strings"
"sync"
"testing"
Expand All @@ -25,6 +26,7 @@ import (
zotcfg "zotregistry.io/zot/pkg/api/config"
"zotregistry.io/zot/pkg/exporter/api"
"zotregistry.io/zot/pkg/extensions/monitoring"
"zotregistry.io/zot/pkg/scheduler"
. "zotregistry.io/zot/pkg/test/common"
)

Expand Down Expand Up @@ -69,12 +71,22 @@ func readDefaultMetrics(collector *api.Collector, chMetric chan prometheus.Metri
So(err, ShouldBeNil)
So(*metric.Gauge.Value, ShouldEqual, 1)

pmMetric = <-chMetric
So(pmMetric.Desc().String(), ShouldEqual, collector.MetricsDesc["zot_scheduler_workers_total"].String())

err = pmMetric.Write(&metric)
So(err, ShouldBeNil)
So(*metric.Gauge.Value, ShouldEqual, runtime.NumCPU()*scheduler.NumWorkersMultiplier)

pmMetric = <-chMetric
So(pmMetric.Desc().String(), ShouldEqual, collector.MetricsDesc["zot_info"].String())

err = pmMetric.Write(&metric)
So(err, ShouldBeNil)
So(*metric.Gauge.Value, ShouldEqual, 0)

pmMetric = <-chMetric
So(pmMetric.Desc().String(), ShouldEqual, collector.MetricsDesc["zot_scheduler_generators_total"].String())
}

func TestNewExporter(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/extensions/extension_userprefs_disable.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ func IsBuiltWithUserPrefsExtension() bool {
func SetupUserPreferencesRoutes(config *config.Config, router *mux.Router,
metaDB mTypes.MetaDB, log log.Logger,
) {
log.Warn().Msg("userprefs extension is disabled because given zot binary doesn't" +
log.Warn().Msg("userprefs extension is disabled because given zot binary doesn't " +
"include this feature please build a binary that does so")
}
11 changes: 11 additions & 0 deletions pkg/extensions/imagetrust/image_trust.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package imagetrust

import (
"context"
"fmt"
"time"

"github.com/aws/aws-sdk-go-v2/aws"
Expand Down Expand Up @@ -274,3 +275,13 @@ func (validityT *validityTask) DoWork(ctx context.Context) error {

return nil
}

func (validityT *validityTask) String() string {
return fmt.Sprintf("{sigValidityTaskGenerator: %s, repo: %s}",
"signatures validity task", // description of generator's task purpose
validityT.repo.Name)
}

func (validityT *validityTask) Name() string {
return "SignatureValidityTask"
}
93 changes: 92 additions & 1 deletion pkg/extensions/monitoring/extension.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,53 @@ var (
},
[]string{"storageName", "lockType"},
)
schedulerGenerators = promauto.NewCounter( //nolint: gochecknoglobals
prometheus.CounterOpts{
Namespace: metricsNamespace,
Name: "scheduler_generators_total",
Help: "Total number of generators registered in scheduler",
},
)
schedulerGeneratorsStatus = promauto.NewGaugeVec( //nolint: gochecknoglobals
prometheus.GaugeOpts{
Namespace: metricsNamespace,
Name: "scheduler_generators_status",
Help: "Scheduler generators by priority & state",
},
[]string{"priority", "state"},
)
schedulerNumWorkers = promauto.NewGauge( //nolint: gochecknoglobals
prometheus.GaugeOpts{ //nolint: promlinter
Namespace: metricsNamespace,
Name: "scheduler_workers_total",
Help: "Total number of available workers to perform scheduler tasks",
},
)
schedulerWorkers = promauto.NewGaugeVec( //nolint: gochecknoglobals
prometheus.GaugeOpts{
Namespace: metricsNamespace,
Name: "scheduler_workers",
Help: "Scheduler workers state",
},
[]string{"state"},
)
schedulerTasksQueue = promauto.NewGaugeVec( //nolint: gochecknoglobals
prometheus.GaugeOpts{
Namespace: metricsNamespace,
Name: "scheduler_tasksqueue_length",
Help: "Number of tasks waiting in the queue to pe processed by scheduler workers",
},
[]string{"priority"},
)
workersTasksDuration = promauto.NewHistogramVec( //nolint: gochecknoglobals
prometheus.HistogramOpts{
Namespace: metricsNamespace,
Name: "scheduler_workers_tasks_duration_seconds",
Help: "How long it takes for a worker to execute a task",
Buckets: GetDefaultBuckets(),
},
[]string{"name"},
)
)

type metricServer struct {
Expand Down Expand Up @@ -169,7 +216,7 @@ func IncDownloadCounter(ms MetricServer, repo string) {
}

func SetStorageUsage(ms MetricServer, rootDir, repo string) {
ms.SendMetric(func() {
ms.ForceSendMetric(func() {
dir := path.Join(rootDir, repo)
repoSize, err := GetDirSize(dir)

Expand All @@ -196,3 +243,47 @@ func ObserveStorageLockLatency(ms MetricServer, latency time.Duration, storageNa
storageLockLatency.WithLabelValues(storageName, lockType).Observe(latency.Seconds())
})
}

func IncSchedulerGenerators(ms MetricServer) {
ms.ForceSendMetric(func() {
schedulerGenerators.Inc()
})
}

func SetSchedulerGenerators(ms MetricServer, gen map[string]map[string]uint64) {
ms.SendMetric(func() {
for priority, states := range gen {
for state, value := range states {
schedulerGeneratorsStatus.WithLabelValues(priority, state).Set(float64(value))
}
}
})
}

func SetSchedulerNumWorkers(ms MetricServer, total int) {
ms.SendMetric(func() {
schedulerNumWorkers.Set(float64(total))
})
}

func SetSchedulerWorkers(ms MetricServer, w map[string]int) {
ms.SendMetric(func() {
for state, value := range w {
schedulerWorkers.WithLabelValues(state).Set(float64(value))
}
})
}

func SetSchedulerTasksQueue(ms MetricServer, tq map[string]int) {
ms.SendMetric(func() {
for priority, value := range tq {
schedulerTasksQueue.WithLabelValues(priority).Set(float64(value))
}
})
}

func ObserveWorkersTasksDuration(ms MetricServer, taskName string, duration time.Duration) {
ms.SendMetric(func() {
workersTasksDuration.WithLabelValues(taskName).Observe(duration.Seconds())
})
}
Loading

0 comments on commit 2e733b3

Please sign in to comment.