Skip to content

Commit

Permalink
Collect garden condition metrics (#93)
Browse files Browse the repository at this point in the history
* Collect garden condition and operation metrics

There will be garden metrics from kube-state-metrics-runtime in a future
garden release, which we want to switch to eventually (probably after at
least 1.103)

* feedback

* Change operation metric

* add test for errored garden
  • Loading branch information
maboehm authored Aug 16, 2024
1 parent 39e4dd1 commit b04e74d
Show file tree
Hide file tree
Showing 6 changed files with 288 additions and 0 deletions.
6 changes: 6 additions & 0 deletions cmd/gardener-operator/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ import (
"github.com/gardener/gardener/pkg/operator/apis/config"
operatorclient "github.com/gardener/gardener/pkg/operator/client"
"github.com/gardener/gardener/pkg/operator/controller"
"github.com/gardener/gardener/pkg/operator/metrics"
"github.com/gardener/gardener/pkg/operator/webhook"
)

Expand Down Expand Up @@ -185,6 +186,11 @@ func run(ctx context.Context, log logr.Logger, cfg *config.OperatorConfiguration
return fmt.Errorf("failed adding webhook handlers to manager: %w", err)
}

log.Info("Adding custom metrics to manager")
if err := metrics.AddToManager(ctx, mgr); err != nil {
return fmt.Errorf("failed adding metrics to manager: %w", err)
}

log.Info("Adding controllers to manager")
if err := controller.AddToManager(ctx, mgr, cfg); err != nil {
return fmt.Errorf("failed adding controllers to manager: %w", err)
Expand Down
123 changes: 123 additions & 0 deletions pkg/operator/metrics/garden.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// SPDX-FileCopyrightText: SAP SE or an SAP affiliate company and Gardener contributors
//
// SPDX-License-Identifier: Apache-2.0

package metrics

import (
"context"

"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/client"

gardencorev1beta1 "github.com/gardener/gardener/pkg/apis/core/v1beta1"
operatorv1alpha1 "github.com/gardener/gardener/pkg/apis/operator/v1alpha1"
)

const gardenSubsystem = "garden"

type gardenCollector struct {
runtimeClient client.Reader
log logr.Logger

condition *prometheus.Desc
operationSucceeded *prometheus.Desc
}

func newGardenCollector(k8sClient client.Reader, log logr.Logger) *gardenCollector {
c := &gardenCollector{
runtimeClient: k8sClient,
log: log,
}
c.setMetricDefinitions()
return c
}

func (c *gardenCollector) setMetricDefinitions() {
c.condition = prometheus.NewDesc(
prometheus.BuildFQName(metricPrefix, gardenSubsystem, "condition"),
"Condition state of the Garden.",
[]string{
"name",
"condition",
"status",
},
nil,
)
c.operationSucceeded = prometheus.NewDesc(
prometheus.BuildFQName(metricPrefix, gardenSubsystem, "operation_succeeded"),
"Returns 1 if the last operation state is Succeeded.",
[]string{
"name",
},
nil,
)
}

func (c *gardenCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.condition
ch <- c.operationSucceeded
}

func (c *gardenCollector) Collect(ch chan<- prometheus.Metric) {
ctx := context.Background()

gardenList := &operatorv1alpha1.GardenList{}
if err := c.runtimeClient.List(ctx, gardenList); err != nil {
c.log.Error(err, "Failed to list gardens")
return
}

for _, garden := range gardenList.Items {
c.collectConditionMetric(ch, &garden)
c.collectOperationMetric(ch, &garden)
}
}

func (c gardenCollector) collectConditionMetric(ch chan<- prometheus.Metric, garden *operatorv1alpha1.Garden) {
for _, condition := range garden.Status.Conditions {
if condition.Type == "" {
continue
}
for _, status := range []gardencorev1beta1.ConditionStatus{
gardencorev1beta1.ConditionFalse,
gardencorev1beta1.ConditionTrue,
gardencorev1beta1.ConditionProgressing,
gardencorev1beta1.ConditionUnknown,
} {
val := float64(0)
if condition.Status == status {
val = 1
}
ch <- prometheus.MustNewConstMetric(
c.condition,
prometheus.GaugeValue,
val,
[]string{
garden.Name,
string(condition.Type),
string(status),
}...,
)
}
}
}

func (c *gardenCollector) collectOperationMetric(ch chan<- prometheus.Metric, garden *operatorv1alpha1.Garden) {
if garden.Status.LastOperation == nil {
return
}
val := float64(0)
if garden.Status.LastOperation.State == gardencorev1beta1.LastOperationStateSucceeded {
val = 1
}
ch <- prometheus.MustNewConstMetric(
c.operationSucceeded,
prometheus.GaugeValue,
val,
[]string{
garden.Name,
}...,
)
}
106 changes: 106 additions & 0 deletions pkg/operator/metrics/garden_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// SPDX-FileCopyrightText: SAP SE or an SAP affiliate company and Gardener contributors
//
// SPDX-License-Identifier: Apache-2.0

package metrics

import (
"context"
"strings"

"github.com/go-logr/logr"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

gardencorev1beta1 "github.com/gardener/gardener/pkg/apis/core/v1beta1"
operatorv1alpha1 "github.com/gardener/gardener/pkg/apis/operator/v1alpha1"
)

var _ = Describe("Garden metrics", func() {
var (
ctx context.Context
k8sClient client.Client

c prometheus.Collector
garden *operatorv1alpha1.Garden
)

BeforeEach(func() {
testScheme := runtime.NewScheme()
Expect(operatorv1alpha1.AddToScheme(testScheme)).To(Succeed())
k8sClient = fake.NewClientBuilder().
WithScheme(testScheme).
WithStatusSubresource(&operatorv1alpha1.Garden{}).
Build()

c = newGardenCollector(k8sClient, logr.Discard())

garden = &operatorv1alpha1.Garden{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
},
}
Expect(k8sClient.Create(ctx, garden)).To(Succeed())

garden.Status = operatorv1alpha1.GardenStatus{
LastOperation: &gardencorev1beta1.LastOperation{
State: gardencorev1beta1.LastOperationStateSucceeded,
},
Conditions: []gardencorev1beta1.Condition{
{Type: operatorv1alpha1.RuntimeComponentsHealthy, Status: gardencorev1beta1.ConditionTrue},
{Type: operatorv1alpha1.VirtualComponentsHealthy, Status: gardencorev1beta1.ConditionFalse},
},
}
Expect(k8sClient.Status().Update(ctx, garden)).To(Succeed())

})

It("should collect condition metrics", func() {
expected := strings.NewReader(`# HELP gardener_operator_garden_condition Condition state of the Garden.
# TYPE gardener_operator_garden_condition gauge
gardener_operator_garden_condition{condition="RuntimeComponentsHealthy",name="foo",status="False"} 0
gardener_operator_garden_condition{condition="RuntimeComponentsHealthy",name="foo",status="Progressing"} 0
gardener_operator_garden_condition{condition="RuntimeComponentsHealthy",name="foo",status="True"} 1
gardener_operator_garden_condition{condition="RuntimeComponentsHealthy",name="foo",status="Unknown"} 0
gardener_operator_garden_condition{condition="VirtualComponentsHealthy",name="foo",status="False"} 1
gardener_operator_garden_condition{condition="VirtualComponentsHealthy",name="foo",status="Progressing"} 0
gardener_operator_garden_condition{condition="VirtualComponentsHealthy",name="foo",status="True"} 0
gardener_operator_garden_condition{condition="VirtualComponentsHealthy",name="foo",status="Unknown"} 0
`)

Expect(
testutil.CollectAndCompare(c, expected, "gardener_operator_garden_condition"),
).To(Succeed())
})

It("should collect operation succeeded metrics", func() {
expected := strings.NewReader(`# HELP gardener_operator_garden_operation_succeeded Returns 1 if the last operation state is Succeeded.
# TYPE gardener_operator_garden_operation_succeeded gauge
gardener_operator_garden_operation_succeeded{name="foo"} 1
`)

Expect(
testutil.CollectAndCompare(c, expected, "gardener_operator_garden_operation_succeeded"),
).To(Succeed())
})

It("should collect the metric for not succeeded gardens", func() {
garden.Status.LastOperation.State = gardencorev1beta1.LastOperationStateError
Expect(k8sClient.Status().Update(ctx, garden)).To(Succeed())

expected := strings.NewReader(`# HELP gardener_operator_garden_operation_succeeded Returns 1 if the last operation state is Succeeded.
# TYPE gardener_operator_garden_operation_succeeded gauge
gardener_operator_garden_operation_succeeded{name="foo"} 0
`)

Expect(
testutil.CollectAndCompare(c, expected, "gardener_operator_garden_operation_succeeded"),
).To(Succeed())
})
})
35 changes: 35 additions & 0 deletions pkg/operator/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// SPDX-FileCopyrightText: SAP SE or an SAP affiliate company and Gardener contributors
//
// SPDX-License-Identifier: Apache-2.0

package metrics

import (
"context"

"sigs.k8s.io/controller-runtime/pkg/manager"
runtimemetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
)

const (
metricPrefix = "gardener_operator"
)

type runnable struct {
gardenCollector *gardenCollector
}

// AddToManager adds the custom metrics collectors to the metrics registry. This is done "inside" a `manager.Runnable`,
// because that guarantees that the cache informers are synced, before the metrics are added / scraped for the first
// time.
func AddToManager(_ context.Context, mgr manager.Manager) error {
k8sClient := mgr.GetClient()
return mgr.Add(&runnable{
gardenCollector: newGardenCollector(k8sClient, mgr.GetLogger().WithName("operator-metrics")),
})
}

func (r *runnable) Start(_ context.Context) error {
runtimemetrics.Registry.MustRegister(r.gardenCollector)
return nil
}
17 changes: 17 additions & 0 deletions pkg/operator/metrics/metrics_suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// SPDX-FileCopyrightText: SAP SE or an SAP affiliate company and Gardener contributors
//
// SPDX-License-Identifier: Apache-2.0

package metrics_test

import (
"testing"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

func TestMetrics(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Operator Metrics Suite")
}
1 change: 1 addition & 0 deletions skaffold-operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ build:
- pkg/operator/controller/garden/garden
- pkg/operator/controller/garden/reference
- pkg/operator/features
- pkg/operator/metrics
- pkg/operator/webhook
- pkg/operator/webhook/defaulting
- pkg/operator/webhook/validation
Expand Down

0 comments on commit b04e74d

Please sign in to comment.