diff --git a/cmd/gardener-operator/app/app.go b/cmd/gardener-operator/app/app.go index bb385999ffe..8db1bb48050 100644 --- a/cmd/gardener-operator/app/app.go +++ b/cmd/gardener-operator/app/app.go @@ -42,6 +42,7 @@ import ( "github.com/gardener/gardener/pkg/operator/apis/config" operatorclient "github.com/gardener/gardener/pkg/operator/client" "github.com/gardener/gardener/pkg/operator/controller" + "github.com/gardener/gardener/pkg/operator/metrics" "github.com/gardener/gardener/pkg/operator/webhook" ) @@ -185,6 +186,11 @@ func run(ctx context.Context, log logr.Logger, cfg *config.OperatorConfiguration return fmt.Errorf("failed adding webhook handlers to manager: %w", err) } + log.Info("Adding custom metrics to manager") + if err := metrics.AddToManager(ctx, mgr); err != nil { + return fmt.Errorf("failed adding metrics to manager: %w", err) + } + log.Info("Adding controllers to manager") if err := controller.AddToManager(ctx, mgr, cfg); err != nil { return fmt.Errorf("failed adding controllers to manager: %w", err) diff --git a/pkg/operator/metrics/garden.go b/pkg/operator/metrics/garden.go new file mode 100644 index 00000000000..bd9c727edcc --- /dev/null +++ b/pkg/operator/metrics/garden.go @@ -0,0 +1,123 @@ +// SPDX-FileCopyrightText: SAP SE or an SAP affiliate company and Gardener contributors +// +// SPDX-License-Identifier: Apache-2.0 + +package metrics + +import ( + "context" + + "github.com/go-logr/logr" + "github.com/prometheus/client_golang/prometheus" + "sigs.k8s.io/controller-runtime/pkg/client" + + gardencorev1beta1 "github.com/gardener/gardener/pkg/apis/core/v1beta1" + operatorv1alpha1 "github.com/gardener/gardener/pkg/apis/operator/v1alpha1" +) + +const gardenSubsystem = "garden" + +type gardenCollector struct { + runtimeClient client.Reader + log logr.Logger + + condition *prometheus.Desc + operationSucceeded *prometheus.Desc +} + +func newGardenCollector(k8sClient client.Reader, log logr.Logger) *gardenCollector { + c := &gardenCollector{ + runtimeClient: k8sClient, + log: log, + } + c.setMetricDefinitions() + return c +} + +func (c *gardenCollector) setMetricDefinitions() { + c.condition = prometheus.NewDesc( + prometheus.BuildFQName(metricPrefix, gardenSubsystem, "condition"), + "Condition state of the Garden.", + []string{ + "name", + "condition", + "status", + }, + nil, + ) + c.operationSucceeded = prometheus.NewDesc( + prometheus.BuildFQName(metricPrefix, gardenSubsystem, "operation_succeeded"), + "Returns 1 if the last operation state is Succeeded.", + []string{ + "name", + }, + nil, + ) +} + +func (c *gardenCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- c.condition + ch <- c.operationSucceeded +} + +func (c *gardenCollector) Collect(ch chan<- prometheus.Metric) { + ctx := context.Background() + + gardenList := &operatorv1alpha1.GardenList{} + if err := c.runtimeClient.List(ctx, gardenList); err != nil { + c.log.Error(err, "Failed to list gardens") + return + } + + for _, garden := range gardenList.Items { + c.collectConditionMetric(ch, &garden) + c.collectOperationMetric(ch, &garden) + } +} + +func (c gardenCollector) collectConditionMetric(ch chan<- prometheus.Metric, garden *operatorv1alpha1.Garden) { + for _, condition := range garden.Status.Conditions { + if condition.Type == "" { + continue + } + for _, status := range []gardencorev1beta1.ConditionStatus{ + gardencorev1beta1.ConditionFalse, + gardencorev1beta1.ConditionTrue, + gardencorev1beta1.ConditionProgressing, + gardencorev1beta1.ConditionUnknown, + } { + val := float64(0) + if condition.Status == status { + val = 1 + } + ch <- prometheus.MustNewConstMetric( + c.condition, + prometheus.GaugeValue, + val, + []string{ + garden.Name, + string(condition.Type), + string(status), + }..., + ) + } + } +} + +func (c *gardenCollector) collectOperationMetric(ch chan<- prometheus.Metric, garden *operatorv1alpha1.Garden) { + if garden.Status.LastOperation == nil { + return + } + val := float64(0) + if garden.Status.LastOperation.State == gardencorev1beta1.LastOperationStateSucceeded { + val = 1 + } + ch <- prometheus.MustNewConstMetric( + c.operationSucceeded, + prometheus.GaugeValue, + val, + []string{ + garden.Name, + }..., + ) +} diff --git a/pkg/operator/metrics/garden_test.go b/pkg/operator/metrics/garden_test.go new file mode 100644 index 00000000000..db591f14357 --- /dev/null +++ b/pkg/operator/metrics/garden_test.go @@ -0,0 +1,106 @@ +// SPDX-FileCopyrightText: SAP SE or an SAP affiliate company and Gardener contributors +// +// SPDX-License-Identifier: Apache-2.0 + +package metrics + +import ( + "context" + "strings" + + "github.com/go-logr/logr" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + gardencorev1beta1 "github.com/gardener/gardener/pkg/apis/core/v1beta1" + operatorv1alpha1 "github.com/gardener/gardener/pkg/apis/operator/v1alpha1" +) + +var _ = Describe("Garden metrics", func() { + var ( + ctx context.Context + k8sClient client.Client + + c prometheus.Collector + garden *operatorv1alpha1.Garden + ) + + BeforeEach(func() { + testScheme := runtime.NewScheme() + Expect(operatorv1alpha1.AddToScheme(testScheme)).To(Succeed()) + k8sClient = fake.NewClientBuilder(). + WithScheme(testScheme). + WithStatusSubresource(&operatorv1alpha1.Garden{}). + Build() + + c = newGardenCollector(k8sClient, logr.Discard()) + + garden = &operatorv1alpha1.Garden{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + }, + } + Expect(k8sClient.Create(ctx, garden)).To(Succeed()) + + garden.Status = operatorv1alpha1.GardenStatus{ + LastOperation: &gardencorev1beta1.LastOperation{ + State: gardencorev1beta1.LastOperationStateSucceeded, + }, + Conditions: []gardencorev1beta1.Condition{ + {Type: operatorv1alpha1.RuntimeComponentsHealthy, Status: gardencorev1beta1.ConditionTrue}, + {Type: operatorv1alpha1.VirtualComponentsHealthy, Status: gardencorev1beta1.ConditionFalse}, + }, + } + Expect(k8sClient.Status().Update(ctx, garden)).To(Succeed()) + + }) + + It("should collect condition metrics", func() { + expected := strings.NewReader(`# HELP gardener_operator_garden_condition Condition state of the Garden. +# TYPE gardener_operator_garden_condition gauge +gardener_operator_garden_condition{condition="RuntimeComponentsHealthy",name="foo",status="False"} 0 +gardener_operator_garden_condition{condition="RuntimeComponentsHealthy",name="foo",status="Progressing"} 0 +gardener_operator_garden_condition{condition="RuntimeComponentsHealthy",name="foo",status="True"} 1 +gardener_operator_garden_condition{condition="RuntimeComponentsHealthy",name="foo",status="Unknown"} 0 +gardener_operator_garden_condition{condition="VirtualComponentsHealthy",name="foo",status="False"} 1 +gardener_operator_garden_condition{condition="VirtualComponentsHealthy",name="foo",status="Progressing"} 0 +gardener_operator_garden_condition{condition="VirtualComponentsHealthy",name="foo",status="True"} 0 +gardener_operator_garden_condition{condition="VirtualComponentsHealthy",name="foo",status="Unknown"} 0 +`) + + Expect( + testutil.CollectAndCompare(c, expected, "gardener_operator_garden_condition"), + ).To(Succeed()) + }) + + It("should collect operation succeeded metrics", func() { + expected := strings.NewReader(`# HELP gardener_operator_garden_operation_succeeded Returns 1 if the last operation state is Succeeded. +# TYPE gardener_operator_garden_operation_succeeded gauge +gardener_operator_garden_operation_succeeded{name="foo"} 1 +`) + + Expect( + testutil.CollectAndCompare(c, expected, "gardener_operator_garden_operation_succeeded"), + ).To(Succeed()) + }) + + It("should collect the metric for not succeeded gardens", func() { + garden.Status.LastOperation.State = gardencorev1beta1.LastOperationStateError + Expect(k8sClient.Status().Update(ctx, garden)).To(Succeed()) + + expected := strings.NewReader(`# HELP gardener_operator_garden_operation_succeeded Returns 1 if the last operation state is Succeeded. +# TYPE gardener_operator_garden_operation_succeeded gauge +gardener_operator_garden_operation_succeeded{name="foo"} 0 +`) + + Expect( + testutil.CollectAndCompare(c, expected, "gardener_operator_garden_operation_succeeded"), + ).To(Succeed()) + }) +}) diff --git a/pkg/operator/metrics/metrics.go b/pkg/operator/metrics/metrics.go new file mode 100644 index 00000000000..196c25b224b --- /dev/null +++ b/pkg/operator/metrics/metrics.go @@ -0,0 +1,35 @@ +// SPDX-FileCopyrightText: SAP SE or an SAP affiliate company and Gardener contributors +// +// SPDX-License-Identifier: Apache-2.0 + +package metrics + +import ( + "context" + + "sigs.k8s.io/controller-runtime/pkg/manager" + runtimemetrics "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +const ( + metricPrefix = "gardener_operator" +) + +type runnable struct { + gardenCollector *gardenCollector +} + +// AddToManager adds the custom metrics collectors to the metrics registry. This is done "inside" a `manager.Runnable`, +// because that guarantees that the cache informers are synced, before the metrics are added / scraped for the first +// time. +func AddToManager(_ context.Context, mgr manager.Manager) error { + k8sClient := mgr.GetClient() + return mgr.Add(&runnable{ + gardenCollector: newGardenCollector(k8sClient, mgr.GetLogger().WithName("operator-metrics")), + }) +} + +func (r *runnable) Start(_ context.Context) error { + runtimemetrics.Registry.MustRegister(r.gardenCollector) + return nil +} diff --git a/pkg/operator/metrics/metrics_suite_test.go b/pkg/operator/metrics/metrics_suite_test.go new file mode 100644 index 00000000000..6d03963d547 --- /dev/null +++ b/pkg/operator/metrics/metrics_suite_test.go @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: SAP SE or an SAP affiliate company and Gardener contributors +// +// SPDX-License-Identifier: Apache-2.0 + +package metrics_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestMetrics(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Operator Metrics Suite") +} diff --git a/skaffold-operator.yaml b/skaffold-operator.yaml index aa89c049017..f9ce10ab345 100644 --- a/skaffold-operator.yaml +++ b/skaffold-operator.yaml @@ -158,6 +158,7 @@ build: - pkg/operator/controller/garden/garden - pkg/operator/controller/garden/reference - pkg/operator/features + - pkg/operator/metrics - pkg/operator/webhook - pkg/operator/webhook/defaulting - pkg/operator/webhook/validation