diff --git a/Makefile b/Makefile index c1760cc..81da5f1 100644 --- a/Makefile +++ b/Makefile @@ -74,5 +74,5 @@ $(BIN_DIR): $(TOOLING): $(BIN_DIR) @echo Installing tools from hack/tools.go - @cd scripts && go list -mod=mod -tags tools -f '{{ range .Imports }}{{ printf "%s\n" .}}{{end}}' ./ | xargs -tI % go build -mod=mod -o $(BIN_DIR) % + @cd scripts && go list -mod=mod -e -tags tools -f '{{ range .Imports }}{{ printf "%s\n" .}}{{end}}' ./ | xargs -tI % go build -mod=mod -o $(BIN_DIR) % diff --git a/config.libsonnet b/config.libsonnet index f2d5764..29ec593 100644 --- a/config.libsonnet +++ b/config.libsonnet @@ -16,9 +16,9 @@ local annotation = g.dashboard.annotation; tags: ['ci/cd', 'argo-cd'], - argoCdAppOutOfSyncFor: '10m', - argoCdAppUnhealthyFor: '10m', - argoCdAppAutoSyncDisabledFor: '1h', + argoCdAppOutOfSyncFor: '15m', + argoCdAppUnhealthyFor: '15m', + argoCdAppAutoSyncDisabledFor: '2h', argoCdAppSyncInterval: '10m', argoCdNotificationDeliveryInterval: '10m', diff --git a/prometheus_alerts.yaml b/prometheus_alerts.yaml index 72b0bb7..5b5ac8e 100644 --- a/prometheus_alerts.yaml +++ b/prometheus_alerts.yaml @@ -4,7 +4,7 @@ - "alert": "ArgoCdAppOutOfSync" "annotations": "dashboard_url": "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}" - "description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} is out of sync with the sync status {{ $labels.sync_status }} for the past 10m." + "description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} is out of sync with the sync status {{ $labels.sync_status }} for the past 15m." "summary": "An ArgoCD Application is Out Of Sync." "expr": | sum( @@ -14,13 +14,13 @@ } ) by (job, dest_server, project, name, sync_status) > 0 - "for": "10m" + "for": "15m" "labels": "severity": "warning" - "alert": "ArgoCdAppUnhealthy" "annotations": "dashboard_url": "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}" - "description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} is unhealthy with the health status {{ $labels.health_status }} for the past 10m." + "description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} is unhealthy with the health status {{ $labels.health_status }} for the past 15m." "summary": "An ArgoCD Application is Unhealthy." "expr": | sum( @@ -30,13 +30,13 @@ } ) by (job, dest_server, project, name, health_status) > 0 - "for": "10m" + "for": "15m" "labels": "severity": "warning" - "alert": "ArgoCdAppAutoSyncDisabled" "annotations": "dashboard_url": "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}" - "description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} has autosync disabled for the past 1h." + "description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} has autosync disabled for the past 2h." "summary": "An ArgoCD Application has AutoSync Disabled." "expr": | sum( @@ -47,7 +47,7 @@ } ) by (job, dest_server, project, name, autosync_enabled) > 0 - "for": "1h" + "for": "2h" "labels": "severity": "warning" - "alert": "ArgoCdAppSyncFailed" diff --git a/tests.yaml b/tests.yaml index 7d35e59..4906f29 100644 --- a/tests.yaml +++ b/tests.yaml @@ -23,7 +23,7 @@ tests: sync_status: OutOfSync exp_annotations: summary: "An ArgoCD Application is Out Of Sync." - description: "The application https://kubernetes.default.svc/ops/ci-cd is out of sync with the sync status OutOfSync for the past 10m." + description: "The application https://kubernetes.default.svc/ops/ci-cd is out of sync with the sync status OutOfSync for the past 15m." dashboard_url: "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server=https://kubernetes.default.svc&var-project=ops&var-application=ci-cd" - interval: 5m input_series: @@ -44,7 +44,7 @@ tests: health_status: Degraded exp_annotations: summary: "An ArgoCD Application is Unhealthy." - description: "The application https://kubernetes.default.svc/ops/ci-cd is unhealthy with the health status Degraded for the past 10m." + description: "The application https://kubernetes.default.svc/ops/ci-cd is unhealthy with the health status Degraded for the past 15m." dashboard_url: "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server=https://kubernetes.default.svc&var-project=ops&var-application=ci-cd" - interval: 5m input_series: @@ -53,7 +53,7 @@ tests: - series: 'argocd_app_info{autosync_enabled="true", dest_server="https://kubernetes.default.svc", health_status="Healthy", job="argo-cd-argocd-application-controller-metrics", name="ci-cd-sync-enabled", namespace="ci-cd", project="ops", sync_status="Synced"}' values: "1+0x40" alert_rule_test: - - eval_time: 2h + - eval_time: 3h alertname: ArgoCdAppAutoSyncDisabled exp_alerts: - exp_labels: @@ -65,7 +65,7 @@ tests: autosync_enabled: false exp_annotations: summary: "An ArgoCD Application has AutoSync Disabled." - description: "The application https://kubernetes.default.svc/ops/ci-cd has autosync disabled for the past 1h." + description: "The application https://kubernetes.default.svc/ops/ci-cd has autosync disabled for the past 2h." dashboard_url: "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server=https://kubernetes.default.svc&var-project=ops&var-application=ci-cd" - interval: 5m input_series: