Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(Mixin): Fix per_namespace_label not used and add per_instance_label support #15258

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions production/loki-mixin/config.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
per_namespace_label: 'namespace',
per_job_label: 'job',

// The Log Formater that is used within the Dashboards (logfmt,json)
log_format: 'json',

// Grouping labels, to uniquely identify and group by {jobs, clusters}
job_labels: [$._config.per_cluster_label, $._config.per_namespace_label, $._config.per_job_label],
cluster_labels: [$._config.per_cluster_label, $._config.per_namespace_label],
Expand Down
10 changes: 5 additions & 5 deletions production/loki-mixin/dashboards/dashboard-utils.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ local utils = import 'mixin-utils/utils.libsonnet';

addNamespace(multi=false)::
if multi then
self.addMultiTemplate('namespace', 'loki_build_info{' + $._config.per_cluster_label + '=~"$cluster"}', 'namespace')
self.addMultiTemplate('namespace', 'loki_build_info{' + $._config.per_cluster_label + '=~"$cluster"}', $._config.per_namespace_label)
else
self.addTemplate('namespace', 'loki_build_info{' + $._config.per_cluster_label + '=~"$cluster"}', 'namespace'),
self.addTemplate('namespace', 'loki_build_info{' + $._config.per_cluster_label + '=~"$cluster"}', $._config.per_namespace_label),

addTag()::
self + {
Expand Down Expand Up @@ -75,17 +75,17 @@ local utils = import 'mixin-utils/utils.libsonnet';

if multi then
d.addMultiTemplate('cluster', 'loki_build_info', $._config.per_cluster_label)
.addMultiTemplate('namespace', 'loki_build_info{' + $._config.per_cluster_label + '=~"$cluster"}', 'namespace')
.addMultiTemplate('namespace', 'loki_build_info{' + $._config.per_cluster_label + '=~"$cluster"}', $._config.per_namespace_label)
else
d.addTemplate('cluster', 'loki_build_info', $._config.per_cluster_label)
.addTemplate('namespace', 'loki_build_info{' + $._config.per_cluster_label + '=~"$cluster"}', 'namespace'),
.addTemplate('namespace', 'loki_build_info{' + $._config.per_cluster_label + '=~"$cluster"}', $._config.per_namespace_label),
},

jobMatcher(job)::
$._config.per_cluster_label + '=~"$cluster", job=~"($namespace)/%s"' % job,

namespaceMatcher()::
$._config.per_cluster_label + '=~"$cluster", namespace=~"$namespace"',
$._config.per_cluster_label + '=~"$cluster", ' + $._config.per_namespace_label + '=~"$namespace"',

logPanel(title, selector, datasource='$loki_datasource'):: {
title: title,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ local template = import 'grafonnet/template.libsonnet';
template.new(
'tenant',
'$datasource',
'label_values(loki_bloomplanner_tenant_tasks_planned{cluster="$cluster", namespace="$namespace"}, tenant)',
'label_values(loki_bloomplanner_tenant_tasks_planned{cluster="$cluster", ' + $._config.per_namespace_label + '="$namespace"}, tenant)',
label='Tenant',
sort=3, // numerical ascending
includeAll=true,
Expand Down
42 changes: 21 additions & 21 deletions production/loki-mixin/dashboards/loki-canary-dashboard.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -49,60 +49,60 @@ local grafana = import 'grafonnet/grafana.libsonnet';
panels: [
// grid row 1
dashboard.panel('Canary Entries Total') +
dashboard.newStatPanel('sum(count(loki_canary_entries_total{' + $._config.per_cluster_label + '=~"$cluster", namespace=~"$namespace"}))', unit='short') +
dashboard.newStatPanel('sum(count(loki_canary_entries_total{' + $._config.per_cluster_label + '=~"$cluster", ' + $._config.per_namespace_label + '=~"$namespace"}))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 0, y: 0 } },

dashboard.panel('Canary Logs Total') +
dashboard.newStatPanel('sum(increase(loki_canary_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
dashboard.newStatPanel('sum(increase(loki_canary_entries_total{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__range]))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 3, y: 0 } },

dashboard.panel('Missing') +
dashboard.newStatPanel('sum(increase(loki_canary_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
dashboard.newStatPanel('sum(increase(loki_canary_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__range]))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 6, y: 0 } },

dashboard.panel('Spotcheck Missing') +
dashboard.newStatPanel('sum(increase(loki_canary_spot_check_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
dashboard.newStatPanel('sum(increase(loki_canary_spot_check_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__range]))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 9, y: 0 } },

// grid row 2
dashboard.panel('Spotcheck Total') +
dashboard.newStatPanel('sum(increase(loki_canary_spot_check_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
dashboard.newStatPanel('sum(increase(loki_canary_spot_check_entries_total{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__range]))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 0, y: 4 } },

dashboard.panel('Metric Test Error %') +
dashboard.newStatPanel('((sum(loki_canary_metric_test_expected{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}) - sum(loki_canary_metric_test_actual{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}))/(sum(loki_canary_metric_test_actual{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}))) * 100') +
dashboard.newStatPanel('((sum(loki_canary_metric_test_expected{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}) - sum(loki_canary_metric_test_actual{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}))/(sum(loki_canary_metric_test_actual{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}))) * 100') +
{ gridPos: { h: 4, w: 3, x: 3, y: 4 } },

dashboard.panel('Missing %') +
dashboard.newStatPanel('(sum(increase(loki_canary_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))/sum(increase(loki_canary_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range])))*100') +
dashboard.newStatPanel('(sum(increase(loki_canary_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__range]))/sum(increase(loki_canary_entries_total{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__range])))*100') +
{ gridPos: { h: 4, w: 3, x: 6, y: 4 } },

dashboard.panel('Spotcheck Missing %') +
dashboard.newStatPanel('(sum(increase(loki_canary_spot_check_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))/sum(increase(loki_canary_spot_check_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))) * 100') +
dashboard.newStatPanel('(sum(increase(loki_canary_spot_check_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__range]))/sum(increase(loki_canary_spot_check_entries_total{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__range]))) * 100') +
{ gridPos: { h: 4, w: 3, x: 9, y: 4 } },

// grid row 3
dashboard.panel('Metric Test Expected') +
dashboard.newStatPanel('sum(loki_canary_metric_test_expected{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"})', unit='short') +
dashboard.newStatPanel('sum(loki_canary_metric_test_expected{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"})', unit='short') +
{ gridPos: { h: 4, w: 3, x: 0, y: 8 } },

dashboard.panel('Metric Test Actual') +
dashboard.newStatPanel('sum(loki_canary_metric_test_actual{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"})', unit='short') +
dashboard.newStatPanel('sum(loki_canary_metric_test_actual{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"})', unit='short') +
{ gridPos: { h: 4, w: 3, x: 3, y: 8 } },

dashboard.panel('Websocket Missing') +
dashboard.newStatPanel('sum(increase(loki_canary_websocket_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))', unit='short') +
dashboard.newStatPanel('sum(increase(loki_canary_websocket_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__range]))', unit='short') +
{ gridPos: { h: 4, w: 3, x: 6, y: 8 } },

dashboard.panel('Websocket Missing %') +
dashboard.newStatPanel('(sum(increase(loki_canary_websocket_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range]))/sum(increase(loki_canary_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__range])))*100') +
dashboard.newStatPanel('(sum(increase(loki_canary_websocket_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__range]))/sum(increase(loki_canary_entries_total{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__range])))*100') +
{ gridPos: { h: 4, w: 3, x: 9, y: 8 } },
// end of grid

dashboard.panel('Log Write to read Latency Percentiles') +
dashboard.queryPanel([
'histogram_quantile(0.95, sum(rate(loki_canary_response_latency_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))',
'histogram_quantile(0.50, sum(rate(loki_canary_response_latency_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))',
'histogram_quantile(0.95, sum(rate(loki_canary_response_latency_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__rate_interval])) by (le))',
'histogram_quantile(0.50, sum(rate(loki_canary_response_latency_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__rate_interval])) by (le))',
], ['p95', 'p50']) +
{ gridPos: { h: 6, w: 12, x: 12, y: 0 } },

Expand All @@ -115,7 +115,7 @@ local grafana = import 'grafonnet/grafana.libsonnet';
).addTargets(
[
grafana.prometheus.target(
'sum(rate(loki_canary_response_latency_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le)',
'sum(rate(loki_canary_response_latency_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__rate_interval])) by (le)',
legendFormat='{{le}}',
format='heatmap',
),
Expand All @@ -125,24 +125,24 @@ local grafana = import 'grafonnet/grafana.libsonnet';

dashboard.panel('Spot Check Query') +
dashboard.queryPanel([
'histogram_quantile(0.99, sum(rate(loki_canary_spot_check_request_duration_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))',
'histogram_quantile(0.50, sum(rate(loki_canary_spot_check_request_duration_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) by (le))',
'histogram_quantile(0.99, sum(rate(loki_canary_spot_check_request_duration_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__rate_interval])) by (le))',
'histogram_quantile(0.50, sum(rate(loki_canary_spot_check_request_duration_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__rate_interval])) by (le))',
], ['p99', 'p95']) +
{ gridPos: { h: 6, w: 12, x: 0, y: 14 } },

dashboard.panel('Metric Test Query') +
dashboard.queryPanel([
'histogram_quantile(0.99, sum(rate(loki_canary_metric_test_request_duration_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[15m])) by (le))',
'histogram_quantile(0.50, sum(rate(loki_canary_metric_test_request_duration_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[15m])) by (le))',
'histogram_quantile(0.99, sum(rate(loki_canary_metric_test_request_duration_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[15m])) by (le))',
'histogram_quantile(0.50, sum(rate(loki_canary_metric_test_request_duration_seconds_bucket{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[15m])) by (le))',
], ['p99', 'p95'],) +
{ gridPos: { h: 6, w: 12, x: 12, y: 14 } },

dashboard.panel('Spot Check Missing %') +
dashboard.queryPanel('topk(20, (sum by (' + $._config.per_cluster_label + ', pod) (increase(loki_canary_spot_check_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval]))/sum by (' + $._config.per_cluster_label + ', pod) (increase(loki_canary_spot_check_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval])) * 100)) > 0', '') +
dashboard.queryPanel('topk(20, (sum by (' + $._config.per_cluster_label + ', pod) (increase(loki_canary_spot_check_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__rate_interval]))/sum by (' + $._config.per_cluster_label + ', pod) (increase(loki_canary_spot_check_entries_total{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__rate_interval])) * 100)) > 0', '') +
{ gridPos: { h: 6, w: 12, x: 0, y: 20 } },

g.panel('Missing logs') +
g.queryPanel('topk(20,(sum by (' + $._config.per_cluster_label + ', pod)(increase(loki_canary_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval]))/sum by (' + $._config.per_cluster_label + ', pod)(increase(loki_canary_entries_total{' + $._config.per_cluster_label + '=~"$cluster",namespace=~"$namespace"}[$__rate_interval])))*100) > 0', 'Missing {{ ' + $._config.per_cluster_label + ' }} {{ pod }}') +
g.queryPanel('topk(20,(sum by (' + $._config.per_cluster_label + ', pod)(increase(loki_canary_missing_entries_total{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__rate_interval]))/sum by (' + $._config.per_cluster_label + ', pod)(increase(loki_canary_entries_total{' + $._config.per_cluster_label + '=~"$cluster",' + $._config.per_namespace_label + '=~"$namespace"}[$__rate_interval])))*100) > 0', 'Missing {{ ' + $._config.per_cluster_label + ' }} {{ pod }}') +
{ gridPos: { h: 6, w: 12, x: 12, y: 20 } },

],
Expand Down
6 changes: 3 additions & 3 deletions production/loki-mixin/dashboards/loki-deletion.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
local utils = import 'mixin-utils/utils.libsonnet';

(import 'dashboard-utils.libsonnet') {
local compactor_matcher = 'pod=~"(compactor|%s-backend.*|loki-single-binary)"' % $._config.ssd.pod_prefix_matcher,
local compactor_matcher = $._config.per_instance_label + '=~"(compactor|%s-backend.*|loki-single-binary)"' % $._config.ssd.pod_prefix_matcher,
grafanaDashboards+::
{
'loki-deletion.json':
Expand Down Expand Up @@ -67,10 +67,10 @@ local utils = import 'mixin-utils/utils.libsonnet';
).addRow(
g.row('List of deletion requests')
.addPanel(
$.logPanel('In progress/finished', '{%s, %s} |~ "Started processing delete request|delete request for user marked as processed" | logfmt | line_format "{{.ts}} user={{.user}} delete_request_id={{.delete_request_id}} msg={{.msg}}" ' % [$.namespaceMatcher(), compactor_matcher]),
$.logPanel('In progress/finished', '{%s, %s} |~ "Started processing delete request|delete request for user marked as processed" | %s | line_format "{{.ts}} user={{.user}} delete_request_id={{.delete_request_id}} msg={{.msg}}" ' % [$.namespaceMatcher(), compactor_matcher,$._config.log_format]),
)
.addPanel(
$.logPanel('Requests', '{%s, %s} |~ "delete request for user added" | logfmt | line_format "{{.ts}} user={{.user}} query=\'{{.query}}\'"' % [$.namespaceMatcher(), compactor_matcher]),
$.logPanel('Requests', '{%s, %s} |~ "delete request for user added" | %s | line_format "{{.ts}} user={{.user}} query=\'{{.query}}\'"' % [$.namespaceMatcher(), compactor_matcher,$._config.log_format]),
)
),
},
Expand Down
6 changes: 3 additions & 3 deletions production/loki-mixin/dashboards/loki-logs.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,23 @@ local template = import 'grafonnet/template.libsonnet';
template.new(
'deployment',
'$datasource',
'label_values(kube_deployment_created{' + $._config.per_cluster_label + '="$cluster", namespace="$namespace"}, deployment)',
'label_values(kube_deployment_created{' + $._config.per_cluster_label + '="$cluster", ' + $._config.per_namespace_label + '="$namespace"}, deployment)',
sort=1,
),

local podTemplate =
template.new(
'pod',
'$datasource',
'label_values(kube_pod_container_info{' + $._config.per_cluster_label + '="$cluster", namespace="$namespace", pod=~"$deployment.*"}, pod)',
'label_values(kube_pod_container_info{' + $._config.per_cluster_label + '="$cluster", ' + $._config.per_namespace_label + '="$namespace", ' + $._config.per_instance_label + '=~"$deployment.*"}, pod)',
sort=1,
),

local containerTemplate =
template.new(
'container',
'$datasource',
'label_values(kube_pod_container_info{' + $._config.per_cluster_label + '="$cluster", namespace="$namespace", pod=~"$pod", pod=~"$deployment.*"}, container)',
'label_values(kube_pod_container_info{' + $._config.per_cluster_label + '="$cluster", ' + $._config.per_namespace_label + '="$namespace", ' + $._config.per_instance_label + '=~"$pod", ' + $._config.per_instance_label + '=~"$deployment.*"}, container)',
sort=1,
),

Expand Down
Loading
Loading