From 9ff68687775fba3111ffb97bd88cc2c7151afe78 Mon Sep 17 00:00:00 2001 From: Rahul Date: Fri, 11 Aug 2023 21:55:39 +0530 Subject: [PATCH] feat: add workload panels in workload dashboard (#2100) * feat: add workload panels in workload dashboard * feat: add workload panels in workload dashboard * feat: workload dashboard * feat: workload dashboard * feat: workload dashboard * feat: workload dashboard * feat: workload dashboard changes * feat: workload dashboard changes * feat: make workload_class configurable * feat: add workload panels * feat: add workload panels * feat: add workload panels * feat: add workload panels * feat: add workload panels * feat: add workload panels * feat: add workload panels * feat: add service center view * feat: add service center view * feat: add service center view * feat: add service center view * feat: add comments for detail workload * feat: add comments for detail workload * feat: address review comments * feat: address review comments * feat: address review comments * feat: address review comments * feat: remove workload class from templates --------- Co-authored-by: Chris Grindstaff --- cmd/collectors/restperf/restperf.go | 190 +- cmd/collectors/zapiperf/zapiperf.go | 176 +- cmd/tools/generate/counter.yaml | 14 - conf/rest/9.12.0/qos_workload.yaml | 15 + conf/rest/default.yaml | 1 + conf/restperf/9.12.0/workload.yaml | 2 + conf/restperf/9.12.0/workload_detail.yaml | 1 + .../9.12.0/workload_detail_volume.yaml | 3 +- conf/restperf/9.12.0/workload_volume.yaml | 4 +- conf/restperf/default.yaml | 6 +- conf/zapi/cdot/9.8.0/qos_workload.yaml | 18 + conf/zapi/default.yaml | 1 + conf/zapiperf/cdot/9.8.0/workload.yaml | 1 + conf/zapiperf/cdot/9.8.0/workload_detail.yaml | 1 + .../cdot/9.8.0/workload_detail_volume.yaml | 3 +- conf/zapiperf/cdot/9.8.0/workload_volume.yaml | 4 +- conf/zapiperf/default.yaml | 12 +- grafana/dashboards/cmode/lun.json | 66 +- grafana/dashboards/cmode/volume.json | 114 +- grafana/dashboards/cmode/workload.json | 6020 ++++++++++++++--- integration/test/dashboard_json_test.go | 3 +- 21 files changed, 5369 insertions(+), 1286 deletions(-) create mode 100644 conf/rest/9.12.0/qos_workload.yaml create mode 100644 conf/zapi/cdot/9.8.0/qos_workload.yaml diff --git a/cmd/collectors/restperf/restperf.go b/cmd/collectors/restperf/restperf.go index 3e63571f1..121140c90 100644 --- a/cmd/collectors/restperf/restperf.go +++ b/cmd/collectors/restperf/restperf.go @@ -19,6 +19,7 @@ import ( "github.com/netapp/harvest/v2/pkg/errs" "github.com/netapp/harvest/v2/pkg/matrix" "github.com/netapp/harvest/v2/pkg/set" + "github.com/netapp/harvest/v2/pkg/tree/node" "github.com/netapp/harvest/v2/pkg/util" "github.com/tidwall/gjson" "path" @@ -28,9 +29,11 @@ import ( ) const ( - latencyIoReqd = 10 - BILLION = 1_000_000_000 - arrayKeyToken = "#" + latencyIoReqd = 10 + BILLION = 1_000_000_000 + arrayKeyToken = "#" + objWorkloadClass = "user_defined|system_defined" + objWorkloadVolumeClass = "autovolume" ) var qosQuery = "api/cluster/counter/tables/qos" @@ -39,6 +42,8 @@ var qosDetailQuery = "api/cluster/counter/tables/qos_detail" var qosDetailVolumeQuery = "api/cluster/counter/tables/qos_detail_volume" var qosWorkloadQuery = "api/storage/qos/workloads" +var workloadDetailMetrics = []string{"resource_latency", "service_time_latency"} + var qosQueries = map[string]string{ qosQuery: qosQuery, qosVolumeQuery: qosVolumeQuery, @@ -171,6 +176,36 @@ func (r *RestPerf) InitMatrix() error { return nil } +// load workload_class or use defaultValue +func (r *RestPerf) loadWorkloadClassQuery(defaultValue string) string { + + var x *node.Node + + name := "workload_class" + + if x = r.Params.GetChildS(name); x != nil { + v := x.GetAllChildContentS() + if len(v) == 0 { + r.Logger.Debug(). + Str("name", name). + Str("defaultValue", defaultValue). + Send() + return defaultValue + } + s := strings.Join(v, "|") + r.Logger.Debug(). + Str("name", name). + Str("value", s). + Send() + return s + } + r.Logger.Debug(). + Str("name", name). + Str("defaultValue", defaultValue). + Send() + return defaultValue +} + // load an int parameter or use defaultValue func (r *RestPerf) loadParamInt(name string, defaultValue int) int { @@ -549,25 +584,27 @@ func (r *RestPerf) processWorkLoadCounter() (map[string]*matrix.Matrix, error) { return nil, errs.New(errs.ErrMissingParam, "resource_map") } else { for _, x := range resourceMap.GetChildren() { - name := x.GetNameS() - resource := x.GetContentS() + for _, wm := range workloadDetailMetrics { + name := x.GetNameS() + wm + resource := x.GetContentS() - if m := mat.GetMetric(name); m != nil { - continue - } - if m, err := mat.NewMetricFloat64(name, "resource_latency"); err != nil { - return nil, err - } else { - r.perfProp.counterInfo[name] = &counter{ - name: "resource_latency", - description: "", - counterType: r.perfProp.counterInfo[service.GetName()].counterType, - unit: r.perfProp.counterInfo[service.GetName()].unit, - denominator: "ops", + if m := mat.GetMetric(name); m != nil { + continue } - m.SetLabel("resource", resource) + if m, err := mat.NewMetricFloat64(name, wm); err != nil { + return nil, err + } else { + r.perfProp.counterInfo[name] = &counter{ + name: wm, + description: "", + counterType: r.perfProp.counterInfo[service.GetName()].counterType, + unit: r.perfProp.counterInfo[service.GetName()].unit, + denominator: "ops", + } + m.SetLabel("resource", resource) - r.Logger.Debug().Str("name", name).Str("resource", resource).Msg("added workload latency metric") + r.Logger.Debug().Str("name", name).Str("resource", resource).Msg("added workload latency metric") + } } } } @@ -611,16 +648,15 @@ func (r *RestPerf) PollData() (map[string]*matrix.Matrix, error) { func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord) (map[string]*matrix.Matrix, error) { var ( - count, numRecords uint64 - apiD, parseD time.Duration - err error - instanceKeys []string - resourceLatency *matrix.Metric // for workload* objects - skips int - instIndex int - ts float64 - prevMat *matrix.Matrix - curMat *matrix.Matrix + count uint64 + apiD, parseD time.Duration + err error + instanceKeys []string + skips int + instIndex int + ts float64 + prevMat *matrix.Matrix + curMat *matrix.Matrix ) prevMat = r.Matrix[r.Object] @@ -683,12 +719,12 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord) } } + var layer = "" // latency layer (resource) for workloads + // special case for these two objects // we need to process each latency layer for each instance/counter if isWorkloadDetailObject(r.Prop.Query) { - layer := "" // latency layer (resource) for workloads - // example instanceKey : umeng-aff300-02:test-wid12022.CPU_dblade i := strings.Index(instanceKey, ":") instanceKey = instanceKey[i+1:] @@ -703,11 +739,14 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord) return true } - if resourceLatency = curMat.GetMetric(layer); resourceLatency == nil { - r.Logger.Trace(). - Str("layer", layer). - Msg("Resource-latency metric missing in cache") - return true + for _, wm := range workloadDetailMetrics { + mLayer := layer + wm + if l := curMat.GetMetric(mLayer); l == nil { + r.Logger.Trace(). + Str("layer", layer). + Msg("Resource-latency metric missing in cache") + return true + } } } @@ -770,27 +809,61 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord) if ok { // special case for workload_detail if isWorkloadDetailObject(r.Prop.Query) { - if name == "wait_time" || name == "service_time" { - if err := resourceLatency.AddValueString(instance, f.value); err != nil { - r.Logger.Error(). - Stack(). - Err(err). - Str("name", name). - Str("value", f.value). - Msg("Add resource-latency failed") - } else { - r.Logger.Trace(). - Str("name", name). - Str("value", f.value). - Msg("Add resource-latency") - count++ + for _, wm := range workloadDetailMetrics { + // "visits" are ignored. This counter is only used to set properties of ops counter + if name == "visits" { + continue + } + wMetric := curMat.GetMetric(layer + wm) + if wm == "resource_latency" && (name == "wait_time" || name == "service_time") { + if err := wMetric.AddValueString(instance, f.value); err != nil { + r.Logger.Error(). + Stack(). + Err(err). + Str("name", name). + Str("value", f.value). + Msg("Add resource_latency failed") + } else { + r.Logger.Trace(). + Str("name", name). + Str("value", f.value). + Msg("Add resource_latency") + count++ + } + continue + } else if wm == "service_time_latency" && name == "service_time" { + if err = wMetric.SetValueString(instance, f.value); err != nil { + r.Logger.Error(). + Stack(). + Err(err). + Str("name", name). + Str("value", f.value). + Msg("Add service_time_latency failed") + } else { + r.Logger.Trace(). + Str("name", name). + Str("value", f.value). + Msg("Add service_time_latency") + count++ + } + } else if wm == "wait_time_latency" && name == "wait_time" { + if err = wMetric.SetValueString(instance, f.value); err != nil { + r.Logger.Error(). + Stack(). + Err(err). + Str("name", name). + Str("value", f.value). + Msg("Add wait_time_latency failed") + } else { + r.Logger.Trace(). + Str("name", name). + Str("value", f.value). + Msg("Add wait_time_latency") + count++ + } } - continue - } - // "visits" are ignored. This counter is only used to set properties of ops counter - if name == "visits" { - continue } + continue } else { if f.isArray { labels := strings.Split(f.label, ",") @@ -917,7 +990,6 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord) r.Logger.Error().Err(err).Msg("Failed to set timestamp") } - numRecords += 1 return true }) } @@ -933,7 +1005,7 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord) _ = r.Metadata.LazySetValueInt64("api_time", "data", apiD.Microseconds()) _ = r.Metadata.LazySetValueInt64("parse_time", "data", parseD.Microseconds()) _ = r.Metadata.LazySetValueUint64("metrics", "data", count) - _ = r.Metadata.LazySetValueUint64("instances", "data", numRecords) + _ = r.Metadata.LazySetValueUint64("instances", "data", uint64(len(curMat.GetInstances()))) r.AddCollectCount(count) // skip calculating from delta if no data from previous poll @@ -1255,9 +1327,9 @@ func (r *RestPerf) PollInstance() (map[string]*matrix.Matrix, error) { fields = "*" dataQuery = qosWorkloadQuery if r.Prop.Query == qosVolumeQuery || r.Prop.Query == qosDetailVolumeQuery { - filter = append(filter, "workload-class=autovolume|user_defined|system_defined") + filter = append(filter, "workload_class="+r.loadWorkloadClassQuery(objWorkloadVolumeClass)) } else { - filter = append(filter, "workload-class=user_defined|system_defined") + filter = append(filter, "workload_class="+r.loadWorkloadClassQuery(objWorkloadClass)) } } diff --git a/cmd/collectors/zapiperf/zapiperf.go b/cmd/collectors/zapiperf/zapiperf.go index 3585a4325..35bf4926e 100644 --- a/cmd/collectors/zapiperf/zapiperf.go +++ b/cmd/collectors/zapiperf/zapiperf.go @@ -60,9 +60,13 @@ const ( objWorkloadDetail = "workload_detail" objWorkloadVolume = "workload_volume" objWorkloadDetailVolume = "workload_detail_volume" + objWorkloadClass = "user_defined|system_defined" + objWorkloadVolumeClass = "autovolume" BILLION = 1_000_000_000 ) +var workloadDetailMetrics = []string{"resource_latency", "service_time_latency"} + type ZapiPerf struct { *zapi.Zapi // provides: AbstractCollector, Client, Object, Query, TemplateFn, TemplateType object string @@ -173,6 +177,36 @@ func (z *ZapiPerf) loadParamStr(name, defaultValue string) string { return defaultValue } +// load workload_class or use defaultValue +func (z *ZapiPerf) loadWorkloadClassQuery(defaultValue string) string { + + var x *node.Node + + name := "workload_class" + + if x = z.Params.GetChildS(name); x != nil { + v := x.GetAllChildContentS() + if len(v) == 0 { + z.Logger.Debug(). + Str("name", name). + Str("defaultValue", defaultValue). + Send() + return defaultValue + } + s := strings.Join(v, "|") + z.Logger.Debug(). + Str("name", name). + Str("value", s). + Send() + return s + } + z.Logger.Debug(). + Str("name", name). + Str("defaultValue", defaultValue). + Send() + return defaultValue +} + // load an int parameter or use defaultValue func (z *ZapiPerf) loadParamInt(name string, defaultValue int) int { @@ -199,10 +233,9 @@ func (z *ZapiPerf) loadParamInt(name string, defaultValue int) int { func (z *ZapiPerf) PollData() (map[string]*matrix.Matrix, error) { var ( - instanceKeys []string - resourceLatency *matrix.Metric // for workload* objects - err error - skips int + instanceKeys []string + err error + skips int ) z.Logger.Trace().Msg("updating data cache") @@ -338,12 +371,12 @@ func (z *ZapiPerf) PollData() (map[string]*matrix.Matrix, error) { key := i.GetChildContentS(z.instanceKey) + var layer = "" // latency layer (resource) for workloads + // special case for these two objects // we need to process each latency layer for each instance/counter if z.Query == objWorkloadDetail || z.Query == objWorkloadDetailVolume { - layer := "" // latency layer (resource) for workloads - if x := strings.Split(key, "."); len(x) == 2 { key = x[0] layer = x[1] @@ -354,11 +387,14 @@ func (z *ZapiPerf) PollData() (map[string]*matrix.Matrix, error) { continue } - if resourceLatency = curMat.GetMetric(layer); resourceLatency == nil { - z.Logger.Warn(). - Str("layer", layer). - Msg("Resource-latency metric missing in cache") - continue + for _, wm := range workloadDetailMetrics { + mLayer := layer + wm + if l := curMat.GetMetric(mLayer); l == nil { + z.Logger.Warn(). + Str("layer", mLayer). + Msg("metric missing in cache") + continue + } } } @@ -475,29 +511,68 @@ func (z *ZapiPerf) PollData() (map[string]*matrix.Matrix, error) { // special case for workload_detail if z.Query == objWorkloadDetail || z.Query == objWorkloadDetailVolume { - if name == "wait_time" || name == "service_time" { - if err := resourceLatency.AddValueString(instance, value); err != nil { - z.Logger.Error(). - Stack(). - Err(err). - Str("name", name). - Str("value", value). - Int("instIndex", instIndex). - Msg("Add resource-latency failed") - } else { - z.Logger.Trace(). - Str("name", name). - Str("value", value). - Int("instIndex", instIndex). - Msg("Add resource-latency") - count++ + for _, wm := range workloadDetailMetrics { + // "visits" are ignored + if name == "visits" { + continue + } + + wMetric := curMat.GetMetric(layer + wm) + + if wm == "resource_latency" && (name == "wait_time" || name == "service_time") { + if err := wMetric.AddValueString(instance, value); err != nil { + z.Logger.Error(). + Err(err). + Str("name", name). + Str("value", value). + Int("instIndex", instIndex). + Msg("Add resource_latency failed") + } else { + z.Logger.Trace(). + Str("name", name). + Str("value", value). + Int("instIndex", instIndex). + Msg("Add resource_latency") + count++ + } + continue + } else if wm == "service_time_latency" && name == "service_time" { + if err = wMetric.SetValueString(instance, value); err != nil { + z.Logger.Error(). + Err(err). + Str("name", name). + Str("value", value). + Int("instIndex", instIndex). + Msg("Add service_time_latency failed") + } else { + z.Logger.Trace(). + Int("instIndex", instIndex). + Str("name", name). + Str("value", value). + Int("instIndex", instIndex). + Msg("Add service_time_latency") + count++ + } + } else if wm == "wait_time_latency" && name == "wait_time" { + if err = wMetric.SetValueString(instance, value); err != nil { + z.Logger.Error(). + Err(err). + Str("name", name). + Str("value", value). + Int("instIndex", instIndex). + Msg("Add wait_time_latency failed") + } else { + z.Logger.Trace(). + Int("instIndex", instIndex). + Str("name", name). + Str("value", value). + Int("instIndex", instIndex). + Msg("Add wait_time_latency") + count++ + } } - continue - } - // "visits" are ignored - if name == "visits" { - continue } + continue } // store as scalar metric @@ -1038,23 +1113,26 @@ func (z *ZapiPerf) PollCounter() (map[string]*matrix.Matrix, error) { return nil, errs.New(errs.ErrMissingParam, "resource_map") } else { for _, x := range resourceMap.GetChildren() { - name := x.GetNameS() - resource := x.GetContentS() + for _, wm := range workloadDetailMetrics { - if m := mat.GetMetric(name); m != nil { - oldMetrics.Remove(name) - continue - } - if m, err := mat.NewMetricFloat64(name, "resource_latency"); err != nil { - return nil, err - } else { - m.SetLabel("resource", resource) - m.SetProperty(service.GetProperty()) - // base counter is the ops of the same resource - m.SetComment("ops") + name := x.GetNameS() + wm + resource := x.GetContentS() - oldMetrics.Remove(name) - z.Logger.Debug().Msgf("+ [%s] (=> %s) added workload latency metric", name, resource) + if m := mat.GetMetric(name); m != nil { + oldMetrics.Remove(name) + continue + } + if m, err := mat.NewMetricFloat64(name, wm); err != nil { + return nil, err + } else { + m.SetLabel("resource", resource) + m.SetProperty(service.GetProperty()) + // base counter is the ops of the same resource + m.SetComment("ops") + + oldMetrics.Remove(name) + z.Logger.Debug().Msgf("+ [%s] (=> %s) added workload latency metric", name, resource) + } } } } @@ -1323,9 +1401,9 @@ func (z *ZapiPerf) PollInstance() (map[string]*matrix.Matrix, error) { queryElem := request.NewChildS("query", "") infoElem := queryElem.NewChildS("qos-workload-info", "") if z.Query == objWorkloadVolume || z.Query == objWorkloadDetailVolume { - infoElem.NewChildS("workload-class", "autovolume|user_defined|system_defined") + infoElem.NewChildS("workload-class", z.loadWorkloadClassQuery(objWorkloadVolumeClass)) } else { - infoElem.NewChildS("workload-class", "user_defined|system_defined") + infoElem.NewChildS("workload-class", z.loadWorkloadClassQuery(objWorkloadClass)) } instancesAttr = "attributes-list" diff --git a/cmd/tools/generate/counter.yaml b/cmd/tools/generate/counter.yaml index 1be133f45..f11369684 100644 --- a/cmd/tools/generate/counter.yaml +++ b/cmd/tools/generate/counter.yaml @@ -487,20 +487,6 @@ counters: Template: conf/zapiperf/9.12.0/workload_detail.yaml Unit: microseconds - - Name: qos_detail_volume_resource_latency - Description: average latency for volume on Data ONTAP subsystems - APIs: - - API: REST - Endpoint: api/cluster/counter/tables/qos_detail_volume - ONTAPCounter: Harvest generated - Template: conf/restperf/9.12.0/workload_detail_volume.yaml - Unit: microseconds - - API: ZAPI - Endpoint: perf-object-get-instances workload_detail_volume - ONTAPCounter: Harvest generated - Template: conf/zapiperf/9.12.0/workload_detail_volume.yaml - Unit: microseconds - - Name: quota_disk_limit Description: Maximum amount of disk space, in kilobytes, allowed for the quota target (hard disk space limit). The value is -1 if the limit is unlimited. diff --git a/conf/rest/9.12.0/qos_workload.yaml b/conf/rest/9.12.0/qos_workload.yaml new file mode 100644 index 000000000..c4d2ca723 --- /dev/null +++ b/conf/rest/9.12.0/qos_workload.yaml @@ -0,0 +1,15 @@ +name: QosWorkload +query: api/storage/qos/workloads +object: qos_workload + +counters: + - ^^uuid => uuid + - ^name => workload + - ^workload_class => class + +export_options: + instance_keys: + - uuid + instance_labels: + - class + - workload \ No newline at end of file diff --git a/conf/rest/default.yaml b/conf/rest/default.yaml index 05c758b9f..22b1b9d74 100644 --- a/conf/rest/default.yaml +++ b/conf/rest/default.yaml @@ -29,6 +29,7 @@ objects: OntapS3Policy: ontap_s3_policy.yaml QosPolicyAdaptive: qos_policy_adaptive.yaml QosPolicyFixed: qos_policy_fixed.yaml + QosWorkload: qos_workload.yaml Qtree: qtree.yaml Security: security.yaml SecurityAccount: security_account.yaml diff --git a/conf/restperf/9.12.0/workload.yaml b/conf/restperf/9.12.0/workload.yaml index ba598908d..92587264a 100644 --- a/conf/restperf/9.12.0/workload.yaml +++ b/conf/restperf/9.12.0/workload.yaml @@ -6,6 +6,7 @@ object: qos # recommended to use large interval, since workload objects are expensive client_timeout: 1m30s + schedule: - counter: 1200s - instance: 600s @@ -17,6 +18,7 @@ counters: - concurrency - latency - ops + - other_ops - read_data - read_io_type_percent => read_io_type - read_latency diff --git a/conf/restperf/9.12.0/workload_detail.yaml b/conf/restperf/9.12.0/workload_detail.yaml index 104d5898e..bc1faeade 100644 --- a/conf/restperf/9.12.0/workload_detail.yaml +++ b/conf/restperf/9.12.0/workload_detail.yaml @@ -7,6 +7,7 @@ object: qos_detail # recommended to use large interval, since workload objects are expensive client_timeout: 1m30s + schedule: - counter: 1200s - instance: 600s diff --git a/conf/restperf/9.12.0/workload_detail_volume.yaml b/conf/restperf/9.12.0/workload_detail_volume.yaml index 7ab7ac16d..a5c57ce1f 100644 --- a/conf/restperf/9.12.0/workload_detail_volume.yaml +++ b/conf/restperf/9.12.0/workload_detail_volume.yaml @@ -2,10 +2,11 @@ # object provides latency breakdown per service or delay center per volume name: WorkloadDetailVolume query: api/cluster/counter/tables/qos_detail_volume -object: qos_detail_volume +object: qos_detail # recommended to use large interval, since workload objects are expensive client_timeout: 1m30s + schedule: - counter: 1200s - instance: 600s diff --git a/conf/restperf/9.12.0/workload_volume.yaml b/conf/restperf/9.12.0/workload_volume.yaml index e06f54afa..f628bb359 100644 --- a/conf/restperf/9.12.0/workload_volume.yaml +++ b/conf/restperf/9.12.0/workload_volume.yaml @@ -4,10 +4,11 @@ name: WorkloadVolume query: api/cluster/counter/tables/qos_volume -object: qos_volume +object: qos # recommended to use large interval, since workload objects are expensive client_timeout: 1m30s + schedule: - counter: 1200s - instance: 600s @@ -15,6 +16,7 @@ schedule: counters: - ^^uuid + - concurrency - latency - ops - read_data diff --git a/conf/restperf/default.yaml b/conf/restperf/default.yaml index 58ebc6f0c..b3d59f1eb 100644 --- a/conf/restperf/default.yaml +++ b/conf/restperf/default.yaml @@ -52,8 +52,10 @@ objects: Vscan: vscan.yaml VscanSVM: vscan_svm.yaml -# Uncomment to collect workload/QOS counters. They are disabled by default because they typically slow down data collection due to a high number of metrics. +# Uncomment to collect workload/QOS counters. # Workload: workload.yaml -# WorkloadDetail: workload_detail.yaml # WorkloadVolume: workload_volume.yaml + +# The following workload templates may slow down data collection due to a high number of metrics. +# WorkloadDetail: workload_detail.yaml # WorkloadDetailVolume: workload_detail_volume.yaml \ No newline at end of file diff --git a/conf/zapi/cdot/9.8.0/qos_workload.yaml b/conf/zapi/cdot/9.8.0/qos_workload.yaml new file mode 100644 index 000000000..e003c068e --- /dev/null +++ b/conf/zapi/cdot/9.8.0/qos_workload.yaml @@ -0,0 +1,18 @@ +name: QosWorkload +query: qos-workload-get-iter +object: qos_workload + +counters: + qos-workload-info: + - ^^workload-uuid => uuid + - ^workload-class => class + - ^workload-name => workload + +collect_only_labels: true + +export_options: + instance_keys: + - uuid + instance_labels: + - class + - workload \ No newline at end of file diff --git a/conf/zapi/default.yaml b/conf/zapi/default.yaml index caf17fa86..0477410f1 100644 --- a/conf/zapi/default.yaml +++ b/conf/zapi/default.yaml @@ -20,6 +20,7 @@ objects: NtpServer: ntpserver.yaml QosPolicyAdaptive: qos_policy_adaptive.yaml QosPolicyFixed: qos_policy_fixed.yaml + QosWorkload: qos_workload.yaml Qtree: qtree.yaml Security: security.yaml SecurityAccount: security_account.yaml diff --git a/conf/zapiperf/cdot/9.8.0/workload.yaml b/conf/zapiperf/cdot/9.8.0/workload.yaml index 037e12cb2..d561a725a 100644 --- a/conf/zapiperf/cdot/9.8.0/workload.yaml +++ b/conf/zapiperf/cdot/9.8.0/workload.yaml @@ -20,6 +20,7 @@ counters: - instance_uuid - latency - ops + - other_ops - read_data - read_io_type - read_latency diff --git a/conf/zapiperf/cdot/9.8.0/workload_detail.yaml b/conf/zapiperf/cdot/9.8.0/workload_detail.yaml index c7fcd28dd..f79d4188b 100644 --- a/conf/zapiperf/cdot/9.8.0/workload_detail.yaml +++ b/conf/zapiperf/cdot/9.8.0/workload_detail.yaml @@ -9,6 +9,7 @@ instance_key: name # recommended to use a large interval, since workload objects are expensive client_timeout: 1m30s + schedule: - counter: 1200s - instance: 600s diff --git a/conf/zapiperf/cdot/9.8.0/workload_detail_volume.yaml b/conf/zapiperf/cdot/9.8.0/workload_detail_volume.yaml index 081991924..8482b3c27 100644 --- a/conf/zapiperf/cdot/9.8.0/workload_detail_volume.yaml +++ b/conf/zapiperf/cdot/9.8.0/workload_detail_volume.yaml @@ -2,12 +2,13 @@ # object provides latency breakdown per service or delay center per volume name: WorkloadDetailVolume query: workload_detail_volume -object: qos_detail_volume +object: qos_detail instance_key: name # recommended to use a large interval, since workload objects are expensive client_timeout: 1m30s + schedule: - counter: 1200s - instance: 600s diff --git a/conf/zapiperf/cdot/9.8.0/workload_volume.yaml b/conf/zapiperf/cdot/9.8.0/workload_volume.yaml index 4291e041c..e26f938d3 100644 --- a/conf/zapiperf/cdot/9.8.0/workload_volume.yaml +++ b/conf/zapiperf/cdot/9.8.0/workload_volume.yaml @@ -4,7 +4,7 @@ name: WorkloadVolume query: workload_volume -object: qos_volume +object: qos # recommended to use large interval, since workload objects are expensive client_timeout: 1m30s @@ -16,10 +16,12 @@ schedule: instance_key: name counters: + - concurrency - instance_name - instance_uuid - latency - ops + - other_ops - read_data - read_io_type - read_latency diff --git a/conf/zapiperf/default.yaml b/conf/zapiperf/default.yaml index ae68ea82f..8eaad696b 100644 --- a/conf/zapiperf/default.yaml +++ b/conf/zapiperf/default.yaml @@ -59,8 +59,10 @@ objects: Vscan: vscan.yaml VscanSVM: vscan_svm.yaml -# Uncomment to collect workload/QOS counters. They are disabled by default because they typically slow down data collection due to a high number of metrics. -# Workload: workload.yaml -# WorkloadDetail: workload_detail.yaml -# WorkloadVolume: workload_volume.yaml -# WorkloadDetailVolume: workload_detail_volume.yaml \ No newline at end of file +# Uncomment to collect workload/QOS counters. +# Workload: workload.yaml +# WorkloadVolume: workload_volume.yaml + +# The following workload templates may slow down data collection due to a high number of metrics. +# WorkloadDetail: workload_detail.yaml +# WorkloadDetailVolume: workload_detail_volume.yaml \ No newline at end of file diff --git a/grafana/dashboards/cmode/lun.json b/grafana/dashboards/cmode/lun.json index 5d08e5a5f..896335bc6 100644 --- a/grafana/dashboards/cmode/lun.json +++ b/grafana/dashboards/cmode/lun.json @@ -2409,7 +2409,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyNetwork\",resource=\"network\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyNetwork\",resource=\"network\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -2500,7 +2500,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyThrottle\",resource=\"throttle\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyThrottle\",resource=\"throttle\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -2591,7 +2591,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyMin\",resource=\"qos_min\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyMin\",resource=\"qos_min\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -2681,7 +2681,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyCluster\",resource=\"cluster\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyCluster\",resource=\"cluster\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -2771,7 +2771,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyBackend\",resource=\"backend\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyBackend\",resource=\"backend\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -2862,7 +2862,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyCP\",resource=\"cp\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyCP\",resource=\"cp\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -2953,7 +2953,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencySuspend\",resource=\"suspend\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencySuspend\",resource=\"suspend\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}} ", "refId": "A" @@ -3044,7 +3044,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyCloud\",resource=\"cloud\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyCloud\",resource=\"cloud\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}} ", "refId": "A" @@ -3134,7 +3134,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyFrontend\",resource=\"frontend\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyFrontend\",resource=\"frontend\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -3224,7 +3224,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyNvlog\",resource=\"nvlog\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyNvlog\",resource=\"nvlog\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}} ", "refId": "A" @@ -3314,7 +3314,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyDisk\",resource=\"disk\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopQosDetailVolumeResourceLatencyDisk\",resource=\"disk\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -5107,7 +5107,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"network\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"network\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -5117,7 +5117,7 @@ "name": "TopQosDetailVolumeResourceLatencyNetwork", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"network\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"network\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -5130,7 +5130,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"throttle\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"throttle\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -5140,7 +5140,7 @@ "name": "TopQosDetailVolumeResourceLatencyThrottle", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"throttle\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"throttle\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -5153,7 +5153,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"frontend\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"frontend\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -5163,7 +5163,7 @@ "name": "TopQosDetailVolumeResourceLatencyFrontend", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"frontend\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"frontend\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -5176,7 +5176,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cluster\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cluster\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -5186,7 +5186,7 @@ "name": "TopQosDetailVolumeResourceLatencyCluster", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cluster\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cluster\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -5199,7 +5199,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"backend\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"backend\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -5209,7 +5209,7 @@ "name": "TopQosDetailVolumeResourceLatencyBackend", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"backend\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"backend\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -5222,7 +5222,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"disk\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"disk\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -5232,7 +5232,7 @@ "name": "TopQosDetailVolumeResourceLatencyDisk", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"disk\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"disk\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -5245,7 +5245,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"suspend\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"suspend\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -5255,7 +5255,7 @@ "name": "TopQosDetailVolumeResourceLatencySuspend", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"suspend\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"suspend\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -5268,7 +5268,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cloud\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cloud\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -5278,7 +5278,7 @@ "name": "TopQosDetailVolumeResourceLatencyCloud", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cloud\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cloud\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -5291,7 +5291,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"nvlog\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"nvlog\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -5301,7 +5301,7 @@ "name": "TopQosDetailVolumeResourceLatencyNvlog", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"nvlog\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"nvlog\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -5314,7 +5314,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"qos_min\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"qos_min\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -5324,7 +5324,7 @@ "name": "TopQosDetailVolumeResourceLatencyMin", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"qos_min\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"qos_min\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -5337,7 +5337,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cp\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cp\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -5347,7 +5347,7 @@ "name": "TopQosDetailVolumeResourceLatencyCP", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cp\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cp\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, diff --git a/grafana/dashboards/cmode/volume.json b/grafana/dashboards/cmode/volume.json index cb98c96a4..29d209b28 100644 --- a/grafana/dashboards/cmode/volume.json +++ b/grafana/dashboards/cmode/volume.json @@ -2619,7 +2619,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_volume_read_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSReadLatency\"})", + "expr": "topk($TopResources, qos_read_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSReadLatency\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -2709,7 +2709,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_volume_write_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSWriteLatency\"})", + "expr": "topk($TopResources, qos_write_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSWriteLatency\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -2799,7 +2799,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_volume_read_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSReadOps\"})", + "expr": "topk($TopResources, qos_read_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSReadOps\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -2889,7 +2889,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_volume_write_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSWriteOps\"})", + "expr": "topk($TopResources, qos_write_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSWriteOps\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -2979,7 +2979,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_volume_read_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSReadData\"})", + "expr": "topk($TopResources, qos_read_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSReadData\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -3069,7 +3069,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_volume_write_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSWriteData\"})", + "expr": "topk($TopResources, qos_write_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSWriteData\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -3160,7 +3160,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_volume_sequential_reads{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSSequentialReads\"})", + "expr": "topk($TopResources, qos_sequential_reads{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSSequentialReads\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -3251,7 +3251,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_volume_sequential_writes{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSSequentialWrites\"})", + "expr": "topk($TopResources, qos_sequential_writes{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSSequentialWrites\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -3373,7 +3373,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSNetwork\",resource=\"network\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSNetwork\",resource=\"network\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -3464,7 +3464,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSThrottle\",resource=\"throttle\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSThrottle\",resource=\"throttle\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -3555,7 +3555,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSMin\",resource=\"qos_min\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSMin\",resource=\"qos_min\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -3645,7 +3645,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSCluster\",resource=\"cluster\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSCluster\",resource=\"cluster\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -3735,7 +3735,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSBackend\",resource=\"backend\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSBackend\",resource=\"backend\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -3826,7 +3826,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSCP\",resource=\"cp\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSCP\",resource=\"cp\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -3917,7 +3917,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSSuspend\",resource=\"suspend\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSSuspend\",resource=\"suspend\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}} ", "refId": "A" @@ -4008,7 +4008,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSCloud\",resource=\"cloud\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSCloud\",resource=\"cloud\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}} ", "refId": "A" @@ -4098,7 +4098,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSfrontend\",resource=\"frontend\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSfrontend\",resource=\"frontend\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -4188,7 +4188,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSNvlog\",resource=\"nvlog\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSNvlog\",resource=\"nvlog\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}} ", "refId": "A" @@ -4278,7 +4278,7 @@ "targets": [ { "exemplar": false, - "expr": "topk($TopResources, qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSDisk\",resource=\"disk\"})", + "expr": "topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$TopVolumeQOSDisk\",resource=\"disk\",volume!=\"\"})", "interval": "", "legendFormat": "{{svm}} / {{workload}} / {{volume}}", "refId": "A" @@ -6253,7 +6253,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"frontend\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"frontend\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6263,7 +6263,7 @@ "name": "TopVolumeQOSfrontend", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"frontend\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"frontend\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6276,7 +6276,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"network\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"network\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6286,7 +6286,7 @@ "name": "TopVolumeQOSNetwork", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"network\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"network\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6299,7 +6299,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cluster\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cluster\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6309,7 +6309,7 @@ "name": "TopVolumeQOSCluster", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cluster\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cluster\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6322,7 +6322,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"disk\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"disk\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6332,7 +6332,7 @@ "name": "TopVolumeQOSDisk", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"disk\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"disk\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6345,7 +6345,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"suspend\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"suspend\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6355,7 +6355,7 @@ "name": "TopVolumeQOSSuspend", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"suspend\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"suspend\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6368,7 +6368,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cloud\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cloud\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6378,7 +6378,7 @@ "name": "TopVolumeQOSCloud", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cloud\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cloud\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6391,7 +6391,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"nvlog\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"nvlog\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6401,7 +6401,7 @@ "name": "TopVolumeQOSNvlog", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"nvlog\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"nvlog\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6414,7 +6414,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"throttle\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"throttle\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6424,7 +6424,7 @@ "name": "TopVolumeQOSThrottle", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"throttle\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"throttle\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6437,7 +6437,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"backend\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"backend\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6447,7 +6447,7 @@ "name": "TopVolumeQOSBackend", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"backend\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"backend\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6460,7 +6460,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_volume_read_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_read_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6470,7 +6470,7 @@ "name": "TopVolumeQOSReadLatency", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_volume_read_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_read_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6483,7 +6483,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_volume_read_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_read_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6493,7 +6493,7 @@ "name": "TopVolumeQOSReadData", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_volume_read_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_read_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6506,7 +6506,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_volume_read_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_read_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6516,7 +6516,7 @@ "name": "TopVolumeQOSReadOps", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_volume_read_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_read_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6529,7 +6529,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_volume_write_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_write_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6539,7 +6539,7 @@ "name": "TopVolumeQOSWriteLatency", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_volume_write_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_write_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6552,7 +6552,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_volume_write_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_write_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6562,7 +6562,7 @@ "name": "TopVolumeQOSWriteData", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_volume_write_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_write_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6575,7 +6575,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_volume_write_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_write_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6585,7 +6585,7 @@ "name": "TopVolumeQOSWriteOps", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_volume_write_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_write_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6598,7 +6598,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_volume_sequential_reads{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_sequential_reads{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6608,7 +6608,7 @@ "name": "TopVolumeQOSSequentialReads", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_volume_sequential_reads{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_sequential_reads{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6621,7 +6621,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_volume_sequential_writes{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_sequential_writes{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6631,7 +6631,7 @@ "name": "TopVolumeQOSSequentialWrites", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_volume_sequential_writes{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_sequential_writes{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6644,7 +6644,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"qos_min\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"qos_min\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6654,7 +6654,7 @@ "name": "TopVolumeQOSMin", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"qos_min\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"qos_min\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -6667,7 +6667,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cp\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cp\",volume!=\"\"}[${__range}])))", "description": null, "error": null, "hide": 2, @@ -6677,7 +6677,7 @@ "name": "TopVolumeQOSCP", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cp\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",svm=~\"$SVM\",volume=~\"$Volume\",resource=\"cp\",volume!=\"\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, diff --git a/grafana/dashboards/cmode/workload.json b/grafana/dashboards/cmode/workload.json index b10bb54f5..df590395d 100644 --- a/grafana/dashboards/cmode/workload.json +++ b/grafana/dashboards/cmode/workload.json @@ -59,7 +59,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1679501540219, + "iteration": 1690889588293, "links": [ { "asDropdown": true, @@ -94,14 +94,14 @@ { "datasource": "${DS_PROMETHEUS}", "gridPos": { - "h": 4, + "h": 6, "w": 24, "x": 0, "y": 1 }, "id": 107, "options": { - "content": "This dashboard requires Workload/QOS counter templates. They are disabled by default because they typically slow down data collection due to a high number of metrics. Two actions are required to use this dashboard:
\n1. If you are using the ZapiPerf collector, then you must enable the Workload/QOS counters in $HARVEST/conf/zapiperf/default.yaml.
\n2. If you are using the RestPerf Collector, then you must enable the Workload/QOS counters in $HARVEST/conf/restperf/default.yaml.
\n\nMore information about [ONTAP Performance](https://kb.netapp.com/Advice_and_Troubleshooting/Data_Storage_Software/ONTAP_OS/ONTAP_9_Performance_-_Resolution_Guide).", + "content": "This dashboard requires Workload/QOS counter templates. They are disabled by default because they typically slow down data collection due to a high number of metrics. Two actions are required to use this dashboard:
\n1. If you are using the ZapiPerf collector, then you must enable the Workload/QOS counters in $HARVEST/conf/zapiperf/default.yaml.
\n2. If you are using the RestPerf Collector, then you must enable the Workload/QOS counters in $HARVEST/conf/restperf/default.yaml.
\n3. To access the panels under `Highlights` and `Workload Read IO Type`, please enable the `workload.yaml` and `workload_volume.yaml` templates. If you're interested in viewing panels within the `Service Center` and `Latency Breakdown`, enable the `workload_detail.yaml` and `workload_detail_volume.yaml` templates.\n\n\nMore information about [ONTAP Performance](https://kb.netapp.com/Advice_and_Troubleshooting/Data_Storage_Software/ONTAP_OS/ONTAP_9_Performance_-_Resolution_Guide).", "mode": "markdown" }, "pluginVersion": "8.1.8", @@ -109,20 +109,21 @@ }, { "collapsed": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 5 + "y": 7 }, - "id": 123, + "id": 161, "panels": [], - "title": "Latency", + "title": "Highlights", "type": "row" }, { "datasource": "${DS_PROMETHEUS}", + "description": "This is the rate of this workload's read operations that completed during the measurement interval.", "fieldConfig": { "defaults": { "color": { @@ -156,112 +157,8 @@ "mode": "off" } }, - "decimals": 2, "mappings": [], "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "µs" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 6 - }, - "id": 103, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.8", - "targets": [ - { - "exemplar": false, - "expr": "(avg(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\"}) by (resource))", - "interval": "", - "legendFormat": "{{resource}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Latency by Resources", - "type": "timeseries" - }, - { - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null+nan", - "result": { - "index": 0, - "text": "0%" - } - }, - "type": "special" - } - ], - "max": 100, - "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -271,7 +168,7 @@ } ] }, - "unit": "percent" + "unit": "iops" }, "overrides": [] }, @@ -279,9 +176,9 @@ "h": 10, "w": 12, "x": 0, - "y": 16 + "y": 8 }, - "id": 104, + "id": 163, "options": { "legend": { "calcs": [ @@ -300,8 +197,7 @@ "targets": [ { "exemplar": false, - "expr": "100 * topk($TopResources, sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSBackend\",resource=\"backend\"}) by (workload)/ignoring(workload) group_left sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSBackend\",resource=\"backend\"}))", - "instant": false, + "expr": "topk($TopResources, qos_read_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSReadOps\"})", "interval": "", "legendFormat": "{{workload}}", "refId": "A" @@ -309,11 +205,12 @@ ], "timeFrom": null, "timeShift": null, - "title": "Top $TopResources Workloads by Latency from backend", + "title": "Top $TopResources Workloads by Read IOPS", "type": "timeseries" }, { "datasource": "${DS_PROMETHEUS}", + "description": "This is the workload's write operations that completed during the measurement interval; measured per second.", "fieldConfig": { "defaults": { "color": { @@ -347,20 +244,7 @@ "mode": "off" } }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null+nan", - "result": { - "index": 0, - "text": "0%" - } - }, - "type": "special" - } - ], - "max": 100, + "mappings": [], "min": 0, "thresholds": { "mode": "absolute", @@ -371,7 +255,7 @@ } ] }, - "unit": "percent" + "unit": "iops" }, "overrides": [] }, @@ -379,9 +263,9 @@ "h": 10, "w": 12, "x": 12, - "y": 16 + "y": 8 }, - "id": 105, + "id": 164, "options": { "legend": { "calcs": [ @@ -400,8 +284,7 @@ "targets": [ { "exemplar": false, - "expr": "100 * topk($TopResources, sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSFrontend\",resource=\"frontend\"}) by (workload)) / ignoring(workload) group_left sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSFrontend\",resource=\"frontend\"})", - "instant": false, + "expr": "topk($TopResources, qos_write_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSWriteOps\"})", "interval": "", "legendFormat": "{{workload}}", "refId": "A" @@ -409,12 +292,12 @@ ], "timeFrom": null, "timeShift": null, - "title": "Top $TopResources Workloads by Latency from frontend", + "title": "Top $TopResources Workloads by Write IOPS", "type": "timeseries" }, { "datasource": "${DS_PROMETHEUS}", - "description": "Represents the cables and adapters with which clustered nodes are physically connected. If the cluster interconnect component is in contention, it means high wait time for I/O requests at the cluster interconnect is impacting the latency of one or more workloads.", + "description": "This is the workload's other operations that completed during the measurement interval measured per second.", "fieldConfig": { "defaults": { "color": { @@ -448,20 +331,7 @@ "mode": "off" } }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null+nan", - "result": { - "index": 0, - "text": "0%" - } - }, - "type": "special" - } - ], - "max": 100, + "mappings": [], "min": 0, "thresholds": { "mode": "absolute", @@ -469,14 +339,10 @@ { "color": "green", "value": null - }, - { - "color": "red", - "value": 80 } ] }, - "unit": "percent" + "unit": "iops" }, "overrides": [] }, @@ -484,9 +350,9 @@ "h": 10, "w": 12, "x": 0, - "y": 26 + "y": 18 }, - "id": 110, + "id": 166, "options": { "legend": { "calcs": [ @@ -505,8 +371,7 @@ "targets": [ { "exemplar": false, - "expr": "100 * topk($TopResources, sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSCluster\",resource=\"cluster\"}) by (workload)) / ignoring(workload) group_left sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSCluster\",resource=\"cluster\"})", - "instant": false, + "expr": "topk($TopResources, qos_other_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSOtherOps\"})", "interval": "", "legendFormat": "{{workload}}", "refId": "A" @@ -514,11 +379,12 @@ ], "timeFrom": null, "timeShift": null, - "title": "Top $TopResources Workloads by Latency from cluster", + "title": "Top $TopResources Workloads by Other IOPS", "type": "timeseries" }, { "datasource": "${DS_PROMETHEUS}", + "description": "Workload operations executed per second.", "fieldConfig": { "defaults": { "color": { @@ -552,20 +418,7 @@ "mode": "off" } }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null+nan", - "result": { - "index": 0, - "text": "0%" - } - }, - "type": "special" - } - ], - "max": 100, + "mappings": [], "min": 0, "thresholds": { "mode": "absolute", @@ -573,14 +426,10 @@ { "color": "green", "value": null - }, - { - "color": "red", - "value": 80 } ] }, - "unit": "percent" + "unit": "iops" }, "overrides": [] }, @@ -588,9 +437,9 @@ "h": 10, "w": 12, "x": 12, - "y": 26 + "y": 18 }, - "id": 111, + "id": 165, "options": { "legend": { "calcs": [ @@ -609,8 +458,7 @@ "targets": [ { "exemplar": false, - "expr": "100 * topk($TopResources, sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSCP\",resource=\"cp\"}) by (workload)) / ignoring(workload) group_left sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSCP\",resource=\"cp\"})", - "instant": false, + "expr": "topk($TopResources, qos_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSOps\"})", "interval": "", "legendFormat": "{{workload}}", "refId": "A" @@ -618,11 +466,12 @@ ], "timeFrom": null, "timeShift": null, - "title": "Top $TopResources Workloads by Latency from cp", + "title": "Top $TopResources Workloads by Total IOPS", "type": "timeseries" }, { "datasource": "${DS_PROMETHEUS}", + "description": "This is the amount of data read per second from the filer by the workload.", "fieldConfig": { "defaults": { "color": { @@ -656,20 +505,7 @@ "mode": "off" } }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null+nan", - "result": { - "index": 0, - "text": "0%" - } - }, - "type": "special" - } - ], - "max": 100, + "mappings": [], "min": 0, "thresholds": { "mode": "absolute", @@ -677,14 +513,10 @@ { "color": "green", "value": null - }, - { - "color": "red", - "value": 80 } ] }, - "unit": "percent" + "unit": "Bps" }, "overrides": [] }, @@ -692,9 +524,9 @@ "h": 10, "w": 12, "x": 0, - "y": 36 + "y": 28 }, - "id": 114, + "id": 167, "options": { "legend": { "calcs": [ @@ -713,8 +545,7 @@ "targets": [ { "exemplar": false, - "expr": "100 * topk($TopResources, sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSDisk\",resource=\"disk\"}) by (workload)) / ignoring(workload) group_left sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSDisk\",resource=\"disk\"})", - "instant": false, + "expr": "topk($TopResources, qos_read_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSReadData\"})", "interval": "", "legendFormat": "{{workload}}", "refId": "A" @@ -722,12 +553,12 @@ ], "timeFrom": null, "timeShift": null, - "title": "Top $TopResources Workloads by Latency from disk", + "title": "Top $TopResources Workloads by Read Throughput", "type": "timeseries" }, { "datasource": "${DS_PROMETHEUS}", - "description": "Represents the wait time of I/O requests by the external networking protocols on the cluster. The wait time is time spent waiting for transfer ready transactions to finish before the cluster can respond to an I/O request. If the network component is in contention, it means high wait time at the protocol layer is impacting the latency of one or more workloads.", + "description": "This is the amount of data written per second to the filer by the workload.", "fieldConfig": { "defaults": { "color": { @@ -761,20 +592,7 @@ "mode": "off" } }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null+nan", - "result": { - "index": 0, - "text": "0%" - } - }, - "type": "special" - } - ], - "max": 100, + "mappings": [], "min": 0, "thresholds": { "mode": "absolute", @@ -782,14 +600,10 @@ { "color": "green", "value": null - }, - { - "color": "red", - "value": 80 } ] }, - "unit": "percent" + "unit": "Bps" }, "overrides": [] }, @@ -797,9 +611,9 @@ "h": 10, "w": 12, "x": 12, - "y": 36 + "y": 28 }, - "id": 108, + "id": 168, "options": { "legend": { "calcs": [ @@ -818,8 +632,7 @@ "targets": [ { "exemplar": false, - "expr": "100 * topk($TopResources, sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSNetwork\",resource=\"network\"}) by (workload)) / ignoring(workload) group_left sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSNetwork\",resource=\"network\"})", - "instant": false, + "expr": "topk($TopResources, qos_write_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSwriteData\"})", "interval": "", "legendFormat": "{{workload}}", "refId": "A" @@ -827,11 +640,12 @@ ], "timeFrom": null, "timeShift": null, - "title": "Top $TopResources Workloads by Latency from network", + "title": "Top $TopResources Workloads by Write Throughput", "type": "timeseries" }, { "datasource": "${DS_PROMETHEUS}", + "description": "This is the average response time for read requests that were initiated by the workload.", "fieldConfig": { "defaults": { "color": { @@ -865,20 +679,7 @@ "mode": "off" } }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null+nan", - "result": { - "index": 0, - "text": "0%" - } - }, - "type": "special" - } - ], - "max": 100, + "mappings": [], "min": 0, "thresholds": { "mode": "absolute", @@ -886,14 +687,10 @@ { "color": "green", "value": null - }, - { - "color": "red", - "value": 80 } ] }, - "unit": "percent" + "unit": "µs" }, "overrides": [] }, @@ -901,9 +698,9 @@ "h": 10, "w": 12, "x": 0, - "y": 46 + "y": 38 }, - "id": 116, + "id": 169, "options": { "legend": { "calcs": [ @@ -922,8 +719,7 @@ "targets": [ { "exemplar": false, - "expr": "100 * topk($TopResources, sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSNVLog\",resource=\"nvlog\"}) by (workload)) / ignoring(workload) group_left sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSNVLog\",resource=\"nvlog\"})", - "instant": false, + "expr": "topk($TopResources, qos_read_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSReadLatency\"})", "interval": "", "legendFormat": "{{workload}}", "refId": "A" @@ -931,11 +727,12 @@ ], "timeFrom": null, "timeShift": null, - "title": "Top $TopResources Workloads by Latency from nvlog", + "title": "Top $TopResources Workloads by Average Read Latency", "type": "timeseries" }, { "datasource": "${DS_PROMETHEUS}", + "description": "This is the average response time for write requests that were initiated by the workload.", "fieldConfig": { "defaults": { "color": { @@ -969,20 +766,7 @@ "mode": "off" } }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null+nan", - "result": { - "index": 0, - "text": "0%" - } - }, - "type": "special" - } - ], - "max": 100, + "mappings": [], "min": 0, "thresholds": { "mode": "absolute", @@ -990,14 +774,10 @@ { "color": "green", "value": null - }, - { - "color": "red", - "value": 80 } ] }, - "unit": "percent" + "unit": "µs" }, "overrides": [] }, @@ -1005,9 +785,9 @@ "h": 10, "w": 12, "x": 12, - "y": 46 + "y": 38 }, - "id": 112, + "id": 171, "options": { "legend": { "calcs": [ @@ -1026,8 +806,7 @@ "targets": [ { "exemplar": false, - "expr": "100 * topk($TopResources, sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSSuspend\",resource=\"suspend\"}) by (workload)) / ignoring(workload) group_left sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSSuspend\",resource=\"suspend\"})", - "instant": false, + "expr": "topk($TopResources, qos_write_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSWriteLatency\"})", "interval": "", "legendFormat": "{{workload}}", "refId": "A" @@ -1035,12 +814,12 @@ ], "timeFrom": null, "timeShift": null, - "title": "Top $TopResources Workloads by Latency from WAFL suspend", + "title": "Top $TopResources Workloads by Average Write Latency", "type": "timeseries" }, { "datasource": "${DS_PROMETHEUS}", - "description": "Represents the throughput maximum (peak) setting of the storage Quality of Service (QoS) policy group assigned to the workload. If the policy group component is in contention, it means all workloads in the policy group are being throttled by the set throughput limit, which is impacting the latency of one or more of those workloads.", + "description": "This is the average response time for requests that were initiated by the workload.", "fieldConfig": { "defaults": { "color": { @@ -1074,20 +853,7 @@ "mode": "off" } }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null+nan", - "result": { - "index": 0, - "text": "0%" - } - }, - "type": "special" - } - ], - "max": 100, + "mappings": [], "min": 0, "thresholds": { "mode": "absolute", @@ -1095,14 +861,10 @@ { "color": "green", "value": null - }, - { - "color": "red", - "value": 80 } ] }, - "unit": "percent" + "unit": "µs" }, "overrides": [] }, @@ -1110,9 +872,9 @@ "h": 10, "w": 12, "x": 0, - "y": 56 + "y": 48 }, - "id": 109, + "id": 170, "options": { "legend": { "calcs": [ @@ -1131,8 +893,7 @@ "targets": [ { "exemplar": false, - "expr": "100 * topk($TopResources, sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSThrottle\",resource=\"throttle\"}) by (workload)) / ignoring(workload) group_left sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSThrottle\",resource=\"throttle\"})", - "instant": false, + "expr": "topk($TopResources, qos_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSLatency\"})", "interval": "", "legendFormat": "{{workload}}", "refId": "A" @@ -1140,12 +901,12 @@ ], "timeFrom": null, "timeShift": null, - "title": "Top $TopResources Workloads by Latency from Qos throttle", + "title": "Top $TopResources Workloads by Average Latency", "type": "timeseries" }, { "datasource": "${DS_PROMETHEUS}", - "description": "Represents the latency to a workload that is being caused by QoS throughput minimum (expected) setting assigned to other workloads. If the QoS minimum set on certain workloads use the majority of the bandwidth to guarantee the promised throughput, other workloads will be throttled and see more latency.", + "description": "This is the average number of concurrent requests for the workload.", "fieldConfig": { "defaults": { "color": { @@ -1179,20 +940,7 @@ "mode": "off" } }, - "decimals": 1, - "mappings": [ - { - "options": { - "match": "null+nan", - "result": { - "index": 0, - "text": "0%" - } - }, - "type": "special" - } - ], - "max": 100, + "mappings": [], "min": 0, "thresholds": { "mode": "absolute", @@ -1200,14 +948,10 @@ { "color": "green", "value": null - }, - { - "color": "red", - "value": 80 } ] }, - "unit": "percent" + "unit": "locale" }, "overrides": [] }, @@ -1215,9 +959,9 @@ "h": 10, "w": 12, "x": 12, - "y": 56 + "y": 48 }, - "id": 113, + "id": 173, "options": { "legend": { "calcs": [ @@ -1236,8 +980,7 @@ "targets": [ { "exemplar": false, - "expr": "100 * topk($TopResources, sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSQosMin\",resource=\"qos_min\"}) by (workload)) / ignoring(workload) group_left sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSQosMin\",resource=\"qos_min\"})", - "instant": false, + "expr": "topk($TopResources, qos_concurrency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSConcurrency\"})", "interval": "", "legendFormat": "{{workload}}", "refId": "A" @@ -1245,879 +988,5034 @@ ], "timeFrom": null, "timeShift": null, - "title": "Top $TopResources Workloads by Latency from qos_min", + "title": "Top $TopResources Workloads by Concurrency", "type": "timeseries" }, { - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 58 + }, + "id": 175, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "description": "This is the percentage of read requests served from bamboo_ssd component", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 58 + }, + "id": 172, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "topk($TopResources, qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSReadIOTypeBamboo\", metric=\"bamboo_ssd\"})", + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Read IO Type bamboo_ssd", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "This is the percentage of read requests served from cache", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" }, - "showPoints": "auto", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 58 + }, + "id": 176, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single" } }, - "decimals": 1, - "mappings": [ + "pluginVersion": "8.1.8", + "targets": [ { - "options": { - "match": "null+nan", - "result": { - "index": 0, - "text": "0%" + "exemplar": false, + "expr": "topk($TopResources, qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSReadIOTypeCache\", metric=\"cache\"})", + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Read IO Type cache", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "This is the percentage of read requests served from cloud", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "type": "special" + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 68 + }, + "id": 177, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "topk($TopResources, qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSReadIOTypeCloud\", metric=\"cloud\"})", + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" } ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Read IO Type cloud", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "This is the percentage of read requests served from cloud_s2c", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "color": "red", - "value": 80 - } - ] + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 68 }, - "unit": "percent" + "id": 178, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "topk($TopResources, qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSReadIOTypeClouds2c\", metric=\"cloud_s2c\"})", + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Read IO Type cloud_s2c", + "type": "timeseries" }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 66 - }, - "id": 118, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" + { + "datasource": "${DS_PROMETHEUS}", + "description": "This is the percentage of read requests served from disk", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 78 + }, + "id": 179, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "topk($TopResources, qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSReadIOTypeDisk\", metric=\"disk\"})", + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } ], - "displayMode": "table", - "placement": "bottom" + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Read IO Type disk", + "type": "timeseries" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.8", - "targets": [ { - "exemplar": false, - "expr": "100 * topk($TopResources, sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSFlexcacheSpinhi\",resource=\"flexcache_spinhi\"}) by (workload)) / ignoring(workload) group_left sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSFlexcacheSpinhi\",resource=\"flexcache_spinhi\"})", - "instant": false, - "interval": "", - "legendFormat": "{{workload}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Top $TopResources Workloads by Latency from flexcache_spinhi", - "type": "timeseries" - }, - { - "datasource": "${DS_PROMETHEUS}", - "description": "Represents the software component in the cluster involved with I/O processing between the cluster and the cloud tier on which user data is stored. If the cloud latency component is in contention, it means that a large amount of reads from volumes that are hosted on the cloud tier are impacting the latency of one or more workloads.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "datasource": "${DS_PROMETHEUS}", + "description": "This is the percentage of read requests served from ext_cache", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 78 + }, + "id": 180, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "topk($TopResources, qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSReadIOTypeExtCache\", metric=\"ext_cache\"})", + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Read IO Type ext_cache", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "This is the percentage of read requests served from fc_miss", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" }, - "showPoints": "auto", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 88 + }, + "id": 181, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single" } }, - "decimals": 1, - "mappings": [ + "pluginVersion": "8.1.8", + "targets": [ { - "options": { - "match": "null+nan", - "result": { - "index": 0, - "text": "0%" + "exemplar": false, + "expr": "topk($TopResources, qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSReadIOTypeFcmiss\", metric=\"fc_miss\"})", + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Read IO Type fc_miss", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "This is the percentage of read requests served from hya_cache", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "type": "special" + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 88 + }, + "id": 182, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "topk($TopResources, qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSReadIOTypeHyaCache\", metric=\"hya_cache\"})", + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" } ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Read IO Type hya_cache", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "This is the percentage of read requests served from hya_hdd", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "color": "red", - "value": 80 - } - ] + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 66 - }, - "id": 115, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 98 + }, + "id": 183, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "topk($TopResources, qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSReadIOTypeHyahdd\", metric=\"hya_hdd\"})", + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } ], - "displayMode": "table", - "placement": "bottom" + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Read IO Type hya_hdd", + "type": "timeseries" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.8", - "targets": [ { - "exemplar": false, - "expr": "100 * topk($TopResources, sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSCloud\",resource=\"cloud\"}) by (workload)) / ignoring(workload) group_left sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSCloud\",resource=\"cloud\"})", - "instant": false, - "interval": "", - "legendFormat": "{{workload}}", - "refId": "A" + "datasource": "${DS_PROMETHEUS}", + "description": "This is the percentage of read requests served from hya_non_cache", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 98 + }, + "id": 184, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "topk($TopResources, qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSReadIOTypeHyaNon\", metric=\"hya_non_cache\"})", + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Read IO Type hya_non_cache", + "type": "timeseries" } ], - "timeFrom": null, - "timeShift": null, - "title": "Top $TopResources Workloads by Latency from cloud", - "type": "timeseries" + "title": "Read IO Type", + "type": "row" }, { - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 59 + }, + "id": 186, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "µs" + }, + "overrides": [] }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 60 + }, + "id": 187, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "(avg(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}) by (resource))", + "interval": "", + "legendFormat": "{{resource}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Service Latency by Resources", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents the delays in the network layer of ONTAP.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" }, - "showPoints": "auto", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 70 + }, + "id": 189, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single" } }, - "decimals": 1, - "mappings": [ + "pluginVersion": "8.1.8", + "targets": [ { - "options": { - "match": "null+nan", - "result": { - "index": 0, - "text": "0%" + "exemplar": false, + "expr": "100 * topk($TopResources, (qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSServiceFrontend\",resource=\"frontend\"}/on() group_left sum(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"frontend\"})))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Service Time from frontend", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents the delays in the data/WAFL layer of ONTAP.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] }, - "type": "special" + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 70 + }, + "id": 191, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSServiceBackend\",resource=\"backend\"}/on() group_left sum(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"backend\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" } ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Service Time from backend", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents delays caused by the cluster switches, cables, and adapters which physically connect clustered nodes. \n\nIf the cluster interconnect component is in contention, it means high wait time for I/O requests at the cluster interconnect is impacting the latency of one or more workloads.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "color": "red", - "value": 80 - } - ] + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 80 + }, + "id": 193, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } }, - "unit": "percent" + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSServiceCluster\",resource=\"cluster\"}/on() group_left sum(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"cluster\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Service Time from cluster", + "type": "timeseries" }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 76 - }, - "id": 120, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents delays due to buffered write flushes, called consistency points (cp).", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 80 + }, + "id": 195, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSServiceCP\",resource=\"cp\"}/on() group_left sum(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"cp\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } ], - "displayMode": "table", - "placement": "bottom" + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Service Time from cp", + "type": "timeseries" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.8", - "targets": [ { - "exemplar": false, - "expr": "100 * topk($TopResources, sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSCOP\",resource=\"cop\"}) by (workload)) / ignoring(workload) group_left sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSCOP\",resource=\"cop\"})", - "instant": false, - "interval": "", - "legendFormat": "{{workload}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Top $TopResources Workloads by Latency from cop", - "type": "timeseries" - }, - { - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "datasource": "${DS_PROMETHEUS}", + "description": "Represents slowness due to attached hard drives or solid state drives.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 90 + }, + "id": 197, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSServiceDisk\",resource=\"disk\"}/on() group_left sum(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"disk\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Service Time from disk", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "`Note:` Typically these latencies only apply to SAN not NAS.\n\nRepresents the wait time of I/O requests by the external networking protocols on the cluster. The wait time is time spent waiting for transfer ready transactions to finish before the cluster can respond to an I/O request. If the network component is in contention, it means high wait time at the protocol layer is impacting the latency of one or more workloads.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" }, - "showPoints": "auto", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 90 + }, + "id": 199, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single" } }, - "decimals": 1, - "mappings": [ + "pluginVersion": "8.1.8", + "targets": [ { - "options": { - "match": "null+nan", - "result": { - "index": 0, - "text": "0%" + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSServiceNetwork\",resource=\"network\"}/on() group_left sum(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"network\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Service Time from network", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents delays due to mirroring writes to the NVRAM/NVLOG memory and to the HA partner NVRAM/NVLOG memory.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "type": "special" + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 100 + }, + "id": 201, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSServiceNVLog\",resource=\"nvlog\"}/on() group_left sum(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"nvlog\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" } ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Service Time from nvlog", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents delays due to operations suspending on a delay mechanism. Typically this is diagnosed by NetApp Support.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "color": "red", - "value": 80 - } - ] + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 76 - }, - "id": 117, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 100 + }, + "id": 203, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSServiceSuspend\",resource=\"suspend\"}/on() group_left sum(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"suspend\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } ], - "displayMode": "table", - "placement": "bottom" + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Service Time from WAFL suspend", + "type": "timeseries" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.8", - "targets": [ { - "exemplar": false, - "expr": "100 * topk($TopResources, sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSFlexcacheRAL\",resource=\"flexcache_ral\"}) by (workload)) / ignoring(workload) group_left sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSFlexcacheRAL\",resource=\"flexcache_ral\"})", - "instant": false, - "interval": "", - "legendFormat": "{{workload}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Top $TopResources Workloads by Latency from flexcache_ral", - "type": "timeseries" - }, - { - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "datasource": "${DS_PROMETHEUS}", + "description": "Represents the throughput maximum (ceiling) setting of the storage Quality of Service (QoS) policy group assigned to the workload. If the policy group component is in contention, it means all workloads in the policy group are being throttled by the set throughput limit, which is impacting the latency of one or more of those workloads.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 110 + }, + "id": 205, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSServiceThrottle\",resource=\"throttle\"}/on() group_left sum(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"throttle\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Service Time from Qos throttle", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents the latency to a workload that is being caused by QoS throughput floor (expected) setting assigned to other workloads. If the QoS floor set on certain workloads use the majority of the bandwidth to guarantee the promised throughput, other workloads will be throttled and see more latency.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" }, - "showPoints": "auto", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 110 + }, + "id": 207, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single" } }, - "decimals": 1, - "mappings": [ + "pluginVersion": "8.1.8", + "targets": [ { - "options": { - "match": "null+nan", - "result": { - "index": 0, - "text": "0%" + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSServiceQosMin\",resource=\"qos_min\"}/on() group_left sum(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"qos_min\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Service Time from qos_min", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents the software component in the cluster involved with I/O processing between the cluster and the cloud tier on which user data is stored. If the cloud latency component is in contention, it means that a large amount of reads from volumes that are hosted on the cloud tier are impacting the latency of one or more workloads.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] }, - "type": "special" + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 120 + }, + "id": 209, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSServiceCloud\",resource=\"cloud\"}/on() group_left sum(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"cloud\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" } ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Service Time from cloud", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "color": "red", - "value": 80 - } - ] + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 120 + }, + "id": 211, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } }, - "unit": "percent" + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSServiceSyncRepl\",resource=\"sync_repl\"}/on() group_left sum(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"sync_repl\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Service Time from sync_repl", + "type": "timeseries" }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 86 - }, - "id": 119, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 130 + }, + "id": 213, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSServiceFlexcacheRal\",resource=\"flexcache_ral\"}/on() group_left sum(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"flexcache_ral\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } ], - "displayMode": "table", - "placement": "bottom" + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Service Time from flexcache_ral", + "type": "timeseries" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.8", - "targets": [ { - "exemplar": false, - "expr": "100 * topk($TopResources, sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSSyncRepl\",resource=\"sync_repl\"}) by (workload)) / ignoring(workload) group_left sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSSyncRepl\",resource=\"sync_repl\"})", - "instant": false, - "interval": "", - "legendFormat": "{{workload}}", - "refId": "A" + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 130 + }, + "id": 215, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSServiceFlexcacheSpinhi\",resource=\"flexcache_spinhi\"}/on() group_left sum(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"flexcache_spinhi\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Service Time from flexcache_spinhi", + "type": "timeseries" } ], - "timeFrom": null, - "timeShift": null, - "title": "Top $TopResources Workloads by Latency from sync_repl", - "type": "timeseries" + "title": "Service Center", + "type": "row" }, { + "collapsed": true, "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 60 + }, + "id": 127, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "µs" + }, + "overrides": [] }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 2 + }, + "id": 129, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "(avg(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}) by (resource))", + "interval": "", + "legendFormat": "{{resource}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Latency by Resources", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents the delays in the network layer of ONTAP.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" }, - "showPoints": "auto", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 12 + }, + "id": 133, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single" } }, - "decimals": 1, - "mappings": [ + "pluginVersion": "8.1.8", + "targets": [ { - "options": { - "match": "null+nan", - "result": { - "index": 0, - "text": "0%" + "exemplar": false, + "expr": "100 * topk($TopResources, (qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSFrontend\",resource=\"frontend\"}/on() group_left sum(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"frontend\"})))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Latency from frontend", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents the delays in the data/WAFL layer of ONTAP.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] }, - "type": "special" + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 131, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSBackend\",resource=\"backend\"}/on() group_left sum(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"backend\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" } ], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Latency from backend", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents delays caused by the cluster switches, cables, and adapters which physically connect clustered nodes. \n\nIf the cluster interconnect component is in contention, it means high wait time for I/O requests at the cluster interconnect is impacting the latency of one or more workloads.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "color": "red", - "value": 80 - } - ] + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 22 + }, + "id": 135, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSCluster\",resource=\"cluster\"}/on() group_left sum(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"cluster\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Latency from cluster", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents delays due to buffered write flushes, called consistency points (cp).", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 22 + }, + "id": 137, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSCP\",resource=\"cp\"}/on() group_left sum(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"cp\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Latency from cp", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents slowness due to attached hard drives or solid state drives.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 139, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSDisk\",resource=\"disk\"}/on() group_left sum(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"disk\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Latency from disk", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "`Note:` Typically these latencies only apply to SAN not NAS.\n\nRepresents the wait time of I/O requests by the external networking protocols on the cluster. The wait time is time spent waiting for transfer ready transactions to finish before the cluster can respond to an I/O request. If the network component is in contention, it means high wait time at the protocol layer is impacting the latency of one or more workloads.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 141, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSNetwork\",resource=\"network\"}/on() group_left sum(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"network\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Latency from network", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents delays due to mirroring writes to the NVRAM/NVLOG memory and to the HA partner NVRAM/NVLOG memory.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 143, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSNVLog\",resource=\"nvlog\"}/on() group_left sum(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"nvlog\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Latency from nvlog", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents delays due to operations suspending on a delay mechanism. Typically this is diagnosed by NetApp Support.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 42 + }, + "id": 145, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSSuspend\",resource=\"suspend\"}/on() group_left sum(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"suspend\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Latency from WAFL suspend", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents the throughput maximum (ceiling) setting of the storage Quality of Service (QoS) policy group assigned to the workload. If the policy group component is in contention, it means all workloads in the policy group are being throttled by the set throughput limit, which is impacting the latency of one or more of those workloads.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 52 + }, + "id": 147, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSThrottle\",resource=\"throttle\"}/on() group_left sum(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"throttle\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Latency from Qos throttle", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents the latency to a workload that is being caused by QoS throughput floor (expected) setting assigned to other workloads. If the QoS floor set on certain workloads use the majority of the bandwidth to guarantee the promised throughput, other workloads will be throttled and see more latency.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 52 + }, + "id": 149, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } }, - "unit": "percent" + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSQosMin\",resource=\"qos_min\"}/on() group_left sum(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"qos_min\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Latency from qos_min", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Represents the software component in the cluster involved with I/O processing between the cluster and the cloud tier on which user data is stored. If the cloud latency component is in contention, it means that a large amount of reads from volumes that are hosted on the cloud tier are impacting the latency of one or more workloads.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 62 + }, + "id": 153, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSCloud\",resource=\"cloud\"}/on() group_left sum(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"cloud\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Latency from cloud", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 62 + }, + "id": 159, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSSyncRepl\",resource=\"sync_repl\"}/on() group_left sum(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"sync_repl\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Latency from sync_repl", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 72 + }, + "id": 157, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSFlexcacheRal\",resource=\"flexcache_ral\"}/on() group_left sum(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"flexcache_ral\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Latency from flexcache_ral", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 1, + "mappings": [ + { + "options": { + "match": "null+nan", + "result": { + "index": 0, + "text": "0%" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 72 + }, + "id": 151, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "100 * topk($TopResources, qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$TopQOSFlexcacheSpinhi\",resource=\"flexcache_spinhi\"}/on() group_left sum(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",resource=\"flexcache_spinhi\"}))", + "instant": false, + "interval": "", + "legendFormat": "{{workload}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Top $TopResources Workloads by Latency from flexcache_spinhi", + "type": "timeseries" + } + ], + "title": "Latency Breakdown", + "type": "row" + } + ], + "refresh": "", + "schemaVersion": 30, + "style": "dark", + "tags": [ + "harvest", + "ontap", + "cdot" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "description": null, + "error": null, + "hide": 2, + "includeAll": false, + "label": "Data Source", + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(volume_labels{system_type!=\"7mode\"}, datacenter)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": true, + "name": "Datacenter", + "options": [], + "query": { + "query": "label_values(volume_labels{system_type!=\"7mode\"}, datacenter)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(volume_labels{system_type!=\"7mode\",datacenter=~\"$Datacenter\"}, cluster)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": true, + "name": "Cluster", + "options": [], + "query": { + "query": "label_values(volume_labels{system_type!=\"7mode\",datacenter=~\"$Datacenter\"}, cluster)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Workload Class", + "multi": true, + "name": "WorkloadClass", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "user_defined", + "value": "user_defined" + }, + { + "selected": false, + "text": "system_defined", + "value": "system_defined" + }, + { + "selected": false, + "text": "autovolume", + "value": "autovolume" + } + ], + "query": "user_defined,system_defined,autovolume", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(qos_workload_labels{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\", class=~\"$WorkloadClass\"}, workload)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "Workload", + "options": [], + "query": { + "query": "label_values(qos_workload_labels{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\", class=~\"$WorkloadClass\"}, workload)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "5", + "value": "5" + }, + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "TopResources", + "options": [ + { + "selected": false, + "text": "1", + "value": "1" + }, + { + "selected": false, + "text": "2", + "value": "2" + }, + { + "selected": false, + "text": "3", + "value": "3" + }, + { + "selected": false, + "text": "4", + "value": "4" + }, + { + "selected": true, + "text": "5", + "value": "5" + }, + { + "selected": false, + "text": "6", + "value": "6" + }, + { + "selected": false, + "text": "8", + "value": "8" + }, + { + "selected": false, + "text": "10", + "value": "10" + }, + { + "selected": false, + "text": "15", + "value": "15" + }, + { + "selected": false, + "text": "25", + "value": "25" + }, + { + "selected": false, + "text": "50", + "value": "50" + }, + { + "selected": false, + "text": "100", + "value": "100" + }, + { + "selected": false, + "text": "250", + "value": "250" + }, + { + "selected": false, + "text": "500", + "value": "500" + } + ], + "query": "1,2,3,4,5,6,8,10,15,25,50,100,250,500", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"backend\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSBackend", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"backend\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"frontend\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSFrontend", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"frontend\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"throttle\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSThrottle", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"throttle\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"network\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSNetwork", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"network\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"cluster\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSCluster", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"cluster\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"cp\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSCP", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"cp\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"qos_min\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSQosMin", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"qos_min\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"suspend\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSSuspend", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"suspend\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"disk\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSDisk", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"disk\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"cloud\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSCloud", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"cloud\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"nvlog\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSNVLog", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"nvlog\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"flexcache_ral\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSFlexcacheRal", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"flexcache_ral\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"flexcache_spinhi\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSFlexcacheSpinhi", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"flexcache_spinhi\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"sync_repl\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSSyncRepl", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"sync_repl\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_read_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSReadOps", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_read_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_write_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSWriteOps", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_write_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_other_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSOtherOps", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_other_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSOps", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_ops{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_read_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSReadData", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_read_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_write_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSwriteData", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_write_data{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_read_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSReadLatency", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_read_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_write_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSWriteLatency", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_write_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSLatency", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_concurrency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSConcurrency", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_concurrency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}[${__range}])))", + "refId": "StandardVariableQuery" }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 86 + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" }, - "id": 121, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"bamboo_ssd\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSReadIOTypeBamboo", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"bamboo_ssd\"}[${__range}])))", + "refId": "StandardVariableQuery" }, - "tooltip": { - "mode": "single" - } + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" }, - "pluginVersion": "8.1.8", - "targets": [ - { - "exemplar": false, - "expr": "100 * topk($TopResources, sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSWaflAdmission\",resource=\"admission\"}) by (workload)) / ignoring(workload) group_left sum(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$TopVolumeQOSWaflAdmission\",resource=\"admission\"})", - "instant": false, - "interval": "", - "legendFormat": "{{workload}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Top $TopResources Workloads by Latency from admission", - "type": "timeseries" - } - ], - "refresh": "", - "schemaVersion": 30, - "style": "dark", - "tags": [ - "harvest", - "ontap", - "cdot" - ], - "templating": { - "list": [ { - "current": { - "selected": false, - "text": "Prometheus", - "value": "Prometheus" - }, + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"cache\"}[${__range}])))", "description": null, "error": null, "hide": 2, - "includeAll": false, - "label": "Data Source", - "multi": false, - "name": "DS_PROMETHEUS", + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSReadIOTypeCache", "options": [], - "query": "prometheus", + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"cache\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, "refresh": 2, - "regex": "", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, - "type": "datasource" + "sort": 0, + "type": "query" }, { "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "label_values(volume_labels{system_type!=\"7mode\"}, datacenter)", + "definition": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"cloud\"}[${__range}])))", "description": null, "error": null, - "hide": 0, - "includeAll": false, + "hide": 2, + "includeAll": true, "label": null, "multi": true, - "name": "Datacenter", + "name": "TopQOSReadIOTypeCloud", "options": [], "query": { - "query": "label_values(volume_labels{system_type!=\"7mode\"}, datacenter)", + "query": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"cloud\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": "", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 0, + "type": "query" }, { "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "label_values(volume_labels{system_type!=\"7mode\",datacenter=~\"$Datacenter\"}, cluster)", + "definition": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"cloud_s2c\"}[${__range}])))", "description": null, "error": null, - "hide": 0, - "includeAll": false, + "hide": 2, + "includeAll": true, "label": null, "multi": true, - "name": "Cluster", + "name": "TopQOSReadIOTypeClouds2c", "options": [], "query": { - "query": "label_values(volume_labels{system_type!=\"7mode\",datacenter=~\"$Datacenter\"}, cluster)", + "query": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"cloud_s2c\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": "", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 0, + "type": "query" }, { "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "label_values(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}, workload)", + "definition": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"disk\"}[${__range}])))", "description": null, "error": null, - "hide": 0, + "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "Workload", + "name": "TopQOSReadIOTypeDisk", "options": [], "query": { - "query": "label_values(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}, workload)", + "query": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"disk\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": "", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 0, + "type": "query" }, { "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "label_values(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}, volume)", + "definition": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"ext_cache\"}[${__range}])))", "description": null, "error": null, - "hide": 0, + "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "Volume", + "name": "TopQOSReadIOTypeExtCache", "options": [], "query": { - "query": "label_values(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\"}, volume)", + "query": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"ext_cache\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": "", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 0, + "type": "query" }, { "allValue": null, - "current": { - "selected": false, - "text": "5", - "value": "5" + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"fc_miss\"}[${__range}])))", + "description": null, + "error": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": true, + "name": "TopQOSReadIOTypeFcmiss", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"fc_miss\"}[${__range}])))", + "refId": "StandardVariableQuery" }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"hya_cache\"}[${__range}])))", "description": null, "error": null, - "hide": 0, - "includeAll": false, + "hide": 2, + "includeAll": true, "label": null, - "multi": false, - "name": "TopResources", - "options": [ - { - "selected": false, - "text": "1", - "value": "1" - }, - { - "selected": false, - "text": "2", - "value": "2" - }, - { - "selected": false, - "text": "3", - "value": "3" - }, - { - "selected": false, - "text": "4", - "value": "4" - }, - { - "selected": true, - "text": "5", - "value": "5" - }, - { - "selected": false, - "text": "6", - "value": "6" - }, - { - "selected": false, - "text": "8", - "value": "8" - }, - { - "selected": false, - "text": "10", - "value": "10" - }, - { - "selected": false, - "text": "15", - "value": "15" - }, - { - "selected": false, - "text": "25", - "value": "25" - }, - { - "selected": false, - "text": "50", - "value": "50" - }, - { - "selected": false, - "text": "100", - "value": "100" - }, - { - "selected": false, - "text": "250", - "value": "250" - }, - { - "selected": false, - "text": "500", - "value": "500" - } - ], - "query": "1,2,3,4,5,6,8,10,15,25,50,100,250,500", - "queryValue": "", + "multi": true, + "name": "TopQOSReadIOTypeHyaCache", + "options": [], + "query": { + "query": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"hya_cache\"}[${__range}])))", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, - "type": "custom" + "sort": 0, + "type": "query" }, { "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"backend\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"hya_hdd\"}[${__range}])))", "description": null, "error": null, "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "TopVolumeQOSBackend", + "name": "TopQOSReadIOTypeHyahdd", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"backend\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"hya_hdd\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": ".*volume=\\\"(.*?)\\\".*", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, "sort": 0, "type": "query" @@ -2126,21 +6024,21 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"frontend\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"hya_non_cache\"}[${__range}])))", "description": null, "error": null, "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "TopVolumeQOSFrontend", + "name": "TopQOSReadIOTypeHyaNon", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"frontend\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_read_io_type{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",metric=\"hya_non_cache\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": ".*volume=\\\"(.*?)\\\".*", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, "sort": 0, "type": "query" @@ -2149,21 +6047,21 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"throttle\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"backend\"}[${__range}])))", "description": null, "error": null, "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "TopVolumeQOSThrottle", + "name": "TopQOSServiceBackend", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"throttle\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"backend\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": ".*volume=\\\"(.*?)\\\".*", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, "sort": 0, "type": "query" @@ -2172,21 +6070,21 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"network\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"frontend\"}[${__range}])))", "description": null, "error": null, "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "TopVolumeQOSNetwork", + "name": "TopQOSServiceFrontend", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"network\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"frontend\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": ".*volume=\\\"(.*?)\\\".*", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, "sort": 0, "type": "query" @@ -2195,21 +6093,21 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"cluster\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"throttle\"}[${__range}])))", "description": null, "error": null, "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "TopVolumeQOSCluster", + "name": "TopQOSServiceThrottle", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"cluster\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"throttle\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": ".*volume=\\\"(.*?)\\\".*", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, "sort": 0, "type": "query" @@ -2218,21 +6116,21 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"cp\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"network\"}[${__range}])))", "description": null, "error": null, "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "TopVolumeQOSCP", + "name": "TopQOSServiceNetwork", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"cp\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"network\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": ".*volume=\\\"(.*?)\\\".*", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, "sort": 0, "type": "query" @@ -2241,21 +6139,21 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"qos_min\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"cluster\"}[${__range}])))", "description": null, "error": null, "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "TopVolumeQOSQosMin", + "name": "TopQOSServiceCluster", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"qos_min\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"cluster\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": ".*volume=\\\"(.*?)\\\".*", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, "sort": 0, "type": "query" @@ -2264,21 +6162,21 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"suspend\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"cp\"}[${__range}])))", "description": null, "error": null, "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "TopVolumeQOSSuspend", + "name": "TopQOSServiceCP", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"suspend\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"cp\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": ".*volume=\\\"(.*?)\\\".*", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, "sort": 0, "type": "query" @@ -2287,21 +6185,21 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"disk\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"qos_min\"}[${__range}])))", "description": null, "error": null, "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "TopVolumeQOSDisk", + "name": "TopQOSServiceQosMin", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"disk\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"qos_min\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": ".*volume=\\\"(.*?)\\\".*", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, "sort": 0, "type": "query" @@ -2310,21 +6208,21 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"cloud\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"suspend\"}[${__range}])))", "description": null, "error": null, "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "TopVolumeQOSCloud", + "name": "TopQOSServiceSuspend", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"cloud\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"suspend\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": ".*volume=\\\"(.*?)\\\".*", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, "sort": 0, "type": "query" @@ -2333,21 +6231,21 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"nvlog\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"disk\"}[${__range}])))", "description": null, "error": null, "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "TopVolumeQOSNVLog", + "name": "TopQOSServiceDisk", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"nvlog\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"disk\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": ".*volume=\\\"(.*?)\\\".*", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, "sort": 0, "type": "query" @@ -2356,21 +6254,21 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"flexcache_ral\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"cloud\"}[${__range}])))", "description": null, "error": null, "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "TopVolumeQOSFlexcacheRAL", + "name": "TopQOSServiceCloud", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"flexcache_ral\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"cloud\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": ".*volume=\\\"(.*?)\\\".*", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, "sort": 0, "type": "query" @@ -2379,21 +6277,21 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"flexcache_spinhi\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"nvlog\"}[${__range}])))", "description": null, "error": null, "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "TopVolumeQOSFlexcacheSpinhi", + "name": "TopQOSServiceNVLog", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"flexcache_spinhi\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"nvlog\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": ".*volume=\\\"(.*?)\\\".*", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, "sort": 0, "type": "query" @@ -2402,21 +6300,21 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"sync_repl\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"flexcache_ral\"}[${__range}])))", "description": null, "error": null, "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "TopVolumeQOSSyncRepl", + "name": "TopQOSServiceFlexcacheRal", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"sync_repl\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"flexcache_ral\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": ".*volume=\\\"(.*?)\\\".*", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, "sort": 0, "type": "query" @@ -2425,21 +6323,21 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"cop\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"flexcache_spinhi\"}[${__range}])))", "description": null, "error": null, "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "TopVolumeQOSCOP", + "name": "TopQOSServiceFlexcacheSpinhi", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"cop\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"flexcache_spinhi\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": ".*volume=\\\"(.*?)\\\".*", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, "sort": 0, "type": "query" @@ -2448,21 +6346,21 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"admission\"}[${__range}])))", + "definition": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"sync_repl\"}[${__range}])))", "description": null, "error": null, "hide": 2, "includeAll": true, "label": null, "multi": true, - "name": "TopVolumeQOSWaflAdmission", + "name": "TopQOSServiceSyncRepl", "options": [], "query": { - "query": "query_result(topk($TopResources, avg_over_time(qos_detail_volume_resource_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",volume=~\"$Volume\",resource=\"admission\"}[${__range}])))", + "query": "query_result(topk($TopResources, avg_over_time(qos_detail_service_time_latency{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\",workload=~\"$Workload\",resource=\"sync_repl\"}[${__range}])))", "refId": "StandardVariableQuery" }, "refresh": 2, - "regex": ".*volume=\\\"(.*?)\\\".*", + "regex": ".*workload=\\\"(.*?)\\\".*", "skipUrlSync": false, "sort": 0, "type": "query" @@ -2489,5 +6387,5 @@ "timezone": "", "title": "ONTAP: Workload", "uid": "", - "version": 1 + "version": 2 } diff --git a/integration/test/dashboard_json_test.go b/integration/test/dashboard_json_test.go index 68acf4bde..b23d7b488 100644 --- a/integration/test/dashboard_json_test.go +++ b/integration/test/dashboard_json_test.go @@ -76,7 +76,6 @@ var excludeCounters = []string{ "path_", "poller", "qos_detail_resource_latency", - "qos_detail_volume_resource_latency", "quota_disk_used_pct_disk_limit", "quota_files_used_pct_file_limit", "security_login", @@ -130,7 +129,7 @@ func TestJsonExpression(t *testing.T) { now := time.Now() // QoS counters have the longest schedule so check for them before checking for any of the other counters - precheckCounters := []string{"qos_read_data", "qos_volume_read_data"} + precheckCounters := []string{"qos_read_data"} for _, counter := range precheckCounters { if counterIsMissing(rest, counter, 7*time.Minute) { t.Fatalf("rest qos counters not found dur=%s", time.Since(now).Round(time.Millisecond).String())