Skip to content

Commit

Permalink
feat: add workload panels in workload dashboard (#2100)
Browse files Browse the repository at this point in the history
* feat: add workload panels in workload dashboard

* feat: add workload panels in workload dashboard

* feat: workload dashboard

* feat: workload dashboard

* feat: workload dashboard

* feat: workload dashboard

* feat: workload dashboard changes

* feat: workload dashboard changes

* feat: make workload_class configurable

* feat: add workload panels

* feat: add workload panels

* feat: add workload panels

* feat: add workload panels

* feat: add workload panels

* feat: add workload panels

* feat: add workload panels

* feat: add service center view

* feat: add service center view

* feat: add service center view

* feat: add service center view

* feat: add comments for detail workload

* feat: add comments for detail workload

* feat: address review comments

* feat: address review comments

* feat: address review comments

* feat: address review comments

* feat: remove workload class from templates

---------

Co-authored-by: Chris Grindstaff <[email protected]>
  • Loading branch information
rahulguptajss and cgrinds authored Aug 11, 2023
1 parent b81c82d commit 9ff6868
Show file tree
Hide file tree
Showing 21 changed files with 5,369 additions and 1,286 deletions.
190 changes: 131 additions & 59 deletions cmd/collectors/restperf/restperf.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/netapp/harvest/v2/pkg/errs"
"github.com/netapp/harvest/v2/pkg/matrix"
"github.com/netapp/harvest/v2/pkg/set"
"github.com/netapp/harvest/v2/pkg/tree/node"
"github.com/netapp/harvest/v2/pkg/util"
"github.com/tidwall/gjson"
"path"
Expand All @@ -28,9 +29,11 @@ import (
)

const (
latencyIoReqd = 10
BILLION = 1_000_000_000
arrayKeyToken = "#"
latencyIoReqd = 10
BILLION = 1_000_000_000
arrayKeyToken = "#"
objWorkloadClass = "user_defined|system_defined"
objWorkloadVolumeClass = "autovolume"
)

var qosQuery = "api/cluster/counter/tables/qos"
Expand All @@ -39,6 +42,8 @@ var qosDetailQuery = "api/cluster/counter/tables/qos_detail"
var qosDetailVolumeQuery = "api/cluster/counter/tables/qos_detail_volume"
var qosWorkloadQuery = "api/storage/qos/workloads"

var workloadDetailMetrics = []string{"resource_latency", "service_time_latency"}

var qosQueries = map[string]string{
qosQuery: qosQuery,
qosVolumeQuery: qosVolumeQuery,
Expand Down Expand Up @@ -171,6 +176,36 @@ func (r *RestPerf) InitMatrix() error {
return nil
}

// load workload_class or use defaultValue
func (r *RestPerf) loadWorkloadClassQuery(defaultValue string) string {

var x *node.Node

name := "workload_class"

if x = r.Params.GetChildS(name); x != nil {
v := x.GetAllChildContentS()
if len(v) == 0 {
r.Logger.Debug().
Str("name", name).
Str("defaultValue", defaultValue).
Send()
return defaultValue
}
s := strings.Join(v, "|")
r.Logger.Debug().
Str("name", name).
Str("value", s).
Send()
return s
}
r.Logger.Debug().
Str("name", name).
Str("defaultValue", defaultValue).
Send()
return defaultValue
}

// load an int parameter or use defaultValue
func (r *RestPerf) loadParamInt(name string, defaultValue int) int {

Expand Down Expand Up @@ -549,25 +584,27 @@ func (r *RestPerf) processWorkLoadCounter() (map[string]*matrix.Matrix, error) {
return nil, errs.New(errs.ErrMissingParam, "resource_map")
} else {
for _, x := range resourceMap.GetChildren() {
name := x.GetNameS()
resource := x.GetContentS()
for _, wm := range workloadDetailMetrics {
name := x.GetNameS() + wm
resource := x.GetContentS()

if m := mat.GetMetric(name); m != nil {
continue
}
if m, err := mat.NewMetricFloat64(name, "resource_latency"); err != nil {
return nil, err
} else {
r.perfProp.counterInfo[name] = &counter{
name: "resource_latency",
description: "",
counterType: r.perfProp.counterInfo[service.GetName()].counterType,
unit: r.perfProp.counterInfo[service.GetName()].unit,
denominator: "ops",
if m := mat.GetMetric(name); m != nil {
continue
}
m.SetLabel("resource", resource)
if m, err := mat.NewMetricFloat64(name, wm); err != nil {
return nil, err
} else {
r.perfProp.counterInfo[name] = &counter{
name: wm,
description: "",
counterType: r.perfProp.counterInfo[service.GetName()].counterType,
unit: r.perfProp.counterInfo[service.GetName()].unit,
denominator: "ops",
}
m.SetLabel("resource", resource)

r.Logger.Debug().Str("name", name).Str("resource", resource).Msg("added workload latency metric")
r.Logger.Debug().Str("name", name).Str("resource", resource).Msg("added workload latency metric")
}
}
}
}
Expand Down Expand Up @@ -611,16 +648,15 @@ func (r *RestPerf) PollData() (map[string]*matrix.Matrix, error) {

func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord) (map[string]*matrix.Matrix, error) {
var (
count, numRecords uint64
apiD, parseD time.Duration
err error
instanceKeys []string
resourceLatency *matrix.Metric // for workload* objects
skips int
instIndex int
ts float64
prevMat *matrix.Matrix
curMat *matrix.Matrix
count uint64
apiD, parseD time.Duration
err error
instanceKeys []string
skips int
instIndex int
ts float64
prevMat *matrix.Matrix
curMat *matrix.Matrix
)

prevMat = r.Matrix[r.Object]
Expand Down Expand Up @@ -683,12 +719,12 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord)
}
}

var layer = "" // latency layer (resource) for workloads

// special case for these two objects
// we need to process each latency layer for each instance/counter
if isWorkloadDetailObject(r.Prop.Query) {

layer := "" // latency layer (resource) for workloads

// example instanceKey : umeng-aff300-02:test-wid12022.CPU_dblade
i := strings.Index(instanceKey, ":")
instanceKey = instanceKey[i+1:]
Expand All @@ -703,11 +739,14 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord)
return true
}

if resourceLatency = curMat.GetMetric(layer); resourceLatency == nil {
r.Logger.Trace().
Str("layer", layer).
Msg("Resource-latency metric missing in cache")
return true
for _, wm := range workloadDetailMetrics {
mLayer := layer + wm
if l := curMat.GetMetric(mLayer); l == nil {
r.Logger.Trace().
Str("layer", layer).
Msg("Resource-latency metric missing in cache")
return true
}
}
}

Expand Down Expand Up @@ -770,27 +809,61 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord)
if ok {
// special case for workload_detail
if isWorkloadDetailObject(r.Prop.Query) {
if name == "wait_time" || name == "service_time" {
if err := resourceLatency.AddValueString(instance, f.value); err != nil {
r.Logger.Error().
Stack().
Err(err).
Str("name", name).
Str("value", f.value).
Msg("Add resource-latency failed")
} else {
r.Logger.Trace().
Str("name", name).
Str("value", f.value).
Msg("Add resource-latency")
count++
for _, wm := range workloadDetailMetrics {
// "visits" are ignored. This counter is only used to set properties of ops counter
if name == "visits" {
continue
}
wMetric := curMat.GetMetric(layer + wm)
if wm == "resource_latency" && (name == "wait_time" || name == "service_time") {
if err := wMetric.AddValueString(instance, f.value); err != nil {
r.Logger.Error().
Stack().
Err(err).
Str("name", name).
Str("value", f.value).
Msg("Add resource_latency failed")
} else {
r.Logger.Trace().
Str("name", name).
Str("value", f.value).
Msg("Add resource_latency")
count++
}
continue
} else if wm == "service_time_latency" && name == "service_time" {
if err = wMetric.SetValueString(instance, f.value); err != nil {
r.Logger.Error().
Stack().
Err(err).
Str("name", name).
Str("value", f.value).
Msg("Add service_time_latency failed")
} else {
r.Logger.Trace().
Str("name", name).
Str("value", f.value).
Msg("Add service_time_latency")
count++
}
} else if wm == "wait_time_latency" && name == "wait_time" {
if err = wMetric.SetValueString(instance, f.value); err != nil {
r.Logger.Error().
Stack().
Err(err).
Str("name", name).
Str("value", f.value).
Msg("Add wait_time_latency failed")
} else {
r.Logger.Trace().
Str("name", name).
Str("value", f.value).
Msg("Add wait_time_latency")
count++
}
}
continue
}
// "visits" are ignored. This counter is only used to set properties of ops counter
if name == "visits" {
continue
}
continue
} else {
if f.isArray {
labels := strings.Split(f.label, ",")
Expand Down Expand Up @@ -917,7 +990,6 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord)
r.Logger.Error().Err(err).Msg("Failed to set timestamp")
}

numRecords += 1
return true
})
}
Expand All @@ -933,7 +1005,7 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord)
_ = r.Metadata.LazySetValueInt64("api_time", "data", apiD.Microseconds())
_ = r.Metadata.LazySetValueInt64("parse_time", "data", parseD.Microseconds())
_ = r.Metadata.LazySetValueUint64("metrics", "data", count)
_ = r.Metadata.LazySetValueUint64("instances", "data", numRecords)
_ = r.Metadata.LazySetValueUint64("instances", "data", uint64(len(curMat.GetInstances())))
r.AddCollectCount(count)

// skip calculating from delta if no data from previous poll
Expand Down Expand Up @@ -1255,9 +1327,9 @@ func (r *RestPerf) PollInstance() (map[string]*matrix.Matrix, error) {
fields = "*"
dataQuery = qosWorkloadQuery
if r.Prop.Query == qosVolumeQuery || r.Prop.Query == qosDetailVolumeQuery {
filter = append(filter, "workload-class=autovolume|user_defined|system_defined")
filter = append(filter, "workload_class="+r.loadWorkloadClassQuery(objWorkloadVolumeClass))
} else {
filter = append(filter, "workload-class=user_defined|system_defined")
filter = append(filter, "workload_class="+r.loadWorkloadClassQuery(objWorkloadClass))
}
}

Expand Down
Loading

0 comments on commit 9ff6868

Please sign in to comment.