Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add workload panels in workload dashboard #2100

Merged
merged 35 commits into from
Aug 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
9c12d51
feat: add workload panels in workload dashboard
rahulguptajss May 19, 2023
6443fbe
feat: add workload panels in workload dashboard
rahulguptajss May 19, 2023
b4703ff
Merge branch 'main' into rg2-workload-dash
rahulguptajss May 22, 2023
4d5b123
feat: workload dashboard
rahulguptajss May 23, 2023
a988bee
feat: workload dashboard
rahulguptajss May 23, 2023
104925d
feat: workload dashboard
rahulguptajss May 25, 2023
56cc6ca
feat: workload dashboard
rahulguptajss May 25, 2023
55503b9
Merge branch 'main' into rg2-workload-dash
rahulguptajss May 30, 2023
73c059e
feat: workload dashboard changes
rahulguptajss May 30, 2023
7beda41
feat: workload dashboard changes
rahulguptajss May 30, 2023
ee6c1b6
Merge branch 'main' into rg2-workload-dash
rahulguptajss Jun 13, 2023
5e4c10c
feat: make workload_class configurable
rahulguptajss Jun 14, 2023
fffd388
Merge branch 'main' into rg2-workload-dash
cgrinds Jul 19, 2023
fdaeac1
Merge remote-tracking branch 'origin/main' into rg2-workload-dash
rahulguptajss Jul 26, 2023
fa508c0
feat: add workload panels
rahulguptajss Jul 26, 2023
1f3a7ec
feat: add workload panels
rahulguptajss Jul 26, 2023
61f3af0
feat: add workload panels
rahulguptajss Jul 26, 2023
8d60d3f
feat: add workload panels
rahulguptajss Jul 26, 2023
7347639
feat: add workload panels
rahulguptajss Jul 26, 2023
a4a38cc
feat: add workload panels
rahulguptajss Jul 28, 2023
38adfdb
Merge remote-tracking branch 'origin/main' into rg2-workload-dash
rahulguptajss Jul 28, 2023
9f528e9
feat: add workload panels
rahulguptajss Jul 28, 2023
c0f0c5e
Merge remote-tracking branch 'origin/main' into rg2-workload-dash
rahulguptajss Jul 31, 2023
62307f4
feat: add service center view
rahulguptajss Aug 1, 2023
7486a7c
feat: add service center view
rahulguptajss Aug 1, 2023
2513a1b
feat: add service center view
rahulguptajss Aug 1, 2023
fa095d7
feat: add service center view
rahulguptajss Aug 1, 2023
f35e155
Merge remote-tracking branch 'origin/main' into rg2-workload-dash
rahulguptajss Aug 4, 2023
8e940e9
feat: add comments for detail workload
rahulguptajss Aug 4, 2023
ec74b50
feat: add comments for detail workload
rahulguptajss Aug 8, 2023
62836d9
feat: address review comments
rahulguptajss Aug 9, 2023
9f35b44
feat: address review comments
rahulguptajss Aug 9, 2023
dda803c
feat: address review comments
rahulguptajss Aug 9, 2023
04ee524
feat: address review comments
rahulguptajss Aug 9, 2023
df4c7db
feat: remove workload class from templates
rahulguptajss Aug 10, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 131 additions & 59 deletions cmd/collectors/restperf/restperf.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/netapp/harvest/v2/pkg/errs"
"github.com/netapp/harvest/v2/pkg/matrix"
"github.com/netapp/harvest/v2/pkg/set"
"github.com/netapp/harvest/v2/pkg/tree/node"
"github.com/netapp/harvest/v2/pkg/util"
"github.com/tidwall/gjson"
"path"
Expand All @@ -28,9 +29,11 @@ import (
)

const (
latencyIoReqd = 10
BILLION = 1_000_000_000
arrayKeyToken = "#"
latencyIoReqd = 10
BILLION = 1_000_000_000
arrayKeyToken = "#"
objWorkloadClass = "user_defined|system_defined"
objWorkloadVolumeClass = "autovolume"
)

var qosQuery = "api/cluster/counter/tables/qos"
Expand All @@ -39,6 +42,8 @@ var qosDetailQuery = "api/cluster/counter/tables/qos_detail"
var qosDetailVolumeQuery = "api/cluster/counter/tables/qos_detail_volume"
var qosWorkloadQuery = "api/storage/qos/workloads"

var workloadDetailMetrics = []string{"resource_latency", "service_time_latency"}

var qosQueries = map[string]string{
qosQuery: qosQuery,
qosVolumeQuery: qosVolumeQuery,
Expand Down Expand Up @@ -171,6 +176,36 @@ func (r *RestPerf) InitMatrix() error {
return nil
}

// load workload_class or use defaultValue
func (r *RestPerf) loadWorkloadClassQuery(defaultValue string) string {

var x *node.Node

name := "workload_class"

if x = r.Params.GetChildS(name); x != nil {
v := x.GetAllChildContentS()
if len(v) == 0 {
r.Logger.Debug().
Str("name", name).
Str("defaultValue", defaultValue).
Send()
return defaultValue
}
s := strings.Join(v, "|")
r.Logger.Debug().
Str("name", name).
Str("value", s).
Send()
return s
}
r.Logger.Debug().
Str("name", name).
Str("defaultValue", defaultValue).
Send()
return defaultValue
}

// load an int parameter or use defaultValue
func (r *RestPerf) loadParamInt(name string, defaultValue int) int {

Expand Down Expand Up @@ -549,25 +584,27 @@ func (r *RestPerf) processWorkLoadCounter() (map[string]*matrix.Matrix, error) {
return nil, errs.New(errs.ErrMissingParam, "resource_map")
} else {
for _, x := range resourceMap.GetChildren() {
name := x.GetNameS()
resource := x.GetContentS()
for _, wm := range workloadDetailMetrics {
name := x.GetNameS() + wm
resource := x.GetContentS()

if m := mat.GetMetric(name); m != nil {
continue
}
if m, err := mat.NewMetricFloat64(name, "resource_latency"); err != nil {
return nil, err
} else {
r.perfProp.counterInfo[name] = &counter{
name: "resource_latency",
description: "",
counterType: r.perfProp.counterInfo[service.GetName()].counterType,
unit: r.perfProp.counterInfo[service.GetName()].unit,
denominator: "ops",
if m := mat.GetMetric(name); m != nil {
continue
}
m.SetLabel("resource", resource)
if m, err := mat.NewMetricFloat64(name, wm); err != nil {
return nil, err
} else {
r.perfProp.counterInfo[name] = &counter{
name: wm,
description: "",
counterType: r.perfProp.counterInfo[service.GetName()].counterType,
unit: r.perfProp.counterInfo[service.GetName()].unit,
denominator: "ops",
}
m.SetLabel("resource", resource)

r.Logger.Debug().Str("name", name).Str("resource", resource).Msg("added workload latency metric")
r.Logger.Debug().Str("name", name).Str("resource", resource).Msg("added workload latency metric")
}
}
}
}
Expand Down Expand Up @@ -611,16 +648,15 @@ func (r *RestPerf) PollData() (map[string]*matrix.Matrix, error) {

func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord) (map[string]*matrix.Matrix, error) {
var (
count, numRecords uint64
apiD, parseD time.Duration
err error
instanceKeys []string
resourceLatency *matrix.Metric // for workload* objects
skips int
instIndex int
ts float64
prevMat *matrix.Matrix
curMat *matrix.Matrix
count uint64
apiD, parseD time.Duration
err error
instanceKeys []string
skips int
instIndex int
ts float64
prevMat *matrix.Matrix
curMat *matrix.Matrix
)

prevMat = r.Matrix[r.Object]
Expand Down Expand Up @@ -683,12 +719,12 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord)
}
}

var layer = "" // latency layer (resource) for workloads

// special case for these two objects
// we need to process each latency layer for each instance/counter
if isWorkloadDetailObject(r.Prop.Query) {

layer := "" // latency layer (resource) for workloads

// example instanceKey : umeng-aff300-02:test-wid12022.CPU_dblade
i := strings.Index(instanceKey, ":")
instanceKey = instanceKey[i+1:]
Expand All @@ -703,11 +739,14 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord)
return true
}

if resourceLatency = curMat.GetMetric(layer); resourceLatency == nil {
r.Logger.Trace().
Str("layer", layer).
Msg("Resource-latency metric missing in cache")
return true
for _, wm := range workloadDetailMetrics {
mLayer := layer + wm
if l := curMat.GetMetric(mLayer); l == nil {
r.Logger.Trace().
Str("layer", layer).
Msg("Resource-latency metric missing in cache")
return true
}
}
}

Expand Down Expand Up @@ -770,27 +809,61 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord)
if ok {
// special case for workload_detail
if isWorkloadDetailObject(r.Prop.Query) {
if name == "wait_time" || name == "service_time" {
if err := resourceLatency.AddValueString(instance, f.value); err != nil {
r.Logger.Error().
Stack().
Err(err).
Str("name", name).
Str("value", f.value).
Msg("Add resource-latency failed")
} else {
r.Logger.Trace().
Str("name", name).
Str("value", f.value).
Msg("Add resource-latency")
count++
for _, wm := range workloadDetailMetrics {
// "visits" are ignored. This counter is only used to set properties of ops counter
if name == "visits" {
continue
}
wMetric := curMat.GetMetric(layer + wm)
if wm == "resource_latency" && (name == "wait_time" || name == "service_time") {
if err := wMetric.AddValueString(instance, f.value); err != nil {
r.Logger.Error().
Stack().
Err(err).
Str("name", name).
Str("value", f.value).
Msg("Add resource_latency failed")
} else {
r.Logger.Trace().
Str("name", name).
Str("value", f.value).
Msg("Add resource_latency")
count++
}
continue
} else if wm == "service_time_latency" && name == "service_time" {
if err = wMetric.SetValueString(instance, f.value); err != nil {
r.Logger.Error().
Stack().
Err(err).
Str("name", name).
Str("value", f.value).
Msg("Add service_time_latency failed")
} else {
r.Logger.Trace().
Str("name", name).
Str("value", f.value).
Msg("Add service_time_latency")
count++
}
} else if wm == "wait_time_latency" && name == "wait_time" {
if err = wMetric.SetValueString(instance, f.value); err != nil {
r.Logger.Error().
Stack().
Err(err).
Str("name", name).
Str("value", f.value).
Msg("Add wait_time_latency failed")
} else {
r.Logger.Trace().
Str("name", name).
Str("value", f.value).
Msg("Add wait_time_latency")
count++
}
}
continue
}
// "visits" are ignored. This counter is only used to set properties of ops counter
if name == "visits" {
continue
}
continue
} else {
if f.isArray {
labels := strings.Split(f.label, ",")
Expand Down Expand Up @@ -917,7 +990,6 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord)
r.Logger.Error().Err(err).Msg("Failed to set timestamp")
}

numRecords += 1
return true
})
}
Expand All @@ -933,7 +1005,7 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord)
_ = r.Metadata.LazySetValueInt64("api_time", "data", apiD.Microseconds())
_ = r.Metadata.LazySetValueInt64("parse_time", "data", parseD.Microseconds())
_ = r.Metadata.LazySetValueUint64("metrics", "data", count)
_ = r.Metadata.LazySetValueUint64("instances", "data", numRecords)
_ = r.Metadata.LazySetValueUint64("instances", "data", uint64(len(curMat.GetInstances())))
r.AddCollectCount(count)

// skip calculating from delta if no data from previous poll
Expand Down Expand Up @@ -1255,9 +1327,9 @@ func (r *RestPerf) PollInstance() (map[string]*matrix.Matrix, error) {
fields = "*"
dataQuery = qosWorkloadQuery
if r.Prop.Query == qosVolumeQuery || r.Prop.Query == qosDetailVolumeQuery {
filter = append(filter, "workload-class=autovolume|user_defined|system_defined")
filter = append(filter, "workload_class="+r.loadWorkloadClassQuery(objWorkloadVolumeClass))
} else {
filter = append(filter, "workload-class=user_defined|system_defined")
filter = append(filter, "workload_class="+r.loadWorkloadClassQuery(objWorkloadClass))
}
}

Expand Down
Loading
Loading