Skip to content

Commit

Permalink
Merge pull request #849 from abestel/fix/generic_rules_bool_gauge
Browse files Browse the repository at this point in the history
Generic Rules for Bool Gauge SLOs
  • Loading branch information
metalmatze authored Aug 16, 2023
2 parents 35a7cd3 + 627aa2f commit e5897fe
Show file tree
Hide file tree
Showing 6 changed files with 171 additions and 31 deletions.
1 change: 0 additions & 1 deletion config/crd/bases/pyrra.dev_servicelevelobjectives.json
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@
}
},
"required": [
"grouping",
"metric"
],
"type": "object"
Expand Down
5 changes: 2 additions & 3 deletions config/crd/bases/pyrra.dev_servicelevelobjectives.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ spec:
with specific selectors for your service.
properties:
bool_gauge:
description: BoolGauge is the indicator that measures whether a
boolean gauge is successful.
description: BoolGauge is the indicator that measures whether
a boolean gauge is successful.
properties:
grouping:
description: Total is the metric that returns how many requests
Expand All @@ -86,7 +86,6 @@ spec:
metric:
type: string
required:
- grouping
- metric
type: object
latency:
Expand Down
2 changes: 2 additions & 0 deletions kubernetes/api/v1alpha1/servicelevelobjective_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ type NativeLatencyIndicator struct {

type BoolGaugeIndicator struct {
Query `json:",inline"`

// +optional
// Total is the metric that returns how many requests there are in total.
Grouping []string `json:"grouping"`
}
Expand Down
28 changes: 27 additions & 1 deletion kubernetes/api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

132 changes: 107 additions & 25 deletions slo/rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -1151,25 +1151,25 @@ func (o Objective) GenericRules() (monitoringv1.RuleGroup, error) {
sloName := o.Labels.Get(labels.MetricName)
var rules []monitoringv1.Rule

ruleLabels := o.commonRuleLabels(sloName)

rules = append(rules, monitoringv1.Rule{
Record: "pyrra_objective",
Expr: intstr.FromString(strconv.FormatFloat(o.Target, 'f', -1, 64)),
Labels: ruleLabels,
})
rules = append(rules, monitoringv1.Rule{
Record: "pyrra_window",
Expr: intstr.FromInt(int(time.Duration(o.Window).Seconds())),
Labels: ruleLabels,
})

switch o.IndicatorType() {
case Ratio:
if len(o.Indicator.Ratio.Grouping) > 0 {
return monitoringv1.RuleGroup{}, ErrGroupingUnsupported
}

ruleLabels := o.commonRuleLabels(sloName)

rules = append(rules, monitoringv1.Rule{
Record: "pyrra_objective",
Expr: intstr.FromString(strconv.FormatFloat(o.Target, 'f', -1, 64)),
Labels: ruleLabels,
})
rules = append(rules, monitoringv1.Rule{
Record: "pyrra_window",
Expr: intstr.FromInt(int(time.Duration(o.Window).Seconds())),
Labels: ruleLabels,
})

availability, err := parser.ParseExpr(`1 - sum(errorMetric{matchers="errors"} or vector(0)) / sum(metric{matchers="total"})`)
if err != nil {
return monitoringv1.RuleGroup{}, err
Expand Down Expand Up @@ -1270,18 +1270,6 @@ func (o Objective) GenericRules() (monitoringv1.RuleGroup, error) {
return monitoringv1.RuleGroup{}, ErrGroupingUnsupported
}

ruleLabels := o.commonRuleLabels(sloName)

rules = append(rules, monitoringv1.Rule{
Record: "pyrra_objective",
Expr: intstr.FromString(strconv.FormatFloat(o.Target, 'f', -1, 64)),
Labels: ruleLabels,
})
rules = append(rules, monitoringv1.Rule{
Record: "pyrra_window",
Expr: intstr.FromInt(int(time.Duration(o.Window).Seconds())),
Labels: ruleLabels,
})
// availability
{
expr, err := parser.ParseExpr(`sum(errorMetric{matchers="errors"} or vector(0)) / sum(metric{matchers="total"})`)
Expand Down Expand Up @@ -1396,6 +1384,100 @@ func (o Objective) GenericRules() (monitoringv1.RuleGroup, error) {
Labels: ruleLabels,
})
}

case BoolGauge:
if len(o.Indicator.BoolGauge.Grouping) > 0 {
return monitoringv1.RuleGroup{}, ErrGroupingUnsupported
}

totalMetric := countName(o.Indicator.BoolGauge.Metric.Name, o.Window)
totalMatchers := cloneMatchers(o.Indicator.BoolGauge.Metric.LabelMatchers)
for _, m := range totalMatchers {
if m.Name == labels.MetricName {
m.Value = totalMetric
break
}
}
totalMatchers = append(totalMatchers, &labels.Matcher{
Type: labels.MatchEqual,
Name: "slo",
Value: o.Name(),
})

successMetric := sumName(o.Indicator.BoolGauge.Metric.Name, o.Window)
successMatchers := cloneMatchers(o.Indicator.BoolGauge.Metric.LabelMatchers)
for _, m := range successMatchers {
if m.Name == labels.MetricName {
m.Value = successMetric
break
}
}
successMatchers = append(successMatchers, &labels.Matcher{
Type: labels.MatchEqual,
Name: "slo",
Value: o.Name(),
})

// availability
{
expr, err := parser.ParseExpr(`sum(errorMetric{matchers="errors"}) / sum(metric{matchers="total"})`)
if err != nil {
return monitoringv1.RuleGroup{}, err
}

objectiveReplacer{
metric: totalMetric,
matchers: totalMatchers,
errorMetric: successMetric,
errorMatchers: successMatchers,
}.replace(expr)

rules = append(rules, monitoringv1.Rule{
Record: "pyrra_availability",
Expr: intstr.FromString(expr.String()),
Labels: ruleLabels,
})
}

// rate
{
rate, err := parser.ParseExpr(`sum(metric{matchers="total"})`)
if err != nil {
return monitoringv1.RuleGroup{}, err
}

objectiveReplacer{
metric: totalMetric,
matchers: totalMatchers,
}.replace(rate)

rules = append(rules, monitoringv1.Rule{
Record: "pyrra_requests_total",
Expr: intstr.FromString(rate.String()),
Labels: ruleLabels,
})
}

// errors
{
rate, err := parser.ParseExpr(`sum(metric{matchers="total"}) - sum(errorMetric{matchers="errors"})`)
if err != nil {
return monitoringv1.RuleGroup{}, err
}

objectiveReplacer{
metric: totalMetric,
matchers: totalMatchers,
errorMetric: successMetric,
errorMatchers: successMatchers,
}.replace(rate)

rules = append(rules, monitoringv1.Rule{
Record: "pyrra_errors_total",
Expr: intstr.FromString(rate.String()),
Labels: ruleLabels,
})
}
}

return monitoringv1.RuleGroup{
Expand Down
34 changes: 33 additions & 1 deletion slo/rules_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1821,9 +1821,41 @@ func TestObjective_GrafanaRules(t *testing.T) {
name: "apiserver-read-resource-latency",
slo: objectiveAPIServerLatency(),
err: ErrGroupingUnsupported,
}, {
name: "up-targets",
slo: objectiveUpTargets(),
rules: monitoringv1.RuleGroup{
Name: "up-targets-generic",
Interval: monitoringDuration("30s"),
Rules: []monitoringv1.Rule{{
Record: "pyrra_objective",
Expr: intstr.FromString(`0.99`),
Labels: map[string]string{"slo": "up-targets"},
}, {
Record: "pyrra_window",
Expr: intstr.FromInt(int((28 * 24 * time.Hour).Seconds())),
Labels: map[string]string{"slo": "up-targets"},
}, {
Record: "pyrra_availability",
Expr: intstr.FromString(`sum(up:sum4w{slo="up-targets"}) / sum(up:count4w{slo="up-targets"})`),
Labels: map[string]string{"slo": "up-targets"},
}, {
Record: "pyrra_requests_total",
Expr: intstr.FromString(`sum(up:count4w{slo="up-targets"})`),
Labels: map[string]string{"slo": "up-targets"},
}, {
Record: "pyrra_errors_total",
Expr: intstr.FromString(`sum(up:count4w{slo="up-targets"}) - sum(up:sum4w{slo="up-targets"})`),
Labels: map[string]string{"slo": "up-targets"},
}},
},
}, {
name: "up-targets-grouping-regex",
slo: objectiveUpTargetsGroupingRegex(),
err: ErrGroupingUnsupported,
}}

require.Len(t, testcases, 14)
require.Len(t, testcases, 16)

for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
Expand Down

0 comments on commit e5897fe

Please sign in to comment.