Skip to content

Commit

Permalink
fix: db errors should be reported as internal error
Browse files Browse the repository at this point in the history
* feat: handle db errors during check runs

* feat: add a new check error metric
  • Loading branch information
adityathebe authored Nov 6, 2024
1 parent 394c1d7 commit e918219
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 3 deletions.
4 changes: 4 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/flanksource/duty"
"github.com/flanksource/duty/connection"
"github.com/flanksource/duty/context"
"github.com/flanksource/duty/db"
"github.com/flanksource/duty/query"
"github.com/flanksource/duty/shutdown"
"github.com/spf13/cobra"
Expand All @@ -38,6 +39,9 @@ func InitContext() (context.Context, error) {
}

ctx.WithTracer(otel.GetTracerProvider().Tracer(app))
if err := ctx.DB().Use(db.NewOopsPlugin()); err != nil {
return ctx, fmt.Errorf("failed to use oops gorm plugin: %w", err)
}

return ctx, nil
}
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ require (
github.com/robertkrimen/otto v0.3.0
github.com/robfig/cron/v3 v3.0.1
github.com/samber/lo v1.47.0
github.com/samber/oops v1.13.1
github.com/sethvargo/go-retry v0.3.0
github.com/sevennt/echo-pprof v0.1.1-0.20220616082843-66a461746b5f
github.com/spf13/cobra v1.8.0
Expand Down Expand Up @@ -253,7 +254,6 @@ require (
github.com/prometheus/procfs v0.15.1 // indirect
github.com/rodaine/table v1.3.0 // indirect
github.com/rogpeppe/go-internal v1.12.0 // indirect
github.com/samber/oops v1.13.1 // indirect
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect
github.com/shirou/gopsutil/v3 v3.24.5 // indirect
github.com/shoenig/go-m1cpu v0.1.6 // indirect
Expand Down
2 changes: 2 additions & 0 deletions pkg/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,8 @@ type CheckResult struct {
// ParentCheck is the parent check of a transformed check
ParentCheck external.Check `json:"-"`
ErrorObject error `json:"-"`

InternalError bool `json:"-"`
}

func (result CheckResult) LoggerName() string {
Expand Down
19 changes: 17 additions & 2 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,15 @@ func setupMetrics() {
checkLabels,
)

prometheus.MustRegister(Gauge, CanaryCheckInfo, OpsCount, OpsSuccessCount, OpsInvalidCount, OpsFailedCount, RequestLatency)
OpsErrorCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "canary_check_error_count",
Help: "The total number of checks that resulted in error",
},
checkLabels,
)

prometheus.MustRegister(Gauge, CanaryCheckInfo, OpsCount, OpsSuccessCount, OpsInvalidCount, OpsErrorCount, OpsFailedCount, RequestLatency)
}

var (
Expand All @@ -115,6 +123,7 @@ var (
OpsCount *prometheus.CounterVec
OpsFailedCount *prometheus.CounterVec
OpsSuccessCount *prometheus.CounterVec
OpsErrorCount *prometheus.CounterVec
RequestLatency *prometheus.HistogramVec
)

Expand Down Expand Up @@ -278,7 +287,13 @@ func Record(
Gauge.WithLabelValues(gaugeLabels...).Set(1)

CanaryCheckInfo.WithLabelValues(checkMetricLabels...).Set(1)
OpsFailedCount.WithLabelValues(checkMetricLabels...).Inc()

if result.InternalError {
OpsErrorCount.WithLabelValues(checkMetricLabels...).Inc()
} else {
fail.Append(1)
OpsFailedCount.WithLabelValues(checkMetricLabels...).Inc()
}
}

_uptime = types.Uptime{Passed: int(pass.Reduce(rolling.Sum)), Failed: int(fail.Reduce(rolling.Sum))}
Expand Down
4 changes: 4 additions & 0 deletions pkg/results.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

"github.com/flanksource/canary-checker/api/external"
v1 "github.com/flanksource/canary-checker/api/v1"
"github.com/flanksource/duty/db"
)

type Results []*CheckResult
Expand Down Expand Up @@ -91,6 +92,9 @@ func (result *CheckResult) Failf(message string, args ...interface{}) *CheckResu
if result.Error != "" {
result.Error += ", "
}

result.InternalError = db.IsDBError(fmt.Errorf(message, args...))

result.Pass = false
result.Error += fmt.Sprintf(message, args...)
return result
Expand Down

0 comments on commit e918219

Please sign in to comment.