From e5b95456df944656d9df71332e75fbc382628aa4 Mon Sep 17 00:00:00 2001 From: Tom Coupland Date: Tue, 25 Sep 2018 11:13:16 +0100 Subject: [PATCH 1/3] Span to View converter We have added open census span calls in interesting parts of the code, but without creating a jaeger or zipkin cluster they are not going anywhere. Here we add a SpanConverter that converts our Spans into OpenCensus distribution Views. It contains an in memory map of the Measures for these Views, recording values for the length of time reported to be spent in a Span. This list has a hard limit of 100, as there is a risk of a metric explosion should an element of the system include some ID value in it's Span names. Gin does this and those are discarded, but there might be others that we'll have to deal with in the future when discovered. --- api/server/server.go | 4 ++ api/server/spanconverter.go | 129 ++++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 api/server/spanconverter.go diff --git a/api/server/server.go b/api/server/server.go index e20c47faa2..eca2f4be78 100644 --- a/api/server/server.go +++ b/api/server/server.go @@ -739,6 +739,10 @@ func WithPrometheus() Option { s.promExporter = exporter view.RegisterExporter(exporter) + converter, _ := NewSpanConverter(Options{Namespace: "fn"}) + trace.RegisterExporter(converter) + trace.ApplyConfig(trace.Config{DefaultSampler: trace.AlwaysSample()}) + return nil } } diff --git a/api/server/spanconverter.go b/api/server/spanconverter.go new file mode 100644 index 0000000000..86ed92a604 --- /dev/null +++ b/api/server/spanconverter.go @@ -0,0 +1,129 @@ +package server + +import ( + "context" + "strings" + "sync" + "time" + "unicode" + + "go.opencensus.io/stats" + view "go.opencensus.io/stats/view" + "go.opencensus.io/trace" +) + +// SpanConverter registers as a opencensus Trace Exporter, +// but it converts all the Spans in Views and registers them as such +// A View exporter will then export them as normal. +type SpanConverter struct { + opts Options + measures map[string]*stats.Float64Measure + viewsMu sync.Mutex + e view.Exporter +} + +// Options contains options for configuring the exporter. +type Options struct { + Namespace string +} + +func NewSpanConverter(o Options) (*SpanConverter, error) { + c := &SpanConverter{ + opts: o, + measures: make(map[string]*stats.Float64Measure), + } + return c, nil +} + +var maxViews = 100 + +// Spans are rejected if there are already maxViews (100) or they are +// prefixed with '/', gin as been observed creating Span id specific +// named Spans. +func (c *SpanConverter) rejectSpan(sd *trace.SpanData) bool { + return len(c.measures) > maxViews || urlName(sd) +} + +// ExportSpan creates a Measure and View once per Span.Name, registering +// the View with the opencensus register. The length of time reported +// by the span is then recorded using the measure. +func (c *SpanConverter) ExportSpan(sd *trace.SpanData) { + if c.rejectSpan(sd) { + return + } + m := c.getMeasure(sd) + + spanTimeNanos := sd.EndTime.Sub(sd.StartTime) + spanTimeMillis := float64(int64(spanTimeNanos / time.Millisecond)) + + stats.Record(context.Background(), m.M(spanTimeMillis)) +} + +func (c *SpanConverter) getMeasure(span *trace.SpanData) *stats.Float64Measure { + sig := sanitize(span.Name) + c.viewsMu.Lock() + m, ok := c.measures[sig] + c.viewsMu.Unlock() + + if !ok { + m = stats.Float64(sig+"_span_time", "The span length in milliseconds", "ms") + v := &view.View{ + Name: sanitize(span.Name), + Description: sanitize(span.Name), + Measure: m, + Aggregation: view.Distribution(1, + 10, + 50, + 100, + 250, + 500, + 1000, + 10000, + 60000, + 120000), + } + + c.viewsMu.Lock() + c.measures[sig] = m + view.Register(v) + c.viewsMu.Unlock() + } + + return m +} + +const labelKeySizeLimit = 100 + +// sanitize returns a string that is trunacated to 100 characters if it's too +// long, and replaces non-alphanumeric characters to underscores. +func sanitize(s string) string { + if len(s) == 0 { + return s + } + if len(s) > labelKeySizeLimit { + s = s[:labelKeySizeLimit] + } + s = strings.Map(sanitizeRune, s) + if unicode.IsDigit(rune(s[0])) { + s = "key_" + s + } + if s[0] == '_' { + s = "key" + s + } + return s +} + +// converts anything that is not a letter or digit to an underscore +func sanitizeRune(r rune) rune { + if unicode.IsLetter(r) || unicode.IsDigit(r) { + return r + } + // Everything else turns into an underscore + return '_' +} + +//Gin creates spans for all paths, containing ID values. +//We can safely discard these, as other histograms are being created for them. +func urlName(sd *trace.SpanData) bool { + return strings.HasPrefix(sd.Name, "/") +} From 8950ffdc8cc0c5de901aa98d46d1b21917d4af41 Mon Sep 17 00:00:00 2001 From: Tom Coupland Date: Thu, 27 Sep 2018 11:49:21 +0100 Subject: [PATCH 2/3] Tidy up comment and dist creation --- api/server/spanconverter.go | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/api/server/spanconverter.go b/api/server/spanconverter.go index 86ed92a604..6dca6766c8 100644 --- a/api/server/spanconverter.go +++ b/api/server/spanconverter.go @@ -59,6 +59,8 @@ func (c *SpanConverter) ExportSpan(sd *trace.SpanData) { stats.Record(context.Background(), m.M(spanTimeMillis)) } +var latencyDist = []float64{1, 10, 50, 100, 250, 500, 1000, 10000, 60000, 120000} + func (c *SpanConverter) getMeasure(span *trace.SpanData) *stats.Float64Measure { sig := sanitize(span.Name) c.viewsMu.Lock() @@ -71,16 +73,7 @@ func (c *SpanConverter) getMeasure(span *trace.SpanData) *stats.Float64Measure { Name: sanitize(span.Name), Description: sanitize(span.Name), Measure: m, - Aggregation: view.Distribution(1, - 10, - 50, - 100, - 250, - 500, - 1000, - 10000, - 60000, - 120000), + Aggregation: view.Distribution(latencyDist...), } c.viewsMu.Lock() @@ -122,8 +115,8 @@ func sanitizeRune(r rune) rune { return '_' } -//Gin creates spans for all paths, containing ID values. -//We can safely discard these, as other histograms are being created for them. +// Gin creates spans for all paths, containing ID values. +// We can safely discard these, as other histograms are being created for them. func urlName(sd *trace.SpanData) bool { return strings.HasPrefix(sd.Name, "/") } From e21000e334680c09f85bf9253c75d40201ec1abc Mon Sep 17 00:00:00 2001 From: Tom Coupland Date: Thu, 27 Sep 2018 11:57:47 +0100 Subject: [PATCH 3/3] Seperate Converter injection from Prometheus exporter --- api/server/server.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/api/server/server.go b/api/server/server.go index eca2f4be78..f2c498e8f0 100644 --- a/api/server/server.go +++ b/api/server/server.go @@ -250,7 +250,8 @@ func NewFromEnv(ctx context.Context, opts ...Option) *Server { opts = append(opts, WithLogDest(getEnv(EnvLogDest, DefaultLogDest), getEnv(EnvLogPrefix, ""))) opts = append(opts, WithZipkin(getEnv(EnvZipkinURL, ""))) opts = append(opts, WithJaeger(getEnv(EnvJaegerURL, ""))) - opts = append(opts, WithPrometheus()) // TODO option to turn this off? + opts = append(opts, WithPrometheus()) // TODO option to turn this off? + opts = append(opts, WithSpanConverter()) // TODO option to turn this off? opts = append(opts, WithDBURL(getEnv(EnvDBURL, defaultDB))) opts = append(opts, WithMQURL(getEnv(EnvMQURL, defaultMQ))) opts = append(opts, WithLogURL(getEnv(EnvLogDBURL, ""))) @@ -739,14 +740,18 @@ func WithPrometheus() Option { s.promExporter = exporter view.RegisterExporter(exporter) - converter, _ := NewSpanConverter(Options{Namespace: "fn"}) - trace.RegisterExporter(converter) - trace.ApplyConfig(trace.Config{DefaultSampler: trace.AlwaysSample()}) - return nil } } +func WithSpanConverter() Option { + converter, _ := NewSpanConverter(Options{Namespace: "fn"}) + trace.RegisterExporter(converter) + trace.ApplyConfig(trace.Config{DefaultSampler: trace.AlwaysSample()}) + + return nil +} + // WithoutHTTPTriggerEndpoints optionally disables the trigger and route endpoints from a LB -supporting server, allowing extensions to replace them with their own versions func WithoutHTTPTriggerEndpoints() Option { return func(ctx context.Context, s *Server) error {