From a3256fb59414a4ea53ef2c1fb56c96d01f6fe491 Mon Sep 17 00:00:00 2001 From: chankyin Date: Fri, 11 Aug 2023 14:09:31 +0800 Subject: [PATCH 01/16] refactor!: rename zconstants.NestLevel to PseudoType --- hack/tfconfig.yaml | 13 +- pkg/aggregator/aggregator.go | 346 +++++------------- .../backend/jaeger-storage/backend.go | 4 +- .../tf/defaults/step/collapse_nesting.go | 7 +- .../tf/defaults/step/compact_duration.go | 2 +- .../tf/defaults/step/extract_nesting.go | 11 +- .../tf/defaults/step/group_by_trace_source.go | 10 +- pkg/frontend/tf/defaults/step/object_tags.go | 14 +- .../tf/defaults/step/prune_childless.go | 2 +- pkg/frontend/tf/extension.go | 2 +- pkg/frontend/tf/tree/grouping.go | 9 - pkg/util/zconstants/zconstants.go | 42 ++- 12 files changed, 166 insertions(+), 296 deletions(-) diff --git a/hack/tfconfig.yaml b/hack/tfconfig.yaml index 65f4f874..35552233 100644 --- a/hack/tfconfig.yaml +++ b/hack/tfconfig.yaml @@ -12,9 +12,8 @@ configs: - kind: Batch batchName: initial - kind: ExtractNestingVisitor - matchesNestLevel: - oneOf: [] - negate: true + matchesPseudoType: + oneOf: ["object"] - kind: Batch batchName: final - id: "20000000" @@ -22,10 +21,6 @@ configs: steps: - kind: Batch batchName: initial - - kind: ExtractNestingVisitor - matchesNestLevel: - oneOf: ["object"] - negate: true - kind: Batch batchName: collapse - kind: CompactDurationVisitor @@ -36,10 +31,6 @@ configs: steps: - kind: Batch batchName: initial - - kind: ExtractNestingVisitor - matchesNestLevel: - oneOf: ["object"] - negate: true - kind: Batch batchName: collapse - kind: GroupByTraceSourceVisitor diff --git a/pkg/aggregator/aggregator.go b/pkg/aggregator/aggregator.go index d19acbb0..57a1ab2a 100644 --- a/pkg/aggregator/aggregator.go +++ b/pkg/aggregator/aggregator.go @@ -21,7 +21,6 @@ import ( "github.com/sirupsen/logrus" "github.com/spf13/pflag" - "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/util/retry" "k8s.io/utils/clock" @@ -42,14 +41,11 @@ func init() { } type options struct { - reserveTtl time.Duration - spanTtl time.Duration - spanFollowTtl time.Duration - spanExtraTtl time.Duration - globalPseudoTags map[string]string - globalEventTags map[string]string - subObjectPrimaryPollInterval time.Duration - subObjectPrimaryPollTimeout time.Duration + reserveTtl time.Duration + spanTtl time.Duration + spanExtraTtl time.Duration + globalPseudoTags map[string]string + globalEventTags map[string]string } func (options *options) Setup(fs *pflag.FlagSet) { @@ -65,15 +61,10 @@ func (options *options) Setup(fs *pflag.FlagSet) { time.Minute*30, "duration of each span", ) - fs.DurationVar(&options.spanFollowTtl, - "aggregator-span-follow-ttl", - 0, - "duration after expiry of previous span within which new spans are considered FollowsFrom", - ) fs.DurationVar(&options.spanExtraTtl, "aggregator-span-extra-ttl", 0, - "duration for which an object span is retained after the FollowsFrom period has ended", + "duration for which an object span is retained in cache after its duration has elapsed", ) fs.StringToStringVar(&options.globalPseudoTags, "aggregator-pseudo-span-global-tags", @@ -85,17 +76,6 @@ func (options *options) Setup(fs *pflag.FlagSet) { map[string]string{}, "tags applied to all event spans", ) - fs.DurationVar(&options.subObjectPrimaryPollInterval, - "aggregator-sub-object-primary-poll-interval", - time.Second*5, - "interval to poll primary event before promoting non-primary events", - ) - fs.DurationVar(&options.subObjectPrimaryPollTimeout, - "aggregator-sub-object-primary-poll-timeout", - time.Second*5, - "timeout to wait for primary event before promoting non-primary events "+ - "(increasing this timeout may lead to indefinite consumer lag", - ) } func (options *options) EnableFlag() *bool { return nil } @@ -169,13 +149,7 @@ func (aggregator *aggregator) Options() manager.Options { return &aggregator.options } -func (aggregator *aggregator) Init() error { - if aggregator.options.spanFollowTtl > aggregator.options.spanTtl { - return fmt.Errorf("invalid option: --span-ttl must not be shorter than --span-follow-ttl") - } - - return nil -} +func (aggregator *aggregator) Init() error { return nil } func (aggregator *aggregator) Start(ctx context.Context) error { return nil } @@ -202,100 +176,6 @@ func (aggregator *aggregator) Send( } var reservedPrimary *primaryReservation - if subObjectId != nil { - sendMetric.HasSubObjectId = true - sendMetric.Primary = subObjectId.Primary - - cacheKey := aggregator.spanCacheKey(object, subObjectId.Id) - - if !subObjectId.Primary { - pollCtx, cancelFunc := context.WithTimeout(ctx, aggregator.options.subObjectPrimaryPollTimeout) - defer cancelFunc() - - if err := wait.PollUntilContextCancel( - pollCtx, - aggregator.options.subObjectPrimaryPollInterval, - true, - func(context.Context) (done bool, err error) { - entry, err := aggregator.SpanCache.Fetch(pollCtx, cacheKey) - if err != nil { - sendMetric.Error = metrics.LabelError(err, "PrimaryEventPoll") - return false, fmt.Errorf("%w during primary event poll", err) - } - - if entry != nil { - parentSpan, err = aggregator.Tracer.ExtractCarrier(entry.Value) - if err != nil { - sendMetric.Error = metrics.LabelError(err, "ExtractPrimaryCarrier") - return false, fmt.Errorf("%w during decoding primary span", err) - } - - return true, nil - } - - return false, nil - }, - ); err != nil { - if !wait.Interrupted(err) { - if sendMetric.Error == nil { - sendMetric.Error = metrics.LabelError(err, "UnknownPrimaryPoll") - aggregator.Logger. - WithFields(object.AsFields("object")). - WithField("event", event.Title). - WithError(err). - Warn("Unknown error for primary poll") - } - return err - } - - sendMetric.PrimaryChanged = parentSpan == nil - - // primary poll timeout, parentSpan == nil, so promote to primary - sendMetric.Error = nil - } - } - - if parentSpan == nil { - // either object ID is primary, or primary poll expired, in which case we should promote - if err := retry.OnError(retry.DefaultBackoff, spancache.ShouldRetry, func() error { - entry, err := aggregator.SpanCache.FetchOrReserve(ctx, cacheKey, aggregator.options.reserveTtl) - if err != nil { - sendMetric.Error = metrics.LabelError(err, "PrimaryReserve") - return fmt.Errorf("%w during primary event fetch-or-reserve", err) - } - - if entry.Value != nil { - // another primary event was sent, demote this one - sendMetric.PrimaryChanged = true - event.Log( - zconstants.LogTypeRealError, - fmt.Sprintf("Kelemetry: multiple primary events for %s sent, demoted later event", subObjectId.Id), - ) - - parentSpan, err = aggregator.Tracer.ExtractCarrier(entry.Value) - if err != nil { - sendMetric.Error = metrics.LabelError(err, "ExtractAltPrimaryCarrier") - return fmt.Errorf("%w during decoding primary span", err) - } - - return nil - } - - reservedPrimary = &primaryReservation{ - cacheKey: cacheKey, - uid: entry.LastUid, - } - - return nil - }); err != nil { - if wait.Interrupted(err) { - sendMetric.Error = metrics.LabelError(err, "PrimaryReserveTimeout") - } - return err - } - } - } - if parentSpan == nil { // there is no primary span to fallback to, so we are the primary parentSpan, err = aggregator.ensureObjectSpan(ctx, object, event.Time) @@ -368,181 +248,151 @@ func (aggregator *aggregator) Send( return nil } -func (aggregator *aggregator) ensureObjectSpan( +func (agg *aggregator) ensureObjectSpan( ctx context.Context, object util.ObjectRef, eventTime time.Time, ) (tracer.SpanContext, error) { - return aggregator.getOrCreateSpan(ctx, object, eventTime, func() (_ tracer.SpanContext, err error) { - // try to associate a parent object - var parent *util.ObjectRef - - for _, linker := range aggregator.Linkers.Impls { - parent = linker.Lookup(ctx, object) - if parent != nil { - break + span, isNew, err := agg.getOrCreateObjectSpan(ctx, object, eventTime) + if err != nil { + return nil, err + } + + if isNew { + // call linkers + for _, linker := range agg.Linkers.Impls { + _ = linker.Lookup(ctx, object) + if err != nil { + agg.Logger.WithError(err).WithField("linker", fmt.Sprintf("%T", linker)).Error("calling linker") + continue } } + } + + return span, nil +} - if parent == nil { - return nil, nil +type spanCreator struct { + cacheKey string + + retries int32 + fetchedSpan tracer.SpanContext + reserveUid spancache.Uid +} + +func (c *spanCreator) fetchOrReserve( + ctx context.Context, + agg *aggregator, +) error { + c.retries += 1 + + entry, err := agg.SpanCache.FetchOrReserve(ctx, c.cacheKey, agg.options.reserveTtl) + if err != nil { + return metrics.LabelError(fmt.Errorf("%w during fetch-or-reserve of object span", err), "FetchOrReserve") + } + + if entry.Value != nil { + // the entry already exists, no additional logic required + span, err := agg.Tracer.ExtractCarrier(entry.Value) + if err != nil { + return metrics.LabelError(fmt.Errorf("persisted span contains invalid data: %w", err), "BadCarrier") } - // ensure parent object has a span - return aggregator.ensureObjectSpan(ctx, *parent, eventTime) - }) + c.fetchedSpan = span + return nil + } + + // else, a new reservation was created + c.reserveUid = entry.LastUid + return nil } -func (aggregator *aggregator) getOrCreateSpan( +func (agg *aggregator) getOrCreateObjectSpan( ctx context.Context, object util.ObjectRef, eventTime time.Time, - parentGetter func() (tracer.SpanContext, error), -) (tracer.SpanContext, error) { +) (_span tracer.SpanContext, _isNew bool, _err error) { lazySpanMetric := &lazySpanMetric{ Cluster: object.Cluster, Result: "error", } - defer aggregator.LazySpanMetric.DeferCount(aggregator.Clock.Now(), lazySpanMetric) + defer agg.LazySpanMetric.DeferCount(agg.Clock.Now(), lazySpanMetric) - cacheKey := aggregator.expiringSpanCacheKey(object, eventTime) + cacheKey := agg.expiringSpanCacheKey(object, eventTime) - logger := aggregator.Logger. - WithField("step", "getOrCreateSpan"). + logger := agg.Logger. + WithField("step", "getOrCreateObjectSpan"). WithFields(object.AsFields("object")) - var reserveUid spancache.Uid - var returnSpan tracer.SpanContext - var followsFrom tracer.SpanContext - defer func() { - logger.WithField("cacheKey", cacheKey).WithField("result", lazySpanMetric.Result).Debug("getOrCreateSpan") + logger.WithField("cacheKey", cacheKey).WithField("result", lazySpanMetric.Result).Debug("getOrCreateObjectSpan") }() - retries := int64(0) - if err := retry.OnError(retry.DefaultBackoff, spancache.ShouldRetry, func() error { - retries += 1 - entry, err := aggregator.SpanCache.FetchOrReserve(ctx, cacheKey, aggregator.options.reserveTtl) - if err != nil { - return metrics.LabelError(fmt.Errorf("%w during initial fetch-or-reserve", err), "FetchOrReserve") - } - - if entry.Value != nil { - // the entry already exists, no additional logic required - reserveUid = []byte{} - followsFrom = nil - returnSpan, err = aggregator.Tracer.ExtractCarrier(entry.Value) - if err != nil { - return metrics.LabelError(fmt.Errorf("persisted span contains invalid data: %w", err), "BadCarrier") - } - - return nil - } - - // we created a new reservation - reserveUid = entry.LastUid - returnSpan = nil - followsFrom = nil - - // check if this new span is a follower of the previous one - followsTime := eventTime.Add(-aggregator.options.spanFollowTtl) - followsKey := aggregator.expiringSpanCacheKey(object, followsTime) - - if followsKey == cacheKey { - // previous span expired - return nil - } - - followsEntry, err := aggregator.SpanCache.Fetch(ctx, followsKey) - if err != nil { - return metrics.LabelError(fmt.Errorf("error fetching followed entry: %w", err), "FetchFollow") - } - - if followsEntry == nil { - // no following target - return nil - } + creator := &spanCreator{cacheKey: cacheKey} - if followsEntry.Value == nil { - return metrics.LabelError(spancache.ErrAlreadyReserved, "FollowPending") // trigger retry - } - - // we have a following target - followsFrom, err = aggregator.Tracer.ExtractCarrier(followsEntry.Value) - if err != nil { - return metrics.LabelError(fmt.Errorf("followed persisted span contains invalid data: %w", err), "BadFollowCarrier") - } - - return nil - }); err != nil { - return nil, metrics.LabelError(fmt.Errorf("cannot reserve or fetch span %q: %w", cacheKey, err), "ReserveRetryLoop") + if err := retry.OnError( + retry.DefaultBackoff, + spancache.ShouldRetry, + func() error { return creator.fetchOrReserve(ctx, agg) }, + ); err != nil { + return nil, false, metrics.LabelError(fmt.Errorf("cannot reserve or fetch span %q: %w", cacheKey, err), "ReserveRetryLoop") } retryCountMetric := lazySpanRetryCountMetric(*lazySpanMetric) defer func() { - aggregator.LazySpanRetryCountMetric.With(&retryCountMetric).Summary(float64(retries)) + agg.LazySpanRetryCountMetric.With(&retryCountMetric).Summary(float64(creator.retries)) }() // take the value of lazySpanMetric later logger = logger. - WithField("returnSpan", returnSpan != nil). - WithField("reserveUid", reserveUid). - WithField("followsFrom", followsFrom != nil) + WithField("returnSpan", creator.fetchedSpan != nil). + WithField("reserveUid", creator.reserveUid) - if returnSpan != nil { + if creator.fetchedSpan != nil { lazySpanMetric.Result = "fetch" - return returnSpan, nil + return creator.fetchedSpan, false, nil } // we have a new reservation, need to initialize it now - startTime := aggregator.Clock.Now() + startTime := agg.Clock.Now() - parent, err := parentGetter() + span, err := agg.createPseudoSpan(ctx, object, zconstants.TraceSourceObject, eventTime, nil, nil) if err != nil { - return nil, fmt.Errorf("cannot fetch parent object: %w", err) + return nil, false, metrics.LabelError(fmt.Errorf("cannot create span: %w", err), "CreateSpan") } - span, err := aggregator.createSpan(ctx, object, zconstants.NestLevelObject, eventTime, parent, followsFrom) + entryValue, err := agg.Tracer.InjectCarrier(span) if err != nil { - return nil, metrics.LabelError(fmt.Errorf("cannot create span: %w", err), "CreateSpan") + return nil, false, metrics.LabelError(fmt.Errorf("cannot serialize span context: %w", err), "InjectCarrier") } - entryValue, err := aggregator.Tracer.InjectCarrier(span) + totalTtl := agg.options.spanTtl + agg.options.spanExtraTtl + err = agg.SpanCache.SetReserved(ctx, cacheKey, entryValue, creator.reserveUid, totalTtl) if err != nil { - return nil, metrics.LabelError(fmt.Errorf("cannot serialize span context: %w", err), "InjectCarrier") + return nil, false, metrics.LabelError(fmt.Errorf("cannot persist reserved value: %w", err), "PersistCarrier") } - totalTtl := aggregator.options.spanTtl + aggregator.options.spanFollowTtl + aggregator.options.spanExtraTtl - err = aggregator.SpanCache.SetReserved(ctx, cacheKey, entryValue, reserveUid, totalTtl) - if err != nil { - return nil, metrics.LabelError(fmt.Errorf("cannot persist reserved value: %w", err), "PersistCarrier") - } + logger.WithField("duration", agg.Clock.Since(startTime)).Debug("Created new span") - logger.WithField("duration", aggregator.Clock.Since(startTime)).Debug("Created new span") - - if followsFrom != nil { - lazySpanMetric.Result = "renew" - } else { - lazySpanMetric.Result = "create" - } + lazySpanMetric.Result = "create" - return span, nil + return span, true, nil } -func (aggregator *aggregator) createSpan( +func (agg *aggregator) createPseudoSpan( ctx context.Context, object util.ObjectRef, - nestLevel string, + pseudoType zconstants.PseudoTypeValue, eventTime time.Time, parent tracer.SpanContext, followsFrom tracer.SpanContext, ) (tracer.SpanContext, error) { - remainderSeconds := eventTime.Unix() % int64(aggregator.options.spanTtl.Seconds()) + remainderSeconds := eventTime.Unix() % int64(agg.options.spanTtl.Seconds()) startTime := eventTime.Add(-time.Duration(remainderSeconds) * time.Second) span := tracer.Span{ - Type: nestLevel, - Name: fmt.Sprintf("%s/%s %s", object.Resource, object.Name, nestLevel), + Type: string(pseudoType), + Name: fmt.Sprintf("%s/%s", object.Resource, object.Name), StartTime: startTime, - FinishTime: startTime.Add(aggregator.options.spanTtl), + FinishTime: startTime.Add(agg.options.spanTtl), Parent: parent, Follows: followsFrom, Tags: map[string]string{ @@ -552,27 +402,27 @@ func (aggregator *aggregator) createSpan( "group": object.Group, "version": object.Version, "resource": object.Resource, - zconstants.NestLevel: nestLevel, - zconstants.TraceSource: zconstants.TraceSourceObject, "timeStamp": startTime.Format(time.RFC3339), + zconstants.TraceSource: zconstants.TraceSourceObject, + zconstants.PseudoType: string(pseudoType), }, } - for tagKey, tagValue := range aggregator.options.globalPseudoTags { + for tagKey, tagValue := range agg.options.globalPseudoTags { span.Tags[tagKey] = tagValue } - if nestLevel == zconstants.NestLevelObject { - for _, decorator := range aggregator.ObjectSpanDecorators.Impls { + if pseudoType == zconstants.PseudoTypeObject { + for _, decorator := range agg.ObjectSpanDecorators.Impls { decorator.Decorate(ctx, object, span.Type, span.Tags) } } - spanContext, err := aggregator.Tracer.CreateSpan(span) + spanContext, err := agg.Tracer.CreateSpan(span) if err != nil { return nil, metrics.LabelError(fmt.Errorf("cannot create span: %w", err), "CreateSpan") } - aggregator.Logger. + agg.Logger. WithFields(object.AsFields("object")). WithField("parent", parent). Debug("CreateSpan") diff --git a/pkg/frontend/backend/jaeger-storage/backend.go b/pkg/frontend/backend/jaeger-storage/backend.go index f454c6e2..40658ec7 100644 --- a/pkg/frontend/backend/jaeger-storage/backend.go +++ b/pkg/frontend/backend/jaeger-storage/backend.go @@ -203,8 +203,8 @@ func (backend *Backend) List( return false // not a root } - field, hasField := model.KeyValues(span.Tags).FindByKey(zconstants.NestLevel) - if !hasField || field.VStr != zconstants.NestLevelObject { + field, isPseudo := model.KeyValues(span.Tags).FindByKey(zconstants.PseudoType) + if !isPseudo || field.VStr != string(zconstants.PseudoTypeObject) { return false // not an object root } diff --git a/pkg/frontend/tf/defaults/step/collapse_nesting.go b/pkg/frontend/tf/defaults/step/collapse_nesting.go index 9d6bfb46..fac402a7 100644 --- a/pkg/frontend/tf/defaults/step/collapse_nesting.go +++ b/pkg/frontend/tf/defaults/step/collapse_nesting.go @@ -35,7 +35,7 @@ func init() { ) } -// Deletes child spans with a traceSource and injects them as logs in the nesting span. +// Deletes child spans with a non-pseudo trace source and injects them as logs in the nesting span. // // Multiple logs of the same span are aggregated into one log, flattening them into a field. // @@ -104,7 +104,7 @@ func (classes *AuditDiffClassification) Get(prefix string) *AuditDiffClass { } func (visitor CollapseNestingVisitor) Enter(tree *tftree.SpanTree, span *model.Span) tftree.TreeVisitor { - if _, hasTag := model.KeyValues(span.Tags).FindByKey(zconstants.NestLevel); !hasTag { + if _, hasTag := model.KeyValues(span.Tags).FindByKey(zconstants.PseudoType); !hasTag { return visitor } @@ -119,9 +119,10 @@ func (visitor CollapseNestingVisitor) Exit(tree *tftree.SpanTree, span *model.Sp func (visitor CollapseNestingVisitor) processChild(tree *tftree.SpanTree, span *model.Span, childId model.SpanID) { childSpan := tree.Span(childId) - if _, childHasTag := model.KeyValues(childSpan.Tags).FindByKey(zconstants.NestLevel); childHasTag { + if _, childIsPseudo := model.KeyValues(childSpan.Tags).FindByKey(zconstants.PseudoType); childIsPseudo { return } + traceSourceKv, hasTraceSource := model.KeyValues(childSpan.Tags).FindByKey(zconstants.TraceSource) if !hasTraceSource { return diff --git a/pkg/frontend/tf/defaults/step/compact_duration.go b/pkg/frontend/tf/defaults/step/compact_duration.go index 129cd7ed..4aa6b800 100644 --- a/pkg/frontend/tf/defaults/step/compact_duration.go +++ b/pkg/frontend/tf/defaults/step/compact_duration.go @@ -45,7 +45,7 @@ func (visitor CompactDurationVisitor) Enter(tree *tftree.SpanTree, span *model.S func (visitor CompactDurationVisitor) Exit(tree *tftree.SpanTree, span *model.Span) { // use exit hook to use compact results of children - if _, hasNestLevel := model.KeyValues(span.Tags).FindByKey(zconstants.NestLevel); !hasNestLevel { + if _, isPseudo := model.KeyValues(span.Tags).FindByKey(zconstants.PseudoType); !isPseudo { return } diff --git a/pkg/frontend/tf/defaults/step/extract_nesting.go b/pkg/frontend/tf/defaults/step/extract_nesting.go index 533e9297..21e92974 100644 --- a/pkg/frontend/tf/defaults/step/extract_nesting.go +++ b/pkg/frontend/tf/defaults/step/extract_nesting.go @@ -31,11 +31,10 @@ func init() { ) } -// Deletes spans matching MatchesNestLevel and brings their children one level up. +// Deletes spans matching MatchesPseudoType and brings their children one level up. type ExtractNestingVisitor struct { - // NestLevels returns true if the span should be deleted. - // It is only called on spans with the tag zconstants.Nesting - MatchesNestLevel StringFilter `json:"matchesNestLevel"` + // Filters the trace sources to delete. + MatchesPseudoType StringFilter `json:"matchesPseudoType"` } func (ExtractNestingVisitor) Kind() string { return "ExtractNestingVisitor" } @@ -46,8 +45,8 @@ func (visitor ExtractNestingVisitor) Enter(tree *tftree.SpanTree, span *model.Sp return visitor } - if nestLevel, ok := model.KeyValues(span.Tags).FindByKey(zconstants.NestLevel); ok { - if visitor.MatchesNestLevel.Test(nestLevel.AsString()) { + if pseudoType, ok := model.KeyValues(span.Tags).FindByKey(zconstants.PseudoType); ok { + if visitor.MatchesPseudoType.Test(pseudoType.AsString()) { childrenMap := tree.Children(span.SpanID) childrenCopy := make([]model.SpanID, 0, len(childrenMap)) for childId := range childrenMap { diff --git a/pkg/frontend/tf/defaults/step/group_by_trace_source.go b/pkg/frontend/tf/defaults/step/group_by_trace_source.go index b376495e..2bf84931 100644 --- a/pkg/frontend/tf/defaults/step/group_by_trace_source.go +++ b/pkg/frontend/tf/defaults/step/group_by_trace_source.go @@ -33,7 +33,7 @@ func init() { ) } -const pseudoSpanNestLevel = "groupByTraceSource" +const myPseudoType = "groupByTraceSource" // Splits span logs into pseudospans grouped by traceSource. type GroupByTraceSourceVisitor struct { @@ -43,8 +43,8 @@ type GroupByTraceSourceVisitor struct { func (GroupByTraceSourceVisitor) Kind() string { return "GroupByTraceSourceVisitor" } func (visitor GroupByTraceSourceVisitor) Enter(tree *tftree.SpanTree, span *model.Span) tftree.TreeVisitor { - nestLevel, hasNestLevel := model.KeyValues(span.Tags).FindByKey(zconstants.NestLevel) - if hasNestLevel && nestLevel.AsString() == pseudoSpanNestLevel { + pseudoType, hasPseudoType := model.KeyValues(span.Tags).FindByKey(zconstants.PseudoType) + if hasPseudoType && pseudoType.AsString() == myPseudoType { // already grouped, don't recurse return visitor } @@ -74,9 +74,9 @@ func (visitor GroupByTraceSourceVisitor) Enter(tree *tftree.SpanTree, span *mode Duration: span.Duration, Tags: []model.KeyValue{ { - Key: zconstants.NestLevel, + Key: zconstants.PseudoType, VType: model.StringType, - VStr: pseudoSpanNestLevel, + VStr: myPseudoType, }, }, Logs: logs, diff --git a/pkg/frontend/tf/defaults/step/object_tags.go b/pkg/frontend/tf/defaults/step/object_tags.go index 46694609..134a6bb1 100644 --- a/pkg/frontend/tf/defaults/step/object_tags.go +++ b/pkg/frontend/tf/defaults/step/object_tags.go @@ -31,6 +31,7 @@ func init() { ) } +// Copy tags from child spans to the object. type ObjectTagsVisitor struct { ResourceTags []string `json:"resourceTags"` } @@ -38,7 +39,8 @@ type ObjectTagsVisitor struct { func (ObjectTagsVisitor) Kind() string { return "ObjectTagsVisitor" } func (visitor ObjectTagsVisitor) Enter(tree *tftree.SpanTree, span *model.Span) tftree.TreeVisitor { - if tagKv, hasTag := model.KeyValues(span.Tags).FindByKey(zconstants.NestLevel); !hasTag || tagKv.VStr != zconstants.NestLevelObject { + if tagKv, isPseudo := model.KeyValues(span.Tags).FindByKey(zconstants.PseudoType); !isPseudo || + tagKv.VStr != string(zconstants.PseudoTypeObject) { return visitor } if _, hasTag := model.KeyValues(span.Tags).FindByKey("resource"); !hasTag { @@ -61,10 +63,14 @@ func (visitor ObjectTagsVisitor) findTagRecursively(tree *tftree.SpanTree, span for childId := range tree.Children(span.SpanID) { childSpan := tree.Span(childId) - if tagKv, hasTag := model.KeyValues(childSpan.Tags).FindByKey(zconstants.NestLevel); hasTag && - tagKv.VStr == zconstants.NestLevelObject { - continue + { + tagKv, isPseudo := model.KeyValues(childSpan.Tags).FindByKey(zconstants.PseudoType) + if isPseudo && tagKv.VStr == string(zconstants.PseudoTypeObject) { + // do not copy from another object + continue + } } + kv := visitor.findTagRecursively(tree, childSpan, tagKey) if len(kv.Key) > 0 { span.Tags = append(span.Tags, kv) diff --git a/pkg/frontend/tf/defaults/step/prune_childless.go b/pkg/frontend/tf/defaults/step/prune_childless.go index ed15d006..26742542 100644 --- a/pkg/frontend/tf/defaults/step/prune_childless.go +++ b/pkg/frontend/tf/defaults/step/prune_childless.go @@ -41,7 +41,7 @@ func (visitor PruneChildlessVisitor) Enter(tree *tftree.SpanTree, span *model.Sp // Prune in postorder traversal to recursively remove higher pseudospans without leaves. func (visitor PruneChildlessVisitor) Exit(tree *tftree.SpanTree, span *model.Span) { - if _, hasTag := model.KeyValues(span.Tags).FindByKey(zconstants.NestLevel); hasTag { + if _, isPseudo := model.KeyValues(span.Tags).FindByKey(zconstants.PseudoType); isPseudo { if len(tree.Children(span.SpanID)) == 0 && span.SpanID != tree.Root.SpanID { tree.Delete(span.SpanID) } diff --git a/pkg/frontend/tf/extension.go b/pkg/frontend/tf/extension.go index 48cffd4b..13e81d10 100644 --- a/pkg/frontend/tf/extension.go +++ b/pkg/frontend/tf/extension.go @@ -96,7 +96,7 @@ func (x *FetchExtensionsAndStoreCache) ProcessExtensions( span := span tags := model.KeyValues(span.Tags) - if tag, exists := tags.FindByKey(zconstants.NestLevel); exists && tag.VStr == zconstants.NestLevelObject { + if tag, isPseudo := tags.FindByKey(zconstants.PseudoType); isPseudo && tag.VStr == string(zconstants.PseudoTypeObject) { for extId, ext := range extensions { ext := ext diff --git a/pkg/frontend/tf/tree/grouping.go b/pkg/frontend/tf/tree/grouping.go index 8536ed1d..f7da22da 100644 --- a/pkg/frontend/tf/tree/grouping.go +++ b/pkg/frontend/tf/tree/grouping.go @@ -27,7 +27,6 @@ type GroupingKey struct { Resource string `json:"resource"` Namespace string `json:"namespace"` Name string `json:"name"` - Field string `json:"field"` } func GroupingKeyFromMap(tags map[string]string) (key GroupingKey, ok bool) { @@ -44,12 +43,6 @@ func GroupingKeyFromMap(tags map[string]string) (key GroupingKey, ok bool) { } } - if field, hasField := tags["field"]; hasField { - key.Field = field - } else { - key.Field = "object" - } - return key, true } @@ -65,14 +58,12 @@ func GroupingKeyFromSpan(span *model.Span) (GroupingKey, bool) { resource, _ := tags.FindByKey("resource") namespace, _ := tags.FindByKey("namespace") name, _ := tags.FindByKey("name") - field, _ := tags.FindByKey(zconstants.NestLevel) key := GroupingKey{ Cluster: cluster.VStr, Group: group.VStr, Resource: resource.VStr, Namespace: namespace.VStr, Name: name.VStr, - Field: field.VStr, } return key, true } diff --git a/pkg/util/zconstants/zconstants.go b/pkg/util/zconstants/zconstants.go index bfac7185..f0e8ac89 100644 --- a/pkg/util/zconstants/zconstants.go +++ b/pkg/util/zconstants/zconstants.go @@ -26,10 +26,12 @@ const SpanName = Prefix + "kelemetryName" // Indicates that the current span is a pseudospan that can be folded or flattened. // The value is the folding type. -const NestLevel = Prefix + "nestingLevel" +const PseudoType = Prefix + "pseudoType" + +type PseudoTypeValue string const ( - NestLevelObject = "object" + PseudoTypeObject PseudoTypeValue = "object" ) // Identifies that the span represents an actual event (rather than as a pseudospan). @@ -37,14 +39,19 @@ const TraceSource = Prefix + "traceSource" const ( TraceSourceObject = "object" - TraceSourceAudit = "audit" - TraceSourceEvent = "event" + TraceSourceLink = "link" + + TraceSourceAudit = "audit" + TraceSourceEvent = "event" ) func KnownTraceSources(withPseudo bool) []string { + numPseudoTraceSources := 2 + traceSources := []string{ // pseudo TraceSourceObject, + TraceSourceLink, // real TraceSourceAudit, @@ -52,12 +59,37 @@ func KnownTraceSources(withPseudo bool) []string { } if !withPseudo { - traceSources = traceSources[1:] + traceSources = traceSources[numPseudoTraceSources:] } return traceSources } +// Tags for TraceSourceLink spans that indicate the linked object. +const ( + LinkedObjectCluster = "linkedCluster" + LinkedObjectGroup = "linkedGroup" + LinkedObjectResource = "linkedResource" + LinkedObjectNamespace = "linkedNamespace" + LinkedObjectName = "linkedName" + + // Indicates how the linked trace interacts with the current trace. + LinkRole = "linkRole" + + // If this tag is nonempty, a virtual span is inserted between the linked objects with the tag value as the name. + LinkClass = "linkClass" +) + +type LinkRoleValue string + +const ( + // The current trace is a child trace under the linked trace + LinkRoleParent LinkRoleValue = "parent" + + // The linked trace is a child trace under the current trace. + LinkRoleChild LinkRoleValue = "child" +) + // Classifies the type of a log line. // Logs without this attribute will not have special treatment. const LogTypeAttr = Prefix + "logType" From 0d9aa6e9e07c80083703b6bfa183e5d50b743423 Mon Sep 17 00:00:00 2001 From: chankyin Date: Fri, 11 Aug 2023 18:25:40 +0800 Subject: [PATCH 02/16] feat(aggregator): generate link span --- Makefile | 3 + pkg/aggregator/aggregator.go | 147 +++++++++------------ pkg/aggregator/linker/job/interface.go | 63 +++++++++ pkg/aggregator/linker/job/local/local.go | 98 ++++++++++++++ pkg/aggregator/linker/job/worker/worker.go | 142 ++++++++++++++++++++ pkg/aggregator/linker/linker.go | 9 +- pkg/annotationlinker/linker.go | 22 +-- pkg/audit/consumer/consumer.go | 10 +- pkg/event/controller.go | 2 +- pkg/imports.go | 2 + pkg/ownerlinker/linker.go | 24 ++-- pkg/util/zconstants/zconstants.go | 29 +++- 12 files changed, 440 insertions(+), 111 deletions(-) create mode 100644 pkg/aggregator/linker/job/interface.go create mode 100644 pkg/aggregator/linker/job/local/local.go create mode 100644 pkg/aggregator/linker/job/worker/worker.go diff --git a/Makefile b/Makefile index 6d92e630..6e640b8b 100644 --- a/Makefile +++ b/Makefile @@ -34,6 +34,8 @@ else LOG_FILE_ARG ?= endif +LINKER_WORKER_COUNT ?= 1 + CONTROLLERS ?= audit-consumer,audit-producer,audit-webhook,event-informer,annotation-linker,owner-linker,resource-object-tag,resource-event-tag,diff-decorator,diff-controller,diff-api,pprof,jaeger-storage-plugin,jaeger-redirect-server,kelemetrix ifeq ($(CONTROLLERS),) ENABLE_ARGS ?= @@ -85,6 +87,7 @@ run: output/kelemetry $(DUMP_ROTATE_DEP) --log-file=$(LOG_FILE) \ --aggregator-pseudo-span-global-tags=runId=$(RUN_ID) \ --aggregator-event-span-global-tags=run=$(RUN_ID) \ + --linker-worker-count=$(LINKER_WORKER_COUNT) \ --pprof-addr=:6030 \ --diff-cache=$(ETCD_OR_LOCAL) \ --diff-cache-etcd-endpoints=127.0.0.1:2379 \ diff --git a/pkg/aggregator/aggregator.go b/pkg/aggregator/aggregator.go index 57a1ab2a..138cf08c 100644 --- a/pkg/aggregator/aggregator.go +++ b/pkg/aggregator/aggregator.go @@ -26,7 +26,7 @@ import ( "github.com/kubewharf/kelemetry/pkg/aggregator/aggregatorevent" "github.com/kubewharf/kelemetry/pkg/aggregator/eventdecorator" - "github.com/kubewharf/kelemetry/pkg/aggregator/linker" + linkjob "github.com/kubewharf/kelemetry/pkg/aggregator/linker/job" "github.com/kubewharf/kelemetry/pkg/aggregator/objectspandecorator" "github.com/kubewharf/kelemetry/pkg/aggregator/spancache" "github.com/kubewharf/kelemetry/pkg/aggregator/tracer" @@ -84,27 +84,35 @@ type Aggregator interface { manager.Component // Send sends an event to the tracer backend. - // The sub-object ID is an optional identifier that associates the event with an object-scoped context (e.g. resource version). - // If an event is created with the same sub-object ID with Primary=false, - // it waits for the primary event to be created and takes it as the parent. - // If the primary event does not get created after options.subObjectPrimaryBackoff, this event is promoted as primary. - // If multiple primary events are sent, the slower one (by SpanCache-authoritative timing) is demoted. - Send(ctx context.Context, object util.ObjectRef, event *aggregatorevent.Event, subObjectId *SubObjectId) error -} - -type SubObjectId struct { - Id string - Primary bool + Send(ctx context.Context, object util.ObjectRef, event *aggregatorevent.Event) error + + // EnsureObjectSpan creates a pseudospan for the object, and triggers any possible relevant linkers. + EnsureObjectSpan( + ctx context.Context, + object util.ObjectRef, + eventTime time.Time, + ) (tracer.SpanContext, error) + + // GetOrCreatePseudoSpan creates a span following the pseudospan standard with the required tags. + GetOrCreatePseudoSpan( + ctx context.Context, + object util.ObjectRef, + pseudoType zconstants.PseudoTypeValue, + eventTime time.Time, + parent tracer.SpanContext, + followsFrom tracer.SpanContext, + extraTags map[string]string, + ) (span tracer.SpanContext, isNew bool, err error) } type aggregator struct { - options options - Clock clock.Clock - Linkers *manager.List[linker.Linker] - Logger logrus.FieldLogger - SpanCache spancache.Cache - Tracer tracer.Tracer - Metrics metrics.Client + options options + Clock clock.Clock + Logger logrus.FieldLogger + SpanCache spancache.Cache + Tracer tracer.Tracer + Metrics metrics.Client + LinkJobPublisher linkjob.Publisher EventDecorators *manager.List[eventdecorator.Decorator] ObjectSpanDecorators *manager.List[objectspandecorator.Decorator] @@ -116,13 +124,10 @@ type aggregator struct { } type sendMetric struct { - Cluster string - TraceSource string - HasSubObjectId bool - Primary bool // whether the subObjectId is primary or not - PrimaryChanged bool // whether the primary got demoted or non-primary got promoted - Success bool - Error metrics.LabeledError + Cluster string + TraceSource string + Success bool + Error metrics.LabeledError } func (*sendMetric) MetricName() string { return "aggregator_send" } @@ -159,7 +164,6 @@ func (aggregator *aggregator) Send( ctx context.Context, object util.ObjectRef, event *aggregatorevent.Event, - subObjectId *SubObjectId, ) (err error) { sendMetric := &sendMetric{Cluster: object.Cluster, TraceSource: event.TraceSource} defer aggregator.SendMetric.DeferCount(aggregator.Clock.Now(), sendMetric) @@ -168,21 +172,10 @@ func (aggregator *aggregator) Send( With(&sinceEventMetric{Cluster: object.Cluster, TraceSource: event.TraceSource}). Summary(float64(aggregator.Clock.Since(event.Time).Nanoseconds())) - var parentSpan tracer.SpanContext - - type primaryReservation struct { - cacheKey string - uid spancache.Uid - } - var reservedPrimary *primaryReservation - - if parentSpan == nil { - // there is no primary span to fallback to, so we are the primary - parentSpan, err = aggregator.ensureObjectSpan(ctx, object, event.Time) - if err != nil { - sendMetric.Error = metrics.LabelError(err, "EnsureObjectSpan") - return fmt.Errorf("%w during fetching field span for primary span", err) - } + parentSpan, err := aggregator.EnsureObjectSpan(ctx, object, event.Time) + if err != nil { + sendMetric.Error = metrics.LabelError(err, "EnsureObjectSpan") + return fmt.Errorf("%w during ensuring object span", err) } for _, decorator := range aggregator.EventDecorators.Impls { @@ -213,31 +206,12 @@ func (aggregator *aggregator) Send( span.Tags[tagKey] = tagValue } - sentSpan, err := aggregator.Tracer.CreateSpan(span) + _, err = aggregator.Tracer.CreateSpan(span) if err != nil { sendMetric.Error = metrics.LabelError(err, "CreateSpan") return fmt.Errorf("cannot create span: %w", err) } - if reservedPrimary != nil { - sentSpanRaw, err := aggregator.Tracer.InjectCarrier(sentSpan) - if err != nil { - sendMetric.Error = metrics.LabelError(err, "InjectCarrier") - return fmt.Errorf("%w during serializing sent span ID", err) - } - - if err := aggregator.SpanCache.SetReserved( - ctx, - reservedPrimary.cacheKey, - sentSpanRaw, - reservedPrimary.uid, - aggregator.options.spanTtl, - ); err != nil { - sendMetric.Error = metrics.LabelError(err, "SetReserved") - return fmt.Errorf("%w during persisting primary span ID", err) - } - } - sendMetric.Success = true aggregator.Logger.WithFields(object.AsFields("object")). @@ -248,25 +222,22 @@ func (aggregator *aggregator) Send( return nil } -func (agg *aggregator) ensureObjectSpan( +func (agg *aggregator) EnsureObjectSpan( ctx context.Context, object util.ObjectRef, eventTime time.Time, ) (tracer.SpanContext, error) { - span, isNew, err := agg.getOrCreateObjectSpan(ctx, object, eventTime) + span, isNew, err := agg.GetOrCreatePseudoSpan(ctx, object, zconstants.PseudoTypeObject, eventTime, nil, nil, nil) if err != nil { return nil, err } if isNew { - // call linkers - for _, linker := range agg.Linkers.Impls { - _ = linker.Lookup(ctx, object) - if err != nil { - agg.Logger.WithError(err).WithField("linker", fmt.Sprintf("%T", linker)).Error("calling linker") - continue - } - } + agg.LinkJobPublisher.Publish(&linkjob.LinkJob{ + Object: object, + EventTime: eventTime, + Span: span, + }) } return span, nil @@ -307,10 +278,14 @@ func (c *spanCreator) fetchOrReserve( return nil } -func (agg *aggregator) getOrCreateObjectSpan( +func (agg *aggregator) GetOrCreatePseudoSpan( ctx context.Context, object util.ObjectRef, + pseudoType zconstants.PseudoTypeValue, eventTime time.Time, + parent tracer.SpanContext, + followsFrom tracer.SpanContext, + extraTags map[string]string, ) (_span tracer.SpanContext, _isNew bool, _err error) { lazySpanMetric := &lazySpanMetric{ Cluster: object.Cluster, @@ -318,14 +293,14 @@ func (agg *aggregator) getOrCreateObjectSpan( } defer agg.LazySpanMetric.DeferCount(agg.Clock.Now(), lazySpanMetric) - cacheKey := agg.expiringSpanCacheKey(object, eventTime) + cacheKey := agg.expiringSpanCacheKey(object, eventTime, pseudoType) logger := agg.Logger. - WithField("step", "getOrCreateObjectSpan"). + WithField("step", "GetOrCreatePseudoSpan"). WithFields(object.AsFields("object")) defer func() { - logger.WithField("cacheKey", cacheKey).WithField("result", lazySpanMetric.Result).Debug("getOrCreateObjectSpan") + logger.WithField("cacheKey", cacheKey).WithField("result", lazySpanMetric.Result).Debug("GetOrCreatePseudoSpan") }() creator := &spanCreator{cacheKey: cacheKey} @@ -355,7 +330,7 @@ func (agg *aggregator) getOrCreateObjectSpan( // we have a new reservation, need to initialize it now startTime := agg.Clock.Now() - span, err := agg.createPseudoSpan(ctx, object, zconstants.TraceSourceObject, eventTime, nil, nil) + span, err := agg.CreatePseudoSpan(ctx, object, pseudoType, eventTime, parent, followsFrom, extraTags) if err != nil { return nil, false, metrics.LabelError(fmt.Errorf("cannot create span: %w", err), "CreateSpan") } @@ -378,13 +353,14 @@ func (agg *aggregator) getOrCreateObjectSpan( return span, true, nil } -func (agg *aggregator) createPseudoSpan( +func (agg *aggregator) CreatePseudoSpan( ctx context.Context, object util.ObjectRef, pseudoType zconstants.PseudoTypeValue, eventTime time.Time, parent tracer.SpanContext, followsFrom tracer.SpanContext, + extraTags map[string]string, ) (tracer.SpanContext, error) { remainderSeconds := eventTime.Unix() % int64(agg.options.spanTtl.Seconds()) startTime := eventTime.Add(-time.Duration(remainderSeconds) * time.Second) @@ -410,6 +386,9 @@ func (agg *aggregator) createPseudoSpan( for tagKey, tagValue := range agg.options.globalPseudoTags { span.Tags[tagKey] = tagValue } + for tagKey, tagValue := range extraTags { + span.Tags[tagKey] = tagValue + } if pseudoType == zconstants.PseudoTypeObject { for _, decorator := range agg.ObjectSpanDecorators.Impls { @@ -430,11 +409,15 @@ func (agg *aggregator) createPseudoSpan( return spanContext, nil } -func (aggregator *aggregator) expiringSpanCacheKey(object util.ObjectRef, timestamp time.Time) string { +func (aggregator *aggregator) expiringSpanCacheKey( + object util.ObjectRef, + timestamp time.Time, + pseudoType zconstants.PseudoTypeValue, +) string { expiringWindow := timestamp.Unix() / int64(aggregator.options.spanTtl.Seconds()) - return aggregator.spanCacheKey(object, fmt.Sprintf("field=object,window=%d", expiringWindow)) + return aggregator.spanCacheKey(object, fmt.Sprintf("field=%s,window=%d", pseudoType, expiringWindow)) } -func (aggregator *aggregator) spanCacheKey(object util.ObjectRef, subObjectId string) string { - return fmt.Sprintf("%s/%s", object.String(), subObjectId) +func (aggregator *aggregator) spanCacheKey(object util.ObjectRef, window string) string { + return fmt.Sprintf("%s/%s", object.String(), window) } diff --git a/pkg/aggregator/linker/job/interface.go b/pkg/aggregator/linker/job/interface.go new file mode 100644 index 00000000..32e43d53 --- /dev/null +++ b/pkg/aggregator/linker/job/interface.go @@ -0,0 +1,63 @@ +// Copyright 2023 The Kelemetry Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linkjob + +import ( + "context" + "time" + + "github.com/kubewharf/kelemetry/pkg/aggregator/tracer" + "github.com/kubewharf/kelemetry/pkg/manager" + "github.com/kubewharf/kelemetry/pkg/util" +) + +type Publisher interface { + Publish(job *LinkJob) +} + +type Subscriber interface { + Subscribe(ctx context.Context) <-chan *LinkJob +} + +func init() { + manager.Global.Provide("linker-job-publisher", manager.Ptr[Publisher](&publisherMux{ + Mux: manager.NewMux("linker-job-publisher", false), + })) + manager.Global.Provide("linker-job-subscriber", manager.Ptr[Subscriber](&subscriberMux{ + Mux: manager.NewMux("linker-job-subscriber", false), + })) +} + +type publisherMux struct { + *manager.Mux +} + +func (mux *publisherMux) Publish(job *LinkJob) { + mux.Impl().(Publisher).Publish(job) +} + +type subscriberMux struct { + *manager.Mux +} + +func (mux *subscriberMux) Subscribe(ctx context.Context) <-chan *LinkJob { + return mux.Impl().(Subscriber).Subscribe(ctx) +} + +type LinkJob struct { + Object util.ObjectRef + EventTime time.Time + Span tracer.SpanContext +} diff --git a/pkg/aggregator/linker/job/local/local.go b/pkg/aggregator/linker/job/local/local.go new file mode 100644 index 00000000..4b5296b7 --- /dev/null +++ b/pkg/aggregator/linker/job/local/local.go @@ -0,0 +1,98 @@ +// Copyright 2023 The Kelemetry Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linkerjoblocal + +import ( + "context" + "sync" + + "github.com/sirupsen/logrus" + + linkjob "github.com/kubewharf/kelemetry/pkg/aggregator/linker/job" + "github.com/kubewharf/kelemetry/pkg/manager" + "github.com/kubewharf/kelemetry/pkg/util/channel" +) + +func init() { + manager.Global.Provide("linker-job-local-queue", manager.Ptr(&queue{})) + manager.Global.ProvideMuxImpl("linker-job-local-publisher", manager.Ptr(&publisher{}), linkjob.Publisher.Publish) + manager.Global.ProvideMuxImpl("linker-job-local-subscriber", manager.Ptr(&subscriber{}), linkjob.Subscriber.Subscribe) +} + +type queue struct { + Logger logrus.FieldLogger + + subscribersMu sync.RWMutex + subscribers map[*subscriberKey]*channel.UnboundedQueue[*linkjob.LinkJob] +} + +type subscriberKey struct{} + +func (queue *queue) Options() manager.Options { return &manager.NoOptions{} } +func (queue *queue) Init() error { + queue.subscribers = map[*subscriberKey]*channel.UnboundedQueue[*linkjob.LinkJob]{} + return nil +} +func (queue *queue) Start(ctx context.Context) error { return nil } +func (queue *queue) Close(ctx context.Context) error { return nil } + +type publisher struct { + Queue *queue + manager.MuxImplBase +} + +func (publisher *publisher) Options() manager.Options { return &manager.NoOptions{} } +func (publisher *publisher) Init() error { return nil } +func (publisher *publisher) Start(ctx context.Context) error { return nil } +func (publisher *publisher) Close(ctx context.Context) error { return nil } +func (publisher *publisher) MuxImplName() (name string, isDefault bool) { return "local", true } +func (publisher *publisher) Publish(job *linkjob.LinkJob) { + publisher.Queue.subscribersMu.RLock() + defer publisher.Queue.subscribersMu.RUnlock() + + for _, sub := range publisher.Queue.subscribers { + sub.Send(job) + } +} + +type subscriber struct { + Queue *queue + manager.MuxImplBase +} + +func (subscriber *subscriber) Options() manager.Options { return &manager.NoOptions{} } +func (subscriber *subscriber) Init() error { return nil } +func (subscriber *subscriber) Start(ctx context.Context) error { return nil } +func (subscriber *subscriber) Close(ctx context.Context) error { return nil } +func (subscriber *subscriber) MuxImplName() (name string, isDefault bool) { return "local", true } +func (subscriber *subscriber) Subscribe(ctx context.Context) <-chan *linkjob.LinkJob { + queue := channel.NewUnboundedQueue[*linkjob.LinkJob](16) + + subscriber.Queue.subscribersMu.Lock() + defer subscriber.Queue.subscribersMu.Unlock() + + key := &subscriberKey{} + subscriber.Queue.subscribers[key] = queue + + go func() { + <-ctx.Done() + + subscriber.Queue.subscribersMu.Lock() + defer subscriber.Queue.subscribersMu.Unlock() + delete(subscriber.Queue.subscribers, key) + }() + + return queue.Receiver() +} diff --git a/pkg/aggregator/linker/job/worker/worker.go b/pkg/aggregator/linker/job/worker/worker.go new file mode 100644 index 00000000..05f465ea --- /dev/null +++ b/pkg/aggregator/linker/job/worker/worker.go @@ -0,0 +1,142 @@ +// Copyright 2023 The Kelemetry Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package linkjobworker + +import ( + "context" + "fmt" + + "github.com/sirupsen/logrus" + "github.com/spf13/pflag" + "k8s.io/utils/pointer" + + "github.com/kubewharf/kelemetry/pkg/aggregator" + "github.com/kubewharf/kelemetry/pkg/aggregator/linker" + linkjob "github.com/kubewharf/kelemetry/pkg/aggregator/linker/job" + "github.com/kubewharf/kelemetry/pkg/manager" + "github.com/kubewharf/kelemetry/pkg/metrics" + "github.com/kubewharf/kelemetry/pkg/util/shutdown" + "github.com/kubewharf/kelemetry/pkg/util/zconstants" +) + +func init() { + manager.Global.Provide("linker-job-worker", manager.Ptr(&worker{})) +} + +type workerOptions struct { + WorkerCount int +} + +func (options *workerOptions) Setup(fs *pflag.FlagSet) { + fs.IntVar(&options.WorkerCount, "linker-worker-count", 0, "Number of workers to execute link jobs") +} +func (options *workerOptions) EnableFlag() *bool { return pointer.Bool(options.WorkerCount > 0) } + +type worker struct { + options workerOptions + Logger logrus.FieldLogger + Linkers *manager.List[linker.Linker] + Subscriber linkjob.Subscriber + Aggregator aggregator.Aggregator + + ch <-chan *linkjob.LinkJob +} + +func (worker *worker) Options() manager.Options { return &worker.options } +func (worker *worker) Init() error { + worker.ch = worker.Subscriber.Subscribe(context.Background()) // never unsubscribe + return nil +} + +func (worker *worker) Start(ctx context.Context) error { + for workerId := 0; workerId < worker.options.WorkerCount; workerId++ { + go func(workerId int) { + defer shutdown.RecoverPanic(worker.Logger) + + for { + select { + case <-ctx.Done(): + return + case job := <-worker.ch: + worker.executeJob(ctx, worker.Logger.WithFields(job.Object.AsFields("job")), job) + } + } + }(workerId) + } + + return nil +} +func (worker *worker) Close(ctx context.Context) error { return nil } + +func (worker *worker) executeJob(ctx context.Context, logger logrus.FieldLogger, job *linkjob.LinkJob) { + for _, linker := range worker.Linkers.Impls { + linkerLogger := logger.WithField("linker", fmt.Sprintf("%T", linker)) + if err := worker.execute(ctx, linkerLogger, linker, job); err != nil { + logger.WithError(err).Error("generating links") + } + } +} + +func (worker *worker) execute(ctx context.Context, logger logrus.FieldLogger, linker linker.Linker, job *linkjob.LinkJob) error { + links, err := linker.Lookup(ctx, job.Object) + if err != nil { + return metrics.LabelError(fmt.Errorf("calling linker: %w", err), "CallLinker") + } + + for _, link := range links { + linkedSpan, err := worker.Aggregator.EnsureObjectSpan(ctx, link.Object, job.EventTime) + if err != nil { + return metrics.LabelError(fmt.Errorf("creating object span: %w", err), "CreateLinkedObjectSpan") + } + + forwardTags := map[string]string{} + zconstants.TagLinkedObject(forwardTags, link.Object, link.Role, link.Class) + _, _, err = worker.Aggregator.GetOrCreatePseudoSpan( + ctx, + job.Object, + zconstants.PseudoTypeLink, + job.EventTime, + job.Span, + nil, + forwardTags, + ) + if err != nil { + return metrics.LabelError( + fmt.Errorf("creating link span from source object to linked object: %w", err), + "CreateForwardLinkSpan", + ) + } + + backwardTags := map[string]string{} + zconstants.TagLinkedObject(backwardTags, job.Object, zconstants.ReverseLinkRole(link.Role), link.Class) + _, _, err = worker.Aggregator.GetOrCreatePseudoSpan( + ctx, + link.Object, + zconstants.PseudoTypeLink, + job.EventTime, + linkedSpan, + nil, + backwardTags, + ) + if err != nil { + return metrics.LabelError( + fmt.Errorf("creating link span from linked object to source object: %w", err), + "CreateBackwardLinkSpan", + ) + } + } + + return nil +} diff --git a/pkg/aggregator/linker/linker.go b/pkg/aggregator/linker/linker.go index 4cf2458f..69b2d399 100644 --- a/pkg/aggregator/linker/linker.go +++ b/pkg/aggregator/linker/linker.go @@ -18,8 +18,15 @@ import ( "context" "github.com/kubewharf/kelemetry/pkg/util" + "github.com/kubewharf/kelemetry/pkg/util/zconstants" ) type Linker interface { - Lookup(ctx context.Context, object util.ObjectRef) *util.ObjectRef + Lookup(ctx context.Context, object util.ObjectRef) ([]LinkerResult, error) +} + +type LinkerResult struct { + Object util.ObjectRef + Role zconstants.LinkRoleValue + Class string } diff --git a/pkg/annotationlinker/linker.go b/pkg/annotationlinker/linker.go index c2b5a362..cf390ef0 100644 --- a/pkg/annotationlinker/linker.go +++ b/pkg/annotationlinker/linker.go @@ -17,6 +17,7 @@ package annotationlinker import ( "context" "encoding/json" + "fmt" "github.com/sirupsen/logrus" "github.com/spf13/pflag" @@ -27,7 +28,9 @@ import ( "github.com/kubewharf/kelemetry/pkg/k8s/discovery" "github.com/kubewharf/kelemetry/pkg/k8s/objectcache" "github.com/kubewharf/kelemetry/pkg/manager" + "github.com/kubewharf/kelemetry/pkg/metrics" "github.com/kubewharf/kelemetry/pkg/util" + "github.com/kubewharf/kelemetry/pkg/util/zconstants" ) func init() { @@ -59,7 +62,7 @@ func (ctrl *controller) Init() error { return nil } func (ctrl *controller) Start(ctx context.Context) error { return nil } func (ctrl *controller) Close(ctx context.Context) error { return nil } -func (ctrl *controller) Lookup(ctx context.Context, object util.ObjectRef) *util.ObjectRef { +func (ctrl *controller) Lookup(ctx context.Context, object util.ObjectRef) ([]linker.LinkerResult, error) { raw := object.Raw logger := ctrl.Logger.WithFields(object.AsFields("object")) @@ -71,13 +74,12 @@ func (ctrl *controller) Lookup(ctx context.Context, object util.ObjectRef) *util raw, err = ctrl.ObjectCache.Get(ctx, object) if err != nil { - logger.WithError(err).Error("cannot fetch object value") - return nil + return nil, metrics.LabelError(fmt.Errorf("cannot fetch object value: %w", err), "FetchCache") } if raw == nil { logger.Debug("object no longer exists") - return nil + return nil, nil } } @@ -85,15 +87,14 @@ func (ctrl *controller) Lookup(ctx context.Context, object util.ObjectRef) *util ref := &ParentLink{} err := json.Unmarshal([]byte(ann), ref) if err != nil { - logger.WithError(err).Error("cannot parse ParentLink annotation") - return nil + return nil, metrics.LabelError(fmt.Errorf("cannot parse ParentLink annotation: %w", err), "ParseAnnotation") } if ref.Cluster == "" { ref.Cluster = object.Cluster } - objectRef := &util.ObjectRef{ + objectRef := util.ObjectRef{ Cluster: ref.Cluster, GroupVersionResource: schema.GroupVersionResource{ Group: ref.GroupVersionResource.Group, @@ -106,8 +107,11 @@ func (ctrl *controller) Lookup(ctx context.Context, object util.ObjectRef) *util } logger.WithField("parent", objectRef).Debug("Resolved parent") - return objectRef + return []linker.LinkerResult{{ + Object: objectRef, + Role: zconstants.LinkRoleParent, + }}, nil } - return nil + return nil, nil } diff --git a/pkg/audit/consumer/consumer.go b/pkg/audit/consumer/consumer.go index 94ec24d3..fd5c270a 100644 --- a/pkg/audit/consumer/consumer.go +++ b/pkg/audit/consumer/consumer.go @@ -284,15 +284,7 @@ func (recv *receiver) handleItem( Resource: objectRef.Resource, }).Summary(float64(e2eLatency.Nanoseconds())) - var subObjectId *aggregator.SubObjectId - if recv.options.enableSubObject && (message.Verb == audit.VerbUpdate || message.Verb == audit.VerbPatch) { - subObjectId = &aggregator.SubObjectId{ - Id: fmt.Sprintf("rv=%s", message.ObjectRef.ResourceVersion), - Primary: message.ResponseStatus.Code < 300, - } - } - - err := recv.Aggregator.Send(ctx, objectRef, event, subObjectId) + err := recv.Aggregator.Send(ctx, objectRef, event) if err != nil { fieldLogger.WithError(err).Error() } else { diff --git a/pkg/event/controller.go b/pkg/event/controller.go index cc286db6..411f88f7 100644 --- a/pkg/event/controller.go +++ b/pkg/event/controller.go @@ -318,7 +318,7 @@ func (ctrl *controller) handleEvent(ctx context.Context, event *corev1.Event) { Namespace: event.InvolvedObject.Namespace, Name: event.InvolvedObject.Name, Uid: event.InvolvedObject.UID, - }, aggregatorEvent, nil); err != nil { + }, aggregatorEvent); err != nil { logger.WithError(err).Error("Cannot send trace") metric.Error = metrics.LabelError(err, "SendTrace") return diff --git a/pkg/imports.go b/pkg/imports.go index f309a279..f7a779ff 100644 --- a/pkg/imports.go +++ b/pkg/imports.go @@ -18,6 +18,8 @@ package kelemetry_pkg import ( _ "github.com/kubewharf/kelemetry/pkg/aggregator/aggregatorevent" _ "github.com/kubewharf/kelemetry/pkg/aggregator/eventdecorator/eventtagger" + _ "github.com/kubewharf/kelemetry/pkg/aggregator/linker/job/local" + _ "github.com/kubewharf/kelemetry/pkg/aggregator/linker/job/worker" _ "github.com/kubewharf/kelemetry/pkg/aggregator/objectspandecorator/resourcetagger" _ "github.com/kubewharf/kelemetry/pkg/aggregator/spancache/etcd" _ "github.com/kubewharf/kelemetry/pkg/aggregator/spancache/local" diff --git a/pkg/ownerlinker/linker.go b/pkg/ownerlinker/linker.go index 03d1ceea..e05bf58b 100644 --- a/pkg/ownerlinker/linker.go +++ b/pkg/ownerlinker/linker.go @@ -16,6 +16,7 @@ package ownerlinker import ( "context" + "fmt" "github.com/sirupsen/logrus" "github.com/spf13/pflag" @@ -26,7 +27,9 @@ import ( "github.com/kubewharf/kelemetry/pkg/k8s/discovery" "github.com/kubewharf/kelemetry/pkg/k8s/objectcache" "github.com/kubewharf/kelemetry/pkg/manager" + "github.com/kubewharf/kelemetry/pkg/metrics" "github.com/kubewharf/kelemetry/pkg/util" + "github.com/kubewharf/kelemetry/pkg/util/zconstants" ) func init() { @@ -58,7 +61,7 @@ func (ctrl *Controller) Init() error { return nil } func (ctrl *Controller) Start(ctx context.Context) error { return nil } func (ctrl *Controller) Close(ctx context.Context) error { return nil } -func (ctrl *Controller) Lookup(ctx context.Context, object util.ObjectRef) *util.ObjectRef { +func (ctrl *Controller) Lookup(ctx context.Context, object util.ObjectRef) ([]linker.LinkerResult, error) { raw := object.Raw logger := ctrl.Logger.WithFields(object.AsFields("object")) @@ -70,16 +73,17 @@ func (ctrl *Controller) Lookup(ctx context.Context, object util.ObjectRef) *util raw, err = ctrl.ObjectCache.Get(ctx, object) if err != nil { - logger.WithError(err).Error("cannot fetch object value") - return nil + return nil, metrics.LabelError(fmt.Errorf("cannot fetch object value from cache: %w", err), "FetchCache") } if raw == nil { logger.Debug("object no longer exists") - return nil + return nil, nil } } + var results []linker.LinkerResult + for _, owner := range raw.GetOwnerReferences() { if owner.Controller != nil && *owner.Controller { groupVersion, err := schema.ParseGroupVersion(owner.APIVersion) @@ -101,18 +105,22 @@ func (ctrl *Controller) Lookup(ctx context.Context, object util.ObjectRef) *util continue } - ret := &util.ObjectRef{ + parentRef := util.ObjectRef{ Cluster: object.Cluster, // inherited from the same cluster GroupVersionResource: gvr, Namespace: object.Namespace, Name: owner.Name, Uid: owner.UID, } - logger.WithField("owner", ret).Debug("Resolved owner") + logger.WithField("owner", parentRef).Debug("Resolved owner") - return ret + results = append(results, linker.LinkerResult{ + Object: parentRef, + Role: zconstants.LinkRoleParent, + Class: "children", + }) } } - return nil + return results, nil } diff --git a/pkg/util/zconstants/zconstants.go b/pkg/util/zconstants/zconstants.go index f0e8ac89..82127bae 100644 --- a/pkg/util/zconstants/zconstants.go +++ b/pkg/util/zconstants/zconstants.go @@ -16,7 +16,11 @@ // for span transformation in the frontend storage plugin. package zconstants -import "time" +import ( + "time" + + "github.com/kubewharf/kelemetry/pkg/util" +) // All tags with this prefix are not rendered. const Prefix = "zzz-" @@ -32,6 +36,7 @@ type PseudoTypeValue string const ( PseudoTypeObject PseudoTypeValue = "object" + PseudoTypeLink PseudoTypeValue = "link" ) // Identifies that the span represents an actual event (rather than as a pseudospan). @@ -80,6 +85,16 @@ const ( LinkClass = "linkClass" ) +func TagLinkedObject(tags map[string]string, object util.ObjectRef, role LinkRoleValue, class string) { + tags[LinkedObjectCluster] = object.Cluster + tags[LinkedObjectGroup] = object.Group + tags[LinkedObjectResource] = object.Resource + tags[LinkedObjectNamespace] = object.Namespace + tags[LinkedObjectName] = object.Name + tags[LinkRole] = string(role) + tags[LinkClass] = class +} + type LinkRoleValue string const ( @@ -90,6 +105,18 @@ const ( LinkRoleChild LinkRoleValue = "child" ) +// Determines the role of the reverse link. +func ReverseLinkRole(role LinkRoleValue) LinkRoleValue { + switch role { + case LinkRoleParent: + return LinkRoleChild + case LinkRoleChild: + return LinkRoleParent + default: + return role + } +} + // Classifies the type of a log line. // Logs without this attribute will not have special treatment. const LogTypeAttr = Prefix + "logType" From e47e63fe5369b42af74754e5079be32c225ea0c5 Mon Sep 17 00:00:00 2001 From: chankyin Date: Tue, 15 Aug 2023 18:19:24 +0800 Subject: [PATCH 03/16] feat: support frontend-side object linking --- Makefile | 1 + pkg/aggregator/aggregator.go | 1 + pkg/frontend/backend/interface.go | 10 +- .../backend/jaeger-storage/backend.go | 109 +--- pkg/frontend/reader/merge.go | 89 ---- pkg/frontend/reader/merge/merge.go | 465 ++++++++++++++++++ pkg/frontend/reader/reader.go | 213 ++++++-- pkg/frontend/tf/config/config.go | 19 +- pkg/frontend/tf/config/file/file.go | 16 +- .../tf/defaults/modifier/exclusive.go | 30 +- pkg/frontend/tf/transform.go | 27 - pkg/frontend/tf/tree/grouping.go | 32 ++ pkg/frontend/tf/tree/tree.go | 55 ++- pkg/util/zconstants/zconstants.go | 14 +- 14 files changed, 797 insertions(+), 284 deletions(-) delete mode 100644 pkg/frontend/reader/merge.go create mode 100644 pkg/frontend/reader/merge/merge.go diff --git a/Makefile b/Makefile index 6e640b8b..5e7b8ed6 100644 --- a/Makefile +++ b/Makefile @@ -98,6 +98,7 @@ run: output/kelemetry $(DUMP_ROTATE_DEP) --span-cache-etcd-endpoints=127.0.0.1:2379 \ --tracer-otel-endpoint=$(OTEL_EXPORTER_OTLP_ENDPOINT) \ --tracer-otel-insecure \ + --object-cache-size=16777216 \ --jaeger-cluster-names=$(CLUSTER_NAME) \ --jaeger-storage-plugin-address=0.0.0.0:17271 \ --jaeger-backend=jaeger-storage \ diff --git a/pkg/aggregator/aggregator.go b/pkg/aggregator/aggregator.go index 138cf08c..10c44f13 100644 --- a/pkg/aggregator/aggregator.go +++ b/pkg/aggregator/aggregator.go @@ -195,6 +195,7 @@ func (aggregator *aggregator) Send( "group": object.Group, "version": object.Version, "resource": object.Resource, + zconstants.NotPseudo: zconstants.NotPseudo, zconstants.TraceSource: event.TraceSource, }, Logs: event.Logs, diff --git a/pkg/frontend/backend/interface.go b/pkg/frontend/backend/interface.go index 82aedcff..50906f7b 100644 --- a/pkg/frontend/backend/interface.go +++ b/pkg/frontend/backend/interface.go @@ -23,6 +23,7 @@ import ( "github.com/jaegertracing/jaeger/storage/spanstore" "k8s.io/utils/clock" + tftree "github.com/kubewharf/kelemetry/pkg/frontend/tf/tree" "github.com/kubewharf/kelemetry/pkg/manager" "github.com/kubewharf/kelemetry/pkg/metrics" ) @@ -38,7 +39,6 @@ type Backend interface { List( ctx context.Context, query *spanstore.TraceQueryParameters, - exclusive bool, ) ([]*TraceThumbnail, error) // Gets the full tree of a trace based on the identifier returned from a prvious call to List. @@ -59,9 +59,12 @@ type TraceThumbnail struct { // Identifier is a serializable object that identifies the trace in GetTrace calls. Identifier any - Spans []*model.Span + Spans *tftree.SpanTree } +func (tt *TraceThumbnail) GetSpans() *tftree.SpanTree { return tt.Spans } +func (tt *TraceThumbnail) GetMetadata() any { return tt.Identifier } + type mux struct { *manager.Mux Clock clock.Clock @@ -81,10 +84,9 @@ func (*getMetric) MetricName() string { return "jaeger_backend_get" } func (mux *mux) List( ctx context.Context, query *spanstore.TraceQueryParameters, - exclusive bool, ) ([]*TraceThumbnail, error) { defer mux.ListMetric.DeferCount(mux.Clock.Now(), &listMetric{}) - return mux.Impl().(Backend).List(ctx, query, exclusive) + return mux.Impl().(Backend).List(ctx, query) } func (mux *mux) Get( diff --git a/pkg/frontend/backend/jaeger-storage/backend.go b/pkg/frontend/backend/jaeger-storage/backend.go index 40658ec7..64b67727 100644 --- a/pkg/frontend/backend/jaeger-storage/backend.go +++ b/pkg/frontend/backend/jaeger-storage/backend.go @@ -31,7 +31,6 @@ import ( "github.com/spf13/pflag" "github.com/spf13/viper" "go.uber.org/zap" - "k8s.io/apimachinery/pkg/util/sets" jaegerbackend "github.com/kubewharf/kelemetry/pkg/frontend/backend" tftree "github.com/kubewharf/kelemetry/pkg/frontend/tf/tree" @@ -151,9 +150,10 @@ func (backend *Backend) Close(ctx context.Context) error { return nil } func (backend *Backend) List( ctx context.Context, params *spanstore.TraceQueryParameters, - exclusive bool, ) ([]*jaegerbackend.TraceThumbnail, error) { - filterTags := map[string]string{} + filterTags := map[string]string{ + zconstants.NotPseudo: zconstants.NotPseudo, + } for key, val := range params.Tags { filterTags[key] = val } @@ -161,10 +161,9 @@ func (backend *Backend) List( filterTags["cluster"] = params.OperationName } - // TODO support additional user-defined trace sources - var traces []*model.Trace + traceThumbnails := []*jaegerbackend.TraceThumbnail{} for _, traceSource := range zconstants.KnownTraceSources(false) { - if len(traces) >= params.NumTraces { + if len(traceThumbnails) >= params.NumTraces { break } @@ -175,99 +174,33 @@ func (backend *Backend) List( StartTimeMax: params.StartTimeMax, DurationMin: params.DurationMin, DurationMax: params.DurationMax, - NumTraces: params.NumTraces - len(traces), + NumTraces: params.NumTraces - len(traceThumbnails), } - newTraces, err := backend.reader.FindTraces(ctx, newParams) + traces, err := backend.reader.FindTraces(ctx, newParams) if err != nil { return nil, fmt.Errorf("find traces from backend err: %w", err) } - traces = append(traces, newTraces...) - } - - var traceThumbnails []*jaegerbackend.TraceThumbnail - - // a stateful function that determines only returns true for each valid resultant root span the first time - var deduplicator func(*model.Span) bool - if exclusive { - // exclusive mode, each object under trace should have a list entry - type objectInTrace struct { - traceId model.TraceID - key tftree.GroupingKey - } - seenObjects := sets.New[objectInTrace]() - deduplicator = func(span *model.Span) bool { - key, hasKey := tftree.GroupingKeyFromSpan(span) - if !hasKey { - return false // not a root + for _, trace := range traces { + if len(trace.Spans) == 0 { + continue } - field, isPseudo := model.KeyValues(span.Tags).FindByKey(zconstants.PseudoType) - if !isPseudo || field.VStr != string(zconstants.PseudoTypeObject) { - return false // not an object root - } + tree := tftree.NewSpanTree(trace.Spans) - fullKey := objectInTrace{ - traceId: span.TraceID, - key: key, + thumbnail := &jaegerbackend.TraceThumbnail{ + Identifier: identifier{ + TraceId: tree.Root.TraceID, + SpanId: tree.Root.SpanID, + }, + Spans: tree, } - if seenObjects.Has(fullKey) { - return false // a known root - } - - for reqKey, reqValue := range filterTags { - if value, exists := model.KeyValues(span.Tags).FindByKey(reqKey); !exists || value.VStr != reqValue { - return false // not a matched root - } - } - - seenObjects.Insert(fullKey) - return true - } - } else { - // non exclusive mode, display full trace, so we want each full trace to display exactly once. - seenTraces := sets.New[model.TraceID]() - deduplicator = func(span *model.Span) bool { - if len(span.References) > 0 { - return false // we only want the root - } - - if seenTraces.Has(span.TraceID) { - return false - } - - seenTraces.Insert(span.TraceID) - return true - } - } - - for _, trace := range traces { - if len(trace.Spans) == 0 { - continue - } - - for _, span := range trace.Spans { - if deduplicator(span) { - tree := tftree.NewSpanTree(trace.Spans) - if err := tree.SetRoot(span.SpanID); err != nil { - return nil, fmt.Errorf("unexpected SetRoot error for span ID from trace: %w", err) - } - - thumbnail := &jaegerbackend.TraceThumbnail{ - Identifier: identifier{ - TraceId: span.TraceID, - SpanId: span.SpanID, - }, - Spans: tree.GetSpans(), - } - traceThumbnails = append(traceThumbnails, thumbnail) - - backend.Logger.WithField("ident", thumbnail.Identifier). - WithField("filteredSpans", len(thumbnail.Spans)). - Debug("matched trace") - } + traceThumbnails = append(traceThumbnails, thumbnail) + backend.Logger.WithField("ident", thumbnail.Identifier). + WithField("filteredSpans", len(thumbnail.Spans.GetSpans())). + Debug("matched trace") } } diff --git a/pkg/frontend/reader/merge.go b/pkg/frontend/reader/merge.go deleted file mode 100644 index 3a95e551..00000000 --- a/pkg/frontend/reader/merge.go +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2023 The Kelemetry Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package jaegerreader - -import ( - "github.com/jaegertracing/jaeger/model" - "k8s.io/apimachinery/pkg/util/sets" - - jaegerbackend "github.com/kubewharf/kelemetry/pkg/frontend/backend" - tftree "github.com/kubewharf/kelemetry/pkg/frontend/tf/tree" -) - -type mergeMap struct { - ptrSet sets.Set[*mergeEntry] - fromKeys map[tftree.GroupingKey]*mergeEntry -} - -type mergeEntry struct { - keys sets.Set[tftree.GroupingKey] - identifiers []any - spans []*model.Span -} - -func singletonMerged(keys sets.Set[tftree.GroupingKey], thumbnail *jaegerbackend.TraceThumbnail) *mergeEntry { - return &mergeEntry{ - keys: keys, - identifiers: []any{thumbnail.Identifier}, - spans: thumbnail.Spans, - } -} - -func (entry *mergeEntry) join(other *mergeEntry) { - for key := range other.keys { - entry.keys.Insert(key) - } - - entry.identifiers = append(entry.identifiers, other.identifiers...) - entry.spans = append(entry.spans, other.spans...) -} - -// add a thumbnail with a preferred root key. -func (m *mergeMap) add(keys sets.Set[tftree.GroupingKey], thumbnail *jaegerbackend.TraceThumbnail) { - entry := singletonMerged(keys.Clone(), thumbnail) - m.ptrSet.Insert(entry) - - dups := sets.New[*mergeEntry]() - - for key := range keys { - if prev, hasPrev := m.fromKeys[key]; hasPrev { - dups.Insert(prev) - } - } - - for dup := range dups { - entry.join(dup) - m.ptrSet.Delete(dup) - } - - for key := range entry.keys { - // including all new and joined keys - m.fromKeys[key] = entry - } -} - -func mergeSegments(thumbnails []*jaegerbackend.TraceThumbnail) []*mergeEntry { - m := mergeMap{ - ptrSet: sets.New[*mergeEntry](), - fromKeys: map[tftree.GroupingKey]*mergeEntry{}, - } - - for _, thumbnail := range thumbnails { - keys := tftree.GroupingKeysFromSpans(thumbnail.Spans) - m.add(keys, thumbnail) - } - - return m.ptrSet.UnsortedList() -} diff --git a/pkg/frontend/reader/merge/merge.go b/pkg/frontend/reader/merge/merge.go new file mode 100644 index 00000000..3d4e3163 --- /dev/null +++ b/pkg/frontend/reader/merge/merge.go @@ -0,0 +1,465 @@ +// Copyright 2023 The Kelemetry Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package merge + +import ( + "fmt" + "math/rand" + + "github.com/jaegertracing/jaeger/model" + "k8s.io/apimachinery/pkg/util/sets" + + tftree "github.com/kubewharf/kelemetry/pkg/frontend/tf/tree" + reflectutil "github.com/kubewharf/kelemetry/pkg/util/reflect" + "github.com/kubewharf/kelemetry/pkg/util/zconstants" +) + +type Merger[R RawTrace[M], M any] struct{} + +type RawTrace[M any] interface { + GetSpans() *tftree.SpanTree + GetMetadata() M +} + +type objKey = tftree.GroupingKey + +func (merger Merger[R, M]) MergeTraces(rawTraces []R) ([]*MergeTree[M], error) { + objects, err := merger.groupByKey(rawTraces) + if err != nil { + return nil, err + } + + abLinks := abLinkMap{} + + for _, obj := range objects { + obj.identifyLinks() + + for _, link := range obj.links { + abLink := abLinkFromTargetLink(obj.key, link) + abLinks.insert(abLink) + } + } + + var mergeTrees []*MergeTree[M] + for _, keys := range merger.findConnectedComponents(objects, abLinks) { + var members []*object[R, M] + for _, key := range keys { + members = append(members, objects[key]) + } + + mergeTree, err := newMergeTree(members, abLinks) + if err != nil { + return nil, err + } + + mergeTrees = append(mergeTrees, mergeTree) + } + + return mergeTrees, nil +} + +func (merger Merger[R, M]) groupByKey(rawTraces []R) (map[objKey]*object[R, M], error) { + objects := map[objKey]*object[R, M]{} + + for _, trace := range rawTraces { + key, _ := tftree.GroupingKeyFromSpan(trace.GetSpans().Root) + + if obj, hasPrev := objects[key]; hasPrev { + if err := obj.merge(trace); err != nil { + return nil, err + } + } else { + obj, err := newObject[R, M](key, trace) + if err != nil { + return nil, err + } + + objects[key] = obj + } + } + + return objects, nil +} + +type object[R RawTrace[M], M any] struct { + key objKey + metadata []M + tree *tftree.SpanTree + + links []targetLink +} + +func newObject[R RawTrace[M], M any](key objKey, trace R) (*object[R, M], error) { + clonedTree, err := trace.GetSpans().Clone() + if err != nil { + return nil, fmt.Errorf("clone spans: %w", err) + } + obj := &object[R, M]{ + key: key, + metadata: []M{trace.GetMetadata()}, + tree: clonedTree, + } + return obj, nil +} + +func (obj *object[R, M]) merge(trace R) error { + obj.metadata = append(obj.metadata, trace.GetMetadata()) + + mergeRoot(obj.tree.Root, trace.GetSpans().Root) + + copyVisitor := ©TreeVisitor{to: obj.tree} + trace.GetSpans().Visit(copyVisitor) + if copyVisitor.err != nil { + return copyVisitor.err + } + + return nil +} + +func mergeRoot(base *model.Span, tail *model.Span) { + mergeRootInterval(base, tail) + mergeRootTags(base, tail) +} + +func mergeRootInterval(base *model.Span, tail *model.Span) { + startTime := base.StartTime + if tail.StartTime.Before(startTime) { + startTime = tail.StartTime + } + + endTime := base.StartTime.Add(base.Duration) + tailEndTime := tail.StartTime.Add(tail.Duration) + if tailEndTime.After(endTime) { + endTime = tailEndTime + } + + base.StartTime = startTime + base.Duration = endTime.Sub(startTime) +} + +func mergeRootTags(base *model.Span, tail *model.Span) { + tagPos := map[string]int{} + for pos, tag := range base.Tags { + tagPos[tag.Key] = pos + } + + for _, tag := range tail.Tags { + if pos, hasTag := tagPos[tag.Key]; hasTag { + if tail.StartTime.After(base.StartTime) { + // the newer value wins + base.Tags[pos] = tag + } + } else { + base.Tags = append(base.Tags, tag) + } + } +} + +func (obj *object[R, M]) identifyLinks() { + for spanId := range obj.tree.Children(obj.tree.Root.SpanID) { + span := obj.tree.Span(spanId) + pseudoType, isPseudo := model.KeyValues(span.Tags).FindByKey(zconstants.PseudoType) + if !(isPseudo && pseudoType.VStr == string(zconstants.PseudoTypeLink)) { + continue + } + + target, hasTarget := tftree.LinkGroupingKeyFromSpan(span) + if !hasTarget { + continue + } + + linkRoleTag, hasLinkRole := model.KeyValues(span.Tags).FindByKey(zconstants.LinkRole) + if !hasLinkRole { + continue + } + linkRole := linkRoleTag.VStr + + linkClassTag, hasLinkClass := model.KeyValues(span.Tags).FindByKey(zconstants.LinkClass) + linkClass := "" + if hasLinkClass { + linkClass = linkClassTag.VStr + } + + obj.links = append(obj.links, targetLink{ + key: target, + role: zconstants.LinkRoleValue(linkRole), + class: linkClass, + }) + } +} + +type targetLink struct { + key objKey + role zconstants.LinkRoleValue + class string +} + +type copyTreeVisitor struct { + to *tftree.SpanTree + err error +} + +func (visitor *copyTreeVisitor) Enter(tree *tftree.SpanTree, span *model.Span) tftree.TreeVisitor { + if span.SpanID != tree.Root.SpanID { + spanCopy, err := tftree.CopySpan(span) + if err != nil { + visitor.err = err + return nil + } + + visitor.to.Add(spanCopy, span.ParentSpanID()) + } + + return visitor +} + +func (visitor *copyTreeVisitor) Exit(tree *tftree.SpanTree, span *model.Span) {} + +type abLink struct { + alpha, beta objKey + + alphaIsParent bool // this needs to be changed if there are link roles other than parent and child + class string +} + +func (link abLink) isParent(key objKey) bool { + if link.alphaIsParent { + return link.alpha == key + } else { + return link.beta == key + } +} + +func abLinkFromTargetLink(subject objKey, link targetLink) abLink { + if groupingKeyLess(subject, link.key) { + return abLink{ + alpha: subject, + beta: link.key, + alphaIsParent: link.role == zconstants.LinkRoleChild, + class: link.class, + } + } else { + return abLink{ + beta: subject, + alpha: link.key, + alphaIsParent: link.role != zconstants.LinkRoleChild, + class: link.class, + } + } +} + +func groupingKeyLess(left, right objKey) bool { + if left.Group != right.Group { + return left.Group < right.Group + } + + if left.Resource != right.Resource { + return left.Resource < right.Resource + } + + if left.Cluster != right.Cluster { + return left.Cluster < right.Cluster + } + + if left.Namespace != right.Namespace { + return left.Namespace < right.Namespace + } + + if left.Name != right.Name { + return left.Name < right.Name + } + + return false +} + +type abLinkMap map[objKey]map[objKey]abLink + +func (m abLinkMap) insert(link abLink) { + m.insertDirected(link.alpha, link.beta, link) + m.insertDirected(link.beta, link.alpha, link) +} + +func (m abLinkMap) insertDirected(k1, k2 objKey, link abLink) { + v1, hasK1 := m[k1] + if !hasK1 { + v1 = map[objKey]abLink{} + } + v1[k2] = link +} + +func (m abLinkMap) detectRoot(seed objKey) (_root objKey, _hasCycle bool) { + visited := sets.New[objKey]() + return m.dfsRoot(visited, seed) +} + +func (m abLinkMap) dfsRoot(visited sets.Set[objKey], key objKey) (_root objKey, _hasCycle bool) { + if visited.Has(key) { + return key, true + } + visited.Insert(key) // avoid infinite recursion + + for peer, link := range m[key] { + if link.isParent(peer) { + return m.dfsRoot(visited, peer) + } + } + + return key, false // key has no parent, so key is root +} + +type componentTaint = int + +type connectedComponent = []objKey + +func (Merger[R, M]) findConnectedComponents(objects map[objKey]*object[R, M], abLinks abLinkMap) []connectedComponent { + objectKeys := make(sets.Set[objKey], len(objects)) + for gk := range objects { + objectKeys.Insert(gk) + } + + var taintCounter componentTaint + + taints := map[objKey]componentTaint{} + + for { + seed, hasMore := peekArbitraryFromSet(objectKeys) + if !hasMore { + break + } + + taintCounter += 1 + dfsTaint(objectKeys, abLinks, taints, taintCounter, seed) + } + + components := make([]connectedComponent, taintCounter) + for key, taint := range taints { + components[taint] = append(components[taint], key) + } + + return components +} + +func peekArbitraryFromSet[T comparable](set sets.Set[T]) (T, bool) { + for value := range set { + return value, true + } + + return reflectutil.ZeroOf[T](), false +} + +func dfsTaint( + keys sets.Set[objKey], + abLinks abLinkMap, + taints map[objKey]componentTaint, + taintId componentTaint, + seed objKey, +) { + taints[seed] = taintId + delete(keys, seed) // delete before diving in to avoid recursing backwards + + for peer := range abLinks[seed] { + if _, remaining := keys[peer]; !remaining { + continue // this should be unreachable + } + + dfsTaint(keys, abLinks, taints, taintId, peer) + } +} + +type MergeTree[M any] struct { + Metadata []M + + Tree *tftree.SpanTree +} + +func newMergeTree[R RawTrace[M], M any]( + members []*object[R, M], + abLinks abLinkMap, +) (*MergeTree[M], error) { + metadata := []M{} + + for _, member := range members { + metadata = append(metadata, member.metadata...) + } + + merged, err := mergeLinkedTraces(members, abLinks) + if err != nil { + return nil, err + } + + return &MergeTree[M]{ + Metadata: metadata, + Tree: merged, + }, nil +} + +func mergeLinkedTraces[R RawTrace[M], M any](objects []*object[R, M], abLinks abLinkMap) (*tftree.SpanTree, error) { + trees := make(map[objKey]*object[R, M], len(objects)) + for _, obj := range objects { + trees[obj.key] = obj + } + + rootKey, _ := abLinks.detectRoot(objects[0].key) + + tree := trees[rootKey].tree + + pendingObjects := []objKey{rootKey} + for len(pendingObjects) > 0 { + subj := pendingObjects[len(pendingObjects)-1] + pendingObjects = pendingObjects[:len(pendingObjects)-1] + + for _, link := range trees[subj].links { + if link.role != zconstants.LinkRoleChild { + continue + } + + parentSpan := trees[subj].tree.Root + if link.class != "" { + virtualSpan := createVirtualSpan(tree.Root.TraceID, parentSpan, link.class) + tree.Add(virtualSpan, parentSpan.SpanID) + parentSpan = virtualSpan + } + + tree.AddTree(trees[link.key].tree, parentSpan.SpanID) + pendingObjects = append(pendingObjects, link.key) + } + } + + return tree, nil +} + +func createVirtualSpan(traceId model.TraceID, span *model.Span, class string) *model.Span { + spanId := model.SpanID(rand.Uint64()) + + return &model.Span{ + TraceID: traceId, + SpanID: spanId, + OperationName: class, + Flags: 0, + StartTime: span.StartTime, + Duration: span.Duration, + Tags: []model.KeyValue{ + { + Key: zconstants.PseudoType, + VType: model.StringType, + VStr: string(zconstants.PseudoTypeLinkClass), + }, + }, + Process: &model.Process{ + ServiceName: class, + }, + ProcessID: "1", + } +} diff --git a/pkg/frontend/reader/reader.go b/pkg/frontend/reader/reader.go index 6484016a..a3559f9e 100644 --- a/pkg/frontend/reader/reader.go +++ b/pkg/frontend/reader/reader.go @@ -26,14 +26,17 @@ import ( "github.com/jaegertracing/jaeger/storage/spanstore" "github.com/sirupsen/logrus" "github.com/spf13/pflag" + "k8s.io/apimachinery/pkg/util/sets" jaegerbackend "github.com/kubewharf/kelemetry/pkg/frontend/backend" "github.com/kubewharf/kelemetry/pkg/frontend/clusterlist" + "github.com/kubewharf/kelemetry/pkg/frontend/reader/merge" transform "github.com/kubewharf/kelemetry/pkg/frontend/tf" tfconfig "github.com/kubewharf/kelemetry/pkg/frontend/tf/config" tftree "github.com/kubewharf/kelemetry/pkg/frontend/tf/tree" "github.com/kubewharf/kelemetry/pkg/frontend/tracecache" "github.com/kubewharf/kelemetry/pkg/manager" + "github.com/kubewharf/kelemetry/pkg/util/semaphore" "github.com/kubewharf/kelemetry/pkg/util/zconstants" ) @@ -46,7 +49,8 @@ type Interface interface { } type options struct { - cacheExtensions bool + cacheExtensions bool + followLinkConcurrency int } func (options *options) Setup(fs *pflag.FlagSet) { @@ -56,6 +60,12 @@ func (options *options) Setup(fs *pflag.FlagSet) { false, "cache extension trace search result, otherwise trace is searched again every time result is reloaded", ) + fs.IntVar( + &options.followLinkConcurrency, + "frontend-follow-link-concurrency", + 5, + "number of concurrent trace per request to follow links", + ) } func (options *options) EnableFlag() *bool { return nil } @@ -127,78 +137,45 @@ func (reader *spanReader) FindTraces(ctx context.Context, query *spanstore.Trace } reader.Logger.WithField("query", query). - WithField("exclusive", config.UseSubtree). + WithField("links", config.FollowLinks). WithField("config", config.Name). Debug("start trace list") - thumbnails, err := reader.Backend.List(ctx, query, config.UseSubtree) + thumbnails, err := reader.Backend.List(ctx, query) if err != nil { return nil, err } + if err := reader.followLinks(ctx, &thumbnails, config.FollowLinks, query); err != nil { + return nil, fmt.Errorf("resolving linked objects: %w", err) + } + var rootKey *tftree.GroupingKey if rootKeyValue, ok := tftree.GroupingKeyFromMap(query.Tags); ok { rootKey = &rootKeyValue } - mergedEntries := mergeSegments(thumbnails) + mergeTrees, err := merge.Merger[*jaegerbackend.TraceThumbnail, any]{}.MergeTraces(thumbnails) + if err != nil { + return nil, fmt.Errorf("merging split and linked traces: %w", err) + } cacheEntries := []tracecache.Entry{} traces := []*model.Trace{} - for _, entry := range mergedEntries { + for _, mergeTree := range mergeTrees { cacheId := generateCacheId(config.Id) - for _, span := range entry.spans { - span.TraceID = cacheId - for i := range span.References { - span.References[i].TraceID = cacheId - } - } - - entry.spans = filterTimeRange(entry.spans, query.StartTimeMin, query.StartTimeMax) - - trace := &model.Trace{ - ProcessMap: []model.Trace_ProcessMapping{{ - ProcessID: "0", - Process: model.Process{}, - }}, - Spans: entry.spans, + trace, extensionCache, err := reader.prepareEntry(ctx, config, rootKey, query, mergeTree.Tree, cacheId) + if err != nil { + return nil, err } - displayMode := extractDisplayMode(cacheId) - - extensions := &transform.FetchExtensionsAndStoreCache{} - - if err := reader.Transformer.Transform( - ctx, trace, rootKey, displayMode, - extensions, - query.StartTimeMin, query.StartTimeMax, - ); err != nil { - return nil, fmt.Errorf("trace transformation failed: %w", err) - } traces = append(traces, trace) - identifiers := make([]json.RawMessage, len(entry.identifiers)) - for i, identifier := range entry.identifiers { - idJson, err := json.Marshal(identifier) - if err != nil { - return nil, fmt.Errorf("thumbnail identifier marshal: %w", err) - } - - identifiers[i] = json.RawMessage(idJson) + cacheEntry, err := reader.storeCache(ctx, rootKey, query, mergeTree.Metadata, cacheId, extensionCache) + if err != nil { + return nil, err } - cacheEntry := tracecache.Entry{ - LowId: cacheId.Low, - Value: tracecache.EntryValue{ - Identifiers: identifiers, - StartTime: query.StartTimeMin, - EndTime: query.StartTimeMax, - RootObject: rootKey, - }, - } - if reader.options.cacheExtensions { - cacheEntry.Value.Extensions = extensions.Cache - } cacheEntries = append(cacheEntries, cacheEntry) } @@ -213,6 +190,140 @@ func (reader *spanReader) FindTraces(ctx context.Context, query *spanstore.Trace return traces, nil } +func (reader *spanReader) followLinks( + ctx context.Context, + thumbnailsPtr *[]*jaegerbackend.TraceThumbnail, + followLinks sets.Set[string], + query *spanstore.TraceQueryParameters, +) error { + knownKeys := sets.New[tftree.GroupingKey]() + for _, thumbnail := range *thumbnailsPtr { + key, _ := tftree.GroupingKeyFromSpan(thumbnail.Spans.Root) + knownKeys.Insert(key) + } + + sem := semaphore.New(reader.options.followLinkConcurrency) + + var scheduleFollow func(thumbnails []*jaegerbackend.TraceThumbnail) + scheduleFollow = func(thumbnails []*jaegerbackend.TraceThumbnail) { + for _, thumbnail := range thumbnails { + for _, span := range thumbnail.Spans.GetSpans() { + span := span + linkKey, hasLinkKey := tftree.LinkGroupingKeyFromSpan(span) + if hasLinkKey && !knownKeys.Has(linkKey) { + linkRole, hasLinkRole := model.KeyValues(span.Tags).FindByKey(zconstants.LinkRole) + if !hasLinkRole || !followLinks.Has(linkRole.VStr) { + continue + } + + sem.Schedule(func(ctx context.Context) (semaphore.Publish, error) { + newThumbnails, err := reader.Backend.List(ctx, &spanstore.TraceQueryParameters{ + ServiceName: zconstants.TraceSourceObject, + Tags: linkKey.AsSpanTags(), + StartTimeMin: query.StartTimeMin, + StartTimeMax: query.StartTimeMax, + NumTraces: query.NumTraces, + }) + if err != nil { + return nil, fmt.Errorf("fetching linked traces: %w", err) + } + + scheduleFollow(newThumbnails) + return func() error { + *thumbnailsPtr = append(*thumbnailsPtr, newThumbnails...) + return nil + }, nil + }) + } + } + } + } + + scheduleFollow(*thumbnailsPtr) + + if err := sem.Run(ctx); err != nil { + return err + } + + return nil +} + +func (reader *spanReader) prepareEntry( + ctx context.Context, + config *tfconfig.Config, + rootKey *tftree.GroupingKey, + query *spanstore.TraceQueryParameters, + tree *tftree.SpanTree, + cacheId model.TraceID, +) (*model.Trace, []tracecache.ExtensionCache, error) { + spans := tree.GetSpans() + + for _, span := range spans { + span.TraceID = cacheId + for i := range span.References { + span.References[i].TraceID = cacheId + } + } + + spans = filterTimeRange(spans, query.StartTimeMin, query.StartTimeMax) + + trace := &model.Trace{ + ProcessMap: []model.Trace_ProcessMapping{{ + ProcessID: "0", + Process: model.Process{}, + }}, + Spans: spans, + } + + displayMode := extractDisplayMode(cacheId) + + extensions := &transform.FetchExtensionsAndStoreCache{} + + if err := reader.Transformer.Transform( + ctx, trace, rootKey, displayMode, + extensions, + query.StartTimeMin, query.StartTimeMax, + ); err != nil { + return nil, nil, fmt.Errorf("trace transformation failed: %w", err) + } + + return trace, extensions.Cache, nil +} + +func (reader *spanReader) storeCache( + ctx context.Context, + rootKey *tftree.GroupingKey, + query *spanstore.TraceQueryParameters, + identifiers []any, + cacheId model.TraceID, + extensionCache []tracecache.ExtensionCache, +) (tracecache.Entry, error) { + identifiersJson := make([]json.RawMessage, len(identifiers)) + for i, identifier := range identifiers { + idJson, err := json.Marshal(identifier) + if err != nil { + return tracecache.Entry{}, fmt.Errorf("thumbnail identifier marshal: %w", err) + } + + identifiersJson[i] = json.RawMessage(idJson) + } + + cacheEntry := tracecache.Entry{ + LowId: cacheId.Low, + Value: tracecache.EntryValue{ + Identifiers: identifiersJson, + StartTime: query.StartTimeMin, + EndTime: query.StartTimeMax, + RootObject: rootKey, + }, + } + if reader.options.cacheExtensions { + cacheEntry.Value.Extensions = extensionCache + } + + return cacheEntry, nil +} + func (reader *spanReader) GetTrace(ctx context.Context, cacheId model.TraceID) (*model.Trace, error) { entry, err := reader.TraceCache.Fetch(ctx, cacheId.Low) if err != nil { diff --git a/pkg/frontend/tf/config/config.go b/pkg/frontend/tf/config/config.go index b5bd4849..8c0bf49f 100644 --- a/pkg/frontend/tf/config/config.go +++ b/pkg/frontend/tf/config/config.go @@ -17,6 +17,8 @@ package tfconfig import ( "strconv" + "k8s.io/apimachinery/pkg/util/sets" + "github.com/kubewharf/kelemetry/pkg/frontend/extension" "github.com/kubewharf/kelemetry/pkg/manager" ) @@ -51,9 +53,8 @@ type Config struct { Id Id // The config name, used in search page display. Name string - // If true, only displays the spans below the matched span. - // If false, displays the whole trace including parent and sibling spans. - UseSubtree bool + // Only links with roles in this set are followed. + FollowLinks sets.Set[string] // The extension traces for this config. Extensions []extension.Provider // The steps to transform the tree @@ -64,11 +65,15 @@ func (config *Config) Clone() *Config { steps := make([]Step, len(config.Steps)) copy(steps, config.Steps) // no need to deep clone each step + extensions := make([]extension.Provider, len(config.Extensions)) + copy(extensions, config.Extensions) + return &Config{ - Id: config.Id, - Name: config.Name, - UseSubtree: config.UseSubtree, - Steps: steps, + Id: config.Id, + Name: config.Name, + FollowLinks: config.FollowLinks.Clone(), + Extensions: extensions, + Steps: steps, } } diff --git a/pkg/frontend/tf/config/file/file.go b/pkg/frontend/tf/config/file/file.go index f65ac5ad..d98e5d08 100644 --- a/pkg/frontend/tf/config/file/file.go +++ b/pkg/frontend/tf/config/file/file.go @@ -115,10 +115,10 @@ func (p *FileProvider) loadJsonBytes(jsonBytes []byte) error { Modifiers map[tfconfig.Id]modifierConfig `json:"modifiers"` Batches []Batch `json:"batches"` Configs []struct { - Id tfconfig.Id `json:"id"` - Name string `json:"name"` - UseSubtree bool `json:"useSubtree"` - Steps json.RawMessage `json:"steps"` + Id tfconfig.Id `json:"id"` + Name string `json:"name"` + FollowLinks []string `json:"followLinks"` + Steps json.RawMessage `json:"steps"` } } if err := json.Unmarshal(jsonBytes, &file); err != nil { @@ -180,10 +180,10 @@ func (p *FileProvider) loadJsonBytes(jsonBytes []byte) error { } config := &tfconfig.Config{ - Id: raw.Id, - Name: raw.Name, - UseSubtree: raw.UseSubtree, - Steps: steps, + Id: raw.Id, + Name: raw.Name, + FollowLinks: sets.New(raw.FollowLinks...), + Steps: steps, } p.register(registeredConfig{config: config, modifierClasses: sets.New[string]()}) diff --git a/pkg/frontend/tf/defaults/modifier/exclusive.go b/pkg/frontend/tf/defaults/modifier/exclusive.go index 7b141675..83559335 100644 --- a/pkg/frontend/tf/defaults/modifier/exclusive.go +++ b/pkg/frontend/tf/defaults/modifier/exclusive.go @@ -16,6 +16,8 @@ package tfmodifier import ( "context" + "encoding/json" + "fmt" "github.com/spf13/pflag" @@ -36,7 +38,7 @@ type ExclusiveModifierOptions struct { } func (options *ExclusiveModifierOptions) Setup(fs *pflag.FlagSet) { - fs.BoolVar(&options.enable, "jaeger-tf-exclusive-modifier-enable", true, "enable exclusive modifier and list it in frontend") + fs.BoolVar(&options.enable, "jaeger-tf-exclusive-modifier-enable", true, "enable exclusive modifiers and list it in frontend") } func (options *ExclusiveModifierOptions) EnableFlag() *bool { return &options.enable } @@ -55,13 +57,31 @@ func (m *ExclusiveModifierFactory) Close(ctx context.Context) error { return nil func (*ExclusiveModifierFactory) ListIndex() string { return "exclusive" } func (*ExclusiveModifierFactory) Build(jsonBuf []byte) (tfconfig.Modifier, error) { + modifier := &ExclusiveModifier{} + + if err := json.Unmarshal(jsonBuf, &modifier); err != nil { + return nil, fmt.Errorf("parse exclusive modifier config: %w", err) + } + return &ExclusiveModifier{}, nil } -type ExclusiveModifier struct{} +type ExclusiveModifier struct { + Class string `json:"modifierClass"` + AddRoles []string `json:"addRoles"` + RemoveRoles []string `json:"removeRoles"` +} + +func (modifier *ExclusiveModifier) ModifierClass() string { + return fmt.Sprintf("kelemetry.kubewharf.io/exclusive/%s", modifier.Class) +} -func (*ExclusiveModifier) ModifierClass() string { return "kelemetry.kubewharf.io/exclusive" } +func (modifier *ExclusiveModifier) Modify(config *tfconfig.Config) { + for _, role := range modifier.AddRoles { + config.FollowLinks.Insert(role) + } -func (*ExclusiveModifier) Modify(config *tfconfig.Config) { - config.UseSubtree = true + for _, role := range modifier.RemoveRoles { + config.FollowLinks.Delete(role) + } } diff --git a/pkg/frontend/tf/transform.go b/pkg/frontend/tf/transform.go index 2af85caa..8ff176f4 100644 --- a/pkg/frontend/tf/transform.go +++ b/pkg/frontend/tf/transform.go @@ -16,7 +16,6 @@ package transform import ( "context" - "errors" "fmt" "time" @@ -75,32 +74,6 @@ func (transformer *Transformer) Transform( transformer.groupDuplicates(tree) - if config.UseSubtree && rootObject != nil { - var rootSpan model.SpanID - hasRootSpan := false - - for _, span := range tree.GetSpans() { - if key, hasKey := tftree.GroupingKeyFromSpan(span); hasKey && key == *rootObject { - rootSpan = span.SpanID - hasRootSpan = true - } - } - - if hasRootSpan { - if err := tree.SetRoot(rootSpan); err != nil { - if errors.Is(err, tftree.ErrRootDoesNotExist) { - return fmt.Errorf( - "trace data does not contain desired root span %v as indicated by the exclusive flag (%w)", - rootSpan, - err, - ) - } - - return fmt.Errorf("cannot set root: %w", err) - } - } - } - newSpans, err := extensionProcessor.ProcessExtensions(ctx, transformer, config.Extensions, trace.Spans, start, end) if err != nil { return fmt.Errorf("cannot prepare extension trace: %w", err) diff --git a/pkg/frontend/tf/tree/grouping.go b/pkg/frontend/tf/tree/grouping.go index f7da22da..2b58c678 100644 --- a/pkg/frontend/tf/tree/grouping.go +++ b/pkg/frontend/tf/tree/grouping.go @@ -68,6 +68,28 @@ func GroupingKeyFromSpan(span *model.Span) (GroupingKey, bool) { return key, true } +func LinkGroupingKeyFromSpan(span *model.Span) (GroupingKey, bool) { + tags := model.KeyValues(span.Tags) + pseudoType, isPseudo := tags.FindByKey(zconstants.PseudoType) + if !isPseudo || pseudoType.VStr != string(zconstants.PseudoTypeLink) { + return GroupingKey{}, false + } + + cluster, _ := tags.FindByKey(zconstants.LinkedObjectCluster) + group, _ := tags.FindByKey(zconstants.LinkedObjectGroup) + resource, _ := tags.FindByKey(zconstants.LinkedObjectResource) + namespace, _ := tags.FindByKey(zconstants.LinkedObjectNamespace) + name, _ := tags.FindByKey(zconstants.LinkedObjectName) + key := GroupingKey{ + Cluster: cluster.VStr, + Group: group.VStr, + Resource: resource.VStr, + Namespace: namespace.VStr, + Name: name.VStr, + } + return key, true +} + func GroupingKeysFromSpans(spans []*model.Span) sets.Set[GroupingKey] { keys := sets.New[GroupingKey]() @@ -78,3 +100,13 @@ func GroupingKeysFromSpans(spans []*model.Span) sets.Set[GroupingKey] { } return keys } + +func (key GroupingKey) AsSpanTags() map[string]string { + return map[string]string{ + "cluster": key.Cluster, + "group": key.Group, + "resource": key.Resource, + "namespace": key.Namespace, + "name": key.Name, + } +} diff --git a/pkg/frontend/tf/tree/tree.go b/pkg/frontend/tf/tree/tree.go index c8327f42..8495dbee 100644 --- a/pkg/frontend/tf/tree/tree.go +++ b/pkg/frontend/tf/tree/tree.go @@ -15,6 +15,7 @@ package tftree import ( + "encoding/json" "fmt" "github.com/jaegertracing/jaeger/model" @@ -68,6 +69,34 @@ func NewSpanTree(spans []*model.Span) *SpanTree { return tree } +func (tree *SpanTree) Clone() (*SpanTree, error) { + copiedSpans := make([]*model.Span, len(tree.spanMap)) + for _, span := range tree.spanMap { + spanCopy, err := CopySpan(span) + if err != nil { + return nil, err + } + + copiedSpans = append(copiedSpans, spanCopy) + } + + return NewSpanTree(copiedSpans), nil +} + +func CopySpan(span *model.Span) (*model.Span, error) { + spanJson, err := json.Marshal(span) + if err != nil { + return nil, err + } + + var spanCopy *model.Span + if err := json.Unmarshal(spanJson, &spanCopy); err != nil { + return nil, err + } + + return spanCopy, nil +} + func (tree *SpanTree) Span(id model.SpanID) *model.Span { return tree.spanMap[id] } func (tree *SpanTree) Children(id model.SpanID) map[model.SpanID]struct{} { return tree.childrenMap[id] @@ -133,7 +162,10 @@ func (subtree spanNode) visit(visitor TreeVisitor) { panic("cannot visit nonexistent node in tree") } - subvisitor := visitor.Enter(subtree.tree, subtree.node) + var subvisitor TreeVisitor + if visitor != nil { + subvisitor = visitor.Enter(subtree.tree, subtree.node) + } // enter before visitorStack is populated to allow removal if _, stillExists := subtree.tree.spanMap[subtree.node.SpanID]; !stillExists { @@ -169,7 +201,10 @@ func (subtree spanNode) visit(visitor TreeVisitor) { delete(subtree.tree.visitorStack, subtree.node.SpanID) - visitor.Exit(subtree.tree, subtree.node) + if visitor != nil { + visitor.Exit(subtree.tree, subtree.node) + } + if _, stillExists := subtree.tree.spanMap[subtree.node.SpanID]; !stillExists { // deleted during exit return @@ -282,6 +317,22 @@ func (tree *SpanTree) Delete(spanId model.SpanID) { } } +// Adds all spans in a tree as a subtree in this span. +func (tree *SpanTree) AddTree(childTree *SpanTree, parentId model.SpanID) { + if tree == childTree { + panic("cannot add tree to itself") + } + + tree.addSubtree(parentId, childTree, childTree.Root.SpanID) +} + +func (tree *SpanTree) addSubtree(parentId model.SpanID, otherTree *SpanTree, subroot model.SpanID) { + tree.Add(otherTree.Span(subroot), parentId) + for child := range otherTree.Children(subroot) { + tree.addSubtree(subroot, otherTree, child) + } +} + type TreeVisitor interface { // Called before entering the descendents of the span. // diff --git a/pkg/util/zconstants/zconstants.go b/pkg/util/zconstants/zconstants.go index 82127bae..37173367 100644 --- a/pkg/util/zconstants/zconstants.go +++ b/pkg/util/zconstants/zconstants.go @@ -32,11 +32,21 @@ const SpanName = Prefix + "kelemetryName" // The value is the folding type. const PseudoType = Prefix + "pseudoType" +// Indicates that the current span is not pseudo. +// Used to optimize trace listing. +// +// This constant is used as both tag key and value. +const NotPseudo = Prefix + "notPseudo" + type PseudoTypeValue string const ( + // Root span in an object trace. PseudoTypeObject PseudoTypeValue = "object" - PseudoTypeLink PseudoTypeValue = "link" + // Indicate that another trace shall be included. + PseudoTypeLink PseudoTypeValue = "link" + // A virtual span synthesized in the frontend when link class is nonempty. + PseudoTypeLinkClass PseudoTypeValue = "linkClass" ) // Identifies that the span represents an actual event (rather than as a pseudospan). @@ -44,7 +54,6 @@ const TraceSource = Prefix + "traceSource" const ( TraceSourceObject = "object" - TraceSourceLink = "link" TraceSourceAudit = "audit" TraceSourceEvent = "event" @@ -56,7 +65,6 @@ func KnownTraceSources(withPseudo bool) []string { traceSources := []string{ // pseudo TraceSourceObject, - TraceSourceLink, // real TraceSourceAudit, From 6e6c908fec65f5d7faf313950a43496d194a5856 Mon Sep 17 00:00:00 2001 From: chankyin Date: Wed, 16 Aug 2023 18:11:22 +0800 Subject: [PATCH 04/16] fix(aggregator): backlinks use a different field for dedup in spancache --- Makefile | 8 +- hack/tfconfig.yaml | 23 +- pkg/aggregator/aggregator.go | 10 +- pkg/aggregator/linker/job/worker/worker.go | 14 +- pkg/aggregator/linker/linker.go | 7 +- pkg/annotationlinker/linker.go | 5 +- pkg/frontend/backend/interface.go | 5 +- .../backend/jaeger-storage/backend.go | 12 +- pkg/frontend/reader/merge/merge.go | 310 +++++++++++++----- pkg/frontend/reader/merge/merge_test.go | 122 +++++++ pkg/frontend/reader/reader.go | 168 +++++----- pkg/frontend/tf/config/config.go | 14 +- pkg/frontend/tf/config/file/file.go | 15 +- pkg/frontend/tf/config/link_selector.go | 87 +++++ .../tf/defaults/modifier/exclusive.go | 87 ----- .../tf/defaults/modifier/link_selector.go | 157 +++++++++ pkg/frontend/tf/tree/tree.go | 2 +- pkg/ownerlinker/linker.go | 9 +- pkg/util/marshal/marshal.go | 136 ++++++++ pkg/util/reflect/reflect.go | 2 + pkg/util/semaphore/semaphore.go | 2 + pkg/util/zconstants/link.go | 134 ++++++++ pkg/util/zconstants/zconstants.go | 123 +------ 23 files changed, 1034 insertions(+), 418 deletions(-) create mode 100644 pkg/frontend/reader/merge/merge_test.go create mode 100644 pkg/frontend/tf/config/link_selector.go delete mode 100644 pkg/frontend/tf/defaults/modifier/exclusive.go create mode 100644 pkg/frontend/tf/defaults/modifier/link_selector.go create mode 100644 pkg/util/marshal/marshal.go create mode 100644 pkg/util/zconstants/link.go diff --git a/Makefile b/Makefile index 5e7b8ed6..5b928d5a 100644 --- a/Makefile +++ b/Makefile @@ -71,7 +71,7 @@ endif .PHONY: run dump-rotate test usage dot kind stack pre-commit run: output/kelemetry $(DUMP_ROTATE_DEP) GIN_MODE=debug \ - ./output/kelemetry \ + $(RUN_PREFIX) ./output/kelemetry $(RUN_SUFFIX) \ --mq=local \ --audit-consumer-partition=$(PARTITIONS) \ --http-address=0.0.0.0 \ @@ -121,15 +121,15 @@ test: go test -v -race -coverpkg=./pkg/... -coverprofile=coverage.out $(INTEGRATION_ARG) $(BUILD_ARGS) ./pkg/... usage: output/kelemetry - ./output/kelemetry --usage=USAGE.txt + $(RUN_PREFIX) ./output/kelemetry $(RUN_SUFFIX) --usage=USAGE.txt dot: output/kelemetry - ./output/kelemetry --dot=depgraph.dot + $(RUN_PREFIX) ./output/kelemetry $(RUN_SUFFIX) --dot=depgraph.dot dot -Tpng depgraph.dot >depgraph.png dot -Tsvg depgraph.dot >depgraph.svg output/kelemetry: go.mod go.sum $(shell find -type f -name "*.go") - go build -v $(RACE_ARG) -ldflags=$(LDFLAGS) -o $@ $(BUILD_ARGS) . + go build -v $(RACE_ARG) -gcflags=$(GCFLAGS) -ldflags=$(LDFLAGS) -o $@ $(BUILD_ARGS) . kind: kind delete cluster --name tracetest diff --git a/hack/tfconfig.yaml b/hack/tfconfig.yaml index 35552233..4f2196ff 100644 --- a/hack/tfconfig.yaml +++ b/hack/tfconfig.yaml @@ -42,9 +42,28 @@ configs: batchName: final modifiers: + # Multiple active link-selector modifiers are additive (union) "01000000": - displayName: exclusive - modifierName: exclusive + displayName: ancestors + modifierName: link-selector + args: + modifierClass: parent + + includeSiblings: false + "02000000": + displayName: ancestors tree + modifierName: link-selector + args: + modifierClass: parent + includeSiblings: true + "04000000": + displayName: all owned objects + modifierName: link-selector + args: + modifierClass: children + ifAll: + - linkClass: children + fromChild: false # Uncomment to enable extension trace from apiserver # "00000001": diff --git a/pkg/aggregator/aggregator.go b/pkg/aggregator/aggregator.go index 93dc91cd..76f7c33e 100644 --- a/pkg/aggregator/aggregator.go +++ b/pkg/aggregator/aggregator.go @@ -102,6 +102,7 @@ type Aggregator interface { parent tracer.SpanContext, followsFrom tracer.SpanContext, extraTags map[string]string, + dedupId string, ) (span tracer.SpanContext, isNew bool, err error) } @@ -223,7 +224,7 @@ func (agg *aggregator) EnsureObjectSpan( object utilobject.Rich, eventTime time.Time, ) (tracer.SpanContext, error) { - span, isNew, err := agg.GetOrCreatePseudoSpan(ctx, object, zconstants.PseudoTypeObject, eventTime, nil, nil, nil) + span, isNew, err := agg.GetOrCreatePseudoSpan(ctx, object, zconstants.PseudoTypeObject, eventTime, nil, nil, nil, "object") if err != nil { return nil, err } @@ -282,6 +283,7 @@ func (agg *aggregator) GetOrCreatePseudoSpan( parent tracer.SpanContext, followsFrom tracer.SpanContext, extraTags map[string]string, + dedupId string, ) (_span tracer.SpanContext, _isNew bool, _err error) { lazySpanMetric := &lazySpanMetric{ Cluster: object.Cluster, @@ -289,7 +291,7 @@ func (agg *aggregator) GetOrCreatePseudoSpan( } defer agg.LazySpanMetric.DeferCount(agg.Clock.Now(), lazySpanMetric) - cacheKey := agg.expiringSpanCacheKey(object.Key, eventTime, pseudoType) + cacheKey := agg.expiringSpanCacheKey(object.Key, eventTime, dedupId) logger := agg.Logger. WithField("step", "GetOrCreatePseudoSpan"). @@ -404,10 +406,10 @@ func (agg *aggregator) CreatePseudoSpan( func (aggregator *aggregator) expiringSpanCacheKey( object utilobject.Key, timestamp time.Time, - pseudoType zconstants.PseudoTypeValue, + subObject string, ) string { expiringWindow := timestamp.Unix() / int64(aggregator.options.spanTtl.Seconds()) - return aggregator.spanCacheKey(object, fmt.Sprintf("field=%s,window=%d", pseudoType, expiringWindow)) + return aggregator.spanCacheKey(object, fmt.Sprintf("field=%s,window=%d", subObject, expiringWindow)) } func (aggregator *aggregator) spanCacheKey(object utilobject.Key, window string) string { diff --git a/pkg/aggregator/linker/job/worker/worker.go b/pkg/aggregator/linker/job/worker/worker.go index 75c9e8a8..948f3a1a 100644 --- a/pkg/aggregator/linker/job/worker/worker.go +++ b/pkg/aggregator/linker/job/worker/worker.go @@ -102,7 +102,11 @@ func (worker *worker) execute(ctx context.Context, logger logrus.FieldLogger, li } forwardTags := map[string]string{} - zconstants.TagLinkedObject(forwardTags, link.Object.Key, link.Role, link.Class) + zconstants.TagLinkedObject(forwardTags, zconstants.LinkRef{ + Key: link.Object.Key, + Role: link.Role, + Class: link.Class, + }) _, _, err = worker.Aggregator.GetOrCreatePseudoSpan( ctx, job.Object, @@ -111,6 +115,7 @@ func (worker *worker) execute(ctx context.Context, logger logrus.FieldLogger, li job.Span, nil, forwardTags, + link.DedupId, ) if err != nil { return metrics.LabelError( @@ -120,7 +125,11 @@ func (worker *worker) execute(ctx context.Context, logger logrus.FieldLogger, li } backwardTags := map[string]string{} - zconstants.TagLinkedObject(backwardTags, job.Object.Key, zconstants.ReverseLinkRole(link.Role), link.Class) + zconstants.TagLinkedObject(backwardTags, zconstants.LinkRef{ + Key: job.Object.Key, + Role: zconstants.ReverseLinkRole(link.Role), + Class: link.Class, + }) _, _, err = worker.Aggregator.GetOrCreatePseudoSpan( ctx, link.Object, @@ -129,6 +138,7 @@ func (worker *worker) execute(ctx context.Context, logger logrus.FieldLogger, li linkedSpan, nil, backwardTags, + fmt.Sprintf("%s@%s", link.DedupId, job.Object.String()), ) if err != nil { return metrics.LabelError( diff --git a/pkg/aggregator/linker/linker.go b/pkg/aggregator/linker/linker.go index d02d2fe9..76810566 100644 --- a/pkg/aggregator/linker/linker.go +++ b/pkg/aggregator/linker/linker.go @@ -26,7 +26,8 @@ type Linker interface { } type LinkerResult struct { - Object utilobject.Rich - Role zconstants.LinkRoleValue - Class string + Object utilobject.Rich + Role zconstants.LinkRoleValue + Class string + DedupId string } diff --git a/pkg/annotationlinker/linker.go b/pkg/annotationlinker/linker.go index e0e5b096..2dd616bd 100644 --- a/pkg/annotationlinker/linker.go +++ b/pkg/annotationlinker/linker.go @@ -97,8 +97,9 @@ func (ctrl *controller) Lookup(ctx context.Context, object utilobject.Rich) ([]l logger.WithFields(objectRef.AsFields("parent")).Debug("Resolved parent") return []linker.LinkerResult{{ - Object: objectRef, - Role: zconstants.LinkRoleParent, + Object: objectRef, + Role: zconstants.LinkRoleParent, + DedupId: "annotation", }}, nil } diff --git a/pkg/frontend/backend/interface.go b/pkg/frontend/backend/interface.go index 50906f7b..c447cc79 100644 --- a/pkg/frontend/backend/interface.go +++ b/pkg/frontend/backend/interface.go @@ -62,8 +62,9 @@ type TraceThumbnail struct { Spans *tftree.SpanTree } -func (tt *TraceThumbnail) GetSpans() *tftree.SpanTree { return tt.Spans } -func (tt *TraceThumbnail) GetMetadata() any { return tt.Identifier } +func (tt *TraceThumbnail) GetSpans() *tftree.SpanTree { return tt.Spans } +func (tt *TraceThumbnail) GetMetadata() any { return tt.Identifier } +func (tt *TraceThumbnail) FromThumbnail(src *TraceThumbnail) { *tt = *src } type mux struct { *manager.Mux diff --git a/pkg/frontend/backend/jaeger-storage/backend.go b/pkg/frontend/backend/jaeger-storage/backend.go index 64b67727..8b6d4aca 100644 --- a/pkg/frontend/backend/jaeger-storage/backend.go +++ b/pkg/frontend/backend/jaeger-storage/backend.go @@ -151,16 +151,6 @@ func (backend *Backend) List( ctx context.Context, params *spanstore.TraceQueryParameters, ) ([]*jaegerbackend.TraceThumbnail, error) { - filterTags := map[string]string{ - zconstants.NotPseudo: zconstants.NotPseudo, - } - for key, val := range params.Tags { - filterTags[key] = val - } - if len(params.OperationName) > 0 { - filterTags["cluster"] = params.OperationName - } - traceThumbnails := []*jaegerbackend.TraceThumbnail{} for _, traceSource := range zconstants.KnownTraceSources(false) { if len(traceThumbnails) >= params.NumTraces { @@ -169,7 +159,7 @@ func (backend *Backend) List( newParams := &spanstore.TraceQueryParameters{ ServiceName: traceSource, - Tags: filterTags, + Tags: params.Tags, StartTimeMin: params.StartTimeMin, StartTimeMax: params.StartTimeMax, DurationMin: params.DurationMin, diff --git a/pkg/frontend/reader/merge/merge.go b/pkg/frontend/reader/merge/merge.go index 24882049..3275ff33 100644 --- a/pkg/frontend/reader/merge/merge.go +++ b/pkg/frontend/reader/merge/merge.go @@ -15,113 +15,248 @@ package merge import ( + "context" "fmt" "math/rand" + "sync/atomic" + "time" "github.com/jaegertracing/jaeger/model" + "github.com/jaegertracing/jaeger/storage/spanstore" "k8s.io/apimachinery/pkg/util/sets" + jaegerbackend "github.com/kubewharf/kelemetry/pkg/frontend/backend" + tfconfig "github.com/kubewharf/kelemetry/pkg/frontend/tf/config" tftree "github.com/kubewharf/kelemetry/pkg/frontend/tf/tree" utilobject "github.com/kubewharf/kelemetry/pkg/util/object" reflectutil "github.com/kubewharf/kelemetry/pkg/util/reflect" + "github.com/kubewharf/kelemetry/pkg/util/semaphore" "github.com/kubewharf/kelemetry/pkg/util/zconstants" ) -type Merger[R RawTrace[M], M any] struct{} +type objKey = utilobject.Key -type RawTrace[M any] interface { - GetSpans() *tftree.SpanTree - GetMetadata() M +type Merger[M any] struct { + objects map[objKey]*object[M] } -type objKey = utilobject.Key +type TraceWithMetadata[M any] struct { + Tree *tftree.SpanTree + Metadata M +} -func (merger Merger[R, M]) MergeTraces(rawTraces []R) ([]*MergeTree[M], error) { - objects, err := merger.groupByKey(rawTraces) - if err != nil { - return nil, err +type RawTree struct { + Tree *tftree.SpanTree +} + +func (tr RawTree) GetSpans() *tftree.SpanTree { return tr.Tree } +func (tr RawTree) GetMetadata() struct{} { return struct{}{} } +func (tr RawTree) FromThumbnail(self *RawTree, tt *jaegerbackend.TraceThumbnail) { + self.Tree = tt.Spans +} + +func (merger *Merger[M]) AddTraces(trees []TraceWithMetadata[M]) error { + if merger.objects == nil { + merger.objects = make(map[objKey]*object[M]) } - abLinks := abLinkMap{} + affected := sets.New[objKey]() + for _, trace := range trees { + key, _ := zconstants.ObjectKeyFromSpan(trace.Tree.Root) + affected.Insert(key) - for _, obj := range objects { - obj.identifyLinks() + if obj, hasPrev := merger.objects[key]; hasPrev { + if err := obj.merge(trace.Tree, trace.Metadata); err != nil { + return err + } + } else { + obj, err := newObject[M](key, trace.Tree, trace.Metadata) + if err != nil { + return err + } - for _, link := range obj.links { - abLink := abLinkFromTargetLink(obj.key, link) - abLinks.insert(abLink) + merger.objects[key] = obj } } - var mergeTrees []*MergeTree[M] - for _, keys := range merger.findConnectedComponents(objects, abLinks) { - var members []*object[R, M] - for _, key := range keys { - members = append(members, objects[key]) + for key := range affected { + merger.objects[key].identifyLinks() + } + + return nil +} + +type followLinkPool[M any] struct { + sem *semaphore.Semaphore + knownKeys sets.Set[objKey] + lister ListFunc[M] + startTime, endTime time.Time + merger *Merger[M] +} + +func (fl *followLinkPool[M]) scheduleKnown(obj *object[M], limit *atomic.Int32, linkSelector tfconfig.LinkSelector) { + for _, link := range obj.links { + if _, known := fl.knownKeys[link.Key]; known { + continue + } + if limit.Add(-1) < 0 { + continue } - mergeTree, err := newMergeTree(members, abLinks) - if err != nil { - return nil, err + parentKey, childKey, parentIsSource := obj.key, link.Key, true + if link.Role == zconstants.LinkRoleParent { + parentKey, childKey, parentIsSource = link.Key, obj.key, false } - mergeTrees = append(mergeTrees, mergeTree) + subSelector := linkSelector.Admit(parentKey, childKey, parentIsSource, link.Class) + if subSelector != nil { + fl.knownKeys.Insert(link.Key) + fl.schedule(link.Key, subSelector, int32(fl.endTime.Sub(fl.startTime)/(time.Minute*30))) + } } +} - return mergeTrees, nil +func (fl *followLinkPool[M]) schedule(key objKey, linkSelector tfconfig.LinkSelector, limit int32) { + fl.sem.Schedule(func(ctx context.Context) (semaphore.Publish, error) { + thumbnails, err := fl.lister(ctx, key, fl.startTime, fl.endTime, int(limit)) + if err != nil { + return nil, fmt.Errorf("fetching linked traces: %w", err) + } + + return func() error { + fl.merger.AddTraces(thumbnails) + + return nil + }, nil + }) } -func (merger Merger[R, M]) groupByKey(rawTraces []R) (map[objKey]*object[R, M], error) { - objects := map[objKey]*object[R, M]{} +type ListFunc[M any] func(ctx context.Context, key objKey, startTime time.Time, endTime time.Time, limit int) ([]TraceWithMetadata[M], error) - for _, trace := range rawTraces { - key, _ := zconstants.ObjectKeyFromSpan(trace.GetSpans().Root) +func ListWithBackend[M any](backend jaegerbackend.Backend, convertMetadata func(any) M) ListFunc[M] { + return func(ctx context.Context, key objKey, startTime time.Time, endTime time.Time, limit int) ([]TraceWithMetadata[M], error) { + tts, err := backend.List(ctx, &spanstore.TraceQueryParameters{ + Tags: zconstants.KeyToSpanTags(key), + StartTimeMin: startTime, + StartTimeMax: endTime, + NumTraces: int(limit), + }) + if err != nil { + return nil, err + } - if obj, hasPrev := objects[key]; hasPrev { - if err := obj.merge(trace); err != nil { - return nil, err + twmList := make([]TraceWithMetadata[M], len(tts)) + for i, tt := range tts { + twmList[i] = TraceWithMetadata[M]{ + Tree: tt.Spans, + Metadata: convertMetadata(tt.Identifier), } + } + + return twmList, nil + } +} + +func (merger *Merger[M]) FollowLinks( + ctx context.Context, + linkSelector tfconfig.LinkSelector, + startTime, endTime time.Time, + lister ListFunc[M], + concurrency int, + limit int32, + limitIsGlobal bool, +) error { + fl := &followLinkPool[M]{ + sem: semaphore.New(concurrency), + knownKeys: sets.New[objKey](), + lister: lister, + startTime: startTime, + endTime: endTime, + merger: merger, + } + + for _, obj := range merger.objects { + fl.knownKeys.Insert(obj.key) + } + + globalLimit := new(atomic.Int32) + globalLimit.Store(limit) + + for _, obj := range merger.objects { + var remainingLimit *atomic.Int32 + if limitIsGlobal { + remainingLimit = globalLimit } else { - obj, err := newObject[R, M](key, trace) - if err != nil { - return nil, err - } + remainingLimit = new(atomic.Int32) + remainingLimit.Store(limit) + } - objects[key] = obj + fl.scheduleKnown(obj, remainingLimit, linkSelector) + } + + if err := fl.sem.Run(ctx); err != nil { + return err + } + + return nil +} + +func (merger *Merger[M]) MergeTraces() ([]*MergeTree[M], error) { + abLinks := abLinkMap{} + + for _, obj := range merger.objects { + for _, link := range obj.links { + abLink := abLinkFromTargetLink(obj.key, link) + abLinks.insert(abLink) } } - return objects, nil + var mergeTrees []*MergeTree[M] + for _, keys := range merger.findConnectedComponents(merger.objects, abLinks) { + var members []*object[M] + for _, key := range keys { + members = append(members, merger.objects[key]) + } + + mergeTree, err := newMergeTree(members, abLinks) + if err != nil { + return nil, err + } + + mergeTrees = append(mergeTrees, mergeTree) + } + + return mergeTrees, nil } -type object[R RawTrace[M], M any] struct { +type object[M any] struct { key objKey metadata []M tree *tftree.SpanTree - links []targetLink + links []TargetLink } -func newObject[R RawTrace[M], M any](key objKey, trace R) (*object[R, M], error) { - clonedTree, err := trace.GetSpans().Clone() +func newObject[M any](key objKey, trace *tftree.SpanTree, metadata M) (*object[M], error) { + clonedTree, err := trace.Clone() if err != nil { return nil, fmt.Errorf("clone spans: %w", err) } - obj := &object[R, M]{ + obj := &object[M]{ key: key, - metadata: []M{trace.GetMetadata()}, + metadata: []M{metadata}, tree: clonedTree, } return obj, nil } -func (obj *object[R, M]) merge(trace R) error { - obj.metadata = append(obj.metadata, trace.GetMetadata()) +func (obj *object[M]) merge(trace *tftree.SpanTree, metadata M) error { + obj.metadata = append(obj.metadata, metadata) - mergeRoot(obj.tree.Root, trace.GetSpans().Root) + mergeRoot(obj.tree.Root, trace.Root) copyVisitor := ©TreeVisitor{to: obj.tree} - trace.GetSpans().Visit(copyVisitor) + trace.Visit(copyVisitor) if copyVisitor.err != nil { return copyVisitor.err } @@ -168,7 +303,7 @@ func mergeRootTags(base *model.Span, tail *model.Span) { } } -func (obj *object[R, M]) identifyLinks() { +func (obj *object[M]) identifyLinks() { for spanId := range obj.tree.Children(obj.tree.Root.SpanID) { span := obj.tree.Span(spanId) pseudoType, isPseudo := model.KeyValues(span.Tags).FindByKey(zconstants.PseudoType) @@ -193,18 +328,18 @@ func (obj *object[R, M]) identifyLinks() { linkClass = linkClassTag.VStr } - obj.links = append(obj.links, targetLink{ - key: target, - role: zconstants.LinkRoleValue(linkRole), - class: linkClass, + obj.links = append(obj.links, TargetLink{ + Key: target, + Role: zconstants.LinkRoleValue(linkRole), + Class: linkClass, }) } } -type targetLink struct { - key objKey - role zconstants.LinkRoleValue - class string +type TargetLink struct { + Key objKey + Role zconstants.LinkRoleValue + Class string } type copyTreeVisitor struct { @@ -243,20 +378,20 @@ func (link abLink) isParent(key objKey) bool { } } -func abLinkFromTargetLink(subject objKey, link targetLink) abLink { - if groupingKeyLess(subject, link.key) { +func abLinkFromTargetLink(subject objKey, link TargetLink) abLink { + if groupingKeyLess(subject, link.Key) { return abLink{ alpha: subject, - beta: link.key, - alphaIsParent: link.role == zconstants.LinkRoleChild, - class: link.class, + beta: link.Key, + alphaIsParent: link.Role == zconstants.LinkRoleChild, + class: link.Class, } } else { return abLink{ beta: subject, - alpha: link.key, - alphaIsParent: link.role != zconstants.LinkRoleChild, - class: link.class, + alpha: link.Key, + alphaIsParent: link.Role != zconstants.LinkRoleChild, + class: link.Class, } } } @@ -296,24 +431,29 @@ func (m abLinkMap) insertDirected(k1, k2 objKey, link abLink) { v1, hasK1 := m[k1] if !hasK1 { v1 = map[objKey]abLink{} + m[k1] = v1 } v1[k2] = link } -func (m abLinkMap) detectRoot(seed objKey) (_root objKey, _hasCycle bool) { +func (m abLinkMap) detectRoot(seed objKey, vertexFilter func(objKey) bool) (_root objKey, _hasCycle bool) { visited := sets.New[objKey]() - return m.dfsRoot(visited, seed) + return m.dfsRoot(visited, seed, vertexFilter) } -func (m abLinkMap) dfsRoot(visited sets.Set[objKey], key objKey) (_root objKey, _hasCycle bool) { +func (m abLinkMap) dfsRoot(visited sets.Set[objKey], key objKey, vertexFilter func(objKey) bool) (_root objKey, _hasCycle bool) { if visited.Has(key) { return key, true } visited.Insert(key) // avoid infinite recursion for peer, link := range m[key] { + if !vertexFilter(peer) { + continue + } + if link.isParent(peer) { - return m.dfsRoot(visited, peer) + return m.dfsRoot(visited, peer, vertexFilter) } } @@ -324,7 +464,7 @@ type componentTaint = int type connectedComponent = []objKey -func (Merger[R, M]) findConnectedComponents(objects map[objKey]*object[R, M], abLinks abLinkMap) []connectedComponent { +func (*Merger[M]) findConnectedComponents(objects map[objKey]*object[M], abLinks abLinkMap) []connectedComponent { objectKeys := make(sets.Set[objKey], len(objects)) for gk := range objects { objectKeys.Insert(gk) @@ -340,8 +480,8 @@ func (Merger[R, M]) findConnectedComponents(objects map[objKey]*object[R, M], ab break } - taintCounter += 1 dfsTaint(objectKeys, abLinks, taints, taintCounter, seed) + taintCounter += 1 } components := make([]connectedComponent, taintCounter) @@ -385,8 +525,8 @@ type MergeTree[M any] struct { Tree *tftree.SpanTree } -func newMergeTree[R RawTrace[M], M any]( - members []*object[R, M], +func newMergeTree[M any]( + members []*object[M], abLinks abLinkMap, ) (*MergeTree[M], error) { metadata := []M{} @@ -406,13 +546,16 @@ func newMergeTree[R RawTrace[M], M any]( }, nil } -func mergeLinkedTraces[R RawTrace[M], M any](objects []*object[R, M], abLinks abLinkMap) (*tftree.SpanTree, error) { - trees := make(map[objKey]*object[R, M], len(objects)) +func mergeLinkedTraces[M any](objects []*object[M], abLinks abLinkMap) (*tftree.SpanTree, error) { + trees := make(map[objKey]*object[M], len(objects)) for _, obj := range objects { trees[obj.key] = obj } - rootKey, _ := abLinks.detectRoot(objects[0].key) + rootKey, _ := abLinks.detectRoot(objects[0].key, func(key objKey) bool { + _, hasTree := trees[key] + return hasTree + }) tree := trees[rootKey].tree @@ -422,19 +565,24 @@ func mergeLinkedTraces[R RawTrace[M], M any](objects []*object[R, M], abLinks ab pendingObjects = pendingObjects[:len(pendingObjects)-1] for _, link := range trees[subj].links { - if link.role != zconstants.LinkRoleChild { + if link.Role != zconstants.LinkRoleChild { continue } parentSpan := trees[subj].tree.Root - if link.class != "" { - virtualSpan := createVirtualSpan(tree.Root.TraceID, parentSpan, link.class) + if link.Class != "" { + virtualSpan := createVirtualSpan(tree.Root.TraceID, parentSpan, link.Class) tree.Add(virtualSpan, parentSpan.SpanID) parentSpan = virtualSpan } - tree.AddTree(trees[link.key].tree, parentSpan.SpanID) - pendingObjects = append(pendingObjects, link.key) + subtree, hasSubtree := trees[link.Key] + if !hasSubtree { + // this link was not fetched, e.g. because of fetch limit or link selector + continue + } + tree.AddTree(subtree.tree, parentSpan.SpanID) + pendingObjects = append(pendingObjects, link.Key) } } diff --git a/pkg/frontend/reader/merge/merge_test.go b/pkg/frontend/reader/merge/merge_test.go new file mode 100644 index 00000000..73cb6921 --- /dev/null +++ b/pkg/frontend/reader/merge/merge_test.go @@ -0,0 +1,122 @@ +// Copyright 2023 The Kelemetry Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package merge_test + +import ( + "context" + "testing" + "time" + + "github.com/jaegertracing/jaeger/model" + "github.com/stretchr/testify/assert" + + "github.com/kubewharf/kelemetry/pkg/frontend/reader/merge" + tfconfig "github.com/kubewharf/kelemetry/pkg/frontend/tf/config" + tftree "github.com/kubewharf/kelemetry/pkg/frontend/tf/tree" + utilobject "github.com/kubewharf/kelemetry/pkg/util/object" + "github.com/kubewharf/kelemetry/pkg/util/zconstants" +) + +func newTrace(id uint64, key utilobject.Key, startTime int64, endTime int64, links []merge.TargetLink) merge.TraceWithMetadata[uint64] { + traceId := model.NewTraceID(id, id) + objectSpan := &model.Span{ + TraceID: traceId, + SpanID: model.SpanID(1), + StartTime: time.Time{}.Add(time.Duration(startTime)), + Duration: time.Duration(endTime - startTime), + Tags: append( + mapToTags(zconstants.KeyToSpanTags(key)), + model.String(zconstants.TraceSource, zconstants.TraceSourceObject), + model.String(zconstants.PseudoType, string(zconstants.PseudoTypeObject)), + ), + } + spans := []*model.Span{objectSpan} + + for i, link := range links { + tags := zconstants.KeyToSpanTags(key) + zconstants.TagLinkedObject(tags, zconstants.LinkRef{ + Key: link.Key, + Role: link.Role, + Class: link.Class, + }) + + spans = append(spans, &model.Span{ + TraceID: traceId, + SpanID: model.SpanID(100 + i), + StartTime: objectSpan.StartTime, + Duration: objectSpan.Duration, + Tags: append( + mapToTags(tags), + model.String(zconstants.TraceSource, zconstants.TraceSourceObject), + model.String(zconstants.PseudoType, string(zconstants.PseudoTypeLink)), + ), + }) + } + + tree := tftree.NewSpanTree(spans) + + return merge.TraceWithMetadata[uint64]{ + Tree: tree, + Metadata: id, + } +} + +func mapToTags(m map[string]string) (out []model.KeyValue) { + for key, value := range m { + out = append(out, model.String(key, value)) + } + + return out +} + +// Assume traces[0] is the only result returned by the initial list. +func do( + t *testing.T, + traces []merge.TraceWithMetadata[uint64], + clipTimeStart, clipTimeEnd int64, +) { + assert := assert.New(t) + + merger := merge.Merger[uint64]{} + assert.NoError(merger.AddTraces(traces)) + + assert.NoError(merger.FollowLinks( + context.Background(), + tfconfig.ConstantLinkSelector(true), + time.Time{}.Add(time.Duration(clipTimeStart)), + time.Time{}.Add(time.Duration(clipTimeEnd)), + func(ctx context.Context, key utilobject.Key, startTime, endTime time.Time, limit int) (out []merge.TraceWithMetadata[uint64], _ error) { + for _, trace := range traces { + traceKey, hasKey := zconstants.ObjectKeyFromSpan(trace.Tree.Root) + assert.True(hasKey) + if key == traceKey { + out = append(out, trace) + } + } + + return out, nil + }, + len(traces), + int32(len(traces)), + false, + )) + + result, err := merger.MergeTraces() + assert.NoError(err) + t.Log(result) +} + +func TestFullTree(t *testing.T) { +} diff --git a/pkg/frontend/reader/reader.go b/pkg/frontend/reader/reader.go index fe2a4c0a..78deb925 100644 --- a/pkg/frontend/reader/reader.go +++ b/pkg/frontend/reader/reader.go @@ -26,7 +26,6 @@ import ( "github.com/jaegertracing/jaeger/storage/spanstore" "github.com/sirupsen/logrus" "github.com/spf13/pflag" - "k8s.io/apimachinery/pkg/util/sets" jaegerbackend "github.com/kubewharf/kelemetry/pkg/frontend/backend" "github.com/kubewharf/kelemetry/pkg/frontend/clusterlist" @@ -37,7 +36,7 @@ import ( "github.com/kubewharf/kelemetry/pkg/frontend/tracecache" "github.com/kubewharf/kelemetry/pkg/manager" utilobject "github.com/kubewharf/kelemetry/pkg/util/object" - "github.com/kubewharf/kelemetry/pkg/util/semaphore" + reflectutil "github.com/kubewharf/kelemetry/pkg/util/reflect" "github.com/kubewharf/kelemetry/pkg/util/zconstants" ) @@ -52,6 +51,8 @@ type Interface interface { type options struct { cacheExtensions bool followLinkConcurrency int + followLinkLimit int32 + followLinksInList bool } func (options *options) Setup(fs *pflag.FlagSet) { @@ -64,9 +65,21 @@ func (options *options) Setup(fs *pflag.FlagSet) { fs.IntVar( &options.followLinkConcurrency, "frontend-follow-link-concurrency", - 5, + 20, "number of concurrent trace per request to follow links", ) + fs.Int32Var( + &options.followLinkLimit, + "frontend-follow-link-limit", + 10, + "maximum number of linked objects to follow per search result", + ) + fs.BoolVar( + &options.followLinksInList, + "frontend-follow-links-in-list", + true, + "whether links should be recursed into when listing traces", + ) } func (options *options) EnableFlag() *bool { return nil } @@ -138,28 +151,56 @@ func (reader *spanReader) FindTraces(ctx context.Context, query *spanstore.Trace } reader.Logger.WithField("query", query). - WithField("links", config.FollowLinks). WithField("config", config.Name). Debug("start trace list") - thumbnails, err := reader.Backend.List(ctx, query) + + if len(query.OperationName) > 0 { + if query.Tags == nil { + query.Tags = map[string]string{} + } + query.Tags["cluster"] = query.OperationName + } + + tts, err := reader.Backend.List(ctx, query) if err != nil { return nil, err } - if err := reader.followLinks(ctx, &thumbnails, config.FollowLinks, query); err != nil { - return nil, fmt.Errorf("resolving linked objects: %w", err) + twmList := make([]merge.TraceWithMetadata[any], len(tts)) + for i, tt := range tts { + twmList[i] = merge.TraceWithMetadata[any]{ + Tree: tt.Spans, + Metadata: tt.Identifier, + } } - var rootKey *utilobject.Key - if rootKeyValue, ok := utilobject.FromMap(query.Tags); ok { - rootKey = &rootKeyValue + merger := merge.Merger[any]{} + if err := merger.AddTraces(twmList); err != nil { + return nil, fmt.Errorf("group traces by object: %w", err) + } + + if reader.options.followLinksInList { + if err := merger.FollowLinks( + ctx, + config.LinkSelector, + query.StartTimeMin, query.StartTimeMax, + merge.ListWithBackend[any](reader.Backend, reflectutil.Identity[any]), + reader.options.followLinkConcurrency, reader.options.followLinkLimit, false, + ); err != nil { + return nil, fmt.Errorf("follow links: %w", err) + } } - mergeTrees, err := merge.Merger[*jaegerbackend.TraceThumbnail, any]{}.MergeTraces(thumbnails) + mergeTrees, err := merger.MergeTraces() if err != nil { return nil, fmt.Errorf("merging split and linked traces: %w", err) } + var rootKey *utilobject.Key + if rootKeyValue, ok := utilobject.FromMap(query.Tags); ok { + rootKey = &rootKeyValue + } + cacheEntries := []tracecache.Entry{} traces := []*model.Trace{} for _, mergeTree := range mergeTrees { @@ -191,64 +232,6 @@ func (reader *spanReader) FindTraces(ctx context.Context, query *spanstore.Trace return traces, nil } -func (reader *spanReader) followLinks( - ctx context.Context, - thumbnailsPtr *[]*jaegerbackend.TraceThumbnail, - followLinks sets.Set[string], - query *spanstore.TraceQueryParameters, -) error { - knownKeys := sets.New[utilobject.Key]() - for _, thumbnail := range *thumbnailsPtr { - key, _ := zconstants.ObjectKeyFromSpan(thumbnail.Spans.Root) - knownKeys.Insert(key) - } - - sem := semaphore.New(reader.options.followLinkConcurrency) - - var scheduleFollow func(thumbnails []*jaegerbackend.TraceThumbnail) - scheduleFollow = func(thumbnails []*jaegerbackend.TraceThumbnail) { - for _, thumbnail := range thumbnails { - for _, span := range thumbnail.Spans.GetSpans() { - span := span - linkKey, hasLinkKey := zconstants.LinkedKeyFromSpan(span) - if hasLinkKey && !knownKeys.Has(linkKey) { - linkRole, hasLinkRole := model.KeyValues(span.Tags).FindByKey(zconstants.LinkRole) - if !hasLinkRole || !followLinks.Has(linkRole.VStr) { - continue - } - - sem.Schedule(func(ctx context.Context) (semaphore.Publish, error) { - newThumbnails, err := reader.Backend.List(ctx, &spanstore.TraceQueryParameters{ - ServiceName: zconstants.TraceSourceObject, - Tags: zconstants.KeyToSpanTags(linkKey), - StartTimeMin: query.StartTimeMin, - StartTimeMax: query.StartTimeMax, - NumTraces: query.NumTraces, - }) - if err != nil { - return nil, fmt.Errorf("fetching linked traces: %w", err) - } - - scheduleFollow(newThumbnails) - return func() error { - *thumbnailsPtr = append(*thumbnailsPtr, newThumbnails...) - return nil - }, nil - }) - } - } - } - } - - scheduleFollow(*thumbnailsPtr) - - if err := sem.Run(ctx); err != nil { - return err - } - - return nil -} - func (reader *spanReader) prepareEntry( ctx context.Context, config *tfconfig.Config, @@ -334,12 +317,8 @@ func (reader *spanReader) GetTrace(ctx context.Context, cacheId model.TraceID) ( return nil, fmt.Errorf("trace %v not found", cacheId) } - aggTrace := &model.Trace{ - ProcessMap: []model.Trace_ProcessMapping{{ - ProcessID: "0", - Process: model.Process{}, - }}, - } + displayMode := extractDisplayMode(cacheId) + var traces []merge.TraceWithMetadata[struct{}] for _, identifier := range entry.Identifiers { trace, err := reader.Backend.Get(ctx, identifier, cacheId, entry.StartTime, entry.EndTime) @@ -348,7 +327,43 @@ func (reader *spanReader) GetTrace(ctx context.Context, cacheId model.TraceID) ( } clipped := filterTimeRange(trace.Spans, entry.StartTime, entry.EndTime) - aggTrace.Spans = append(aggTrace.Spans, clipped...) + traces = append(traces, merge.TraceWithMetadata[struct{}]{Tree: tftree.NewSpanTree(clipped)}) + } + + merger := merge.Merger[struct{}]{} + if err := merger.AddTraces(traces); err != nil { + return nil, fmt.Errorf("grouping traces by object: %w", err) + } + + displayConfig := reader.TransformConfigs.GetById(displayMode) + if displayConfig == nil { + return nil, fmt.Errorf("display mode %x does not exist", displayMode) + } + + if err := merger.FollowLinks( + ctx, + displayConfig.LinkSelector, + entry.StartTime, entry.EndTime, + merge.ListWithBackend[struct{}](reader.Backend, func(any) struct{} { return struct{}{} }), + reader.options.followLinkConcurrency, reader.options.followLinkLimit, true, + ); err != nil { + return nil, fmt.Errorf("cannot follow links: %w", err) + } + mergedTrees, err := merger.MergeTraces() + if err != nil { + return nil, fmt.Errorf("merging linked trees: %w", err) + } + + // if spans were connected, they should continue to be connected since link spans cannot be deleted, so assume there is only one trace + if len(mergedTrees) != 1 { + return nil, fmt.Errorf("inconsistent linked trace count %d", len(mergedTrees)) + } + mergedTree := mergedTrees[0] + aggTrace := &model.Trace{ + ProcessMap: []model.Trace_ProcessMapping{{ + ProcessID: "0", + }}, + Spans: mergedTree.Tree.GetSpans(), } var extensions transform.ExtensionProcessor = &transform.FetchExtensionsAndStoreCache{} @@ -356,7 +371,6 @@ func (reader *spanReader) GetTrace(ctx context.Context, cacheId model.TraceID) ( extensions = &transform.LoadExtensionCache{Cache: entry.Extensions} } - displayMode := extractDisplayMode(cacheId) if err := reader.Transformer.Transform( ctx, aggTrace, entry.RootObject, displayMode, extensions, diff --git a/pkg/frontend/tf/config/config.go b/pkg/frontend/tf/config/config.go index 8c0bf49f..32b1b419 100644 --- a/pkg/frontend/tf/config/config.go +++ b/pkg/frontend/tf/config/config.go @@ -17,8 +17,6 @@ package tfconfig import ( "strconv" - "k8s.io/apimachinery/pkg/util/sets" - "github.com/kubewharf/kelemetry/pkg/frontend/extension" "github.com/kubewharf/kelemetry/pkg/manager" ) @@ -54,7 +52,7 @@ type Config struct { // The config name, used in search page display. Name string // Only links with roles in this set are followed. - FollowLinks sets.Set[string] + LinkSelector LinkSelector // The extension traces for this config. Extensions []extension.Provider // The steps to transform the tree @@ -69,11 +67,11 @@ func (config *Config) Clone() *Config { copy(extensions, config.Extensions) return &Config{ - Id: config.Id, - Name: config.Name, - FollowLinks: config.FollowLinks.Clone(), - Extensions: extensions, - Steps: steps, + Id: config.Id, + Name: config.Name, + LinkSelector: config.LinkSelector, // modifier changes LinkSelector by wrapping the previous value + Extensions: extensions, + Steps: steps, } } diff --git a/pkg/frontend/tf/config/file/file.go b/pkg/frontend/tf/config/file/file.go index d98e5d08..707fc633 100644 --- a/pkg/frontend/tf/config/file/file.go +++ b/pkg/frontend/tf/config/file/file.go @@ -115,10 +115,9 @@ func (p *FileProvider) loadJsonBytes(jsonBytes []byte) error { Modifiers map[tfconfig.Id]modifierConfig `json:"modifiers"` Batches []Batch `json:"batches"` Configs []struct { - Id tfconfig.Id `json:"id"` - Name string `json:"name"` - FollowLinks []string `json:"followLinks"` - Steps json.RawMessage `json:"steps"` + Id tfconfig.Id `json:"id"` + Name string `json:"name"` + Steps json.RawMessage `json:"steps"` } } if err := json.Unmarshal(jsonBytes, &file); err != nil { @@ -180,10 +179,10 @@ func (p *FileProvider) loadJsonBytes(jsonBytes []byte) error { } config := &tfconfig.Config{ - Id: raw.Id, - Name: raw.Name, - FollowLinks: sets.New(raw.FollowLinks...), - Steps: steps, + Id: raw.Id, + Name: raw.Name, + LinkSelector: tfconfig.ConstantLinkSelector(false), + Steps: steps, } p.register(registeredConfig{config: config, modifierClasses: sets.New[string]()}) diff --git a/pkg/frontend/tf/config/link_selector.go b/pkg/frontend/tf/config/link_selector.go new file mode 100644 index 00000000..d61b0277 --- /dev/null +++ b/pkg/frontend/tf/config/link_selector.go @@ -0,0 +1,87 @@ +// Copyright 2023 The Kelemetry Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tfconfig + +import utilobject "github.com/kubewharf/kelemetry/pkg/util/object" + +type LinkSelector interface { + // Whether to follow the given link. + // + // If link should be followed, return a non-nil LinkSelector. + // The returned object will be used to recursively follow links in the linked object. + Admit(parent utilobject.Key, child utilobject.Key, parentIsSource bool, linkClass string) LinkSelector +} + +type ConstantLinkSelector bool + +func (selector ConstantLinkSelector) Admit( + parent utilobject.Key, + child utilobject.Key, + parentIsSource bool, + linkClass string, +) LinkSelector { + if selector { + return selector + } + + return nil +} + +type IntersectLinkSelector []LinkSelector + +func (selector IntersectLinkSelector) Admit( + parentKey utilobject.Key, + childKey utilobject.Key, + parentIsSource bool, + linkClass string, +) LinkSelector { + newChildren := make([]LinkSelector, len(selector)) + + for i, child := range selector { + newChildren[i] = child.Admit(parentKey, childKey, parentIsSource, linkClass) + if newChildren[i] == nil { + return nil + } + } + + return IntersectLinkSelector(newChildren) +} + +type UnionLinkSelector []LinkSelector + +func (selector UnionLinkSelector) Admit( + parentKey utilobject.Key, + childKey utilobject.Key, + parentIsSource bool, + linkClass string, +) LinkSelector { + newChildren := make([]LinkSelector, len(selector)) + + ok := false + for i, child := range selector { + if child != nil { + newChildren[i] = child.Admit(parentKey, childKey, parentIsSource, linkClass) + if newChildren[i] != nil { + ok = true + } + } + } + + if ok { + return UnionLinkSelector(newChildren) + } + + return nil +} diff --git a/pkg/frontend/tf/defaults/modifier/exclusive.go b/pkg/frontend/tf/defaults/modifier/exclusive.go deleted file mode 100644 index 83559335..00000000 --- a/pkg/frontend/tf/defaults/modifier/exclusive.go +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright 2023 The Kelemetry Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tfmodifier - -import ( - "context" - "encoding/json" - "fmt" - - "github.com/spf13/pflag" - - tfconfig "github.com/kubewharf/kelemetry/pkg/frontend/tf/config" - "github.com/kubewharf/kelemetry/pkg/manager" -) - -func init() { - manager.Global.ProvideListImpl( - "tf-modifier/exclusive", - manager.Ptr(&ExclusiveModifierFactory{}), - &manager.List[tfconfig.ModifierFactory]{}, - ) -} - -type ExclusiveModifierOptions struct { - enable bool -} - -func (options *ExclusiveModifierOptions) Setup(fs *pflag.FlagSet) { - fs.BoolVar(&options.enable, "jaeger-tf-exclusive-modifier-enable", true, "enable exclusive modifiers and list it in frontend") -} - -func (options *ExclusiveModifierOptions) EnableFlag() *bool { return &options.enable } - -type ExclusiveModifierFactory struct { - options ExclusiveModifierOptions -} - -var _ manager.Component = &ExclusiveModifierFactory{} - -func (m *ExclusiveModifierFactory) Options() manager.Options { return &m.options } -func (m *ExclusiveModifierFactory) Init() error { return nil } -func (m *ExclusiveModifierFactory) Start(ctx context.Context) error { return nil } -func (m *ExclusiveModifierFactory) Close(ctx context.Context) error { return nil } - -func (*ExclusiveModifierFactory) ListIndex() string { return "exclusive" } - -func (*ExclusiveModifierFactory) Build(jsonBuf []byte) (tfconfig.Modifier, error) { - modifier := &ExclusiveModifier{} - - if err := json.Unmarshal(jsonBuf, &modifier); err != nil { - return nil, fmt.Errorf("parse exclusive modifier config: %w", err) - } - - return &ExclusiveModifier{}, nil -} - -type ExclusiveModifier struct { - Class string `json:"modifierClass"` - AddRoles []string `json:"addRoles"` - RemoveRoles []string `json:"removeRoles"` -} - -func (modifier *ExclusiveModifier) ModifierClass() string { - return fmt.Sprintf("kelemetry.kubewharf.io/exclusive/%s", modifier.Class) -} - -func (modifier *ExclusiveModifier) Modify(config *tfconfig.Config) { - for _, role := range modifier.AddRoles { - config.FollowLinks.Insert(role) - } - - for _, role := range modifier.RemoveRoles { - config.FollowLinks.Delete(role) - } -} diff --git a/pkg/frontend/tf/defaults/modifier/link_selector.go b/pkg/frontend/tf/defaults/modifier/link_selector.go new file mode 100644 index 00000000..3627e0cd --- /dev/null +++ b/pkg/frontend/tf/defaults/modifier/link_selector.go @@ -0,0 +1,157 @@ +// Copyright 2023 The Kelemetry Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tfmodifier + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/spf13/pflag" + + tfconfig "github.com/kubewharf/kelemetry/pkg/frontend/tf/config" + "github.com/kubewharf/kelemetry/pkg/manager" + utilmarshal "github.com/kubewharf/kelemetry/pkg/util/marshal" + utilobject "github.com/kubewharf/kelemetry/pkg/util/object" +) + +func init() { + manager.Global.ProvideListImpl( + "tf-modifier/link-selector", + manager.Ptr(&LinkSelectorModifierFactory{}), + &manager.List[tfconfig.ModifierFactory]{}, + ) +} + +type LinkSelectorModifierOptions struct { + enable bool +} + +func (options *LinkSelectorModifierOptions) Setup(fs *pflag.FlagSet) { + fs.BoolVar(&options.enable, "jaeger-tf-link-selector-modifier-enable", true, "enable link selector modifiers and list it in frontend") +} + +func (options *LinkSelectorModifierOptions) EnableFlag() *bool { return &options.enable } + +type LinkSelectorModifierFactory struct { + options LinkSelectorModifierOptions +} + +var _ manager.Component = &LinkSelectorModifierFactory{} + +func (m *LinkSelectorModifierFactory) Options() manager.Options { return &m.options } +func (m *LinkSelectorModifierFactory) Init() error { return nil } +func (m *LinkSelectorModifierFactory) Start(ctx context.Context) error { return nil } +func (m *LinkSelectorModifierFactory) Close(ctx context.Context) error { return nil } + +func (*LinkSelectorModifierFactory) ListIndex() string { return "link-selector" } + +func (*LinkSelectorModifierFactory) Build(jsonBuf []byte) (tfconfig.Modifier, error) { + modifier := &LinkSelectorModifier{} + + if err := json.Unmarshal(jsonBuf, &modifier); err != nil { + return nil, fmt.Errorf("parse link selector modifier config: %w", err) + } + + return modifier, nil +} + +type LinkSelectorModifier struct { + Class string `json:"modifierClass"` + IncludeSiblings bool `json:"includeSiblings"` + PatternFilters []LinkPattern `json:"ifAll"` +} + +type LinkPattern struct { + Parent utilmarshal.ObjectFilter `json:"parent"` + Child utilmarshal.ObjectFilter `json:"child"` + IncludeFromParent utilmarshal.Optional[bool] `json:"fromParent"` + IncludeFromChild utilmarshal.Optional[bool] `json:"fromChild"` + LinkClass utilmarshal.Optional[utilmarshal.StringFilter] `json:"linkClass"` +} + +func (pattern *LinkPattern) Matches(parent utilobject.Key, child utilobject.Key, isFromParent bool, linkClass string) bool { + if !pattern.Parent.Matches(parent) { + return false + } + + if !pattern.Child.Matches(child) { + return false + } + + if !pattern.IncludeFromParent.GetOr(true) && isFromParent { + return false + } + + if !pattern.IncludeFromChild.GetOr(true) && !isFromParent { + return false + } + + if pattern.LinkClass.IsSet && !pattern.LinkClass.Value.Matches(linkClass) { + return false + } + + return true +} + +func (modifier *LinkSelectorModifier) ModifierClass() string { + return fmt.Sprintf("kelemetry.kubewharf.io/link-selectors/%s", modifier.Class) +} + +func (modifier *LinkSelectorModifier) Modify(config *tfconfig.Config) { + var selector tfconfig.LinkSelector = patternLinkSelector{patterns: modifier.PatternFilters} + if !modifier.IncludeSiblings { + selector = tfconfig.IntersectLinkSelector{ + selector, + denySiblingsLinkSelector{}, + } + } + + config.LinkSelector = tfconfig.UnionLinkSelector{config.LinkSelector, selector} +} + +type denySiblingsLinkSelector struct { + hasFirst bool + firstIsFromParent bool +} + +func (s denySiblingsLinkSelector) Admit( + parent utilobject.Key, + child utilobject.Key, + isFromParent bool, + linkClass string, +) tfconfig.LinkSelector { + if !s.hasFirst { + return denySiblingsLinkSelector{hasFirst: true, firstIsFromParent: isFromParent} + } + if s.firstIsFromParent != isFromParent { + return nil + } + return s +} + +type patternLinkSelector struct { + patterns []LinkPattern +} + +func (s patternLinkSelector) Admit(parent utilobject.Key, child utilobject.Key, isFromParent bool, linkClass string) tfconfig.LinkSelector { + for _, pattern := range s.patterns { + if !pattern.Matches(parent, child, isFromParent, linkClass) { + return nil + } + } + + return s +} diff --git a/pkg/frontend/tf/tree/tree.go b/pkg/frontend/tf/tree/tree.go index 8495dbee..b4653cff 100644 --- a/pkg/frontend/tf/tree/tree.go +++ b/pkg/frontend/tf/tree/tree.go @@ -70,7 +70,7 @@ func NewSpanTree(spans []*model.Span) *SpanTree { } func (tree *SpanTree) Clone() (*SpanTree, error) { - copiedSpans := make([]*model.Span, len(tree.spanMap)) + copiedSpans := make([]*model.Span, 0, len(tree.spanMap)) for _, span := range tree.spanMap { spanCopy, err := CopySpan(span) if err != nil { diff --git a/pkg/ownerlinker/linker.go b/pkg/ownerlinker/linker.go index 40531d23..136e569a 100644 --- a/pkg/ownerlinker/linker.go +++ b/pkg/ownerlinker/linker.go @@ -110,7 +110,7 @@ func (ctrl *Controller) Lookup(ctx context.Context, object utilobject.Rich) ([]l Key: utilobject.Key{ Cluster: object.Cluster, Group: gvr.Group, - Resource: gvr.Group, + Resource: gvr.Resource, Namespace: object.Namespace, Name: owner.Name, }, @@ -121,9 +121,10 @@ func (ctrl *Controller) Lookup(ctx context.Context, object utilobject.Rich) ([]l logger.WithField("owner", parentRef).Debug("Resolved owner") results = append(results, linker.LinkerResult{ - Object: parentRef, - Role: zconstants.LinkRoleParent, - Class: "children", + Object: parentRef, + Role: zconstants.LinkRoleParent, + Class: "children", + DedupId: "ownerReference", }) } } diff --git a/pkg/util/marshal/marshal.go b/pkg/util/marshal/marshal.go new file mode 100644 index 00000000..7fdd10a7 --- /dev/null +++ b/pkg/util/marshal/marshal.go @@ -0,0 +1,136 @@ +// Copyright 2023 The Kelemetry Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Utilities for unmarshaling in config files +package utilmarshal + +import ( + "encoding/json" + "fmt" + "regexp" + "strings" + + utilobject "github.com/kubewharf/kelemetry/pkg/util/object" +) + +type Optional[T any] struct { + IsSet bool + Value T +} + +func (opt Optional[T]) GetOr(defaultValue T) T { + if opt.IsSet { + return opt.Value + } else { + return defaultValue + } +} + +func IsSetTo[T comparable](opt Optional[T], t T) bool { + return opt.IsSet && opt.Value == t +} + +func (v *Optional[T]) UnmarshalJSON(buf []byte) error { + if err := json.Unmarshal(buf, &v.Value); err != nil { + return err + } + + v.IsSet = true + return nil +} + +type ObjectFilter struct { + Cluster Optional[StringFilter] `json:"cluster"` + Group Optional[StringFilter] `json:"group"` + Resource Optional[StringFilter] `json:"resource"` + Namespace Optional[StringFilter] `json:"namespace"` + Name Optional[StringFilter] `json:"name"` +} + +func (filter *ObjectFilter) Matches(key utilobject.Key) bool { + if filter.Cluster.IsSet { + if !filter.Cluster.Value.Matches(key.Cluster) { + return false + } + } + + if filter.Group.IsSet { + if !filter.Group.Value.Matches(key.Group) { + return false + } + } + + if filter.Resource.IsSet { + if !filter.Resource.Value.Matches(key.Resource) { + return false + } + } + + if filter.Namespace.IsSet { + if !filter.Namespace.Value.Matches(key.Namespace) { + return false + } + } + + if filter.Name.IsSet { + if !filter.Name.Value.Matches(key.Name) { + return false + } + } + + return true +} + +type StringFilter struct { + fn func(s string) bool +} + +func (f *StringFilter) UnmarshalJSON(buf []byte) error { + var pattern string + if err := json.Unmarshal(buf, &pattern); err == nil { + f.fn = func(s string) bool { return s == pattern } + return nil + } + + var value struct { + Exact Optional[string] `json:"exact"` + CaseFold Optional[string] `json:"caseInsensitive"` + Regex Optional[string] `json:"regex"` + } + + if err := json.Unmarshal(buf, &value); err != nil { + return err + } + + if value.Exact.IsSet { + f.fn = func(s string) bool { return s == value.Exact.Value } + } else if value.CaseFold.IsSet { + f.fn = func(s string) bool { return strings.EqualFold(value.CaseFold.Value, value.CaseFold.Value) } + } else if value.Regex.IsSet { + regex, err := regexp.Compile(value.Regex.Value) + if err != nil { + return fmt.Errorf("pattern contains invalid regex: %w", err) + } + + f.fn = regex.MatchString + } else { + return fmt.Errorf("no filter selected") + } + + return nil +} + +func (f *StringFilter) Matches(subject string) bool { + return f.fn(subject) +} diff --git a/pkg/util/reflect/reflect.go b/pkg/util/reflect/reflect.go index 5e3f60a1..3ce6179d 100644 --- a/pkg/util/reflect/reflect.go +++ b/pkg/util/reflect/reflect.go @@ -23,3 +23,5 @@ func TypeOf[T any]() reflect.Type { } func ZeroOf[T any]() (_ T) { return } + +func Identity[T any](t T) T { return t } diff --git a/pkg/util/semaphore/semaphore.go b/pkg/util/semaphore/semaphore.go index c04070c5..ecd82e59 100644 --- a/pkg/util/semaphore/semaphore.go +++ b/pkg/util/semaphore/semaphore.go @@ -110,6 +110,8 @@ func (sem *Semaphore) Schedule(task Task) { if publish != nil { select { case sem.publishCh <- publish: + // publishCh has zero capacity, so this case blocks until the main goroutine selects the publishCh case, + // so we can ensure that publishCh is received before calling sem.doneWg.Done() case <-sem.errNotifyCh: // no need to publish if the caller received error } diff --git a/pkg/util/zconstants/link.go b/pkg/util/zconstants/link.go new file mode 100644 index 00000000..dea0d5ad --- /dev/null +++ b/pkg/util/zconstants/link.go @@ -0,0 +1,134 @@ +// Copyright 2023 The Kelemetry Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package zconstants + +import ( + "github.com/jaegertracing/jaeger/model" + + utilobject "github.com/kubewharf/kelemetry/pkg/util/object" +) + +type LinkRef struct { + Key utilobject.Key + Role LinkRoleValue + Class string +} + +// Tags for TraceSourceLink spans that indicate the linked object. +const ( + LinkedObjectCluster = "linkedCluster" + LinkedObjectGroup = "linkedGroup" + LinkedObjectResource = "linkedResource" + LinkedObjectNamespace = "linkedNamespace" + LinkedObjectName = "linkedName" + + // Indicates how the linked trace interacts with the current trace. + LinkRole = "linkRole" + + // If this tag is nonempty, a virtual span is inserted between the linked objects with the tag value as the name. + LinkClass = "linkClass" +) + +func TagLinkedObject(tags map[string]string, ln LinkRef) { + tags[LinkedObjectCluster] = ln.Key.Cluster + tags[LinkedObjectGroup] = ln.Key.Group + tags[LinkedObjectResource] = ln.Key.Resource + tags[LinkedObjectNamespace] = ln.Key.Namespace + tags[LinkedObjectName] = ln.Key.Name + tags[LinkRole] = string(ln.Role) + tags[LinkClass] = ln.Class +} + +func ObjectKeyFromSpan(span *model.Span) (utilobject.Key, bool) { + tags := model.KeyValues(span.Tags) + traceSource, hasTraceSource := tags.FindByKey(TraceSource) + if !hasTraceSource || traceSource.VStr != TraceSourceObject { + return utilobject.Key{}, false + } + + cluster, _ := tags.FindByKey("cluster") + group, _ := tags.FindByKey("group") + resource, _ := tags.FindByKey("resource") + namespace, _ := tags.FindByKey("namespace") + name, _ := tags.FindByKey("name") + key := utilobject.Key{ + Cluster: cluster.VStr, + Group: group.VStr, + Resource: resource.VStr, + Namespace: namespace.VStr, + Name: name.VStr, + } + return key, true +} + +func LinkedKeyFromSpan(span *model.Span) (utilobject.Key, bool) { + tags := model.KeyValues(span.Tags) + pseudoType, isPseudo := tags.FindByKey(PseudoType) + if !isPseudo || pseudoType.VStr != string(PseudoTypeLink) { + return utilobject.Key{}, false + } + + cluster, _ := tags.FindByKey(LinkedObjectCluster) + group, _ := tags.FindByKey(LinkedObjectGroup) + resource, _ := tags.FindByKey(LinkedObjectResource) + namespace, _ := tags.FindByKey(LinkedObjectNamespace) + name, _ := tags.FindByKey(LinkedObjectName) + key := utilobject.Key{ + Cluster: cluster.VStr, + Group: group.VStr, + Resource: resource.VStr, + Namespace: namespace.VStr, + Name: name.VStr, + } + return key, true +} + +func KeyToSpanTags(key utilobject.Key) map[string]string { + return map[string]string{ + "cluster": key.Cluster, + "group": key.Group, + "resource": key.Resource, + "namespace": key.Namespace, + "name": key.Name, + } +} + +func VersionedKeyToSpanTags(key utilobject.VersionedKey) map[string]string { + m := KeyToSpanTags(key.Key) + m["version"] = key.Version + return m +} + +type LinkRoleValue string + +const ( + // The current trace is a child trace under the linked trace + LinkRoleParent LinkRoleValue = "parent" + + // The linked trace is a child trace under the current trace. + LinkRoleChild LinkRoleValue = "child" +) + +// Determines the role of the reverse link. +func ReverseLinkRole(role LinkRoleValue) LinkRoleValue { + switch role { + case LinkRoleParent: + return LinkRoleChild + case LinkRoleChild: + return LinkRoleParent + default: + return role + } +} diff --git a/pkg/util/zconstants/zconstants.go b/pkg/util/zconstants/zconstants.go index f5413484..02faeb5a 100644 --- a/pkg/util/zconstants/zconstants.go +++ b/pkg/util/zconstants/zconstants.go @@ -18,10 +18,6 @@ package zconstants import ( "time" - - "github.com/jaegertracing/jaeger/model" - - utilobject "github.com/kubewharf/kelemetry/pkg/util/object" ) // All tags with this prefix are not rendered. @@ -62,7 +58,7 @@ const ( ) func KnownTraceSources(withPseudo bool) []string { - numPseudoTraceSources := 2 + numPseudoTraceSources := 1 traceSources := []string{ // pseudo @@ -80,123 +76,6 @@ func KnownTraceSources(withPseudo bool) []string { return traceSources } -// Tags for TraceSourceLink spans that indicate the linked object. -const ( - LinkedObjectCluster = "linkedCluster" - LinkedObjectGroup = "linkedGroup" - LinkedObjectResource = "linkedResource" - LinkedObjectNamespace = "linkedNamespace" - LinkedObjectName = "linkedName" - - // Indicates how the linked trace interacts with the current trace. - LinkRole = "linkRole" - - // If this tag is nonempty, a virtual span is inserted between the linked objects with the tag value as the name. - LinkClass = "linkClass" -) - -func TagLinkedObject(tags map[string]string, object utilobject.Key, role LinkRoleValue, class string) { - tags[LinkedObjectCluster] = object.Cluster - tags[LinkedObjectGroup] = object.Group - tags[LinkedObjectResource] = object.Resource - tags[LinkedObjectNamespace] = object.Namespace - tags[LinkedObjectName] = object.Name - tags[LinkRole] = string(role) - tags[LinkClass] = class -} - -func ObjectKeyFromSpan(span *model.Span) (utilobject.Key, bool) { - tags := model.KeyValues(span.Tags) - traceSource, hasTraceSource := tags.FindByKey(TraceSource) - if !hasTraceSource || traceSource.VStr != TraceSourceObject { - return utilobject.Key{}, false - } - - cluster, _ := tags.FindByKey("cluster") - group, _ := tags.FindByKey("group") - resource, _ := tags.FindByKey("resource") - namespace, _ := tags.FindByKey("namespace") - name, _ := tags.FindByKey("name") - key := utilobject.Key{ - Cluster: cluster.VStr, - Group: group.VStr, - Resource: resource.VStr, - Namespace: namespace.VStr, - Name: name.VStr, - } - return key, true -} - -func LinkedKeyFromSpan(span *model.Span) (utilobject.Key, bool) { - tags := model.KeyValues(span.Tags) - pseudoType, isPseudo := tags.FindByKey(PseudoType) - if !isPseudo || pseudoType.VStr != string(PseudoTypeLink) { - return utilobject.Key{}, false - } - - cluster, _ := tags.FindByKey(LinkedObjectCluster) - group, _ := tags.FindByKey(LinkedObjectGroup) - resource, _ := tags.FindByKey(LinkedObjectResource) - namespace, _ := tags.FindByKey(LinkedObjectNamespace) - name, _ := tags.FindByKey(LinkedObjectName) - key := utilobject.Key{ - Cluster: cluster.VStr, - Group: group.VStr, - Resource: resource.VStr, - Namespace: namespace.VStr, - Name: name.VStr, - } - return key, true -} - -func KeyToSpanTags(key utilobject.Key) map[string]string { - return map[string]string{ - "cluster": key.Cluster, - "group": key.Group, - "resource": key.Resource, - "namespace": key.Namespace, - "name": key.Name, - } -} - -func VersionedKeyToSpanTags(key utilobject.VersionedKey) map[string]string { - m := KeyToSpanTags(key.Key) - m["version"] = key.Version - return m -} - -func KeyToSpanLinkedTags(key utilobject.Key) map[string]string { - return map[string]string{ - LinkedObjectCluster: key.Cluster, - LinkedObjectGroup: key.Group, - LinkedObjectResource: key.Resource, - LinkedObjectNamespace: key.Namespace, - LinkedObjectName: key.Name, - } -} - -type LinkRoleValue string - -const ( - // The current trace is a child trace under the linked trace - LinkRoleParent LinkRoleValue = "parent" - - // The linked trace is a child trace under the current trace. - LinkRoleChild LinkRoleValue = "child" -) - -// Determines the role of the reverse link. -func ReverseLinkRole(role LinkRoleValue) LinkRoleValue { - switch role { - case LinkRoleParent: - return LinkRoleChild - case LinkRoleChild: - return LinkRoleParent - default: - return role - } -} - // Classifies the type of a log line. // Logs without this attribute will not have special treatment. const LogTypeAttr = Prefix + "logType" From 49c47cc6395e6891a4ee54f5a2b82d3f5a00ae0e Mon Sep 17 00:00:00 2001 From: chankyin Date: Fri, 18 Aug 2023 16:51:19 +0800 Subject: [PATCH 05/16] fix(merge): add unit test and correct some corner cases --- pkg/frontend/reader/merge/merge.go | 79 +++++---- pkg/frontend/reader/merge/merge_test.go | 218 ++++++++++++++++++++++-- pkg/frontend/reader/reader.go | 47 +++-- pkg/frontend/tf/tree/tree.go | 2 + pkg/util/marshal/marshal.go | 2 +- pkg/util/semaphore/semaphore.go | 15 +- pkg/util/zconstants/link.go | 8 +- 7 files changed, 300 insertions(+), 71 deletions(-) diff --git a/pkg/frontend/reader/merge/merge.go b/pkg/frontend/reader/merge/merge.go index 3275ff33..ede5e983 100644 --- a/pkg/frontend/reader/merge/merge.go +++ b/pkg/frontend/reader/merge/merge.go @@ -22,7 +22,6 @@ import ( "time" "github.com/jaegertracing/jaeger/model" - "github.com/jaegertracing/jaeger/storage/spanstore" "k8s.io/apimachinery/pkg/util/sets" jaegerbackend "github.com/kubewharf/kelemetry/pkg/frontend/backend" @@ -55,24 +54,24 @@ func (tr RawTree) FromThumbnail(self *RawTree, tt *jaegerbackend.TraceThumbnail) self.Tree = tt.Spans } -func (merger *Merger[M]) AddTraces(trees []TraceWithMetadata[M]) error { +func (merger *Merger[M]) AddTraces(trees []TraceWithMetadata[M]) (_affected sets.Set[objKey], _err error) { if merger.objects == nil { merger.objects = make(map[objKey]*object[M]) } affected := sets.New[objKey]() for _, trace := range trees { - key, _ := zconstants.ObjectKeyFromSpan(trace.Tree.Root) + key := zconstants.ObjectKeyFromSpan(trace.Tree.Root) affected.Insert(key) if obj, hasPrev := merger.objects[key]; hasPrev { if err := obj.merge(trace.Tree, trace.Metadata); err != nil { - return err + return nil, err } } else { obj, err := newObject[M](key, trace.Tree, trace.Metadata) if err != nil { - return err + return nil, err } merger.objects[key] = obj @@ -83,7 +82,7 @@ func (merger *Merger[M]) AddTraces(trees []TraceWithMetadata[M]) error { merger.objects[key].identifyLinks() } - return nil + return affected, nil } type followLinkPool[M any] struct { @@ -94,12 +93,15 @@ type followLinkPool[M any] struct { merger *Merger[M] } -func (fl *followLinkPool[M]) scheduleKnown(obj *object[M], limit *atomic.Int32, linkSelector tfconfig.LinkSelector) { +func (fl *followLinkPool[M]) scheduleFrom(obj *object[M], followLimit *atomic.Int32, linkSelector tfconfig.LinkSelector) { + admittedLinks := []TargetLink{} + for _, link := range obj.links { if _, known := fl.knownKeys[link.Key]; known { + admittedLinks = append(admittedLinks, link) continue } - if limit.Add(-1) < 0 { + if followLimit.Add(-1) < 0 { continue } @@ -110,21 +112,31 @@ func (fl *followLinkPool[M]) scheduleKnown(obj *object[M], limit *atomic.Int32, subSelector := linkSelector.Admit(parentKey, childKey, parentIsSource, link.Class) if subSelector != nil { + admittedLinks = append(admittedLinks, link) fl.knownKeys.Insert(link.Key) - fl.schedule(link.Key, subSelector, int32(fl.endTime.Sub(fl.startTime)/(time.Minute*30))) + fl.schedule(link.Key, subSelector, followLimit, int32(fl.endTime.Sub(fl.startTime)/(time.Minute*30))) } } + + obj.links = admittedLinks } -func (fl *followLinkPool[M]) schedule(key objKey, linkSelector tfconfig.LinkSelector, limit int32) { +func (fl *followLinkPool[M]) schedule(key objKey, linkSelector tfconfig.LinkSelector, followLimit *atomic.Int32, traceLimit int32) { fl.sem.Schedule(func(ctx context.Context) (semaphore.Publish, error) { - thumbnails, err := fl.lister(ctx, key, fl.startTime, fl.endTime, int(limit)) + thumbnails, err := fl.lister(ctx, key, fl.startTime, fl.endTime, int(traceLimit)) if err != nil { return nil, fmt.Errorf("fetching linked traces: %w", err) } return func() error { - fl.merger.AddTraces(thumbnails) + affected, err := fl.merger.AddTraces(thumbnails) + if err != nil { + return err + } + + for key := range affected { + fl.scheduleFrom(fl.merger.objects[key], followLimit, linkSelector) + } return nil }, nil @@ -133,30 +145,6 @@ func (fl *followLinkPool[M]) schedule(key objKey, linkSelector tfconfig.LinkSele type ListFunc[M any] func(ctx context.Context, key objKey, startTime time.Time, endTime time.Time, limit int) ([]TraceWithMetadata[M], error) -func ListWithBackend[M any](backend jaegerbackend.Backend, convertMetadata func(any) M) ListFunc[M] { - return func(ctx context.Context, key objKey, startTime time.Time, endTime time.Time, limit int) ([]TraceWithMetadata[M], error) { - tts, err := backend.List(ctx, &spanstore.TraceQueryParameters{ - Tags: zconstants.KeyToSpanTags(key), - StartTimeMin: startTime, - StartTimeMax: endTime, - NumTraces: int(limit), - }) - if err != nil { - return nil, err - } - - twmList := make([]TraceWithMetadata[M], len(tts)) - for i, tt := range tts { - twmList[i] = TraceWithMetadata[M]{ - Tree: tt.Spans, - Metadata: convertMetadata(tt.Identifier), - } - } - - return twmList, nil - } -} - func (merger *Merger[M]) FollowLinks( ctx context.Context, linkSelector tfconfig.LinkSelector, @@ -191,7 +179,7 @@ func (merger *Merger[M]) FollowLinks( remainingLimit.Store(limit) } - fl.scheduleKnown(obj, remainingLimit, linkSelector) + fl.scheduleFrom(obj, remainingLimit, linkSelector) } if err := fl.sem.Run(ctx); err != nil { @@ -558,6 +546,7 @@ func mergeLinkedTraces[M any](objects []*object[M], abLinks abLinkMap) (*tftree. }) tree := trees[rootKey].tree + treeObjects := sets.New(rootKey) pendingObjects := []objKey{rootKey} for len(pendingObjects) > 0 { @@ -571,17 +560,25 @@ func mergeLinkedTraces[M any](objects []*object[M], abLinks abLinkMap) (*tftree. parentSpan := trees[subj].tree.Root if link.Class != "" { - virtualSpan := createVirtualSpan(tree.Root.TraceID, parentSpan, link.Class) + virtualSpan := createVirtualSpan(tree.Root.TraceID, parentSpan, "", link.Class) tree.Add(virtualSpan, parentSpan.SpanID) parentSpan = virtualSpan } + if treeObjects.Has(link.Key) { + parentSpan.Warnings = append(parentSpan.Warnings, fmt.Sprintf("repeated object %v omitted", link.Key)) + // duplicate + continue + } + subtree, hasSubtree := trees[link.Key] if !hasSubtree { // this link was not fetched, e.g. because of fetch limit or link selector continue } + tree.AddTree(subtree.tree, parentSpan.SpanID) + treeObjects.Insert(link.Key) pendingObjects = append(pendingObjects, link.Key) } } @@ -589,13 +586,13 @@ func mergeLinkedTraces[M any](objects []*object[M], abLinks abLinkMap) (*tftree. return tree, nil } -func createVirtualSpan(traceId model.TraceID, span *model.Span, class string) *model.Span { +func createVirtualSpan(traceId model.TraceID, span *model.Span, opName string, svcName string) *model.Span { spanId := model.SpanID(rand.Uint64()) return &model.Span{ TraceID: traceId, SpanID: spanId, - OperationName: class, + OperationName: opName, Flags: 0, StartTime: span.StartTime, Duration: span.Duration, @@ -607,7 +604,7 @@ func createVirtualSpan(traceId model.TraceID, span *model.Span, class string) *m }, }, Process: &model.Process{ - ServiceName: class, + ServiceName: svcName, }, ProcessID: "1", } diff --git a/pkg/frontend/reader/merge/merge_test.go b/pkg/frontend/reader/merge/merge_test.go index 73cb6921..f66574b0 100644 --- a/pkg/frontend/reader/merge/merge_test.go +++ b/pkg/frontend/reader/merge/merge_test.go @@ -16,6 +16,7 @@ package merge_test import ( "context" + "sort" "testing" "time" @@ -33,7 +34,7 @@ func newTrace(id uint64, key utilobject.Key, startTime int64, endTime int64, lin traceId := model.NewTraceID(id, id) objectSpan := &model.Span{ TraceID: traceId, - SpanID: model.SpanID(1), + SpanID: model.SpanID(id | (1 << 16)), StartTime: time.Time{}.Add(time.Duration(startTime)), Duration: time.Duration(endTime - startTime), Tags: append( @@ -54,7 +55,7 @@ func newTrace(id uint64, key utilobject.Key, startTime int64, endTime int64, lin spans = append(spans, &model.Span{ TraceID: traceId, - SpanID: model.SpanID(100 + i), + SpanID: model.SpanID(id | (3 << 16) | (uint64(i) << 20)), StartTime: objectSpan.StartTime, Duration: objectSpan.Duration, Tags: append( @@ -62,9 +63,23 @@ func newTrace(id uint64, key utilobject.Key, startTime int64, endTime int64, lin model.String(zconstants.TraceSource, zconstants.TraceSourceObject), model.String(zconstants.PseudoType, string(zconstants.PseudoTypeLink)), ), + References: []model.SpanRef{model.NewChildOfRef(traceId, objectSpan.SpanID)}, }) } + spans = append(spans, &model.Span{ + TraceID: traceId, + SpanID: model.SpanID(id | (2 << 16)), + StartTime: time.Time{}.Add(time.Duration(startTime+endTime) / 2), + Duration: time.Duration(endTime-startTime) / 4, + Tags: append( + mapToTags(zconstants.KeyToSpanTags(key)), + model.String(zconstants.TraceSource, zconstants.TraceSourceEvent), + model.String(zconstants.NotPseudo, zconstants.NotPseudo), + ), + References: []model.SpanRef{model.NewChildOfRef(traceId, objectSpan.SpanID)}, + }) + tree := tftree.NewSpanTree(spans) return merge.TraceWithMetadata[uint64]{ @@ -81,27 +96,170 @@ func mapToTags(m map[string]string) (out []model.KeyValue) { return out } -// Assume traces[0] is the only result returned by the initial list. +type traceList []merge.TraceWithMetadata[uint64] + +func (list traceList) append(idLow uint32, key utilobject.Key, links []merge.TargetLink) traceList { + for time := 0; time < 4; time++ { + id := uint64(idLow) | (uint64(time) << 8) + list = append(list, newTrace( + id, key, + int64(time*10), int64((time+1)*10), + links, + )) + } + return list +} + +// IDs: +// - rs = 0x10, 0x11 +// - dp = 0x20 +// - pod = 0x30 | rs | (replica*2) +// - node = 0x40 +func sampleTraces(withPods bool, withNode bool) traceList { + rsKeys := []utilobject.Key{ + { + Cluster: "test", + Group: "apps", + Resource: "replicasets", + Namespace: "default", + Name: "dp-spec1", + }, + { + Cluster: "test", + Group: "apps", + Resource: "replicasets", + Namespace: "default", + Name: "dp-spec2", + }, + } + dpKey := utilobject.Key{ + Cluster: "test", + Group: "apps", + Resource: "deployments", + Namespace: "default", + Name: "dp", + } + podKeys := [][]utilobject.Key{ + { + { + Cluster: "test", + Group: "", + Resource: "pods", + Namespace: "default", + Name: "dp-spec1-replica1", + }, + { + Cluster: "test", + Group: "", + Resource: "pods", + Namespace: "default", + Name: "dp-spec1-replica2", + }, + }, + { + { + Cluster: "test", + Group: "", + Resource: "pods", + Namespace: "default", + Name: "dp-spec2-replica1", + }, + { + Cluster: "test", + Group: "", + Resource: "pods", + Namespace: "default", + Name: "dp-spec2-replica2", + }, + }, + } + nodeKey := utilobject.Key{ + Cluster: "test", + Group: "", + Resource: "nodes", + Namespace: "", + Name: "node", + } + + list := traceList{} + for spec := uint32(0); spec < 2; spec++ { + rsLinks := []merge.TargetLink{ + {Key: dpKey, Role: zconstants.LinkRoleParent, Class: "children"}, + } + if withPods { + rsLinks = append(rsLinks, + merge.TargetLink{Key: podKeys[spec][0], Role: zconstants.LinkRoleChild, Class: "children"}, + merge.TargetLink{Key: podKeys[spec][1], Role: zconstants.LinkRoleChild, Class: "children"}, + ) + } + list = list.append(0x10|spec, rsKeys[spec], rsLinks) + } + list = list.append(0x20, dpKey, []merge.TargetLink{ + {Key: rsKeys[0], Role: zconstants.LinkRoleChild, Class: "children"}, + {Key: rsKeys[1], Role: zconstants.LinkRoleChild, Class: "children"}, + }) + + nodeLinks := []merge.TargetLink{} + if withPods { + for spec := uint32(0); spec < 2; spec++ { + for replica := uint32(0); replica < 2; replica++ { + podLinks := []merge.TargetLink{ + {Key: rsKeys[spec], Role: zconstants.LinkRoleParent, Class: "children"}, + } + if withNode { + podLinks = append(podLinks, merge.TargetLink{Key: nodeKey, Role: zconstants.LinkRoleChild, Class: "node"}) + } + list = list.append(0x30|spec|(replica<<1), podKeys[spec][replica], podLinks) + nodeLinks = append(nodeLinks, merge.TargetLink{Key: podKeys[spec][replica], Role: zconstants.LinkRoleParent, Class: "node"}) + } + } + } + if withNode { + list = list.append(0x40, nodeKey, nodeLinks) + } + return list +} + func do( t *testing.T, - traces []merge.TraceWithMetadata[uint64], clipTimeStart, clipTimeEnd int64, + traces traceList, + activePrefixLength int, + linkSelector tfconfig.LinkSelector, + expectGroupSizes []int, + expectObjectCounts []int, ) { + t.Helper() + assert := assert.New(t) + active := []merge.TraceWithMetadata[uint64]{} + for _, trace := range traces[:activePrefixLength] { + traceTime := int64(trace.Tree.Root.StartTime.Sub(time.Time{})) + if clipTimeStart <= traceTime && traceTime < clipTimeEnd { + active = append(active, trace) + } + } + merger := merge.Merger[uint64]{} - assert.NoError(merger.AddTraces(traces)) + _, err := merger.AddTraces(active) + assert.NoError(err) assert.NoError(merger.FollowLinks( context.Background(), - tfconfig.ConstantLinkSelector(true), + linkSelector, time.Time{}.Add(time.Duration(clipTimeStart)), time.Time{}.Add(time.Duration(clipTimeEnd)), - func(ctx context.Context, key utilobject.Key, startTime, endTime time.Time, limit int) (out []merge.TraceWithMetadata[uint64], _ error) { + func( + ctx context.Context, + key utilobject.Key, + startTime, endTime time.Time, + limit int, + ) (out []merge.TraceWithMetadata[uint64], _ error) { for _, trace := range traces { - traceKey, hasKey := zconstants.ObjectKeyFromSpan(trace.Tree.Root) - assert.True(hasKey) - if key == traceKey { + traceKey := zconstants.ObjectKeyFromSpan(trace.Tree.Root) + traceTime := int64(trace.Tree.Root.StartTime.Sub(time.Time{})) + if key == traceKey && clipTimeStart <= traceTime && traceTime < clipTimeEnd { out = append(out, trace) } } @@ -115,8 +273,46 @@ func do( result, err := merger.MergeTraces() assert.NoError(err) - t.Log(result) + assert.Len(result, len(expectGroupSizes)) + + sort.Ints(expectGroupSizes) + actualGroupSizes := make([]int, len(result)) + for i, group := range result { + actualGroupSizes[i] = len(group.Metadata) + } + sort.Ints(actualGroupSizes) + assert.Equal(expectGroupSizes, actualGroupSizes) + + actualObjectCounts := []int{} + for _, group := range result { + objectCount := 0 + for _, span := range group.Tree.GetSpans() { + pseudoTag, isPseudo := model.KeyValues(span.Tags).FindByKey(zconstants.PseudoType) + if isPseudo && pseudoTag.VStr == string(zconstants.PseudoTypeObject) { + objectCount += 1 + } + } + + actualObjectCounts = append(actualObjectCounts, objectCount) + } + sort.Ints(actualObjectCounts) + assert.Equal(expectObjectCounts, actualObjectCounts) } func TestFullTree(t *testing.T) { + do(t, 10, 30, sampleTraces(true, true), 4, tfconfig.ConstantLinkSelector(true), []int{2 * (1 + 2 + 4 + 1)}, []int{1 + 2 + 4 + 1}) +} + +func TestFilteredTree(t *testing.T) { + do(t, 10, 30, sampleTraces(true, true), 4, rsPodLinksOnly{}, []int{2 * (1 + 2)}, []int{1 + 2}) +} + +type rsPodLinksOnly struct{} + +func (rsPodLinksOnly) Admit(parent, child utilobject.Key, parentIsSource bool, class string) tfconfig.LinkSelector { + if parent.Resource == "replicasets" && child.Resource == "pods" { + return rsPodLinksOnly{} + } else { + return nil + } } diff --git a/pkg/frontend/reader/reader.go b/pkg/frontend/reader/reader.go index 78deb925..e7b10306 100644 --- a/pkg/frontend/reader/reader.go +++ b/pkg/frontend/reader/reader.go @@ -175,7 +175,7 @@ func (reader *spanReader) FindTraces(ctx context.Context, query *spanstore.Trace } merger := merge.Merger[any]{} - if err := merger.AddTraces(twmList); err != nil { + if _, err := merger.AddTraces(twmList); err != nil { return nil, fmt.Errorf("group traces by object: %w", err) } @@ -184,7 +184,7 @@ func (reader *spanReader) FindTraces(ctx context.Context, query *spanstore.Trace ctx, config.LinkSelector, query.StartTimeMin, query.StartTimeMax, - merge.ListWithBackend[any](reader.Backend, reflectutil.Identity[any]), + mergeListWithBackend[any](reader.Backend, reflectutil.Identity[any]), reader.options.followLinkConcurrency, reader.options.followLinkLimit, false, ); err != nil { return nil, fmt.Errorf("follow links: %w", err) @@ -206,14 +206,14 @@ func (reader *spanReader) FindTraces(ctx context.Context, query *spanstore.Trace for _, mergeTree := range mergeTrees { cacheId := generateCacheId(config.Id) - trace, extensionCache, err := reader.prepareEntry(ctx, config, rootKey, query, mergeTree.Tree, cacheId) + trace, extensionCache, err := reader.prepareEntry(ctx, rootKey, query, mergeTree.Tree, cacheId) if err != nil { return nil, err } traces = append(traces, trace) - cacheEntry, err := reader.storeCache(ctx, rootKey, query, mergeTree.Metadata, cacheId, extensionCache) + cacheEntry, err := reader.prepareCache(rootKey, query, mergeTree.Metadata, cacheId, extensionCache) if err != nil { return nil, err } @@ -234,7 +234,6 @@ func (reader *spanReader) FindTraces(ctx context.Context, query *spanstore.Trace func (reader *spanReader) prepareEntry( ctx context.Context, - config *tfconfig.Config, rootKey *utilobject.Key, query *spanstore.TraceQueryParameters, tree *tftree.SpanTree, @@ -274,8 +273,7 @@ func (reader *spanReader) prepareEntry( return trace, extensions.Cache, nil } -func (reader *spanReader) storeCache( - ctx context.Context, +func (reader *spanReader) prepareCache( rootKey *utilobject.Key, query *spanstore.TraceQueryParameters, identifiers []any, @@ -318,8 +316,8 @@ func (reader *spanReader) GetTrace(ctx context.Context, cacheId model.TraceID) ( } displayMode := extractDisplayMode(cacheId) - var traces []merge.TraceWithMetadata[struct{}] + traces := make([]merge.TraceWithMetadata[struct{}], 0, len(entry.Identifiers)) for _, identifier := range entry.Identifiers { trace, err := reader.Backend.Get(ctx, identifier, cacheId, entry.StartTime, entry.EndTime) if err != nil { @@ -331,7 +329,7 @@ func (reader *spanReader) GetTrace(ctx context.Context, cacheId model.TraceID) ( } merger := merge.Merger[struct{}]{} - if err := merger.AddTraces(traces); err != nil { + if _, err := merger.AddTraces(traces); err != nil { return nil, fmt.Errorf("grouping traces by object: %w", err) } @@ -344,7 +342,7 @@ func (reader *spanReader) GetTrace(ctx context.Context, cacheId model.TraceID) ( ctx, displayConfig.LinkSelector, entry.StartTime, entry.EndTime, - merge.ListWithBackend[struct{}](reader.Backend, func(any) struct{} { return struct{}{} }), + mergeListWithBackend[struct{}](reader.Backend, func(any) struct{} { return struct{}{} }), reader.options.followLinkConcurrency, reader.options.followLinkLimit, true, ); err != nil { return nil, fmt.Errorf("cannot follow links: %w", err) @@ -425,3 +423,32 @@ func filterTimeRange(spans []*model.Span, startTime, endTime time.Time) []*model return retained } + +func mergeListWithBackend[M any](backend jaegerbackend.Backend, convertMetadata func(any) M) merge.ListFunc[M] { + return func( + ctx context.Context, + key utilobject.Key, + startTime time.Time, endTime time.Time, + limit int, + ) ([]merge.TraceWithMetadata[M], error) { + tts, err := backend.List(ctx, &spanstore.TraceQueryParameters{ + Tags: zconstants.KeyToSpanTags(key), + StartTimeMin: startTime, + StartTimeMax: endTime, + NumTraces: limit, + }) + if err != nil { + return nil, err + } + + twmList := make([]merge.TraceWithMetadata[M], len(tts)) + for i, tt := range tts { + twmList[i] = merge.TraceWithMetadata[M]{ + Tree: tt.Spans, + Metadata: convertMetadata(tt.Identifier), + } + } + + return twmList, nil + } +} diff --git a/pkg/frontend/tf/tree/tree.go b/pkg/frontend/tf/tree/tree.go index b4653cff..dfd2ce61 100644 --- a/pkg/frontend/tf/tree/tree.go +++ b/pkg/frontend/tf/tree/tree.go @@ -318,6 +318,8 @@ func (tree *SpanTree) Delete(spanId model.SpanID) { } // Adds all spans in a tree as a subtree in this span. +// +// TODO FIXME: when the two trees have overlapping span IDs, this does not work correctly. func (tree *SpanTree) AddTree(childTree *SpanTree, parentId model.SpanID) { if tree == childTree { panic("cannot add tree to itself") diff --git a/pkg/util/marshal/marshal.go b/pkg/util/marshal/marshal.go index 7fdd10a7..b2332cec 100644 --- a/pkg/util/marshal/marshal.go +++ b/pkg/util/marshal/marshal.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Utilities for unmarshaling in config files +// Utilities for unmarshalling in config files package utilmarshal import ( diff --git a/pkg/util/semaphore/semaphore.go b/pkg/util/semaphore/semaphore.go index ecd82e59..da68928b 100644 --- a/pkg/util/semaphore/semaphore.go +++ b/pkg/util/semaphore/semaphore.go @@ -108,12 +108,23 @@ func (sem *Semaphore) Schedule(task Task) { } } else { if publish != nil { + sem.doneWg.Add(1) + wrappedPublish := func() error { + defer sem.doneWg.Done() + return publish() + } + select { - case sem.publishCh <- publish: + case sem.publishCh <- wrappedPublish: // publishCh has zero capacity, so this case blocks until the main goroutine selects the publishCh case, - // so we can ensure that publishCh is received before calling sem.doneWg.Done() + // so we can ensure that publishCh is received before calling sem.doneWg.Done(). + // However we need to call doneWg again to ensure that + // schedules during publish are called before this function brings doneWg to zero. + + // the main goroutine will call `sem.doneWg.Done()` for us. case <-sem.errNotifyCh: // no need to publish if the caller received error + sem.doneWg.Done() } } } diff --git a/pkg/util/zconstants/link.go b/pkg/util/zconstants/link.go index dea0d5ad..dd17ffa5 100644 --- a/pkg/util/zconstants/link.go +++ b/pkg/util/zconstants/link.go @@ -51,12 +51,8 @@ func TagLinkedObject(tags map[string]string, ln LinkRef) { tags[LinkClass] = ln.Class } -func ObjectKeyFromSpan(span *model.Span) (utilobject.Key, bool) { +func ObjectKeyFromSpan(span *model.Span) utilobject.Key { tags := model.KeyValues(span.Tags) - traceSource, hasTraceSource := tags.FindByKey(TraceSource) - if !hasTraceSource || traceSource.VStr != TraceSourceObject { - return utilobject.Key{}, false - } cluster, _ := tags.FindByKey("cluster") group, _ := tags.FindByKey("group") @@ -70,7 +66,7 @@ func ObjectKeyFromSpan(span *model.Span) (utilobject.Key, bool) { Namespace: namespace.VStr, Name: name.VStr, } - return key, true + return key } func LinkedKeyFromSpan(span *model.Span) (utilobject.Key, bool) { From 5b9158b7d77af9c6733eb7554244473bf30f5b2e Mon Sep 17 00:00:00 2001 From: chankyin Date: Wed, 23 Aug 2023 11:38:07 +0800 Subject: [PATCH 06/16] fix(frontend/tf): remove the useless `children` pseudospan in trace mode --- hack/tfconfig.yaml | 9 ++- .../tf/defaults/step/collapse_nesting.go | 7 +- .../tf/defaults/step/extract_nesting.go | 5 +- .../tf/defaults/step/group_by_trace_source.go | 5 +- pkg/frontend/tf/defaults/step/util.go | 52 ------------ pkg/frontend/tf/transform.go | 3 +- pkg/util/marshal/marshal.go | 79 ++++++++++++++----- 7 files changed, 77 insertions(+), 83 deletions(-) delete mode 100644 pkg/frontend/tf/defaults/step/util.go diff --git a/hack/tfconfig.yaml b/hack/tfconfig.yaml index 4f2196ff..332f8b23 100644 --- a/hack/tfconfig.yaml +++ b/hack/tfconfig.yaml @@ -23,6 +23,10 @@ configs: batchName: initial - kind: Batch batchName: collapse + - kind: ExtractNestingVisitor + matchesPseudoType: + oneOf: ["linkClass"] + matchesName: - kind: CompactDurationVisitor - kind: Batch batchName: final @@ -36,7 +40,7 @@ configs: - kind: GroupByTraceSourceVisitor shouldBeGrouped: oneOf: ["event"] - negate: true + then: false - kind: CompactDurationVisitor - kind: Batch batchName: final @@ -131,9 +135,6 @@ batches: - name: collapse steps: - kind: CollapseNestingVisitor - shouldCollapse: - oneOf: [] - negate: true tagMappings: "audit": - fromSpanTag: "userAgent" diff --git a/pkg/frontend/tf/defaults/step/collapse_nesting.go b/pkg/frontend/tf/defaults/step/collapse_nesting.go index fac402a7..e1725793 100644 --- a/pkg/frontend/tf/defaults/step/collapse_nesting.go +++ b/pkg/frontend/tf/defaults/step/collapse_nesting.go @@ -24,6 +24,7 @@ import ( tfconfig "github.com/kubewharf/kelemetry/pkg/frontend/tf/config" tftree "github.com/kubewharf/kelemetry/pkg/frontend/tf/tree" "github.com/kubewharf/kelemetry/pkg/manager" + utilmarshal "github.com/kubewharf/kelemetry/pkg/util/marshal" "github.com/kubewharf/kelemetry/pkg/util/zconstants" ) @@ -41,7 +42,7 @@ func init() { // // Must be followed by PruneTagsVisitor in the last step. type CollapseNestingVisitor struct { - ShouldCollapse StringFilter `json:"shouldCollapse"` // tests traceSource + ShouldCollapse utilmarshal.StringFilter `json:"shouldCollapse"` // tests traceSource TagMappings map[string][]TagMapping `json:"tagMappings"` // key = traceSource AuditDiffClasses AuditDiffClassification `json:"auditDiffClasses"` // key = prefix LogTypeMapping map[zconstants.LogType]string `json:"logTypeMapping"` // key = log type, value = log field @@ -104,7 +105,7 @@ func (classes *AuditDiffClassification) Get(prefix string) *AuditDiffClass { } func (visitor CollapseNestingVisitor) Enter(tree *tftree.SpanTree, span *model.Span) tftree.TreeVisitor { - if _, hasTag := model.KeyValues(span.Tags).FindByKey(zconstants.PseudoType); !hasTag { + if _, isPseudo := model.KeyValues(span.Tags).FindByKey(zconstants.PseudoType); !isPseudo { return visitor } @@ -128,7 +129,7 @@ func (visitor CollapseNestingVisitor) processChild(tree *tftree.SpanTree, span * return } traceSource := traceSourceKv.VStr - if !visitor.ShouldCollapse.Test(traceSource) { + if !visitor.ShouldCollapse.Matches(traceSource) { return } diff --git a/pkg/frontend/tf/defaults/step/extract_nesting.go b/pkg/frontend/tf/defaults/step/extract_nesting.go index 21e92974..0d4e3b69 100644 --- a/pkg/frontend/tf/defaults/step/extract_nesting.go +++ b/pkg/frontend/tf/defaults/step/extract_nesting.go @@ -20,6 +20,7 @@ import ( tfconfig "github.com/kubewharf/kelemetry/pkg/frontend/tf/config" tftree "github.com/kubewharf/kelemetry/pkg/frontend/tf/tree" "github.com/kubewharf/kelemetry/pkg/manager" + utilmarshal "github.com/kubewharf/kelemetry/pkg/util/marshal" "github.com/kubewharf/kelemetry/pkg/util/zconstants" ) @@ -34,7 +35,7 @@ func init() { // Deletes spans matching MatchesPseudoType and brings their children one level up. type ExtractNestingVisitor struct { // Filters the trace sources to delete. - MatchesPseudoType StringFilter `json:"matchesPseudoType"` + MatchesPseudoType utilmarshal.StringFilter `json:"matchesPseudoType"` } func (ExtractNestingVisitor) Kind() string { return "ExtractNestingVisitor" } @@ -46,7 +47,7 @@ func (visitor ExtractNestingVisitor) Enter(tree *tftree.SpanTree, span *model.Sp } if pseudoType, ok := model.KeyValues(span.Tags).FindByKey(zconstants.PseudoType); ok { - if visitor.MatchesPseudoType.Test(pseudoType.AsString()) { + if visitor.MatchesPseudoType.Matches(pseudoType.AsString()) { childrenMap := tree.Children(span.SpanID) childrenCopy := make([]model.SpanID, 0, len(childrenMap)) for childId := range childrenMap { diff --git a/pkg/frontend/tf/defaults/step/group_by_trace_source.go b/pkg/frontend/tf/defaults/step/group_by_trace_source.go index 2bf84931..b341d3a8 100644 --- a/pkg/frontend/tf/defaults/step/group_by_trace_source.go +++ b/pkg/frontend/tf/defaults/step/group_by_trace_source.go @@ -22,6 +22,7 @@ import ( tfconfig "github.com/kubewharf/kelemetry/pkg/frontend/tf/config" tftree "github.com/kubewharf/kelemetry/pkg/frontend/tf/tree" "github.com/kubewharf/kelemetry/pkg/manager" + utilmarshal "github.com/kubewharf/kelemetry/pkg/util/marshal" "github.com/kubewharf/kelemetry/pkg/util/zconstants" ) @@ -37,7 +38,7 @@ const myPseudoType = "groupByTraceSource" // Splits span logs into pseudospans grouped by traceSource. type GroupByTraceSourceVisitor struct { - ShouldBeGrouped StringFilter `json:"shouldBeGrouped"` + ShouldBeGrouped utilmarshal.StringFilter `json:"shouldBeGrouped"` } func (GroupByTraceSourceVisitor) Kind() string { return "GroupByTraceSourceVisitor" } @@ -54,7 +55,7 @@ func (visitor GroupByTraceSourceVisitor) Enter(tree *tftree.SpanTree, span *mode index := map[string][]model.Log{} for _, log := range span.Logs { traceSource, hasTraceSource := model.KeyValues(log.Fields).FindByKey(zconstants.TraceSource) - if hasTraceSource && visitor.ShouldBeGrouped.Test(traceSource.AsString()) { + if hasTraceSource && visitor.ShouldBeGrouped.Matches(traceSource.AsString()) { index[traceSource.AsString()] = append(index[traceSource.AsString()], log) } else { remainingLogs = append(remainingLogs, log) diff --git a/pkg/frontend/tf/defaults/step/util.go b/pkg/frontend/tf/defaults/step/util.go deleted file mode 100644 index a3864870..00000000 --- a/pkg/frontend/tf/defaults/step/util.go +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2023 The Kelemetry Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tfstep - -import ( - "encoding/json" - "strings" - - "k8s.io/apimachinery/pkg/util/sets" -) - -type StringFilter struct { - OneOf []string `json:"oneOf"` - Negate bool `json:"negate"` - IgnoreCase bool `json:"ignoreCase"` -} - -// Test whether a string matches the filter. -func (f *StringFilter) Test(s string) bool { - for _, choice := range f.OneOf { - if (f.IgnoreCase && strings.EqualFold(s, choice)) || s == choice { - return !f.Negate - } - } - - return f.Negate -} - -type JsonStringSet struct { - set sets.Set[string] -} - -func (set *JsonStringSet) UnmarshalJSON(buf []byte) error { - strings := []string{} - if err := json.Unmarshal(buf, &strings); err != nil { - return err - } - set.set = sets.New[string](strings...) - return nil -} diff --git a/pkg/frontend/tf/transform.go b/pkg/frontend/tf/transform.go index 0b011342..4b1849ae 100644 --- a/pkg/frontend/tf/transform.go +++ b/pkg/frontend/tf/transform.go @@ -36,8 +36,7 @@ func init() { type TransformerOptions struct{} -func (options *TransformerOptions) Setup(fs *pflag.FlagSet) { -} +func (options *TransformerOptions) Setup(fs *pflag.FlagSet) {} func (options *TransformerOptions) EnableFlag() *bool { return nil } diff --git a/pkg/util/marshal/marshal.go b/pkg/util/marshal/marshal.go index b2332cec..3066a426 100644 --- a/pkg/util/marshal/marshal.go +++ b/pkg/util/marshal/marshal.go @@ -21,6 +21,8 @@ import ( "regexp" "strings" + "k8s.io/apimachinery/pkg/util/sets" + utilobject "github.com/kubewharf/kelemetry/pkg/util/object" ) @@ -92,45 +94,86 @@ func (filter *ObjectFilter) Matches(key utilobject.Key) bool { return true } +type stringPredicate = func(string) bool + type StringFilter struct { - fn func(s string) bool + fn stringPredicate } -func (f *StringFilter) UnmarshalJSON(buf []byte) error { - var pattern string - if err := json.Unmarshal(buf, &pattern); err == nil { - f.fn = func(s string) bool { return s == pattern } - return nil - } +type fields struct { + Exact Optional[string] `json:"exact"` + OneOf Optional[[]string] `json:"oneOf"` + CaseFold Optional[string] `json:"caseInsensitive"` + Regex Optional[string] `json:"regex"` + Then Optional[bool] `json:"then"` +} - var value struct { - Exact Optional[string] `json:"exact"` - CaseFold Optional[string] `json:"caseInsensitive"` - Regex Optional[string] `json:"regex"` +func (value fields) getBasePredicate() (stringPredicate, error) { + isSet := 0 + for _, b := range []bool{value.Exact.IsSet, value.OneOf.IsSet, value.CaseFold.IsSet, value.Regex.IsSet} { + if b { + isSet += 1 + } } - if err := json.Unmarshal(buf, &value); err != nil { - return err + if isSet > 1 { + return nil, fmt.Errorf("string filter must set exactly one of `exact`, `oneOf`, `caseInsensitive` or `regex`") } if value.Exact.IsSet { - f.fn = func(s string) bool { return s == value.Exact.Value } + return func(s string) bool { return s == value.Exact.Value }, nil + } else if value.OneOf.IsSet { + options := sets.New[string](value.OneOf.Value...) + return options.Has, nil } else if value.CaseFold.IsSet { - f.fn = func(s string) bool { return strings.EqualFold(value.CaseFold.Value, value.CaseFold.Value) } + return func(s string) bool { return strings.EqualFold(value.CaseFold.Value, value.CaseFold.Value) }, nil } else if value.Regex.IsSet { regex, err := regexp.Compile(value.Regex.Value) if err != nil { - return fmt.Errorf("pattern contains invalid regex: %w", err) + return nil, fmt.Errorf("pattern contains invalid regex: %w", err) } - f.fn = regex.MatchString + return regex.MatchString, nil } else { - return fmt.Errorf("no filter selected") + return func(string) bool { return true }, nil // caller will change value to `then` + } +} + +func (f *StringFilter) UnmarshalJSON(buf []byte) error { + var pattern string + if err := json.Unmarshal(buf, &pattern); err == nil { + f.fn = func(s string) bool { return s == pattern } + return nil + } + + var value fields + if err := json.Unmarshal(buf, &value); err != nil { + return err + } + + predicate, err := value.getBasePredicate() + if err != nil { + return err + } + + then := value.Then.GetOr(true) + f.fn = func(s string) bool { + base := predicate(s) + if base { + return then + } else { + return !then + } } return nil } func (f *StringFilter) Matches(subject string) bool { + if f.fn == nil { + // string filter not set, take default then = true + return true + } + return f.fn(subject) } From 85c0d9bab750f2562f995f5edb73a31c839b19ff Mon Sep 17 00:00:00 2001 From: chankyin Date: Wed, 23 Aug 2023 11:49:26 +0800 Subject: [PATCH 07/16] chore: fix lints --- pkg/frontend/reader/merge/merge.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pkg/frontend/reader/merge/merge.go b/pkg/frontend/reader/merge/merge.go index ede5e983..d3b2f90f 100644 --- a/pkg/frontend/reader/merge/merge.go +++ b/pkg/frontend/reader/merge/merge.go @@ -143,7 +143,12 @@ func (fl *followLinkPool[M]) schedule(key objKey, linkSelector tfconfig.LinkSele }) } -type ListFunc[M any] func(ctx context.Context, key objKey, startTime time.Time, endTime time.Time, limit int) ([]TraceWithMetadata[M], error) +type ListFunc[M any] func( + ctx context.Context, + key objKey, + startTime, endTime time.Time, + limit int, +) ([]TraceWithMetadata[M], error) func (merger *Merger[M]) FollowLinks( ctx context.Context, @@ -199,8 +204,9 @@ func (merger *Merger[M]) MergeTraces() ([]*MergeTree[M], error) { } } - var mergeTrees []*MergeTree[M] - for _, keys := range merger.findConnectedComponents(merger.objects, abLinks) { + connectedComps := merger.findConnectedComponents(merger.objects, abLinks) + mergeTrees := make([]*MergeTree[M], 0, len(connectedComps)) + for _, keys := range connectedComps { var members []*object[M] for _, key := range keys { members = append(members, merger.objects[key]) From 7f9a85ba86f4361d418772b1e1b09c2a5548bffd Mon Sep 17 00:00:00 2001 From: chankyin Date: Wed, 23 Aug 2023 11:59:38 +0800 Subject: [PATCH 08/16] ci: check display mode ff26 by default --- .github/workflows/build.yml | 10 +++++----- hack/tfconfig.yaml | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 31375626..f722e90d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -136,7 +136,7 @@ jobs: TRACE_ID=$(grep "Location: /trace/" curl-output.http | cut -d/ -f3 | tr -d '\r') mkdir -p output/api/traces - for mode in ff{0,1,2,3}{0,1}00000{0,1}; do + for mode in ff{0,1,2,3}{0,1,5,6}000000; do mode_trace=${mode}${TRACE_ID:10} curl -o output/api/traces/$mode_trace http://localhost:16686/api/traces/$mode_trace done @@ -172,12 +172,12 @@ jobs: Redirecting to ${basename} - + EOF - mkdir -p output/trace/ff20000000$(cat $trace_dir/trace_id) - cp spa.html output/trace/ff20000000$(cat $trace_dir/trace_id)/index.html + mkdir -p output/trace/ff26000000$(cat $trace_dir/trace_id) + cp spa.html output/trace/ff26000000$(cat $trace_dir/trace_id)/index.html cp $trace_dir/api/traces/* output/api/traces/ done mv output/dot-usage/* output/ @@ -261,7 +261,7 @@ jobs: promises.push((async () => { const traceName = await fs.readFile(`pages/${trace}/trace_name`, {encoding: "utf8"}) const partialTraceId = await fs.readFile(`pages/${trace}/trace_id`, {encoding: "utf8"}) - const traceId = "ff20000000" + partialTraceId + const traceId = "ff26000000" + partialTraceId console.log(`Loading trace ${traceName}`) diff --git a/hack/tfconfig.yaml b/hack/tfconfig.yaml index 332f8b23..0820b8bb 100644 --- a/hack/tfconfig.yaml +++ b/hack/tfconfig.yaml @@ -52,7 +52,6 @@ modifiers: modifierName: link-selector args: modifierClass: parent - includeSiblings: false "02000000": displayName: ancestors tree From e808b2a1eae285d0fc3bc381187356afc0b99e21 Mon Sep 17 00:00:00 2001 From: chankyin Date: Thu, 24 Aug 2023 10:36:06 +0800 Subject: [PATCH 09/16] fix: sort modifier names in display mode for stable output --- pkg/frontend/tf/config/config.go | 20 ++++++++++++++++++++ pkg/frontend/tf/config/file/file.go | 5 ++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/pkg/frontend/tf/config/config.go b/pkg/frontend/tf/config/config.go index 32b1b419..a82ba3a1 100644 --- a/pkg/frontend/tf/config/config.go +++ b/pkg/frontend/tf/config/config.go @@ -15,10 +15,14 @@ package tfconfig import ( + "fmt" + "sort" "strconv" + "strings" "github.com/kubewharf/kelemetry/pkg/frontend/extension" "github.com/kubewharf/kelemetry/pkg/manager" + "k8s.io/apimachinery/pkg/util/sets" ) func init() { @@ -51,6 +55,10 @@ type Config struct { Id Id // The config name, used in search page display. Name string + // Base config name without modifiers, used to help reconstruct the name. + BaseName string + // Names of modifiers, used to help reconstruct the name. + ModifierNames sets.Set[string] // Only links with roles in this set are followed. LinkSelector LinkSelector // The extension traces for this config. @@ -59,6 +67,16 @@ type Config struct { Steps []Step } +func (config *Config) RecomputeName() { + modifiers := config.ModifierNames.UnsortedList() + sort.Strings(modifiers) + if len(modifiers) > 0 { + config.Name = fmt.Sprintf("%s [%s]", config.BaseName, strings.Join(modifiers, "+")) + } else { + config.Name = config.BaseName + } +} + func (config *Config) Clone() *Config { steps := make([]Step, len(config.Steps)) copy(steps, config.Steps) // no need to deep clone each step @@ -69,6 +87,8 @@ func (config *Config) Clone() *Config { return &Config{ Id: config.Id, Name: config.Name, + BaseName: config.BaseName, + ModifierNames: config.ModifierNames.Clone(), LinkSelector: config.LinkSelector, // modifier changes LinkSelector by wrapping the previous value Extensions: extensions, Steps: steps, diff --git a/pkg/frontend/tf/config/file/file.go b/pkg/frontend/tf/config/file/file.go index 707fc633..949301ce 100644 --- a/pkg/frontend/tf/config/file/file.go +++ b/pkg/frontend/tf/config/file/file.go @@ -153,7 +153,8 @@ func (p *FileProvider) loadJsonBytes(jsonBytes []byte) error { priority: modifierConfig.Priority, fn: func(config *tfconfig.Config) { config.Id |= bitmask - config.Name += fmt.Sprintf(" [%s]", displayName) + config.ModifierNames.Insert(displayName) + config.RecomputeName() modifier.Modify(config) }, }) @@ -181,6 +182,8 @@ func (p *FileProvider) loadJsonBytes(jsonBytes []byte) error { config := &tfconfig.Config{ Id: raw.Id, Name: raw.Name, + BaseName: raw.Name, + ModifierNames: sets.New[string](), LinkSelector: tfconfig.ConstantLinkSelector(false), Steps: steps, } From 213b88e987f9f5ed620508c3310480593ca66f9a Mon Sep 17 00:00:00 2001 From: chankyin Date: Thu, 24 Aug 2023 10:56:46 +0800 Subject: [PATCH 10/16] feat: add depth limit for link selectors --- hack/tfconfig.yaml | 18 ++++++++++++------ .../tf/defaults/modifier/link_selector.go | 16 ++++++++++++++++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/hack/tfconfig.yaml b/hack/tfconfig.yaml index 0820b8bb..e479889c 100644 --- a/hack/tfconfig.yaml +++ b/hack/tfconfig.yaml @@ -48,25 +48,31 @@ configs: modifiers: # Multiple active link-selector modifiers are additive (union) "01000000": - displayName: ancestors + # the entire tree under the deepest (up to 3 levels) ancestor + displayName: full tree modifierName: link-selector args: - modifierClass: parent + modifierClass: owner-ref includeSiblings: false + depth: 3 "02000000": - displayName: ancestors tree + # include all ancestors (up to 3) but not siblings of ancestors + displayName: ancestors modifierName: link-selector args: - modifierClass: parent + modifierClass: owner-ref includeSiblings: true + depth: 3 "04000000": - displayName: all owned objects + # the entire subtree under this object + displayName: owned objects modifierName: link-selector args: - modifierClass: children + modifierClass: owner-ref ifAll: - linkClass: children fromChild: false + depth: 3 # Uncomment to enable extension trace from apiserver # "00000001": diff --git a/pkg/frontend/tf/defaults/modifier/link_selector.go b/pkg/frontend/tf/defaults/modifier/link_selector.go index 3627e0cd..d55d5cce 100644 --- a/pkg/frontend/tf/defaults/modifier/link_selector.go +++ b/pkg/frontend/tf/defaults/modifier/link_selector.go @@ -72,6 +72,7 @@ type LinkSelectorModifier struct { Class string `json:"modifierClass"` IncludeSiblings bool `json:"includeSiblings"` PatternFilters []LinkPattern `json:"ifAll"` + Depth utilmarshal.Optional[uint32] `json:"depth"` } type LinkPattern struct { @@ -118,6 +119,12 @@ func (modifier *LinkSelectorModifier) Modify(config *tfconfig.Config) { denySiblingsLinkSelector{}, } } + if modifier.Depth.IsSet { + selector = tfconfig.IntersectLinkSelector{ + selector, + depthLinkSelector(modifier.Depth.Value), + } + } config.LinkSelector = tfconfig.UnionLinkSelector{config.LinkSelector, selector} } @@ -155,3 +162,12 @@ func (s patternLinkSelector) Admit(parent utilobject.Key, child utilobject.Key, return s } + +type depthLinkSelector uint32 + +func (d depthLinkSelector) Admit(parent utilobject.Key, child utilobject.Key, isFromParent bool, linkClass string) tfconfig.LinkSelector { + if d == 0 { + return nil + } + return d - 1 +} From aa814c46e7de2dc149a7ef5189451d5212a5ab29 Mon Sep 17 00:00:00 2001 From: chankyin Date: Thu, 24 Aug 2023 11:50:54 +0800 Subject: [PATCH 11/16] feat: link selector distance limit should be directed --- docs/USER_GUIDE.md | 8 ++- hack/tfconfig.yaml | 8 +-- pkg/frontend/http/trace/server.go | 2 +- .../tf/defaults/modifier/link_selector.go | 52 +++++++++++++------ 4 files changed, 47 insertions(+), 23 deletions(-) diff --git a/docs/USER_GUIDE.md b/docs/USER_GUIDE.md index ea1417fd..dc6b026b 100644 --- a/docs/USER_GUIDE.md +++ b/docs/USER_GUIDE.md @@ -20,8 +20,12 @@ The "Service" field selects one of the display modes: Additional information is available in event tags. - `timeline`: All events are displayed as children of the root object. -By default, the whole trace is displayed, including parent and sibling objects of the searched object. -Enabling the `exclusive` option limits the output to the subtree under the object matched in the search. +By default, only the trace for a single object is displayed. +More traces are available by configuration: + +- `full tree`: view the full tree from the deepest ancestor +- `ancestors`: include transitive owners +- `children`: include child objects ### Cluster diff --git a/hack/tfconfig.yaml b/hack/tfconfig.yaml index e479889c..2bbdfc0e 100644 --- a/hack/tfconfig.yaml +++ b/hack/tfconfig.yaml @@ -54,7 +54,8 @@ modifiers: args: modifierClass: owner-ref includeSiblings: false - depth: 3 + upwardDistance: 3 + downwardDistance: 3 "02000000": # include all ancestors (up to 3) but not siblings of ancestors displayName: ancestors @@ -62,7 +63,7 @@ modifiers: args: modifierClass: owner-ref includeSiblings: true - depth: 3 + upwardDistance: 3 "04000000": # the entire subtree under this object displayName: owned objects @@ -72,11 +73,10 @@ modifiers: ifAll: - linkClass: children fromChild: false - depth: 3 + downwardDistance: 3 # Uncomment to enable extension trace from apiserver # "00000001": -# # We want to run extension modifiers after exclusive modifier to avoid fetching unused traces # displayName: apiserver trace # modifierName: extension # args: diff --git a/pkg/frontend/http/trace/server.go b/pkg/frontend/http/trace/server.go index c7799f90..bd92be78 100644 --- a/pkg/frontend/http/trace/server.go +++ b/pkg/frontend/http/trace/server.go @@ -105,7 +105,7 @@ func (server *server) handleTrace(ctx *gin.Context, metric *requestMetric) (code return 400, fmt.Errorf("invalid param %w", err) } - trace, code, err := server.findTrace(metric, "tracing (exclusive)", query) + trace, code, err := server.findTrace(metric, "tracing", query) if err != nil { return code, err } diff --git a/pkg/frontend/tf/defaults/modifier/link_selector.go b/pkg/frontend/tf/defaults/modifier/link_selector.go index d55d5cce..80638b76 100644 --- a/pkg/frontend/tf/defaults/modifier/link_selector.go +++ b/pkg/frontend/tf/defaults/modifier/link_selector.go @@ -72,7 +72,8 @@ type LinkSelectorModifier struct { Class string `json:"modifierClass"` IncludeSiblings bool `json:"includeSiblings"` PatternFilters []LinkPattern `json:"ifAll"` - Depth utilmarshal.Optional[uint32] `json:"depth"` + UpwardDistance utilmarshal.Optional[uint32] `json:"upwardDistance"` + DownwardDistance utilmarshal.Optional[uint32] `json:"downwardDistance"` } type LinkPattern struct { @@ -112,21 +113,26 @@ func (modifier *LinkSelectorModifier) ModifierClass() string { } func (modifier *LinkSelectorModifier) Modify(config *tfconfig.Config) { - var selector tfconfig.LinkSelector = patternLinkSelector{patterns: modifier.PatternFilters} + intersectSelector := tfconfig.IntersectLinkSelector{ + patternLinkSelector{patterns: modifier.PatternFilters}, + } if !modifier.IncludeSiblings { - selector = tfconfig.IntersectLinkSelector{ - selector, - denySiblingsLinkSelector{}, - } + intersectSelector = append(intersectSelector, denySiblingsLinkSelector{}) } - if modifier.Depth.IsSet { - selector = tfconfig.IntersectLinkSelector{ - selector, - depthLinkSelector(modifier.Depth.Value), - } + if modifier.UpwardDistance.IsSet { + intersectSelector = append( + intersectSelector, + directedDistanceLinkSelector{distance: modifier.UpwardDistance.Value, direction: directionUpwards}, + ) + } + if modifier.DownwardDistance.IsSet { + intersectSelector = append( + intersectSelector, + directedDistanceLinkSelector{distance: modifier.DownwardDistance.Value, direction: directionDownwards}, + ) } - config.LinkSelector = tfconfig.UnionLinkSelector{config.LinkSelector, selector} + config.LinkSelector = tfconfig.UnionLinkSelector{config.LinkSelector, intersectSelector} } type denySiblingsLinkSelector struct { @@ -163,11 +169,25 @@ func (s patternLinkSelector) Admit(parent utilobject.Key, child utilobject.Key, return s } -type depthLinkSelector uint32 +type direction bool +const ( + directionUpwards direction = true + directionDownwards direction = false +) +type directedDistanceLinkSelector struct { + direction direction + distance uint32 +} -func (d depthLinkSelector) Admit(parent utilobject.Key, child utilobject.Key, isFromParent bool, linkClass string) tfconfig.LinkSelector { - if d == 0 { +func (d directedDistanceLinkSelector) Admit(parent utilobject.Key, child utilobject.Key, isFromParent bool, linkClass string) tfconfig.LinkSelector { + if isFromParent != (d.direction == directionDownwards) { + return d + } + if d.distance == 0 { return nil } - return d - 1 + return directedDistanceLinkSelector{ + direction: d.direction, + distance: d.distance - 1, + } } From 862b0c829212f42f2f653b6630c92e306287ffa0 Mon Sep 17 00:00:00 2001 From: chankyin Date: Thu, 24 Aug 2023 16:41:00 +0800 Subject: [PATCH 12/16] fix(frontend/reader): copyTreeVisitor should use the target tree span ID --- pkg/frontend/reader/merge/merge.go | 14 ++++++++--- pkg/frontend/tf/config/config.go | 15 +++++------ pkg/frontend/tf/config/file/file.go | 10 ++++---- .../tf/defaults/modifier/link_selector.go | 25 ++++++++++++------- 4 files changed, 39 insertions(+), 25 deletions(-) diff --git a/pkg/frontend/reader/merge/merge.go b/pkg/frontend/reader/merge/merge.go index d3b2f90f..3a97a570 100644 --- a/pkg/frontend/reader/merge/merge.go +++ b/pkg/frontend/reader/merge/merge.go @@ -249,7 +249,7 @@ func (obj *object[M]) merge(trace *tftree.SpanTree, metadata M) error { mergeRoot(obj.tree.Root, trace.Root) - copyVisitor := ©TreeVisitor{to: obj.tree} + copyVisitor := ©TreeVisitor{to: obj.tree, toParent: obj.tree.Root.SpanID} trace.Visit(copyVisitor) if copyVisitor.err != nil { return copyVisitor.err @@ -337,8 +337,9 @@ type TargetLink struct { } type copyTreeVisitor struct { - to *tftree.SpanTree - err error + to *tftree.SpanTree + toParent model.SpanID + err error } func (visitor *copyTreeVisitor) Enter(tree *tftree.SpanTree, span *model.Span) tftree.TreeVisitor { @@ -349,7 +350,12 @@ func (visitor *copyTreeVisitor) Enter(tree *tftree.SpanTree, span *model.Span) t return nil } - visitor.to.Add(spanCopy, span.ParentSpanID()) + visitor.to.Add(spanCopy, visitor.toParent) + + return ©TreeVisitor{ + to: visitor.to, + toParent: spanCopy.SpanID, + } } return visitor diff --git a/pkg/frontend/tf/config/config.go b/pkg/frontend/tf/config/config.go index a82ba3a1..57524228 100644 --- a/pkg/frontend/tf/config/config.go +++ b/pkg/frontend/tf/config/config.go @@ -20,9 +20,10 @@ import ( "strconv" "strings" + "k8s.io/apimachinery/pkg/util/sets" + "github.com/kubewharf/kelemetry/pkg/frontend/extension" "github.com/kubewharf/kelemetry/pkg/manager" - "k8s.io/apimachinery/pkg/util/sets" ) func init() { @@ -85,13 +86,13 @@ func (config *Config) Clone() *Config { copy(extensions, config.Extensions) return &Config{ - Id: config.Id, - Name: config.Name, - BaseName: config.BaseName, + Id: config.Id, + Name: config.Name, + BaseName: config.BaseName, ModifierNames: config.ModifierNames.Clone(), - LinkSelector: config.LinkSelector, // modifier changes LinkSelector by wrapping the previous value - Extensions: extensions, - Steps: steps, + LinkSelector: config.LinkSelector, // modifier changes LinkSelector by wrapping the previous value + Extensions: extensions, + Steps: steps, } } diff --git a/pkg/frontend/tf/config/file/file.go b/pkg/frontend/tf/config/file/file.go index 949301ce..e076d129 100644 --- a/pkg/frontend/tf/config/file/file.go +++ b/pkg/frontend/tf/config/file/file.go @@ -180,12 +180,12 @@ func (p *FileProvider) loadJsonBytes(jsonBytes []byte) error { } config := &tfconfig.Config{ - Id: raw.Id, - Name: raw.Name, - BaseName: raw.Name, + Id: raw.Id, + Name: raw.Name, + BaseName: raw.Name, ModifierNames: sets.New[string](), - LinkSelector: tfconfig.ConstantLinkSelector(false), - Steps: steps, + LinkSelector: tfconfig.ConstantLinkSelector(false), + Steps: steps, } p.register(registeredConfig{config: config, modifierClasses: sets.New[string]()}) diff --git a/pkg/frontend/tf/defaults/modifier/link_selector.go b/pkg/frontend/tf/defaults/modifier/link_selector.go index 80638b76..5d49f590 100644 --- a/pkg/frontend/tf/defaults/modifier/link_selector.go +++ b/pkg/frontend/tf/defaults/modifier/link_selector.go @@ -69,11 +69,11 @@ func (*LinkSelectorModifierFactory) Build(jsonBuf []byte) (tfconfig.Modifier, er } type LinkSelectorModifier struct { - Class string `json:"modifierClass"` - IncludeSiblings bool `json:"includeSiblings"` - PatternFilters []LinkPattern `json:"ifAll"` - UpwardDistance utilmarshal.Optional[uint32] `json:"upwardDistance"` - DownwardDistance utilmarshal.Optional[uint32] `json:"downwardDistance"` + Class string `json:"modifierClass"` + IncludeSiblings bool `json:"includeSiblings"` + PatternFilters []LinkPattern `json:"ifAll"` + UpwardDistance utilmarshal.Optional[uint32] `json:"upwardDistance"` + DownwardDistance utilmarshal.Optional[uint32] `json:"downwardDistance"` } type LinkPattern struct { @@ -170,16 +170,23 @@ func (s patternLinkSelector) Admit(parent utilobject.Key, child utilobject.Key, } type direction bool + const ( - directionUpwards direction = true + directionUpwards direction = true directionDownwards direction = false ) + type directedDistanceLinkSelector struct { direction direction - distance uint32 + distance uint32 } -func (d directedDistanceLinkSelector) Admit(parent utilobject.Key, child utilobject.Key, isFromParent bool, linkClass string) tfconfig.LinkSelector { +func (d directedDistanceLinkSelector) Admit( + parent utilobject.Key, + child utilobject.Key, + isFromParent bool, + linkClass string, +) tfconfig.LinkSelector { if isFromParent != (d.direction == directionDownwards) { return d } @@ -188,6 +195,6 @@ func (d directedDistanceLinkSelector) Admit(parent utilobject.Key, child utilobj } return directedDistanceLinkSelector{ direction: d.direction, - distance: d.distance - 1, + distance: d.distance - 1, } } From e98e4a863b4202634c7d43625da08fbedfa9b398 Mon Sep 17 00:00:00 2001 From: chankyin Date: Thu, 24 Aug 2023 18:58:32 +0800 Subject: [PATCH 13/16] chore: set --linker-worker-count in chart and quickstart --- charts/kelemetry/templates/_helpers.yaml | 1 + charts/kelemetry/values.yaml | 3 +++ quickstart.docker-compose.yaml | 1 + 3 files changed, 5 insertions(+) diff --git a/charts/kelemetry/templates/_helpers.yaml b/charts/kelemetry/templates/_helpers.yaml index 2524c062..16ac93ee 100644 --- a/charts/kelemetry/templates/_helpers.yaml +++ b/charts/kelemetry/templates/_helpers.yaml @@ -111,6 +111,7 @@ span-cache-etcd-prefix: {{ .Values.aggregator.spanCache.etcd.prefix | toJson }} {{- end }} {{/* LINKERS */}} +linker-worker-count: {{ .Values.linkers.workerCount }} annotation-linker-enable: {{ .Values.linkers.annotation }} owner-linker-enable: {{ .Values.linkers.ownerReference }} diff --git a/charts/kelemetry/values.yaml b/charts/kelemetry/values.yaml index c29405f5..1c1654fd 100644 --- a/charts/kelemetry/values.yaml +++ b/charts/kelemetry/values.yaml @@ -323,6 +323,9 @@ aggregator: # Linkers associated objects together. linkers: + # Maximum number of concurrent link jobs. + # Each link job runs each of the linkers for a single object in series. + workerCount: 8 # Enable the owner linker, which links objects based on native owner references. ownerReference: true # Enable the annotation linker, which links objects based on the `kelemetry.kubewharf.io/parent-link` annotation. diff --git a/quickstart.docker-compose.yaml b/quickstart.docker-compose.yaml index dbaf8a37..8272a2fd 100644 --- a/quickstart.docker-compose.yaml +++ b/quickstart.docker-compose.yaml @@ -70,6 +70,7 @@ services: "--diff-cache-wrapper-enable", "--diff-controller-leader-election-enable=false", "--event-informer-leader-election-enable=false", + "--linker-worker-count=8", "--span-cache=etcd", "--span-cache-etcd-endpoints=etcd:2379", "--tracer-otel-endpoint=jaeger-collector:4317", From 842458c289462b280c9615f5006f9ae06b0e8dcc Mon Sep 17 00:00:00 2001 From: chankyin Date: Fri, 25 Aug 2023 11:31:47 +0800 Subject: [PATCH 14/16] fix(aggregator/linker/worker): add more metrics for job worker --- pkg/aggregator/aggregator.go | 11 ++++--- pkg/aggregator/linker/job/interface.go | 6 ++-- pkg/aggregator/linker/job/local/local.go | 13 ++++++-- pkg/aggregator/linker/job/worker/worker.go | 32 ++++++++++++++----- pkg/aggregator/linker/linker.go | 1 + pkg/annotationlinker/linker.go | 1 + .../backend/jaeger-storage/backend.go | 3 -- pkg/ownerlinker/linker.go | 1 + 8 files changed, 48 insertions(+), 20 deletions(-) diff --git a/pkg/aggregator/aggregator.go b/pkg/aggregator/aggregator.go index 76f7c33e..ae7d5f73 100644 --- a/pkg/aggregator/aggregator.go +++ b/pkg/aggregator/aggregator.go @@ -141,8 +141,9 @@ type sinceEventMetric struct { func (*sinceEventMetric) MetricName() string { return "aggregator_send_since_event" } type lazySpanMetric struct { - Cluster string - Result string + Cluster string + PseudoType zconstants.PseudoTypeValue + Result string } func (*lazySpanMetric) MetricName() string { return "aggregator_lazy_span" } @@ -286,8 +287,9 @@ func (agg *aggregator) GetOrCreatePseudoSpan( dedupId string, ) (_span tracer.SpanContext, _isNew bool, _err error) { lazySpanMetric := &lazySpanMetric{ - Cluster: object.Cluster, - Result: "error", + Cluster: object.Cluster, + PseudoType: pseudoType, + Result: "error", } defer agg.LazySpanMetric.DeferCount(agg.Clock.Now(), lazySpanMetric) @@ -295,6 +297,7 @@ func (agg *aggregator) GetOrCreatePseudoSpan( logger := agg.Logger. WithField("step", "GetOrCreatePseudoSpan"). + WithField("dedupId", dedupId). WithFields(object.AsFields("object")) defer func() { diff --git a/pkg/aggregator/linker/job/interface.go b/pkg/aggregator/linker/job/interface.go index 69f838ab..8c9979ca 100644 --- a/pkg/aggregator/linker/job/interface.go +++ b/pkg/aggregator/linker/job/interface.go @@ -28,7 +28,7 @@ type Publisher interface { } type Subscriber interface { - Subscribe(ctx context.Context) <-chan *LinkJob + Subscribe(ctx context.Context, name string) <-chan *LinkJob } func init() { @@ -52,8 +52,8 @@ type subscriberMux struct { *manager.Mux } -func (mux *subscriberMux) Subscribe(ctx context.Context) <-chan *LinkJob { - return mux.Impl().(Subscriber).Subscribe(ctx) +func (mux *subscriberMux) Subscribe(ctx context.Context, name string) <-chan *LinkJob { + return mux.Impl().(Subscriber).Subscribe(ctx, name) } type LinkJob struct { diff --git a/pkg/aggregator/linker/job/local/local.go b/pkg/aggregator/linker/job/local/local.go index 4b5296b7..47310d5d 100644 --- a/pkg/aggregator/linker/job/local/local.go +++ b/pkg/aggregator/linker/job/local/local.go @@ -22,6 +22,7 @@ import ( linkjob "github.com/kubewharf/kelemetry/pkg/aggregator/linker/job" "github.com/kubewharf/kelemetry/pkg/manager" + "github.com/kubewharf/kelemetry/pkg/metrics" "github.com/kubewharf/kelemetry/pkg/util/channel" ) @@ -68,17 +69,25 @@ func (publisher *publisher) Publish(job *linkjob.LinkJob) { } type subscriber struct { - Queue *queue + Queue *queue + Metrics metrics.Client manager.MuxImplBase } +type queueMetricTags struct { + Name string +} + +func (*queueMetricTags) MetricName() string { return "linker_local_worker_lag" } + func (subscriber *subscriber) Options() manager.Options { return &manager.NoOptions{} } func (subscriber *subscriber) Init() error { return nil } func (subscriber *subscriber) Start(ctx context.Context) error { return nil } func (subscriber *subscriber) Close(ctx context.Context) error { return nil } func (subscriber *subscriber) MuxImplName() (name string, isDefault bool) { return "local", true } -func (subscriber *subscriber) Subscribe(ctx context.Context) <-chan *linkjob.LinkJob { +func (subscriber *subscriber) Subscribe(ctx context.Context, name string) <-chan *linkjob.LinkJob { queue := channel.NewUnboundedQueue[*linkjob.LinkJob](16) + channel.InitMetricLoop(queue, subscriber.Metrics, &queueMetricTags{Name: name}) subscriber.Queue.subscribersMu.Lock() defer subscriber.Queue.subscribersMu.Unlock() diff --git a/pkg/aggregator/linker/job/worker/worker.go b/pkg/aggregator/linker/job/worker/worker.go index 948f3a1a..c5ee82d8 100644 --- a/pkg/aggregator/linker/job/worker/worker.go +++ b/pkg/aggregator/linker/job/worker/worker.go @@ -20,6 +20,7 @@ import ( "github.com/sirupsen/logrus" "github.com/spf13/pflag" + "k8s.io/utils/clock" "k8s.io/utils/pointer" "github.com/kubewharf/kelemetry/pkg/aggregator" @@ -45,18 +46,27 @@ func (options *workerOptions) Setup(fs *pflag.FlagSet) { func (options *workerOptions) EnableFlag() *bool { return pointer.Bool(options.WorkerCount > 0) } type worker struct { - options workerOptions - Logger logrus.FieldLogger - Linkers *manager.List[linker.Linker] - Subscriber linkjob.Subscriber - Aggregator aggregator.Aggregator + options workerOptions + Logger logrus.FieldLogger + Clock clock.Clock + Linkers *manager.List[linker.Linker] + Subscriber linkjob.Subscriber + Aggregator aggregator.Aggregator + ExecuteJobMetric *metrics.Metric[*executeJobMetric] ch <-chan *linkjob.LinkJob } +type executeJobMetric struct { + Linker string + Error metrics.LabeledError +} + +func (*executeJobMetric) MetricName() string { return "linker_job_exec" } + func (worker *worker) Options() manager.Options { return &worker.options } func (worker *worker) Init() error { - worker.ch = worker.Subscriber.Subscribe(context.Background()) // never unsubscribe + worker.ch = worker.Subscriber.Subscribe(context.Background(), "worker") // background context, never unsubscribe return nil } @@ -82,15 +92,21 @@ func (worker *worker) Close(ctx context.Context) error { return nil } func (worker *worker) executeJob(ctx context.Context, logger logrus.FieldLogger, job *linkjob.LinkJob) { for _, linker := range worker.Linkers.Impls { - linkerLogger := logger.WithField("linker", fmt.Sprintf("%T", linker)) + linkerLogger := logger.WithField("linker", linker.LinkerName()) if err := worker.execute(ctx, linkerLogger, linker, job); err != nil { - logger.WithError(err).Error("generating links") + linkerLogger.WithError(err).Error("generating links") } } } func (worker *worker) execute(ctx context.Context, logger logrus.FieldLogger, linker linker.Linker, job *linkjob.LinkJob) error { + logger.Debug("execute linker") + startTime := worker.Clock.Now() links, err := linker.Lookup(ctx, job.Object) + worker.ExecuteJobMetric.DeferCount(startTime, &executeJobMetric{ + Linker: linker.LinkerName(), + Error: err, + }) if err != nil { return metrics.LabelError(fmt.Errorf("calling linker: %w", err), "CallLinker") } diff --git a/pkg/aggregator/linker/linker.go b/pkg/aggregator/linker/linker.go index 76810566..2bf517ee 100644 --- a/pkg/aggregator/linker/linker.go +++ b/pkg/aggregator/linker/linker.go @@ -22,6 +22,7 @@ import ( ) type Linker interface { + LinkerName() string Lookup(ctx context.Context, object utilobject.Rich) ([]LinkerResult, error) } diff --git a/pkg/annotationlinker/linker.go b/pkg/annotationlinker/linker.go index 2dd616bd..33707d05 100644 --- a/pkg/annotationlinker/linker.go +++ b/pkg/annotationlinker/linker.go @@ -61,6 +61,7 @@ func (ctrl *controller) Init() error { return nil } func (ctrl *controller) Start(ctx context.Context) error { return nil } func (ctrl *controller) Close(ctx context.Context) error { return nil } +func (ctrl *controller) LinkerName() string { return "annotation-linker" } func (ctrl *controller) Lookup(ctx context.Context, object utilobject.Rich) ([]linker.LinkerResult, error) { raw := object.Raw diff --git a/pkg/frontend/backend/jaeger-storage/backend.go b/pkg/frontend/backend/jaeger-storage/backend.go index 8b6d4aca..dcad2262 100644 --- a/pkg/frontend/backend/jaeger-storage/backend.go +++ b/pkg/frontend/backend/jaeger-storage/backend.go @@ -188,9 +188,6 @@ func (backend *Backend) List( } traceThumbnails = append(traceThumbnails, thumbnail) - backend.Logger.WithField("ident", thumbnail.Identifier). - WithField("filteredSpans", len(thumbnail.Spans.GetSpans())). - Debug("matched trace") } } diff --git a/pkg/ownerlinker/linker.go b/pkg/ownerlinker/linker.go index 136e569a..9f055379 100644 --- a/pkg/ownerlinker/linker.go +++ b/pkg/ownerlinker/linker.go @@ -61,6 +61,7 @@ func (ctrl *Controller) Init() error { return nil } func (ctrl *Controller) Start(ctx context.Context) error { return nil } func (ctrl *Controller) Close(ctx context.Context) error { return nil } +func (ctrl *Controller) LinkerName() string { return "owner-linker" } func (ctrl *Controller) Lookup(ctx context.Context, object utilobject.Rich) ([]linker.LinkerResult, error) { raw := object.Raw From 601a60522e43d9e260a3d377f14710b297760a9a Mon Sep 17 00:00:00 2001 From: chankyin Date: Fri, 25 Aug 2023 13:34:57 +0800 Subject: [PATCH 15/16] fix(frontend/reader): expose original trace request to backend follower in context values --- dev.docker-compose.yaml | 1 + pkg/frontend/reader/reader.go | 31 ++++++++++++++++++++++--------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/dev.docker-compose.yaml b/dev.docker-compose.yaml index 8f66a2b3..c951a95d 100644 --- a/dev.docker-compose.yaml +++ b/dev.docker-compose.yaml @@ -54,6 +54,7 @@ services: - 127.0.0.1:17272:17271 volumes: - badger:/mnt/badger + restart: always # Web frontend for raw trace database view. jaeger-query-raw: diff --git a/pkg/frontend/reader/reader.go b/pkg/frontend/reader/reader.go index e7b10306..2f611e40 100644 --- a/pkg/frontend/reader/reader.go +++ b/pkg/frontend/reader/reader.go @@ -184,7 +184,7 @@ func (reader *spanReader) FindTraces(ctx context.Context, query *spanstore.Trace ctx, config.LinkSelector, query.StartTimeMin, query.StartTimeMax, - mergeListWithBackend[any](reader.Backend, reflectutil.Identity[any]), + mergeListWithBackend[any](reader.Backend, reflectutil.Identity[any], OriginalTraceRequest{FindTraces: query}), reader.options.followLinkConcurrency, reader.options.followLinkLimit, false, ); err != nil { return nil, fmt.Errorf("follow links: %w", err) @@ -342,7 +342,7 @@ func (reader *spanReader) GetTrace(ctx context.Context, cacheId model.TraceID) ( ctx, displayConfig.LinkSelector, entry.StartTime, entry.EndTime, - mergeListWithBackend[struct{}](reader.Backend, func(any) struct{} { return struct{}{} }), + mergeListWithBackend[struct{}](reader.Backend, func(any) struct{} { return struct{}{} }, OriginalTraceRequest{GetTrace: &cacheId}), reader.options.followLinkConcurrency, reader.options.followLinkLimit, true, ); err != nil { return nil, fmt.Errorf("cannot follow links: %w", err) @@ -424,19 +424,32 @@ func filterTimeRange(spans []*model.Span, startTime, endTime time.Time) []*model return retained } -func mergeListWithBackend[M any](backend jaegerbackend.Backend, convertMetadata func(any) M) merge.ListFunc[M] { +type ( + OriginalTraceRequestKey struct{} + OriginalTraceRequest struct { + GetTrace *model.TraceID + FindTraces *spanstore.TraceQueryParameters + } +) + +func mergeListWithBackend[M any](backend jaegerbackend.Backend, convertMetadata func(any) M, otr OriginalTraceRequest) merge.ListFunc[M] { return func( ctx context.Context, key utilobject.Key, startTime time.Time, endTime time.Time, limit int, ) ([]merge.TraceWithMetadata[M], error) { - tts, err := backend.List(ctx, &spanstore.TraceQueryParameters{ - Tags: zconstants.KeyToSpanTags(key), - StartTimeMin: startTime, - StartTimeMax: endTime, - NumTraces: limit, - }) + tags := zconstants.KeyToSpanTags(key) + + tts, err := backend.List( + context.WithValue(ctx, OriginalTraceRequestKey{}, otr), + &spanstore.TraceQueryParameters{ + Tags: tags, + StartTimeMin: startTime, + StartTimeMax: endTime, + NumTraces: limit, + }, + ) if err != nil { return nil, err } From 64ccc73ed000eae420c0e87bd120e5fbd9899be9 Mon Sep 17 00:00:00 2001 From: chankyin Date: Tue, 19 Sep 2023 12:27:14 +0800 Subject: [PATCH 16/16] fix(frontend/merge): merge logs in root span --- pkg/frontend/reader/merge/merge.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/frontend/reader/merge/merge.go b/pkg/frontend/reader/merge/merge.go index 3a97a570..88bb6bdb 100644 --- a/pkg/frontend/reader/merge/merge.go +++ b/pkg/frontend/reader/merge/merge.go @@ -261,6 +261,7 @@ func (obj *object[M]) merge(trace *tftree.SpanTree, metadata M) error { func mergeRoot(base *model.Span, tail *model.Span) { mergeRootInterval(base, tail) mergeRootTags(base, tail) + mergeRootLogs(base, tail) } func mergeRootInterval(base *model.Span, tail *model.Span) { @@ -297,6 +298,10 @@ func mergeRootTags(base *model.Span, tail *model.Span) { } } +func mergeRootLogs(base *model.Span, tail *model.Span) { + base.Logs = append(base.Logs, tail.Logs...) +} + func (obj *object[M]) identifyLinks() { for spanId := range obj.tree.Children(obj.tree.Root.SpanID) { span := obj.tree.Span(spanId)