diff --git a/pkg/model/profile.go b/pkg/model/profile.go index 9f16ad2fd5..93327b9e0c 100644 --- a/pkg/model/profile.go +++ b/pkg/model/profile.go @@ -70,31 +70,22 @@ func NewSpanSelector(spans []string) (SpanSelector, error) { return m, nil } -func StacktracePartitionFromProfile(lbls []Labels, p *profilev1.Profile, otel bool) uint64 { - return xxhash.Sum64String(stacktracePartitionKeyFromProfile(lbls, p, otel)) +func SymbolsPartitionForProfile(labelValue string, p *profilev1.Profile, ignoreMapping bool) uint64 { + return xxhash.Sum64String(symbolsPartitionKeyForProfile(labelValue, p, ignoreMapping)) } -func stacktracePartitionKeyFromProfile(lbls []Labels, p *profilev1.Profile, otel bool) string { - // Take the first mapping (which is the main binary's file basename) - // OTEL (at least from ebpf profiler at the time of writing) mappings are unreliable and ordered unpredictably and - // have no VA addresses (only relative to the shared object base) - if len(p.Mapping) > 0 && !otel { +func symbolsPartitionKeyForProfile(labelValue string, p *profilev1.Profile, ignoreMapping bool) string { + if labelValue != "" { + return labelValue + } + if !ignoreMapping && len(p.Mapping) > 0 { + // Use the main binary's file basename as the partition key. if filenameID := p.Mapping[0].Filename; filenameID > 0 { if filename := extractMappingFilename(p.StringTable[filenameID]); filename != "" { return filename } } } - - // failing that look through the labels for the ServiceName - if len(lbls) > 0 { - for _, lbl := range lbls[0] { - if lbl.Name == LabelNameServiceName { - return lbl.Value - } - } - } - return "unknown" } @@ -104,7 +95,8 @@ func extractMappingFilename(filename string) string { if filename == "" || strings.HasPrefix(filename, "[") || strings.HasPrefix(filename, "linux-vdso") || - strings.HasPrefix(filename, "/dev/dri/") { + strings.HasPrefix(filename, "/dev/dri/") || + strings.HasPrefix(filename, "//anon") { return "" } // Like filepath.ToSlash but doesn't rely on OS. diff --git a/pkg/model/profile_test.go b/pkg/model/profile_test.go index c703e5e699..f1c7c88efa 100644 --- a/pkg/model/profile_test.go +++ b/pkg/model/profile_test.go @@ -6,7 +6,7 @@ import ( "github.com/stretchr/testify/assert" ) -func Test_filename_extraction(t *testing.T) { +func Test_extractMappingFilename(t *testing.T) { assert.Equal(t, "app", extractMappingFilename(`app`)) assert.Equal(t, "app", extractMappingFilename(`./app`)) assert.Equal(t, "app", extractMappingFilename(`/usr/bin/app`)) @@ -32,5 +32,6 @@ func Test_filename_extraction(t *testing.T) { assert.Equal(t, "", extractMappingFilename("")) assert.Equal(t, "", extractMappingFilename(`[vdso]`)) assert.Equal(t, "", extractMappingFilename(`[vsyscall]`)) + assert.Equal(t, "", extractMappingFilename(`//anon`)) assert.Equal(t, "not a path actually", extractMappingFilename(`not a path actually`)) } diff --git a/pkg/phlaredb/head.go b/pkg/phlaredb/head.go index 00908457e2..68c6d10dd9 100644 --- a/pkg/phlaredb/head.go +++ b/pkg/phlaredb/head.go @@ -64,6 +64,7 @@ type Head struct { metaLock sync.RWMutex meta *block.Meta + config Config parquetConfig *ParquetConfig symdb *symdb.SymDB profiles *profileStore @@ -93,6 +94,7 @@ func NewHead(phlarectx context.Context, cfg Config, limiter TenantLimiter) (*Hea meta: block.NewMeta(), totalSamples: atomic.NewUint64(0), + config: cfg, parquetConfig: &parquetConfig, limiter: limiter, updatedAt: atomic.NewTime(time.Now()), @@ -194,25 +196,23 @@ func (h *Head) Ingest(ctx context.Context, p *profilev1.Profile, id uuid.UUID, e delta := phlaremodel.Labels(externalLabels).Get(phlaremodel.LabelNameDelta) != "false" externalLabels = phlaremodel.Labels(externalLabels).Delete(phlaremodel.LabelNameDelta) + otel := phlaremodel.Labels(externalLabels).Get(phlaremodel.LabelNameOTEL) == "true" externalLabels = phlaremodel.Labels(externalLabels).Delete(phlaremodel.LabelNameOTEL) enforceLabelOrder := phlaremodel.Labels(externalLabels).Get(phlaremodel.LabelNameOrder) == phlaremodel.LabelOrderEnforced externalLabels = phlaremodel.Labels(externalLabels).Delete(phlaremodel.LabelNameOrder) - lbls, seriesFingerprints := phlarelabels.CreateProfileLabels(enforceLabelOrder, p, externalLabels...) + partition := phlaremodel.SymbolsPartitionForProfile(phlaremodel.Labels(externalLabels).Get(h.config.SymbolsPartitionLabel), p, otel) + metricName := phlaremodel.Labels(externalLabels).Get(model.MetricNameLabel) + lbls, seriesFingerprints := phlarelabels.CreateProfileLabels(enforceLabelOrder, p, externalLabels...) for i, fp := range seriesFingerprints { if err := h.limiter.AllowProfile(fp, lbls[i], p.TimeNanos); err != nil { return err } } - // determine the stacktraces partition ID - partition := phlaremodel.StacktracePartitionFromProfile(lbls, p, otel) - - metricName := phlaremodel.Labels(externalLabels).Get(model.MetricNameLabel) - var profileIngested bool for idxType, profile := range h.symdb.WriteProfileSymbols(partition, p) { profile.ID = id diff --git a/pkg/phlaredb/phlaredb.go b/pkg/phlaredb/phlaredb.go index a204b4335b..62956ea504 100644 --- a/pkg/phlaredb/phlaredb.go +++ b/pkg/phlaredb/phlaredb.go @@ -48,7 +48,8 @@ type Config struct { MaxBlockDuration time.Duration `yaml:"max_block_duration,omitempty"` // TODO: docs - RowGroupTargetSize uint64 `yaml:"row_group_target_size"` + RowGroupTargetSize uint64 `yaml:"row_group_target_size"` + SymbolsPartitionLabel string `yaml:"symbols_partition_label"` // Those configs should not be exposed to the user, rather they should be determined by pyroscope itself. // Currently, they are solely used for test cases. @@ -71,6 +72,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.StringVar(&cfg.DataPath, "pyroscopedb.data-path", "./data", "Directory used for local storage.") f.DurationVar(&cfg.MaxBlockDuration, "pyroscopedb.max-block-duration", 1*time.Hour, "Upper limit to the duration of a Pyroscope block.") f.Uint64Var(&cfg.RowGroupTargetSize, "pyroscopedb.row-group-target-size", 10*128*1024*1024, "How big should a single row group be uncompressed") // This should roughly be 128MiB compressed + f.StringVar(&cfg.SymbolsPartitionLabel, "pyroscopedb.symbols-partition-label", "", "Specifies the dimension symbols are partitioned by. If empty, the partition is determined automatically.") f.Uint64Var(&cfg.MinFreeDisk, "pyroscopedb.retention-policy-min-free-disk-gb", DefaultMinFreeDisk, "How much available disk space to keep in GiB") f.Float64Var(&cfg.MinDiskAvailablePercentage, "pyroscopedb.retention-policy-min-disk-available-percentage", DefaultMinDiskAvailablePercentage, "Which percentage of free disk space to keep") f.DurationVar(&cfg.EnforcementInterval, "pyroscopedb.retention-policy-enforcement-interval", DefaultRetentionPolicyEnforcementInterval, "How often to enforce disk retention")