Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

poc(ddtrace/tracer): migrate tracer config to knobs #3030

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions ddtrace/tracer/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import (
"gopkg.in/DataDog/dd-trace-go.v1/internal/log"
"gopkg.in/DataDog/dd-trace-go.v1/internal/osinfo"
"gopkg.in/DataDog/dd-trace-go.v1/internal/version"

"github.com/darccio/knobs"
)

// startupInfo contains various information about the status of the tracer on startup.
Expand Down Expand Up @@ -141,8 +143,8 @@ func logStartup(t *tracer) {
AgentFeatures: t.config.agent,
Integrations: t.config.integrations,
AppSec: appsec.Enabled(),
PartialFlushEnabled: t.config.partialFlushEnabled,
PartialFlushMinSpans: t.config.partialFlushMinSpans,
PartialFlushEnabled: knobs.GetScope(t.config.Scope, partialFlushEnabled),
PartialFlushMinSpans: knobs.GetScope(t.config.Scope, partialFlushMinSpans),
Orchestrion: t.config.orchestrionCfg,
FeatureFlags: featureFlags,
PropagationStyleInject: injectorNames,
Expand Down
159 changes: 109 additions & 50 deletions ddtrace/tracer/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package tracer
import (
"context"
"encoding/json"
"errors"
"fmt"
"math"
"net"
Expand Down Expand Up @@ -37,6 +38,7 @@ import (
"gopkg.in/DataDog/dd-trace-go.v1/internal/version"

"github.com/DataDog/datadog-go/v5/statsd"
"github.com/darccio/knobs"
)

var contribIntegrations = map[string]struct {
Expand Down Expand Up @@ -115,6 +117,13 @@ var (

// config holds the tracer configuration.
type config struct {
*knobs.Scope

dynamic struct {
// enabled reports whether tracing is enabled.
enabled dynamicConfig[bool]
}

// debug, when true, writes details to logs.
debug bool

Expand Down Expand Up @@ -230,9 +239,6 @@ type config struct {
// profilerEndpoints specifies whether profiler endpoint filtering is enabled.
profilerEndpoints bool

// enabled reports whether tracing is enabled.
enabled dynamicConfig[bool]

// enableHostnameDetection specifies whether the tracer should enable hostname detection.
enableHostnameDetection bool

Expand All @@ -252,15 +258,6 @@ type config struct {
// misconfiguration
spanTimeout time.Duration

// partialFlushMinSpans is the number of finished spans in a single trace to trigger a
// partial flush, or 0 if partial flushing is disabled.
// Value from DD_TRACE_PARTIAL_FLUSH_MIN_SPANS, default 1000.
partialFlushMinSpans int

// partialFlushEnabled specifices whether the tracer should enable partial flushing. Value
// from DD_TRACE_PARTIAL_FLUSH_ENABLED, default false.
partialFlushEnabled bool

// statsComputationEnabled enables client-side stats computation (aka trace metrics).
statsComputationEnabled bool

Expand All @@ -284,9 +281,6 @@ type config struct {
// Value from DD_DYNAMIC_INSTRUMENTATION_ENABLED, default false.
dynamicInstrumentationEnabled bool

// globalSampleRate holds sample rate read from environment variables.
globalSampleRate float64

// ciVisibilityEnabled controls if the tracer is loaded with CI Visibility mode. default false
ciVisibilityEnabled bool

Expand All @@ -297,6 +291,98 @@ type config struct {
logDirectory string
}

var (
errOutOfRange = errors.New("value out of range")
)

var (
enabled = knobs.Register(&knobs.Definition[bool]{
Default: true,
EnvVars: []knobs.EnvVar{
{
Key: "DD_TRACE_ENABLED",
},
{
Key: "OTEL_TRACES_EXPORTER",
Transform: mapEnabled,
},
},
Parse: func(s string) (bool, error) {
if s == "" {
// Short-circuit if the value is empty, to avoid parsing errors.
return true, nil
}
return strconv.ParseBool(s)
},
})

// globalSampleRate holds sample rate read from environment variables.
globalSampleRate = knobs.Register(&knobs.Definition[float64]{
Default: math.NaN(),

EnvVars: []knobs.EnvVar{
{
Key: "DD_TRACE_SAMPLE_RATE",
},
{
Key: "OTEL_TRACES_SAMPLER",
Transform: mapSampleRate,
},
},
Resolve: func(environ map[string]string, decision string) (string, error) {
Copy link
Member Author

@darccio darccio Dec 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mtoffl01 As you already saw, I took the liberty to create a Resolve function implementing what we discussed offline. This Resolve allows to do extra validations in the whole context, but also potentially overriding what environment variable should be used.

BTW, I'm open to better naming 😆

dd := "DD_TRACE_SAMPLE_RATE"
ot := "OTEL_TRACES_SAMPLER"
if _, ok := environ[dd]; ok {
if _, ok := environ[ot]; ok {
log.Warn("Both %s and %s are set, using %v=%v", dd, ot, dd, environ["DD_TRACE_SAMPLE_RATE"])
reportHidingCount(dd, ot)
}
}
// The chosen environment variable is fine.
return decision, nil
},
Parse: func(s string) (float64, error) {
if sampleRate, err := strconv.ParseFloat(s, 64); err != nil {
return 0.0, err
} else if sampleRate < 0.0 || sampleRate > 1.0 {
return 0.0, errOutOfRange
} else {
return sampleRate, nil
}
},
})

// partialFlushEnabled specifices whether the tracer should enable partial flushing. Value
// from DD_TRACE_PARTIAL_FLUSH_ENABLED, default false.
partialFlushEnabled = knobs.Register(&knobs.Definition[bool]{
Default: false,
EnvVars: []knobs.EnvVar{{Key: "DD_TRACE_PARTIAL_FLUSH_ENABLED"}},
Parse: knobs.ToBool,
})

// partialFlushMinSpans is the number of finished spans in a single trace to trigger a
// partial flush, or 0 if partial flushing is disabled.
// Value from DD_TRACE_PARTIAL_FLUSH_MIN_SPANS, default 1000.
partialFlushMinSpans = knobs.Register(&knobs.Definition[int]{
// TODO(partialFlush): consider logging a warning if DD_TRACE_PARTIAL_FLUSH_MIN_SPANS
// is set, but DD_TRACE_PARTIAL_FLUSH_ENABLED is not true. Or just assume it should be enabled
// if it's explicitly set, and don't require both variables to be configured.
Default: 1000,
EnvVars: []knobs.EnvVar{{Key: "DD_TRACE_PARTIAL_FLUSH_MIN_SPANS"}},
Requires: []any{partialFlushEnabled},
Parse: func(v string) (int, error) {
i, _ := strconv.Atoi(v)
if i <= 0 {
return 0, knobs.ErrInvalidValue
} else if i >= traceMaxSize {
err := fmt.Errorf("value is above the max number of spans that can be kept in memory for a single trace (%d spans), so partial flushing will never trigger", traceMaxSize)
return 0, err
}
return i, nil
},
})
)

// orchestrionConfig contains Orchestrion configuration.
type orchestrionConfig struct {
// Enabled indicates whether this tracer was instanciated via Orchestrion.
Expand All @@ -318,27 +404,12 @@ type StartOption func(*config)
// maxPropagatedTagsLength limits the size of DD_TRACE_X_DATADOG_TAGS_MAX_LENGTH to prevent HTTP 413 responses.
const maxPropagatedTagsLength = 512

// partialFlushMinSpansDefault is the default number of spans for partial flushing, if enabled.
const partialFlushMinSpansDefault = 1000

// newConfig renders the tracer configuration based on defaults, environment variables
// and passed user opts.
func newConfig(opts ...StartOption) *config {
c := new(config)
c.Scope = knobs.NewScope()
c.sampler = NewAllSampler()
sampleRate := math.NaN()
if r := getDDorOtelConfig("sampleRate"); r != "" {
var err error
sampleRate, err = strconv.ParseFloat(r, 64)
if err != nil {
log.Warn("ignoring DD_TRACE_SAMPLE_RATE, error: %v", err)
sampleRate = math.NaN()
} else if sampleRate < 0.0 || sampleRate > 1.0 {
log.Warn("ignoring DD_TRACE_SAMPLE_RATE: out of range %f", sampleRate)
sampleRate = math.NaN()
}
}
c.globalSampleRate = sampleRate
c.httpClientTimeout = time.Second * 10 // 10 seconds

if v := os.Getenv("OTEL_LOGS_EXPORTER"); v != "" {
Expand Down Expand Up @@ -400,9 +471,10 @@ func newConfig(opts ...StartOption) *config {
c.runtimeMetricsV2 = internal.BoolEnv("DD_RUNTIME_METRICS_V2_ENABLED", false)
c.debug = internal.BoolVal(getDDorOtelConfig("debugMode"), false)
c.logDirectory = os.Getenv("DD_TRACE_LOG_DIRECTORY")
c.enabled = newDynamicConfig("tracing_enabled", internal.BoolVal(getDDorOtelConfig("enabled"), true), func(b bool) bool { return true }, equal[bool])
c.dynamic.enabled = newDynamicConfig("tracing_enabled", knobs.GetScope(c.Scope, enabled), func(b bool) bool { knobs.SetScope(c.Scope, enabled, knobs.Code, b); return true }, equal[bool])
if _, ok := os.LookupEnv("DD_TRACE_ENABLED"); ok {
c.enabled.cfgOrigin = telemetry.OriginEnvVar
// TODO: shouldn't we track this for the OTEL_TRACES_EXPORTER case?
c.dynamic.enabled.cfgOrigin = telemetry.OriginEnvVar
}
c.profilerEndpoints = internal.BoolEnv(traceprof.EndpointEnvVar, true)
c.profilerHotspots = internal.BoolEnv(traceprof.CodeHotspotsEnvVar, true)
Expand All @@ -419,19 +491,6 @@ func newConfig(opts ...StartOption) *config {
}
c.statsComputationEnabled = internal.BoolEnv("DD_TRACE_STATS_COMPUTATION_ENABLED", false)
c.dataStreamsMonitoringEnabled = internal.BoolEnv("DD_DATA_STREAMS_ENABLED", false)
c.partialFlushEnabled = internal.BoolEnv("DD_TRACE_PARTIAL_FLUSH_ENABLED", false)
c.partialFlushMinSpans = internal.IntEnv("DD_TRACE_PARTIAL_FLUSH_MIN_SPANS", partialFlushMinSpansDefault)
if c.partialFlushMinSpans <= 0 {
log.Warn("DD_TRACE_PARTIAL_FLUSH_MIN_SPANS=%d is not a valid value, setting to default %d", c.partialFlushMinSpans, partialFlushMinSpansDefault)
c.partialFlushMinSpans = partialFlushMinSpansDefault
} else if c.partialFlushMinSpans >= traceMaxSize {
log.Warn("DD_TRACE_PARTIAL_FLUSH_MIN_SPANS=%d is above the max number of spans that can be kept in memory for a single trace (%d spans), so partial flushing will never trigger, setting to default %d", c.partialFlushMinSpans, traceMaxSize, partialFlushMinSpansDefault)
c.partialFlushMinSpans = partialFlushMinSpansDefault
}
// TODO(partialFlush): consider logging a warning if DD_TRACE_PARTIAL_FLUSH_MIN_SPANS
// is set, but DD_TRACE_PARTIAL_FLUSH_ENABLED is not true. Or just assume it should be enabled
// if it's explicitly set, and don't require both variables to be configured.

c.dynamicInstrumentationEnabled = internal.BoolEnv("DD_DYNAMIC_INSTRUMENTATION_ENABLED", false)

schemaVersionStr := os.Getenv("DD_TRACE_SPAN_ATTRIBUTE_SCHEMA")
Expand Down Expand Up @@ -530,7 +589,7 @@ func newConfig(opts ...StartOption) *config {
log.SetLevel(log.LevelDebug)
}
// if using stdout or traces are disabled, agent is disabled
agentDisabled := c.logToStdout || !c.enabled.current
agentDisabled := c.logToStdout || !knobs.GetScope(c.Scope, enabled)
c.agent = loadAgentFeatures(agentDisabled, c.agentURL, c.httpClient)
info, ok := debug.ReadBuildInfo()
if !ok {
Expand Down Expand Up @@ -1132,7 +1191,7 @@ func WithHostname(name string) StartOption {
// WithTraceEnabled allows specifying whether tracing will be enabled
func WithTraceEnabled(enabled bool) StartOption {
return func(c *config) {
c.enabled = newDynamicConfig("tracing_enabled", enabled, func(b bool) bool { return true }, equal[bool])
c.dynamic.enabled.update(enabled, telemetry.OriginCode)
}
}

Expand Down Expand Up @@ -1192,8 +1251,8 @@ func WithDebugSpansMode(timeout time.Duration) StartOption {
// is disabled by default.
func WithPartialFlushing(numSpans int) StartOption {
return func(c *config) {
c.partialFlushEnabled = true
c.partialFlushMinSpans = numSpans
knobs.SetScope(c.Scope, partialFlushEnabled, knobs.Code, true)
knobs.SetScope(c.Scope, partialFlushMinSpans, knobs.Code, numSpans)
}
}

Expand Down
Loading
Loading