Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add logger for CLI telemetry #2083

Open
wants to merge 5 commits into
base: refactor-bundle-init-squashed
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 91 additions & 0 deletions integration/libs/telemetry/telemetry_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package telemetry_test

import (
"context"
"net/http"
"reflect"
"testing"
"time"

"github.com/databricks/cli/integration/internal/acc"
"github.com/databricks/cli/libs/telemetry"
"github.com/databricks/cli/libs/telemetry/events"
"github.com/databricks/databricks-sdk-go/client"
"github.com/google/uuid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

// Wrapper to capture the response from the API client since that's not directly
// accessible from the logger.
type apiClientWrapper struct {
response *telemetry.ResponseBody
apiClient *client.DatabricksClient
}

func (wrapper *apiClientWrapper) Do(ctx context.Context, method, path string,
headers map[string]string, request, response any,
visitors ...func(*http.Request) error,
) error {
err := wrapper.apiClient.Do(ctx, method, path, headers, request, response, visitors...)
wrapper.response = response.(*telemetry.ResponseBody)
return err
}

func TestTelemetryLogger(t *testing.T) {
events := []telemetry.DatabricksCliLog{
{
CliTestEvent: &events.CliTestEvent{
Name: events.DummyCliEnumValue1,
},
},
{
BundleInitEvent: &events.BundleInitEvent{
Uuid: uuid.New().String(),
TemplateName: "abc",
TemplateEnumArgs: []events.BundleInitTemplateEnumArg{
{
Key: "a",
Value: "b",
},
{
Key: "c",
Value: "d",
},
},
},
},
}

assert.Equal(t, len(events), reflect.TypeOf(telemetry.DatabricksCliLog{}).NumField(),
"Number of events should match the number of fields in DatabricksCliLog. Please add a new event to this test.")

ctx, w := acc.WorkspaceTest(t)
ctx = telemetry.WithDefaultLogger(ctx)

// Extend the maximum wait time for the telemetry flush just for this test.
oldV := telemetry.MaxAdditionalWaitTime
telemetry.MaxAdditionalWaitTime = 1 * time.Hour
t.Cleanup(func() {
telemetry.MaxAdditionalWaitTime = oldV
})

for _, event := range events {
telemetry.Log(ctx, event)
}

apiClient, err := client.New(w.W.Config)
require.NoError(t, err)

// Flush the events.
wrapper := &apiClientWrapper{
apiClient: apiClient,
}
telemetry.Flush(ctx, wrapper)

// Assert that the events were logged.
assert.Equal(t, telemetry.ResponseBody{
NumProtoSuccess: int64(len(events)),
Errors: []telemetry.LogError{},
}, *wrapper.response)
}
19 changes: 19 additions & 0 deletions libs/telemetry/api.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package telemetry

// RequestBody is the request body type bindings for the /telemetry-ext API endpoint.
type RequestBody struct {
UploadTime int64 `json:"uploadTime"`
Items []string `json:"items"`
ProtoLogs []string `json:"protoLogs"`
}

// ResponseBody is the response body type bindings for the /telemetry-ext API endpoint.
type ResponseBody struct {
Errors []LogError `json:"errors"`
NumProtoSuccess int64 `json:"numProtoSuccess"`
}

type LogError struct {
Message string `json:"message"`
ErrorType string `json:"ErrorType"`
}
62 changes: 62 additions & 0 deletions libs/telemetry/context.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package telemetry

import (
"context"
"fmt"
)

// Private type to store the telemetry logger in the context
type telemetryLogger int

// Key to store the telemetry logger in the context
var telemetryLoggerKey telemetryLogger

func WithDefaultLogger(ctx context.Context) context.Context {
v := ctx.Value(telemetryLoggerKey)

// If no logger is set in the context, set the default logger.
if v == nil {
nctx := context.WithValue(ctx, telemetryLoggerKey, &defaultLogger{})
return nctx
}

switch v.(type) {
case *defaultLogger:
panic(fmt.Errorf("default telemetry logger already set in the context: %T", v))
case *mockLogger:
// Do nothing. Unit and integration tests set the mock logger in the context
// to avoid making actual API calls. Thus WithDefaultLogger should silently
// ignore the mock logger.
default:
panic(fmt.Errorf("unexpected telemetry logger type: %T", v))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the purpose of this typecheck here? Why panic (as opposed to logging a message).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reaching here typically is a developer error. Panicking informs the developer there's a problem without having to return and propagate the error. For example, we also panic here: dbr.RunsOnRuntime(ctx)

We do the type check because we do not want to error or override if a mock logger is configured.

}

return ctx
}

// WithMockLogger sets a mock telemetry logger in the context. It overrides the
// default logger if it is already set in the context.
func WithMockLogger(ctx context.Context) context.Context {
v := ctx.Value(telemetryLoggerKey)
if v != nil {
panic(fmt.Errorf("telemetry logger already set in the context: %T", v))
}

return context.WithValue(ctx, telemetryLoggerKey, &mockLogger{})
}

func fromContext(ctx context.Context) Logger {
v := ctx.Value(telemetryLoggerKey)
if v == nil {
panic(fmt.Errorf("telemetry logger not found in the context"))
}

switch vv := v.(type) {
case *defaultLogger:
return vv
case *mockLogger:
return vv
default:
panic(fmt.Errorf("unexpected telemetry logger type: %T", v))
}
}
77 changes: 77 additions & 0 deletions libs/telemetry/context_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package telemetry

import (
"context"
"testing"

"github.com/stretchr/testify/assert"
)

func TestWithDefaultLogger(t *testing.T) {
ctx := context.Background()

// No default logger set
ctx1 := WithDefaultLogger(ctx)
assert.Equal(t, &defaultLogger{}, ctx1.Value(telemetryLoggerKey))

// Default logger already set
assert.PanicsWithError(t, "default telemetry logger already set in the context: *telemetry.defaultLogger", func() {
WithDefaultLogger(ctx1)
})

// Mock logger already set
ctx2 := WithMockLogger(ctx)
assert.NotPanics(t, func() {
WithDefaultLogger(ctx2)
})

// Unexpected logger type
type foobar struct{}
ctx3 := context.WithValue(ctx, telemetryLoggerKey, &foobar{})
assert.PanicsWithError(t, "unexpected telemetry logger type: *telemetry.foobar", func() {
WithDefaultLogger(ctx3)
})
}

func TestWithMockLogger(t *testing.T) {
ctx := context.Background()

// No logger set
ctx1 := WithMockLogger(ctx)
assert.Equal(t, &mockLogger{}, ctx1.Value(telemetryLoggerKey))

// Logger already set
assert.PanicsWithError(t, "telemetry logger already set in the context: *telemetry.mockLogger", func() {
WithMockLogger(ctx1)
})

// Default logger already set
ctx2 := WithDefaultLogger(ctx)
assert.PanicsWithError(t, "telemetry logger already set in the context: *telemetry.defaultLogger", func() {
WithMockLogger(ctx2)
})
}

func TestFromContext(t *testing.T) {
ctx := context.Background()

// No logger set
assert.PanicsWithError(t, "telemetry logger not found in the context", func() {
fromContext(ctx)
})

// Default logger set
ctx1 := WithDefaultLogger(ctx)
assert.Equal(t, &defaultLogger{}, fromContext(ctx1))

// Mock logger set
ctx2 := WithMockLogger(ctx)
assert.Equal(t, &mockLogger{}, fromContext(ctx2))

// Unexpected logger type
type foobar struct{}
ctx3 := context.WithValue(ctx, telemetryLoggerKey, &foobar{})
assert.PanicsWithError(t, "unexpected telemetry logger type: *telemetry.foobar", func() {
fromContext(ctx3)
})
}
39 changes: 39 additions & 0 deletions libs/telemetry/events/bundle_init.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package events

// Corresponds to the `DatabricksCliBundleInitEvent` proto message in `databricks_cli_log.proto`
// as of 20 Dec 2024.
type BundleInitEvent struct {
// UUID associated with the DAB itself. This is serialized into the DAB
// when a user runs `databricks bundle init` and all subsequent deployments of
// that DAB can then be associated with this init event.
Uuid string `json:"bundle_uuid,omitempty"`

// Name of the template initialized when the user ran `databricks bundle init`
// This is only populated when the template is a first party template like
// mlops-stacks or default-python.
TemplateName string `json:"template_name,omitempty"`

// Arguments used by the user to initialize the template. Only enum
// values will be set here by the Databricks CLI.
//
// We use a generic map representation here because a bundle template's args are
// managed in the template itself and maintaining a copy typed schema for it here
// will be untenable in the long term.
TemplateEnumArgs []BundleInitTemplateEnumArg `json:"template_enum_args,omitempty"`
}

type BundleInitTemplateEnumArg struct {
// Valid key values for the template. These correspond to the keys specified in
// the "properties" section of the `databricks_template_schema.json` file.
//
// Note: `databricks_template_schema.json` contains a JSON schema type specification
// for the arguments that the template accepts.
Key string `json:"key"`

// Value that the user set for the field. This is only populated for properties
// that have the "enum" field specified in the JSON schema type specification.
//
// The Databricks CLI ensures that the value here is one of the "enum" values from
// the template specification.
Value string `json:"value"`
}
16 changes: 16 additions & 0 deletions libs/telemetry/events/test_event.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package events

// dummy event for testing the telemetry pipeline. Corresponds to `DatabricksCliTestEvent`
// proto in `databricks_cli_log.proto` as of 20 Dec 2024.
type CliTestEvent struct {
Name DummyCliEnum `json:"name,omitempty"`
}

type DummyCliEnum string

const (
DummyCliEnumUnspecified DummyCliEnum = "DUMMY_CLI_ENUM_UNSPECIFIED"
DummyCliEnumValue1 DummyCliEnum = "VALUE1"
DummyCliEnumValue2 DummyCliEnum = "VALUE2"
DummyCliEnumValue3 DummyCliEnum = "VALUE3"
)
22 changes: 22 additions & 0 deletions libs/telemetry/frontend_log.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package telemetry

import "github.com/databricks/cli/libs/telemetry/events"

// This corresponds to the FrontendLog lumberjack proto in universe.
// FrontendLog is the top-level struct for any client-side logs at Databricks
// regardless of whether they are generated from the CLI or the web UI.
type FrontendLog struct {
// A unique identifier for the log event generated from the CLI.
FrontendLogEventID string `json:"frontend_log_event_id,omitempty"`

Entry FrontendLogEntry `json:"entry,omitempty"`
}

type FrontendLogEntry struct {
DatabricksCliLog DatabricksCliLog `json:"databricks_cli_log,omitempty"`
}

type DatabricksCliLog struct {
CliTestEvent *events.CliTestEvent `json:"cli_test_event,omitempty"`
BundleInitEvent *events.BundleInitEvent `json:"bundle_init_event,omitempty"`
}
Loading
Loading