Skip to content

Commit

Permalink
ZED-13 replace Jaeger with Grafana & co.
Browse files Browse the repository at this point in the history
  • Loading branch information
zbigniewzolnierowicz committed Sep 20, 2024
1 parent 6a40014 commit 7789572
Show file tree
Hide file tree
Showing 14 changed files with 286 additions and 31 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ target/
.env
.env.production
*.log
tempo-data
3 changes: 3 additions & 0 deletions .mise.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@ task = "latest"
"cargo:cargo-udeps" = "latest"
"cargo:cargo-llvm-cov" = "latest"
"cargo:mprocs" = "latest"
httpie-go = "latest"
hurl = "latest"
caddy = "latest"
23 changes: 23 additions & 0 deletions Caddyfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
log default {
output stdout
include http.log.access admin.api
}
}

:4318 {
@options {
method OPTIONS
}

header Access-Control-Allow-Origin "http://localhost:5173"
header Access-Control-Allow-Credentials "true"
header Access-Control-Allow-Methods "GET, OPTIONS"
header Access-Control-Allow-Headers "Priority,User-Agent,Content-Type"
respond @options 204

reverse_proxy localhost:14318 {
header_down -Access-Control-Allow-Origin
header_down -Access-Control-Allow-Credentials
}
}
3 changes: 3 additions & 0 deletions Taskfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,6 @@ tasks:
cmds:
- task: backend:lint:fix
- task: frontend:lint:fix
proxy:
desc: "Run proxy for Tempo"
cmd: "caddy run"
78 changes: 56 additions & 22 deletions compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,62 @@ services:
POSTGRES_DB: ${POSTGRES_DB}
ports:
- ${POSTGRES_PORT}:5432
redis:
image: redis:latest
restart: always

# Tempo runs as user 10001, and docker compose creates the volume as root.
# As such, we need to chown the volume in order for Tempo to start correctly.

init:
image: &tempoImage grafana/tempo:latest
user: root
entrypoint:
- "chown"
- "10001:10001"
- "/var/tempo"
volumes:
- ./tempo-data:/var/tempo

tempo:
image: *tempoImage
command: [ "-config.file=/etc/tempo.yaml" ]
volumes:
- ./tempo.yaml:/etc/tempo.yaml
- ./tempo-data:/var/tempo
ports:
- "6379:6379"
environment:
- REDIS_PASSWORD=${REDIS_PASSWORD}
- REDIS_PORT=6379
# TODO: Replace with grafana + otel_collector
jaeger:
image: jaegertracing/all-in-one:1.56
restart: always
- "14268" # jaeger ingest
- "3200:3200" # tempo
- "4317:4317" # otlp grpc
- "14318:4318" # otlp http
- "9411" # zipkin
depends_on:
- init

# And put them in an OTEL collector pipeline...
otel-collector:
image: otel/opentelemetry-collector:0.86.0
command: [ "--config=/etc/otel-collector.yaml" ]
volumes:
- ./otel-collector.yaml:/etc/otel-collector.yaml

prometheus:
image: prom/prometheus:latest
command:
- --config.file=/etc/prometheus.yaml
- --web.enable-remote-write-receiver
- --enable-feature=exemplar-storage
- --enable-feature=native-histograms
volumes:
- ./prometheus.yaml:/etc/prometheus.yaml
ports:
- "9090:9090"

grafana:
image: grafana/grafana:11.0.0
volumes:
- ./grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml
environment:
- COLLECTOR_ZIPKIN_HOST_PORT=:9411
- GF_AUTH_ANONYMOUS_ENABLED=true
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
- GF_AUTH_DISABLE_LOGIN_FORM=true
- GF_FEATURE_TOGGLES_ENABLE=traceqlEditor
ports:
- 6831:6831/udp
- 6832:6832/udp
- 5778:5778
- 16686:16686
- 4317:4317
- 4318:4318
- 14250:14250
- 14268:14268
- 14269:14269
- 9411:9411
- "3000:3000"
25 changes: 21 additions & 4 deletions frontend/app/telemetry.client.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,32 @@
import {
ConsoleSpanExporter,
SimpleSpanProcessor,
BatchSpanProcessor,
} from '@opentelemetry/sdk-trace-base';
import { WebTracerProvider } from '@opentelemetry/sdk-trace-web';
import { ZoneContextManager } from '@opentelemetry/context-zone';
import { registerInstrumentations } from '@opentelemetry/instrumentation';
import { B3Propagator } from '@opentelemetry/propagator-b3';
import { getWebAutoInstrumentations } from '@opentelemetry/auto-instrumentations-web';
import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-proto';
import { diag, DiagConsoleLogger, DiagLogLevel } from '@opentelemetry/api';
import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions';
import { Resource } from '@opentelemetry/resources';

const provider = new WebTracerProvider();
provider.addSpanProcessor(new SimpleSpanProcessor(new ConsoleSpanExporter()));
diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.DEBUG);

const provider = new WebTracerProvider({
resource: new Resource({
[ATTR_SERVICE_NAME]: 'deepdi.sh-frontend-web',
}),
});

const traceExporter = new OTLPTraceExporter({
url: 'http://localhost:4318/v1/traces',
headers: {
'Content-Type': 'application/json',
},
});

provider.addSpanProcessor(new BatchSpanProcessor(traceExporter));

provider.register({
// Changing default contextManager to use ZoneContextManager - supports asynchronous operations - optional
Expand Down
20 changes: 15 additions & 5 deletions frontend/app/telemetry.server.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,29 @@
import opentelemetry from '@opentelemetry/sdk-node';
import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node';
import { ConsoleSpanExporter } from '@opentelemetry/sdk-trace-base';
// import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node';
import { Resource } from '@opentelemetry/resources';
import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions';
import { RemixInstrumentation } from 'opentelemetry-instrumentation-remix';
import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-proto';

// configure the SDK to export telemetry data to the console
// enable all auto-instrumentations from the meta package
const traceExporter = new ConsoleSpanExporter();
const traceExporter = new OTLPTraceExporter({
url: 'http://localhost:4318/v1/traces',
});

const sdk = new opentelemetry.NodeSDK({
resource: new Resource({
[ATTR_SERVICE_NAME]: 'my-service',
[ATTR_SERVICE_NAME]: 'deepdi.sh-frontend-server',
}),
traceExporter,
instrumentations: [getNodeAutoInstrumentations(), new RemixInstrumentation()],
instrumentations: [
/* getNodeAutoInstrumentations({
'@opentelemetry/instrumentation-fs': {
enabled: false,
},
}), */
new RemixInstrumentation(),
],
});

sdk.start();
2 changes: 2 additions & 0 deletions frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
"@opentelemetry/auto-instrumentations-node": "^0.50.0",
"@opentelemetry/auto-instrumentations-web": "^0.41.0",
"@opentelemetry/context-zone": "^1.26.0",
"@opentelemetry/exporter-metrics-otlp-proto": "^0.53.0",
"@opentelemetry/exporter-trace-otlp-proto": "^0.53.0",
"@opentelemetry/instrumentation": "^0.53.0",
"@opentelemetry/instrumentation-document-load": "^0.40.0",
"@opentelemetry/propagator-b3": "^1.26.0",
Expand Down
37 changes: 37 additions & 0 deletions frontend/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions grafana-datasources.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
apiVersion: 1

datasources:
- name: Prometheus
type: prometheus
uid: prometheus
access: proxy
orgId: 1
url: http://prometheus:9090
basicAuth: false
isDefault: false
version: 1
editable: false
jsonData:
httpMethod: GET
- name: Tempo
type: tempo
access: proxy
orgId: 1
url: http://tempo:3200
basicAuth: false
isDefault: true
version: 1
editable: false
apiVersion: 1
uid: tempo
jsonData:
httpMethod: GET
serviceMap:
datasourceUid: prometheus
3 changes: 3 additions & 0 deletions mprocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ procs:
frontend:
cmd: ["task", "fe:dev"]
stop: "SIGKILL"
caddy:
cmd: ["task", "proxy"]
stop: "SIGKILL"
14 changes: 14 additions & 0 deletions otel-collector.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
receivers:
otlp:
protocols:
grpc:
exporters:
otlp:
endpoint: tempo:4317
tls:
insecure: true
service:
pipelines:
traces:
receivers: [otlp]
exporters: [otlp]
11 changes: 11 additions & 0 deletions prometheus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
global:
scrape_interval: 15s
evaluation_interval: 15s

scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: [ 'localhost:9090' ]
- job_name: 'tempo'
static_configs:
- targets: [ 'tempo:3200' ]
67 changes: 67 additions & 0 deletions tempo.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
stream_over_http_enabled: true
server:
http_listen_port: 3200
log_level: info

query_frontend:
search:
duration_slo: 5s
throughput_bytes_slo: 1.073741824e+09
trace_by_id:
duration_slo: 5s

distributor:
receivers: # this configuration will listen on all ports and protocols that tempo is capable of.
jaeger: # the receives all come from the OpenTelemetry collector. more configuration information can
protocols: # be found there: https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver
thrift_http: #
grpc: # for a production deployment you should only enable the receivers you need!
thrift_binary:
thrift_compact:
zipkin:
otlp:
protocols:
http:
include_metadata: true
cors:
allowed_origins:
- http://localhost:5173
allowed_headers:
- Example-Header
max_age: 7200
grpc:
opencensus:

ingester:
max_block_duration: 5m # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally

compactor:
compaction:
block_retention: 1h # overall Tempo trace retention. set for demo purposes

metrics_generator:
registry:
external_labels:
source: tempo
cluster: docker-compose
storage:
path: /var/tempo/generator/wal
remote_write:
- url: http://prometheus:9090/api/v1/write
send_exemplars: true
traces_storage:
path: /var/tempo/generator/traces

storage:
trace:
backend: local # backend configuration to use
wal:
path: /var/tempo/wal # where to store the wal locally
local:
path: /var/tempo/blocks

overrides:
defaults:
metrics_generator:
processors: [service-graphs, span-metrics, local-blocks] # enables metrics generator
generate_native_histograms: both

0 comments on commit 7789572

Please sign in to comment.