diff --git a/package.json b/package.json index d24b3de..2a7c03e 100644 --- a/package.json +++ b/package.json @@ -81,10 +81,10 @@ "@bull-board/api": "^5.9.1", "@bull-board/express": "^5.9.1", "@opentelemetry/api": "^1.7.0", - "@opentelemetry/auto-instrumentations-node": "^0.44.0", "@opentelemetry/exporter-jaeger": "^1.19.0", "@opentelemetry/exporter-metrics-otlp-proto": "^0.50.0", "@opentelemetry/exporter-trace-otlp-proto": "^0.50.0", + "@opentelemetry/instrumentation-http": "^0.53.0", "@opentelemetry/resources": "^1.18.1", "@opentelemetry/sdk-metrics": "^1.18.1", "@opentelemetry/sdk-node": "^0.50.0", diff --git a/src/controllers/opentelemetry.ts b/src/controllers/opentelemetry.ts index e05793a..77b81e3 100644 --- a/src/controllers/opentelemetry.ts +++ b/src/controllers/opentelemetry.ts @@ -1,5 +1,6 @@ import { NodeSDK } from "@opentelemetry/sdk-node"; -import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node"; +import { BatchSpanProcessor } from "@opentelemetry/sdk-trace-node"; +import { HttpInstrumentation } from "@opentelemetry/instrumentation-http"; import { Resource } from "@opentelemetry/resources"; import Debug from "debug"; import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-proto'; @@ -10,17 +11,28 @@ const jaegerHost = process.env.JAEGER_HOST ?? 'jaeger-otel-collector.sri'; const jaegerPort = process.env.JAEGER_PORT ?? 4318; const jaegerResName = process.env.JAEGER_RES_NAME ?? '/v1/traces'; +const traceExporter = new OTLPTraceExporter({ + url: `http://${jaegerHost}:${jaegerPort}${jaegerResName}` +}); +const spanProcessor = new BatchSpanProcessor(traceExporter); + debug("Initializing Opentelemetry instrumentation..."); const sdk = new NodeSDK({ // metrics, if needed, shall be exported on a different endpoint - traceExporter: new OTLPTraceExporter({ - url: `http://${jaegerHost}:${jaegerPort}${jaegerResName}` - }), - instrumentations: [getNodeAutoInstrumentations()], + // trace a subset of instrumentations to avoid performance overhead + instrumentations: [new HttpInstrumentation()], resource: new Resource({ [ATTR_SERVICE_NAME]: "biothings-explorer", }), + // @ts-ignore because MetinSeylan/Nestjs-OpenTelemetry#63 + spanProcessors: [spanProcessor], }); debug(`OTel URL http://${jaegerHost}:${jaegerPort}${jaegerResName}`); sdk.start(); debug("Opentelemetry instrumentation initialized."); + +export async function flushRemainingSpans(): Promise { + // avoid losing any spans in the buffer when taskHandler exits + debug("Flushing remaining spans..."); + await spanProcessor.forceFlush(); +} diff --git a/src/controllers/threading/taskHandler.ts b/src/controllers/threading/taskHandler.ts index 78b2629..b22ae41 100644 --- a/src/controllers/threading/taskHandler.ts +++ b/src/controllers/threading/taskHandler.ts @@ -14,9 +14,10 @@ import { tasks } from "../../routes/index"; import { getQueryQueue } from "../async/asyncquery_queue"; import * as Sentry from "@sentry/node"; import { ProfilingIntegration } from "@sentry/profiling-node"; -import { Span, trace, context, propagation, Context } from "@opentelemetry/api"; +import { Span, trace, context, propagation, Context, Tracer } from "@opentelemetry/api"; import { Telemetry } from "@biothings-explorer/utils"; import { InnerTaskData } from "@biothings-explorer/types"; +import { flushRemainingSpans } from "../opentelemetry"; // use SENTRY_DSN environment variable try { @@ -91,14 +92,13 @@ async function runTask({ }); let activeContext: Context = propagation.extract(context.active(), { traceparent, tracestate }); - debug(`OTel task ${traceparent} and ${tracestate}`); - let tracer = trace.getTracer("biothings-explorer-thread") + debug(`OTel task context: ${traceparent} and ${tracestate}`); + let tracer: Tracer = trace.getTracer("biothings-explorer-thread") span = tracer.startSpan( routeNames[route], {kind: 1}, // specifies internal span activeContext, ); - debug(`OTel span created ${span}`); span.setAttribute("bte.requestData", JSON.stringify(req.data.queryGraph)); Telemetry.setOtelSpan(span); @@ -114,6 +114,7 @@ async function runTask({ transaction.finish(); span.end(); Telemetry.removeOtelSpan(); + await flushRemainingSpans(); } catch (error) { debug("Sentry/OpenTelemetry transaction finish error. This does not affect execution."); debug(error); diff --git a/src/controllers/threading/threadHandler.ts b/src/controllers/threading/threadHandler.ts index e9a595c..c23e69c 100644 --- a/src/controllers/threading/threadHandler.ts +++ b/src/controllers/threading/threadHandler.ts @@ -1,6 +1,6 @@ import { MessageChannel, threadId } from "worker_threads"; import Debug from "debug"; -import { context, propagation } from "@opentelemetry/api"; +import { context, propagation, trace, Context, Span } from "@opentelemetry/api"; const debug = Debug("bte:biothings-explorer-trapi:threading"); import path from "path"; import { redisClient } from "@biothings-explorer/utils"; @@ -221,6 +221,11 @@ export async function runTask(req: Request, res: Response, route: string, useBul propagation.inject(context.active(), otelData); const { traceparent, tracestate } = otelData; + // add req dest to root span name as HTTP instrumentation doesn't do it automatically + const activeContext: Context = context.active(); + const rootSpan: Span = trace.getSpan(activeContext); + if (rootSpan != undefined) rootSpan.updateName(`${req.method} ${req.originalUrl}`); + const taskInfo: TaskInfo = { data: { route,