From a8f7fb4ee34056ca93b2aa23a3cc0c5d8a0a90b5 Mon Sep 17 00:00:00 2001 From: Dinmukhamed Mailibay <47117969+dinmukhamedm@users.noreply.github.com> Date: Thu, 23 Jan 2025 12:33:33 +0500 Subject: [PATCH] fix(anthropic): add response id attribute (#2543) Co-authored-by: Nir Gazit --- .../instrumentation/anthropic/__init__.py | 2 ++ .../instrumentation/anthropic/streaming.py | 10 +++++++--- .../tests/test_completion.py | 16 ++++++++++++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py index 1566399cc..52459d9c3 100644 --- a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py +++ b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py @@ -28,6 +28,7 @@ from opentelemetry.instrumentation.instrumentor import BaseInstrumentor from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY, unwrap from opentelemetry.metrics import Counter, Histogram, Meter, get_meter +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_RESPONSE_ID from opentelemetry.semconv_ai import ( SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY, LLMRequestTypeValues, @@ -438,6 +439,7 @@ def _set_response_attributes(span, response): if not isinstance(response, dict): response = response.__dict__ set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, response.get("model")) + set_span_attribute(span, GEN_AI_RESPONSE_ID, response.get("id")) if response.get("usage"): prompt_tokens = response.get("usage").input_tokens diff --git a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/streaming.py b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/streaming.py index 011d722b8..3c164bf9e 100644 --- a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/streaming.py +++ b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/streaming.py @@ -11,6 +11,7 @@ should_send_prompts, ) from opentelemetry.metrics import Counter, Histogram +from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_RESPONSE_ID from opentelemetry.semconv_ai import SpanAttributes from opentelemetry.trace.status import Status, StatusCode @@ -22,6 +23,7 @@ def _process_response_item(item, complete_response): if item.type == "message_start": complete_response["model"] = item.message.model complete_response["usage"] = dict(item.message.usage) + complete_response["id"] = item.message.id elif item.type == "content_block_start": index = item.index if len(complete_response.get("events")) <= index: @@ -131,7 +133,7 @@ def build_from_streaming_response( exception_counter: Counter = None, kwargs: dict = {}, ): - complete_response = {"events": [], "model": "", "usage": {}} + complete_response = {"events": [], "model": "", "usage": {}, "id": ""} for item in response: try: yield item @@ -143,7 +145,7 @@ def build_from_streaming_response( _process_response_item(item, complete_response) metric_attributes = shared_metrics_attributes(complete_response) - + set_span_attribute(span, GEN_AI_RESPONSE_ID, complete_response.get("id")) if duration_histogram: duration = time.time() - start_time duration_histogram.record( @@ -206,7 +208,7 @@ async def abuild_from_streaming_response( exception_counter: Counter = None, kwargs: dict = {}, ): - complete_response = {"events": [], "model": "", "usage": {}} + complete_response = {"events": [], "model": "", "usage": {}, "id": ""} async for item in response: try: yield item @@ -217,6 +219,8 @@ async def abuild_from_streaming_response( raise e _process_response_item(item, complete_response) + set_span_attribute(span, GEN_AI_RESPONSE_ID, complete_response.get("id")) + metric_attributes = shared_metrics_attributes(complete_response) if duration_histogram: diff --git a/packages/opentelemetry-instrumentation-anthropic/tests/test_completion.py b/packages/opentelemetry-instrumentation-anthropic/tests/test_completion.py index 6049295f7..bdcaa0291 100644 --- a/packages/opentelemetry-instrumentation-anthropic/tests/test_completion.py +++ b/packages/opentelemetry-instrumentation-anthropic/tests/test_completion.py @@ -97,6 +97,7 @@ def test_anthropic_completion(exporter, reader): == f"{HUMAN_PROMPT}\nHello world\n{AI_PROMPT}" ) assert anthropic_span.attributes.get(f"{SpanAttributes.LLM_COMPLETIONS}.0.content") + assert anthropic_span.attributes.get("gen_ai.response.id") == "compl_01EjfrPvPEsRDRUKD6VoBxtK" metrics_data = reader.get_metrics_data() resource_metrics = metrics_data.resource_metrics @@ -143,6 +144,7 @@ def test_anthropic_message_create(exporter, reader): + anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] ) + assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01TPXhkPo8jy6yQMrMhjpiAE" metrics_data = reader.get_metrics_data() resource_metrics = metrics_data.resource_metrics @@ -205,6 +207,7 @@ def test_anthropic_multi_modal(exporter): + anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] ) + assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01B37ySLPzYj8KY6uZmiPoxd" @pytest.mark.vcr @@ -264,6 +267,7 @@ async def test_anthropic_async_multi_modal(exporter): + anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] ) + assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01DWnmUo9hWk4Fk7V7Ddfa2w" @pytest.mark.vcr @@ -312,6 +316,7 @@ def test_anthropic_message_streaming(exporter, reader): + anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] ) + assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01MXWxhWoPSgrYhjTuMDM6F1" metrics_data = reader.get_metrics_data() resource_metrics = metrics_data.resource_metrics @@ -360,6 +365,7 @@ async def test_async_anthropic_message_create(exporter, reader): + anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] ) + assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01UFDDjsFn5BPQnfNwmsMnAY" metrics_data = reader.get_metrics_data() resource_metrics = metrics_data.resource_metrics @@ -414,6 +420,7 @@ async def test_async_anthropic_message_streaming(exporter, reader): + anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS] == anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS] ) + assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_016o6A7zDmgjucf5mWv1rrPD" metrics_data = reader.get_metrics_data() resource_metrics = metrics_data.resource_metrics @@ -554,6 +561,7 @@ def test_anthropic_tools(exporter, reader): assert (anthropic_span.attributes["gen_ai.completion.0.tool_calls.1.name"]) == response.content[2].name response_input = json.dumps(response.content[2].input) assert (anthropic_span.attributes["gen_ai.completion.0.tool_calls.1.arguments"] == response_input) + assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01RBkXFe9TmDNNWThMz2HmGt" # verify metrics metrics_data = reader.get_metrics_data() @@ -621,6 +629,8 @@ def test_anthropic_prompt_caching(exporter, reader): cache_creation_span.attributes["gen_ai.usage.cache_creation_input_tokens"] == cache_read_span.attributes["gen_ai.usage.cache_read_input_tokens"] ) + assert cache_creation_span.attributes.get("gen_ai.response.id") == "msg_01BQdyey3QZM7KW1nM7xXdjR" + assert cache_read_span.attributes.get("gen_ai.response.id") == "msg_013eHbX6hmAnH7kp6NNp2qyL" # first check that cache_creation_span only wrote to cache, but not read from it, assert cache_creation_span.attributes["gen_ai.usage.cache_read_input_tokens"] == 0 @@ -707,6 +717,8 @@ async def test_anthropic_prompt_caching_async(exporter, reader): cache_creation_span.attributes["gen_ai.usage.cache_creation_input_tokens"] == cache_read_span.attributes["gen_ai.usage.cache_read_input_tokens"] ) + assert cache_creation_span.attributes.get("gen_ai.response.id") == "msg_018V6UftrxrM1Z5Rt8govzD1" + assert cache_read_span.attributes.get("gen_ai.response.id") == "msg_01MsbM1E6LPe7pz1NrkZEGev" # first check that cache_creation_span only wrote to cache, but not read from it, assert cache_creation_span.attributes["gen_ai.usage.cache_read_input_tokens"] == 0 @@ -796,6 +808,8 @@ def test_anthropic_prompt_caching_stream(exporter, reader): cache_creation_span.attributes["gen_ai.usage.cache_creation_input_tokens"] == cache_read_span.attributes["gen_ai.usage.cache_read_input_tokens"] ) + assert cache_creation_span.attributes.get("gen_ai.response.id") == "msg_015AgAJmgFQdYXgvvgbuKqPY" + assert cache_read_span.attributes.get("gen_ai.response.id") == "msg_01HSJ2wYvBf4DbFbwCaiGreG" # first check that cache_creation_span only wrote to cache, but not read from it, assert cache_creation_span.attributes["gen_ai.usage.cache_read_input_tokens"] == 0 @@ -877,6 +891,8 @@ async def test_anthropic_prompt_caching_async_stream(exporter, reader): assert system_message == cache_creation_span.attributes["gen_ai.prompt.0.content"] assert cache_read_span.attributes["gen_ai.prompt.0.role"] == "system" assert system_message == cache_read_span.attributes["gen_ai.prompt.0.content"] + assert cache_creation_span.attributes.get("gen_ai.response.id") == "msg_01VBrTFKAYvujMd593dtpRHF" + assert cache_read_span.attributes.get("gen_ai.response.id") == "msg_01BqqtrPfxepxW2xPuZz1m6h" assert cache_creation_span.attributes["gen_ai.prompt.1.role"] == "user" assert text == cache_creation_span.attributes["gen_ai.prompt.1.content"]