From a8f7fb4ee34056ca93b2aa23a3cc0c5d8a0a90b5 Mon Sep 17 00:00:00 2001
From: Dinmukhamed Mailibay <47117969+dinmukhamedm@users.noreply.github.com>
Date: Thu, 23 Jan 2025 12:33:33 +0500
Subject: [PATCH] fix(anthropic): add response id attribute (#2543)

Co-authored-by: Nir Gazit <nirga@users.noreply.github.com>
---
 .../instrumentation/anthropic/__init__.py        |  2 ++
 .../instrumentation/anthropic/streaming.py       | 10 +++++++---
 .../tests/test_completion.py                     | 16 ++++++++++++++++
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py
index 1566399cc..52459d9c3 100644
--- a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py
+++ b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/__init__.py
@@ -28,6 +28,7 @@
 from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
 from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY, unwrap
 from opentelemetry.metrics import Counter, Histogram, Meter, get_meter
+from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_RESPONSE_ID
 from opentelemetry.semconv_ai import (
     SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
     LLMRequestTypeValues,
@@ -438,6 +439,7 @@ def _set_response_attributes(span, response):
     if not isinstance(response, dict):
         response = response.__dict__
     set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, response.get("model"))
+    set_span_attribute(span, GEN_AI_RESPONSE_ID, response.get("id"))
 
     if response.get("usage"):
         prompt_tokens = response.get("usage").input_tokens
diff --git a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/streaming.py b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/streaming.py
index 011d722b8..3c164bf9e 100644
--- a/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/streaming.py
+++ b/packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/streaming.py
@@ -11,6 +11,7 @@
     should_send_prompts,
 )
 from opentelemetry.metrics import Counter, Histogram
+from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_RESPONSE_ID
 from opentelemetry.semconv_ai import SpanAttributes
 from opentelemetry.trace.status import Status, StatusCode
 
@@ -22,6 +23,7 @@ def _process_response_item(item, complete_response):
     if item.type == "message_start":
         complete_response["model"] = item.message.model
         complete_response["usage"] = dict(item.message.usage)
+        complete_response["id"] = item.message.id
     elif item.type == "content_block_start":
         index = item.index
         if len(complete_response.get("events")) <= index:
@@ -131,7 +133,7 @@ def build_from_streaming_response(
     exception_counter: Counter = None,
     kwargs: dict = {},
 ):
-    complete_response = {"events": [], "model": "", "usage": {}}
+    complete_response = {"events": [], "model": "", "usage": {}, "id": ""}
     for item in response:
         try:
             yield item
@@ -143,7 +145,7 @@ def build_from_streaming_response(
         _process_response_item(item, complete_response)
 
     metric_attributes = shared_metrics_attributes(complete_response)
-
+    set_span_attribute(span, GEN_AI_RESPONSE_ID, complete_response.get("id"))
     if duration_histogram:
         duration = time.time() - start_time
         duration_histogram.record(
@@ -206,7 +208,7 @@ async def abuild_from_streaming_response(
     exception_counter: Counter = None,
     kwargs: dict = {},
 ):
-    complete_response = {"events": [], "model": "", "usage": {}}
+    complete_response = {"events": [], "model": "", "usage": {}, "id": ""}
     async for item in response:
         try:
             yield item
@@ -217,6 +219,8 @@ async def abuild_from_streaming_response(
             raise e
         _process_response_item(item, complete_response)
 
+    set_span_attribute(span, GEN_AI_RESPONSE_ID, complete_response.get("id"))
+
     metric_attributes = shared_metrics_attributes(complete_response)
 
     if duration_histogram:
diff --git a/packages/opentelemetry-instrumentation-anthropic/tests/test_completion.py b/packages/opentelemetry-instrumentation-anthropic/tests/test_completion.py
index 6049295f7..bdcaa0291 100644
--- a/packages/opentelemetry-instrumentation-anthropic/tests/test_completion.py
+++ b/packages/opentelemetry-instrumentation-anthropic/tests/test_completion.py
@@ -97,6 +97,7 @@ def test_anthropic_completion(exporter, reader):
         == f"{HUMAN_PROMPT}\nHello world\n{AI_PROMPT}"
     )
     assert anthropic_span.attributes.get(f"{SpanAttributes.LLM_COMPLETIONS}.0.content")
+    assert anthropic_span.attributes.get("gen_ai.response.id") == "compl_01EjfrPvPEsRDRUKD6VoBxtK"
 
     metrics_data = reader.get_metrics_data()
     resource_metrics = metrics_data.resource_metrics
@@ -143,6 +144,7 @@ def test_anthropic_message_create(exporter, reader):
         + anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
         == anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
     )
+    assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01TPXhkPo8jy6yQMrMhjpiAE"
 
     metrics_data = reader.get_metrics_data()
     resource_metrics = metrics_data.resource_metrics
@@ -205,6 +207,7 @@ def test_anthropic_multi_modal(exporter):
         + anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
         == anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
     )
+    assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01B37ySLPzYj8KY6uZmiPoxd"
 
 
 @pytest.mark.vcr
@@ -264,6 +267,7 @@ async def test_anthropic_async_multi_modal(exporter):
         + anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
         == anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
     )
+    assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01DWnmUo9hWk4Fk7V7Ddfa2w"
 
 
 @pytest.mark.vcr
@@ -312,6 +316,7 @@ def test_anthropic_message_streaming(exporter, reader):
         + anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
         == anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
     )
+    assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01MXWxhWoPSgrYhjTuMDM6F1"
 
     metrics_data = reader.get_metrics_data()
     resource_metrics = metrics_data.resource_metrics
@@ -360,6 +365,7 @@ async def test_async_anthropic_message_create(exporter, reader):
         + anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
         == anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
     )
+    assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01UFDDjsFn5BPQnfNwmsMnAY"
 
     metrics_data = reader.get_metrics_data()
     resource_metrics = metrics_data.resource_metrics
@@ -414,6 +420,7 @@ async def test_async_anthropic_message_streaming(exporter, reader):
         + anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
         == anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
     )
+    assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_016o6A7zDmgjucf5mWv1rrPD"
 
     metrics_data = reader.get_metrics_data()
     resource_metrics = metrics_data.resource_metrics
@@ -554,6 +561,7 @@ def test_anthropic_tools(exporter, reader):
     assert (anthropic_span.attributes["gen_ai.completion.0.tool_calls.1.name"]) == response.content[2].name
     response_input = json.dumps(response.content[2].input)
     assert (anthropic_span.attributes["gen_ai.completion.0.tool_calls.1.arguments"] == response_input)
+    assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01RBkXFe9TmDNNWThMz2HmGt"
 
     # verify metrics
     metrics_data = reader.get_metrics_data()
@@ -621,6 +629,8 @@ def test_anthropic_prompt_caching(exporter, reader):
         cache_creation_span.attributes["gen_ai.usage.cache_creation_input_tokens"]
         == cache_read_span.attributes["gen_ai.usage.cache_read_input_tokens"]
     )
+    assert cache_creation_span.attributes.get("gen_ai.response.id") == "msg_01BQdyey3QZM7KW1nM7xXdjR"
+    assert cache_read_span.attributes.get("gen_ai.response.id") == "msg_013eHbX6hmAnH7kp6NNp2qyL"
 
     # first check that cache_creation_span only wrote to cache, but not read from it,
     assert cache_creation_span.attributes["gen_ai.usage.cache_read_input_tokens"] == 0
@@ -707,6 +717,8 @@ async def test_anthropic_prompt_caching_async(exporter, reader):
         cache_creation_span.attributes["gen_ai.usage.cache_creation_input_tokens"]
         == cache_read_span.attributes["gen_ai.usage.cache_read_input_tokens"]
     )
+    assert cache_creation_span.attributes.get("gen_ai.response.id") == "msg_018V6UftrxrM1Z5Rt8govzD1"
+    assert cache_read_span.attributes.get("gen_ai.response.id") == "msg_01MsbM1E6LPe7pz1NrkZEGev"
 
     # first check that cache_creation_span only wrote to cache, but not read from it,
     assert cache_creation_span.attributes["gen_ai.usage.cache_read_input_tokens"] == 0
@@ -796,6 +808,8 @@ def test_anthropic_prompt_caching_stream(exporter, reader):
         cache_creation_span.attributes["gen_ai.usage.cache_creation_input_tokens"]
         == cache_read_span.attributes["gen_ai.usage.cache_read_input_tokens"]
     )
+    assert cache_creation_span.attributes.get("gen_ai.response.id") == "msg_015AgAJmgFQdYXgvvgbuKqPY"
+    assert cache_read_span.attributes.get("gen_ai.response.id") == "msg_01HSJ2wYvBf4DbFbwCaiGreG"
 
     # first check that cache_creation_span only wrote to cache, but not read from it,
     assert cache_creation_span.attributes["gen_ai.usage.cache_read_input_tokens"] == 0
@@ -877,6 +891,8 @@ async def test_anthropic_prompt_caching_async_stream(exporter, reader):
     assert system_message == cache_creation_span.attributes["gen_ai.prompt.0.content"]
     assert cache_read_span.attributes["gen_ai.prompt.0.role"] == "system"
     assert system_message == cache_read_span.attributes["gen_ai.prompt.0.content"]
+    assert cache_creation_span.attributes.get("gen_ai.response.id") == "msg_01VBrTFKAYvujMd593dtpRHF"
+    assert cache_read_span.attributes.get("gen_ai.response.id") == "msg_01BqqtrPfxepxW2xPuZz1m6h"
 
     assert cache_creation_span.attributes["gen_ai.prompt.1.role"] == "user"
     assert text == cache_creation_span.attributes["gen_ai.prompt.1.content"]