Skip to content

Commit

Permalink
fix(anthropic): add response id attribute (#2543)
Browse files Browse the repository at this point in the history
Co-authored-by: Nir Gazit <[email protected]>
  • Loading branch information
dinmukhamedm and nirga authored Jan 23, 2025
1 parent 2cfce0a commit a8f7fb4
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY, unwrap
from opentelemetry.metrics import Counter, Histogram, Meter, get_meter
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_RESPONSE_ID
from opentelemetry.semconv_ai import (
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
LLMRequestTypeValues,
Expand Down Expand Up @@ -438,6 +439,7 @@ def _set_response_attributes(span, response):
if not isinstance(response, dict):
response = response.__dict__
set_span_attribute(span, SpanAttributes.LLM_RESPONSE_MODEL, response.get("model"))
set_span_attribute(span, GEN_AI_RESPONSE_ID, response.get("id"))

if response.get("usage"):
prompt_tokens = response.get("usage").input_tokens
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
should_send_prompts,
)
from opentelemetry.metrics import Counter, Histogram
from opentelemetry.semconv._incubating.attributes.gen_ai_attributes import GEN_AI_RESPONSE_ID
from opentelemetry.semconv_ai import SpanAttributes
from opentelemetry.trace.status import Status, StatusCode

Expand All @@ -22,6 +23,7 @@ def _process_response_item(item, complete_response):
if item.type == "message_start":
complete_response["model"] = item.message.model
complete_response["usage"] = dict(item.message.usage)
complete_response["id"] = item.message.id
elif item.type == "content_block_start":
index = item.index
if len(complete_response.get("events")) <= index:
Expand Down Expand Up @@ -131,7 +133,7 @@ def build_from_streaming_response(
exception_counter: Counter = None,
kwargs: dict = {},
):
complete_response = {"events": [], "model": "", "usage": {}}
complete_response = {"events": [], "model": "", "usage": {}, "id": ""}
for item in response:
try:
yield item
Expand All @@ -143,7 +145,7 @@ def build_from_streaming_response(
_process_response_item(item, complete_response)

metric_attributes = shared_metrics_attributes(complete_response)

set_span_attribute(span, GEN_AI_RESPONSE_ID, complete_response.get("id"))
if duration_histogram:
duration = time.time() - start_time
duration_histogram.record(
Expand Down Expand Up @@ -206,7 +208,7 @@ async def abuild_from_streaming_response(
exception_counter: Counter = None,
kwargs: dict = {},
):
complete_response = {"events": [], "model": "", "usage": {}}
complete_response = {"events": [], "model": "", "usage": {}, "id": ""}
async for item in response:
try:
yield item
Expand All @@ -217,6 +219,8 @@ async def abuild_from_streaming_response(
raise e
_process_response_item(item, complete_response)

set_span_attribute(span, GEN_AI_RESPONSE_ID, complete_response.get("id"))

metric_attributes = shared_metrics_attributes(complete_response)

if duration_histogram:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def test_anthropic_completion(exporter, reader):
== f"{HUMAN_PROMPT}\nHello world\n{AI_PROMPT}"
)
assert anthropic_span.attributes.get(f"{SpanAttributes.LLM_COMPLETIONS}.0.content")
assert anthropic_span.attributes.get("gen_ai.response.id") == "compl_01EjfrPvPEsRDRUKD6VoBxtK"

metrics_data = reader.get_metrics_data()
resource_metrics = metrics_data.resource_metrics
Expand Down Expand Up @@ -143,6 +144,7 @@ def test_anthropic_message_create(exporter, reader):
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
)
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01TPXhkPo8jy6yQMrMhjpiAE"

metrics_data = reader.get_metrics_data()
resource_metrics = metrics_data.resource_metrics
Expand Down Expand Up @@ -205,6 +207,7 @@ def test_anthropic_multi_modal(exporter):
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
)
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01B37ySLPzYj8KY6uZmiPoxd"


@pytest.mark.vcr
Expand Down Expand Up @@ -264,6 +267,7 @@ async def test_anthropic_async_multi_modal(exporter):
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
)
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01DWnmUo9hWk4Fk7V7Ddfa2w"


@pytest.mark.vcr
Expand Down Expand Up @@ -312,6 +316,7 @@ def test_anthropic_message_streaming(exporter, reader):
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
)
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01MXWxhWoPSgrYhjTuMDM6F1"

metrics_data = reader.get_metrics_data()
resource_metrics = metrics_data.resource_metrics
Expand Down Expand Up @@ -360,6 +365,7 @@ async def test_async_anthropic_message_create(exporter, reader):
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
)
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01UFDDjsFn5BPQnfNwmsMnAY"

metrics_data = reader.get_metrics_data()
resource_metrics = metrics_data.resource_metrics
Expand Down Expand Up @@ -414,6 +420,7 @@ async def test_async_anthropic_message_streaming(exporter, reader):
+ anthropic_span.attributes[SpanAttributes.LLM_USAGE_PROMPT_TOKENS]
== anthropic_span.attributes[SpanAttributes.LLM_USAGE_TOTAL_TOKENS]
)
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_016o6A7zDmgjucf5mWv1rrPD"

metrics_data = reader.get_metrics_data()
resource_metrics = metrics_data.resource_metrics
Expand Down Expand Up @@ -554,6 +561,7 @@ def test_anthropic_tools(exporter, reader):
assert (anthropic_span.attributes["gen_ai.completion.0.tool_calls.1.name"]) == response.content[2].name
response_input = json.dumps(response.content[2].input)
assert (anthropic_span.attributes["gen_ai.completion.0.tool_calls.1.arguments"] == response_input)
assert anthropic_span.attributes.get("gen_ai.response.id") == "msg_01RBkXFe9TmDNNWThMz2HmGt"

# verify metrics
metrics_data = reader.get_metrics_data()
Expand Down Expand Up @@ -621,6 +629,8 @@ def test_anthropic_prompt_caching(exporter, reader):
cache_creation_span.attributes["gen_ai.usage.cache_creation_input_tokens"]
== cache_read_span.attributes["gen_ai.usage.cache_read_input_tokens"]
)
assert cache_creation_span.attributes.get("gen_ai.response.id") == "msg_01BQdyey3QZM7KW1nM7xXdjR"
assert cache_read_span.attributes.get("gen_ai.response.id") == "msg_013eHbX6hmAnH7kp6NNp2qyL"

# first check that cache_creation_span only wrote to cache, but not read from it,
assert cache_creation_span.attributes["gen_ai.usage.cache_read_input_tokens"] == 0
Expand Down Expand Up @@ -707,6 +717,8 @@ async def test_anthropic_prompt_caching_async(exporter, reader):
cache_creation_span.attributes["gen_ai.usage.cache_creation_input_tokens"]
== cache_read_span.attributes["gen_ai.usage.cache_read_input_tokens"]
)
assert cache_creation_span.attributes.get("gen_ai.response.id") == "msg_018V6UftrxrM1Z5Rt8govzD1"
assert cache_read_span.attributes.get("gen_ai.response.id") == "msg_01MsbM1E6LPe7pz1NrkZEGev"

# first check that cache_creation_span only wrote to cache, but not read from it,
assert cache_creation_span.attributes["gen_ai.usage.cache_read_input_tokens"] == 0
Expand Down Expand Up @@ -796,6 +808,8 @@ def test_anthropic_prompt_caching_stream(exporter, reader):
cache_creation_span.attributes["gen_ai.usage.cache_creation_input_tokens"]
== cache_read_span.attributes["gen_ai.usage.cache_read_input_tokens"]
)
assert cache_creation_span.attributes.get("gen_ai.response.id") == "msg_015AgAJmgFQdYXgvvgbuKqPY"
assert cache_read_span.attributes.get("gen_ai.response.id") == "msg_01HSJ2wYvBf4DbFbwCaiGreG"

# first check that cache_creation_span only wrote to cache, but not read from it,
assert cache_creation_span.attributes["gen_ai.usage.cache_read_input_tokens"] == 0
Expand Down Expand Up @@ -877,6 +891,8 @@ async def test_anthropic_prompt_caching_async_stream(exporter, reader):
assert system_message == cache_creation_span.attributes["gen_ai.prompt.0.content"]
assert cache_read_span.attributes["gen_ai.prompt.0.role"] == "system"
assert system_message == cache_read_span.attributes["gen_ai.prompt.0.content"]
assert cache_creation_span.attributes.get("gen_ai.response.id") == "msg_01VBrTFKAYvujMd593dtpRHF"
assert cache_read_span.attributes.get("gen_ai.response.id") == "msg_01BqqtrPfxepxW2xPuZz1m6h"

assert cache_creation_span.attributes["gen_ai.prompt.1.role"] == "user"
assert text == cache_creation_span.attributes["gen_ai.prompt.1.content"]
Expand Down

0 comments on commit a8f7fb4

Please sign in to comment.