From 5ff89eff0fce934da20f6d46ba5623726282d2b0 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Thu, 4 Jan 2024 10:23:04 -0800 Subject: [PATCH 01/44] feat: openai instrumentator --- .github/workflows/python-CI.yaml | 40 ++ .gitignore | 1 + .../LICENSE | 201 ++++++++++ .../README.rst | 14 + .../examples/chat_completion_async_stream.py | 40 ++ .../integration_tests/completions.py | 209 ++++++++++ .../integration_tests/completions_async.py | 207 ++++++++++ .../integration_tests/embeddings.py | 142 +++++++ .../functions_and_tool_calls.py | 195 ++++++++++ .../pyproject.toml | 52 +++ .../instrumentation/openai/__init__.py | 57 +++ .../openai/_extra_attributes_from_request.py | 143 +++++++ .../openai/_extra_attributes_from_response.py | 204 ++++++++++ .../instrumentation/openai/_request.py | 360 ++++++++++++++++++ .../openai/_response_accumulator.py | 248 ++++++++++++ .../instrumentation/openai/_stream.py | 149 ++++++++ .../instrumentation/openai/_utils.py | 134 +++++++ .../instrumentation/openai/_with_span.py | 82 ++++ .../instrumentation/openai/package.py | 2 + .../instrumentation/openai/py.typed | 0 .../instrumentation/openai/version.py | 1 + .../openai/test_response_accumulator.py | 154 ++++++++ python/mypy.ini | 5 + .../pyproject.toml | 2 +- .../openinference/semconv/trace/__init__.py | 144 ++++--- .../src/openinference/semconv/version.py | 2 +- python/ruff.toml | 12 + python/tox.ini | 29 ++ 28 files changed, 2775 insertions(+), 54 deletions(-) create mode 100644 .github/workflows/python-CI.yaml create mode 100644 python/instrumentation/openinference-instrumentation-openai/LICENSE create mode 100644 python/instrumentation/openinference-instrumentation-openai/README.rst create mode 100644 python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py create mode 100644 python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py create mode 100644 python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py create mode 100644 python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py create mode 100644 python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py create mode 100644 python/instrumentation/openinference-instrumentation-openai/pyproject.toml create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_with_span.py create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/package.py create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/py.typed create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/version.py create mode 100644 python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py create mode 100644 python/mypy.ini create mode 100644 python/ruff.toml create mode 100644 python/tox.ini diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml new file mode 100644 index 000000000..935f74c45 --- /dev/null +++ b/.github/workflows/python-CI.yaml @@ -0,0 +1,40 @@ +name: Python CI + +on: + push: + branches: [main] + pull_request: + paths: + - "python/**" + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +concurrency: + group: test-python-${{ github.head_ref }} + cancel-in-progress: true + +jobs: + ci: + name: CI Python + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v3 + - name: Set up Python 3.8 + uses: actions/setup-python@v4 + with: + python-version: "3.8" + - name: Install tox + run: pip install tox==4.11.4 + - name: Check style + run: | + tox run -e ruff + git diff --exit-code + - name: Run mypy + run: | + tox run -e mypy-openai + - name: Run tests + run: | + tox run -e test-openai + + diff --git a/.gitignore b/.gitignore index 99baffced..82c639258 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Generated dirs .vscode .idea +.tox node_modules dist sdist diff --git a/python/instrumentation/openinference-instrumentation-openai/LICENSE b/python/instrumentation/openinference-instrumentation-openai/LICENSE new file mode 100644 index 000000000..0223315cc --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright The OpenTelemetry Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/python/instrumentation/openinference-instrumentation-openai/README.rst b/python/instrumentation/openinference-instrumentation-openai/README.rst new file mode 100644 index 000000000..35f92883b --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/README.rst @@ -0,0 +1,14 @@ +OpenInference OpenAI Instrumentation +============================================= + +|pypi| + +.. |pypi| image:: https://badge.fury.io/py/openinference-instrumentation-openai.svg + :target: https://pypi.org/project/openinference-instrumentation-openai/ + +Installation +------------ + +:: + + pip install openinference-instrumentation-openai diff --git a/python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py b/python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py new file mode 100644 index 000000000..4c81584bc --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py @@ -0,0 +1,40 @@ +""" +Phoenix collector should be running in the background. +""" +import asyncio + +import openai +from openinference.instrumentation.openai import OpenAIInstrumentor +from opentelemetry import trace as trace_api +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.sdk import trace as trace_sdk +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace.export import SimpleSpanProcessor + +resource = Resource(attributes={}) +tracer_provider = trace_sdk.TracerProvider(resource=resource) +span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces") +span_processor = SimpleSpanProcessor(span_exporter=span_exporter) +tracer_provider.add_span_processor(span_processor=span_processor) +trace_api.set_tracer_provider(tracer_provider=tracer_provider) + +OpenAIInstrumentor().instrument() + + +async def chat_completions(**kwargs): + client = openai.AsyncOpenAI() + async for chunk in await client.chat.completions.create(**kwargs): + if content := chunk.choices[0].delta.content: + print(content, end="") + print() + + +if __name__ == "__main__": + asyncio.run( + chat_completions( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Write a haiku."}], + max_tokens=20, + stream=True, + ), + ) diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py new file mode 100644 index 000000000..8830ff8b3 --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py @@ -0,0 +1,209 @@ +""" +Phoenix collector should be running in the background. +""" +import contextvars +import inspect +import logging +import threading +from contextlib import suppress +from importlib.metadata import version +from itertools import chain +from time import sleep + +from opentelemetry import trace as trace_api +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor +from opentelemetry.sdk import trace as trace_sdk +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter + + +def default_tracer_provider() -> trace_sdk.TracerProvider: + resource = Resource(attributes={}) + tracer_provider = trace_sdk.TracerProvider(resource=resource) + span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces") + span_processor = SimpleSpanProcessor(span_exporter=span_exporter) + tracer_provider.add_span_processor(span_processor=span_processor) + return tracer_provider + + +# Instrument httpx to show that it can show up as a child span. +# Note that it must be instrumented before it's imported by openai. +HTTPXClientInstrumentor().instrument() + +# To instrument httpx, it must be monkey-patched before it is imported by openai, so we do it +# like this to prevent the imports from being re-formatted to the top of file. +if True: + import openai + from openinference.instrumentation.openai import OpenAIInstrumentor + from openinference.semconv.trace import SpanAttributes + +CLIENT = openai.OpenAI() + +tracer_provider = default_tracer_provider() +in_memory_span_exporter = InMemorySpanExporter() +tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter)) +trace_api.set_tracer_provider(tracer_provider=tracer_provider) + +OpenAIInstrumentor().instrument() + +_OPENAI_VERSION = tuple(map(int, version("openai").split(".")[:3])) + +N = 3 # iteration i = 0 results in intentional BadRequestError +HAIKU = "Write a haiku." +HAIKU_TOKENS = [8144, 264, 6520, 39342, 13] +RESUME = "Write a resume." +RESUME_TOKENS = [8144, 264, 6520, 39342, 13] +CHAT_KWARGS = { + "model": "gpt-3.5-turbo", + "messages": [{"role": "user", "content": HAIKU}], + "max_tokens": 20, + "temperature": 2, + **( + { + "logprobs": True, + "top_logprobs": 5, + } + if _OPENAI_VERSION >= (1, 5, 0) + else {} + ), +} +COMP_KWARGS = { + "model": "gpt-3.5-turbo-instruct", + "prompt": HAIKU, + "max_tokens": 20, + "temperature": 2, + "logprobs": 5, +} + +for k, v in logging.root.manager.loggerDict.items(): + if k.startswith("openinference.instrumentation.openai") and isinstance(v, logging.Logger): + v.setLevel(logging.DEBUG) + v.handlers.clear() + v.addHandler(logging.StreamHandler()) + +logger = logging.getLogger(__name__) + +_EXPECTED_SPAN_COUNT = 0 +_LOCK = threading.Lock() + + +def _print_span_count(kwargs): + spans = in_memory_span_exporter.get_finished_spans() + llm_spans = [ + span + for span in spans + if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM" + ] + actual = len(llm_spans) + global _EXPECTED_SPAN_COUNT + with _LOCK: + _EXPECTED_SPAN_COUNT += 1 + mark = "✅" if _EXPECTED_SPAN_COUNT <= actual else "❌" + name = inspect.stack()[1][3] + print(f"{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual}; {name}({kwargs})") + + +def chat_completions(**kwargs): + try: + with suppress(openai.BadRequestError): + response = CLIENT.chat.completions.create(**{**CHAT_KWARGS, **kwargs}) + if kwargs.get("stream"): + for _ in response: + sleep(0.005) + except Exception: + logger.exception(f"{inspect.stack()[0][3]}({kwargs})") + finally: + _print_span_count(kwargs) + + +def completions(**kwargs): + try: + with suppress(openai.BadRequestError): + response = CLIENT.completions.create(**{**COMP_KWARGS, **kwargs}) + if kwargs.get("stream"): + for _ in response: + sleep(0.005) + except Exception: + logger.exception(f"{inspect.stack()[0][3]}({kwargs})") + finally: + _print_span_count(kwargs) + + +def chat_completions_with_raw_response(**kwargs): + try: + with suppress(openai.BadRequestError): + response = CLIENT.chat.completions.with_raw_response.create(**{**CHAT_KWARGS, **kwargs}) + if kwargs.get("stream"): + for _ in response.parse(): + sleep(0.005) + except Exception: + logger.exception(f"{inspect.stack()[0][3]}({kwargs})") + finally: + _print_span_count(kwargs) + + +def completions_with_raw_response(**kwargs): + try: + with suppress(openai.BadRequestError): + response = CLIENT.completions.with_raw_response.create(**{**COMP_KWARGS, **kwargs}) + if kwargs.get("stream"): + for _ in response.parse(): + sleep(0.005) + except Exception: + logger.exception(f"{inspect.stack()[0][3]}({kwargs})") + finally: + _print_span_count(kwargs) + + +def tasks(n, task, **kwargs): + for i in range(n): # i = 0 results in intentional BadRequestError + ctx = contextvars.copy_context() + yield threading.Thread( + target=ctx.run, + args=(task,), + kwargs={"n": i, **kwargs}, + ) + + +if __name__ == "__main__": + threads = list( + chain( + tasks(N, completions), + tasks(N, completions_with_raw_response), + tasks(N, completions, stream=True), + tasks(N, completions_with_raw_response, stream=True), + tasks(N, completions, prompt=[HAIKU, RESUME]), + tasks(N, completions_with_raw_response, prompt=[HAIKU, RESUME]), + tasks(N, completions, prompt=[HAIKU, RESUME], stream=True), + tasks(N, completions_with_raw_response, prompt=[HAIKU, RESUME], stream=True), + tasks(N, completions, prompt=HAIKU_TOKENS), + tasks(N, completions_with_raw_response, prompt=HAIKU_TOKENS), + tasks(N, completions, prompt=HAIKU_TOKENS, stream=True), + tasks(N, completions_with_raw_response, prompt=HAIKU_TOKENS, stream=True), + tasks(N, completions, prompt=[HAIKU_TOKENS, RESUME_TOKENS]), + tasks(N, completions_with_raw_response, prompt=[HAIKU_TOKENS, RESUME_TOKENS]), + tasks(N, completions, prompt=[HAIKU_TOKENS, RESUME_TOKENS], stream=True), + tasks( + N, completions_with_raw_response, prompt=[HAIKU_TOKENS, RESUME_TOKENS], stream=True + ), + tasks(N, chat_completions), + tasks(N, chat_completions_with_raw_response), + tasks(N, chat_completions, stream=True), + tasks(N, chat_completions_with_raw_response, stream=True), + ) + ) + [thread.start() for thread in threads] + [thread.join() for thread in threads] + spans = in_memory_span_exporter.get_finished_spans() + llm_spans = [ + span + for span in spans + if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM" + ] + actual = len(llm_spans) + mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌" + print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};") + assert _EXPECTED_SPAN_COUNT == actual + sleep(1) # (if applicable) let the old exporter finish sending traces diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py new file mode 100644 index 000000000..6263956bf --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py @@ -0,0 +1,207 @@ +""" +Phoenix collector should be running in the background. +""" +import asyncio +import inspect +import logging +from contextlib import suppress +from importlib.metadata import version +from itertools import chain +from time import sleep + +from opentelemetry import trace as trace_api +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor +from opentelemetry.sdk import trace as trace_sdk +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter + + +def default_tracer_provider() -> trace_sdk.TracerProvider: + resource = Resource(attributes={}) + tracer_provider = trace_sdk.TracerProvider(resource=resource) + span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces") + span_processor = SimpleSpanProcessor(span_exporter=span_exporter) + tracer_provider.add_span_processor(span_processor=span_processor) + return tracer_provider + + +# Instrument httpx to show that it can show up as a child span. +# Note that it must be instrumented before it's imported by openai. +HTTPXClientInstrumentor().instrument() + +# To instrument httpx, it must be monkey-patched before it is imported by openai, so we do it +# like this to prevent the imports from being re-formatted to the top of file. +if True: + import openai + from openinference.instrumentation.openai import OpenAIInstrumentor + from openinference.semconv.trace import SpanAttributes + +CLIENT = openai.AsyncOpenAI() + +tracer_provider = default_tracer_provider() +in_memory_span_exporter = InMemorySpanExporter() +tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter)) +trace_api.set_tracer_provider(tracer_provider=tracer_provider) + +OpenAIInstrumentor().instrument() + +_OPENAI_VERSION = tuple(map(int, version("openai").split(".")[:3])) + +N = 3 # iteration i = 0 results in intentional BadRequestError +HAIKU = "Write a haiku." +HAIKU_TOKENS = [8144, 264, 6520, 39342, 13] +RESUME = "Write a resume." +RESUME_TOKENS = [8144, 264, 6520, 39342, 13] +CHAT_KWARGS = { + "model": "gpt-3.5-turbo", + "messages": [{"role": "user", "content": HAIKU}], + "max_tokens": 20, + "temperature": 2, + **( + { + "logprobs": True, + "top_logprobs": 5, + } + if _OPENAI_VERSION >= (1, 5, 0) + else {} + ), +} +COMP_KWARGS = { + "model": "gpt-3.5-turbo-instruct", + "prompt": HAIKU, + "max_tokens": 20, + "temperature": 2, + "logprobs": 5, +} + +for k, v in logging.root.manager.loggerDict.items(): + if k.startswith("openinference.instrumentation.openai") and isinstance(v, logging.Logger): + v.setLevel(logging.DEBUG) + v.handlers.clear() + v.addHandler(logging.StreamHandler()) + + +logger = logging.getLogger(__name__) + +_EXPECTED_SPAN_COUNT = 0 + + +def _print_span_count(kwargs): + spans = in_memory_span_exporter.get_finished_spans() + llm_spans = [ + span + for span in spans + if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM" + ] + actual = len(llm_spans) + global _EXPECTED_SPAN_COUNT + _EXPECTED_SPAN_COUNT += 1 + mark = "✅" if _EXPECTED_SPAN_COUNT <= actual else "❌" + name = inspect.stack()[1][3] + print(f"{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual}; {name}({kwargs})") + + +async def chat_completions(**kwargs): + try: + with suppress(openai.BadRequestError): + response = await CLIENT.chat.completions.create(**{**CHAT_KWARGS, **kwargs}) + if kwargs.get("stream"): + async for _ in response: + await asyncio.sleep(0.005) + except Exception: + logger.exception(f"{inspect.stack()[0][3]}({kwargs})") + finally: + _print_span_count(kwargs) + + +async def completions(**kwargs): + try: + with suppress(openai.BadRequestError): + response = await CLIENT.completions.create(**{**COMP_KWARGS, **kwargs}) + if kwargs.get("stream"): + async for _ in response: + await asyncio.sleep(0.005) + except Exception: + logger.exception(f"{inspect.stack()[0][3]}({kwargs})") + finally: + _print_span_count(kwargs) + + +async def chat_completions_with_raw_response(**kwargs): + try: + with suppress(openai.BadRequestError): + response = await CLIENT.chat.completions.with_raw_response.create( + **{**CHAT_KWARGS, **kwargs} + ) + if kwargs.get("stream"): + async for _ in response.parse(): + await asyncio.sleep(0.005) + except Exception: + logger.exception(f"{inspect.stack()[0][3]}({kwargs})") + finally: + _print_span_count(kwargs) + + +async def completions_with_raw_response(**kwargs): + try: + with suppress(openai.BadRequestError): + response = await CLIENT.completions.with_raw_response.create( + **{**COMP_KWARGS, **kwargs} + ) + if kwargs.get("stream"): + async for _ in response.parse(): + await asyncio.sleep(0.005) + except Exception: + logger.exception(f"{inspect.stack()[0][3]}({kwargs})") + finally: + _print_span_count(kwargs) + + +def tasks(n, task, **kwargs): + return [task(n=i, **kwargs) for i in range(n)] # i = 0 results in intentional BadRequestError + + +async def main(*tasks): + await asyncio.gather(*chain.from_iterable(tasks)) + + +if __name__ == "__main__": + asyncio.run( + main( + tasks(N, completions), + tasks(N, completions_with_raw_response), + tasks(N, completions, stream=True), + tasks(N, completions_with_raw_response, stream=True), + tasks(N, completions, prompt=[HAIKU, RESUME]), + tasks(N, completions_with_raw_response, prompt=[HAIKU, RESUME]), + tasks(N, completions, prompt=[HAIKU, RESUME], stream=True), + tasks(N, completions_with_raw_response, prompt=[HAIKU, RESUME], stream=True), + tasks(N, completions, prompt=HAIKU_TOKENS), + tasks(N, completions_with_raw_response, prompt=HAIKU_TOKENS), + tasks(N, completions, prompt=HAIKU_TOKENS, stream=True), + tasks(N, completions_with_raw_response, prompt=HAIKU_TOKENS, stream=True), + tasks(N, completions, prompt=[HAIKU_TOKENS, RESUME_TOKENS]), + tasks(N, completions_with_raw_response, prompt=[HAIKU_TOKENS, RESUME_TOKENS]), + tasks(N, completions, prompt=[HAIKU_TOKENS, RESUME_TOKENS], stream=True), + tasks( + N, completions_with_raw_response, prompt=[HAIKU_TOKENS, RESUME_TOKENS], stream=True + ), + tasks(N, chat_completions), + tasks(N, chat_completions_with_raw_response), + tasks(N, chat_completions, stream=True), + tasks(N, chat_completions_with_raw_response, stream=True), + ) + ) + spans = in_memory_span_exporter.get_finished_spans() + llm_spans = [ + span + for span in spans + if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM" + ] + actual = len(llm_spans) + mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌" + print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};") + assert _EXPECTED_SPAN_COUNT == actual + sleep(1) # (if applicable) let the old exporter finish sending traces diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py new file mode 100644 index 000000000..ee91e2898 --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py @@ -0,0 +1,142 @@ +""" +Phoenix collector should be running in the background. +""" +import asyncio +import inspect +import logging +from contextlib import suppress +from itertools import chain +from time import sleep + +from opentelemetry import trace as trace_api +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor +from opentelemetry.sdk import trace as trace_sdk +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter + + +def default_tracer_provider() -> trace_sdk.TracerProvider: + resource = Resource(attributes={}) + tracer_provider = trace_sdk.TracerProvider(resource=resource) + span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces") + span_processor = SimpleSpanProcessor(span_exporter=span_exporter) + tracer_provider.add_span_processor(span_processor=span_processor) + return tracer_provider + + +# Instrument httpx to show that it can show up as a child span. +# Note that it must be instrumented before it's imported by openai. +HTTPXClientInstrumentor().instrument() + +# To instrument httpx, it must be monkey-patched before it is imported by openai, so we do it +# like this to prevent the imports from being re-formatted to the top of file. +if True: + import openai + from openinference.instrumentation.openai import OpenAIInstrumentor + from openinference.semconv.trace import SpanAttributes + +CLIENT = openai.AsyncOpenAI() + +tracer_provider = default_tracer_provider() +in_memory_span_exporter = InMemorySpanExporter() +tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter)) +trace_api.set_tracer_provider(tracer_provider=tracer_provider) + +OpenAIInstrumentor().instrument() + +KWARGS = { + "model": "text-embedding-ada-002", +} + +for k, v in logging.root.manager.loggerDict.items(): + if k.startswith("openinference.instrumentation.openai") and isinstance(v, logging.Logger): + v.setLevel(logging.DEBUG) + v.handlers.clear() + v.addHandler(logging.StreamHandler()) + +logger = logging.getLogger(__name__) + +_EXPECTED_SPAN_COUNT = 0 + + +def _print_span_count(kwargs): + spans = in_memory_span_exporter.get_finished_spans() + llm_spans = [ + span + for span in spans + if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "EMBEDDING" + ] + actual = len(llm_spans) + global _EXPECTED_SPAN_COUNT + _EXPECTED_SPAN_COUNT += 1 + mark = "✅" if _EXPECTED_SPAN_COUNT <= actual else "❌" + name = inspect.stack()[1][3] + print(f"{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual}; {name}({kwargs})") + + +async def embeddings(**kwargs): + try: + with suppress(openai.BadRequestError): + await CLIENT.embeddings.create(**{**KWARGS, **kwargs}) + except Exception: + logger.exception(f"{inspect.stack()[0][3]}({kwargs})") + finally: + _print_span_count(kwargs) + + +async def embeddings_with_raw_response(**kwargs): + try: + with suppress(openai.BadRequestError): + await CLIENT.embeddings.with_raw_response.create(**{**KWARGS, **kwargs}) + except Exception: + logger.exception(f"{inspect.stack()[0][3]}({kwargs})") + finally: + _print_span_count(kwargs) + + +async def main(*tasks): + await asyncio.gather(*chain(tasks)) + + +if __name__ == "__main__": + asyncio.run( + main( + embeddings(input="hello world"), + embeddings(input="hello world", encoding_format="float"), + embeddings(input="hello world", encoding_format="base64"), + embeddings(input=["hello", "world"]), + embeddings(input=["hello", "world"], encoding_format="float"), + embeddings(input=["hello", "world"], encoding_format="base64"), + embeddings(input=[15339, 1917]), + embeddings(input=[15339, 1917], encoding_format="float"), + embeddings(input=[15339, 1917], encoding_format="base64"), + embeddings(input=[[15339], [14957]]), + embeddings(input=[[15339], [14957]], encoding_format="float"), + embeddings(input=[[15339], [14957]], encoding_format="base64"), + embeddings_with_raw_response(input="hello world"), + embeddings_with_raw_response(input="hello world", encoding_format="float"), + embeddings_with_raw_response(input="hello world", encoding_format="base64"), + embeddings_with_raw_response(input=["hello", "world"]), + embeddings_with_raw_response(input=["hello", "world"], encoding_format="float"), + embeddings_with_raw_response(input=["hello", "world"], encoding_format="base64"), + embeddings_with_raw_response(input=[15339, 1917]), + embeddings_with_raw_response(input=[15339, 1917], encoding_format="float"), + embeddings_with_raw_response(input=[15339, 1917], encoding_format="base64"), + embeddings_with_raw_response(input=[[15339], [14957]]), + embeddings_with_raw_response(input=[[15339], [14957]], encoding_format="float"), + embeddings_with_raw_response(input=[[15339], [14957]], encoding_format="base64"), + ) + ) + spans = in_memory_span_exporter.get_finished_spans() + llm_spans = [ + span + for span in spans + if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "EMBEDDING" + ] + actual = len(llm_spans) + mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌" + print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};") + assert _EXPECTED_SPAN_COUNT == actual + sleep(1) # (if applicable) let the old exporter finish sending traces diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py new file mode 100644 index 000000000..a281938e5 --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py @@ -0,0 +1,195 @@ +""" +Phoenix collector should be running in the background. + +tools requires openai>=1.1.0 +""" +import asyncio +import inspect +import logging +from contextlib import suppress +from importlib.metadata import version +from itertools import chain +from time import sleep + +from opentelemetry import trace as trace_api +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor +from opentelemetry.sdk import trace as trace_sdk +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter + + +def default_tracer_provider() -> trace_sdk.TracerProvider: + resource = Resource(attributes={}) + tracer_provider = trace_sdk.TracerProvider(resource=resource) + span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces") + span_processor = SimpleSpanProcessor(span_exporter=span_exporter) + tracer_provider.add_span_processor(span_processor=span_processor) + return tracer_provider + + +# Instrument httpx to show that it can show up as a child span. +# Note that it must be instrumented before it's imported by openai. +HTTPXClientInstrumentor().instrument() + +# To instrument httpx, it must be monkey-patched before it is imported by openai, so we do it +# like this to prevent the imports from being re-formatted to the top of file. +if True: + import openai + from openinference.instrumentation.openai import OpenAIInstrumentor + from openinference.semconv.trace import SpanAttributes + +CLIENT = openai.AsyncOpenAI() + +tracer_provider = default_tracer_provider() +in_memory_span_exporter = InMemorySpanExporter() +tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter)) +trace_api.set_tracer_provider(tracer_provider=tracer_provider) + +OpenAIInstrumentor().instrument() + +_OPENAI_VERSION = tuple(map(int, version("openai").split(".")[:3])) + +N = 3 # iteration i = 0 results in intentional BadRequestError +KWARGS = { + "model": "gpt-4", + "messages": [ + { + "role": "user", + "content": "What's the current time and weather in San Francisco, CA?", + } + ], + "max_tokens": 20, + "temperature": 0, +} +TOOLS = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g., San Francisco, CA", + }, + }, + "required": ["location"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "get_current_time", + "description": "Get the current time in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g., San Francisco, CA", + }, + }, + "required": ["location"], + }, + }, + }, +] +FUNCTIONS = [tool["function"] for tool in TOOLS] + +for k, v in logging.root.manager.loggerDict.items(): + if k.startswith("openinference.instrumentation.openai") and isinstance(v, logging.Logger): + v.setLevel(logging.DEBUG) + v.handlers.clear() + v.addHandler(logging.StreamHandler()) + +logger = logging.getLogger(__name__) + +_EXPECTED_SPAN_COUNT = 0 + + +def _print_span_count(): + spans = in_memory_span_exporter.get_finished_spans() + llm_spans = [ + span + for span in spans + if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM" + ] + actual = len(llm_spans) + global _EXPECTED_SPAN_COUNT + _EXPECTED_SPAN_COUNT += 1 + mark = "✅" if _EXPECTED_SPAN_COUNT <= actual else "❌" + name = inspect.stack()[1][3] + print(f"{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual}; {name}") + + +async def chat_completions(**kwargs): + try: + with suppress(openai.BadRequestError): + response = await CLIENT.chat.completions.create(**{**KWARGS, **kwargs}) + if kwargs.get("stream"): + async for _ in response: + await asyncio.sleep(0.005) + except Exception: + logger.exception(f"{inspect.stack()[0][3]}({kwargs})") + finally: + _print_span_count() + + +async def chat_completions_with_raw_response(**kwargs): + try: + with suppress(openai.BadRequestError): + response = await CLIENT.chat.completions.with_raw_response.create( + **{**KWARGS, **kwargs} + ) + if kwargs.get("stream"): + async for _ in response.parse(): + await asyncio.sleep(0.005) + except Exception: + logger.exception(f"{inspect.stack()[0][3]}({kwargs})") + finally: + _print_span_count() + + +def tasks(n, task, **kwargs): + return [task(n=i, **kwargs) for i in range(n)] # i = 0 results in intentional BadRequestError + + +async def main(*tasks): + await asyncio.gather(*chain.from_iterable(tasks)) + + +if __name__ == "__main__": + asyncio.run( + main( + tasks(N, chat_completions, functions=FUNCTIONS), + tasks(N, chat_completions, functions=FUNCTIONS, stream=True), + tasks(N, chat_completions_with_raw_response, functions=FUNCTIONS), + tasks(N, chat_completions_with_raw_response, functions=FUNCTIONS, stream=True), + *( + [ + tasks(N, chat_completions, tools=TOOLS), + tasks(N, chat_completions, tools=TOOLS, stream=True), + tasks(N, chat_completions_with_raw_response, tools=TOOLS), + tasks(N, chat_completions_with_raw_response, tools=TOOLS, stream=True), + ] + if _OPENAI_VERSION >= (1, 1, 0) + else () + ), + ) + ) + spans = in_memory_span_exporter.get_finished_spans() + llm_spans = [ + span + for span in spans + if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM" + ] + actual = len(llm_spans) + mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌" + print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};") + assert _EXPECTED_SPAN_COUNT == actual + sleep(1) # (if applicable) let the old exporter finish sending traces diff --git a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml new file mode 100644 index 000000000..a22f88ece --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml @@ -0,0 +1,52 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "openinference-instrumentation-openai" +dynamic = ["version"] +description = "OpenInference OpenAI instrumentation" +readme = "README.rst" +license = "Apache-2.0" +requires-python = ">=3.8, <3.12" +authors = [ + { name = "OpenInference Authors", email = "oss@arize.com" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", +] +dependencies = [ + "opentelemetry-api", + "opentelemetry-instrumentation", + "opentelemetry-semantic-conventions", + "openinference-semantic-conventions", + "wrapt", +] + +[project.optional-dependencies] +test = [ + "openai == 1.0.0", +] + +[project.urls] +Homepage = "https://github.com/Arize-ai/openinference/tree/main/python/instrumentation/openinference-instrumentation-openai" + +[tool.hatch.version] +path = "src/openinference/instrumentation/openai/version.py" + +[tool.hatch.build.targets.sdist] +include = [ + "/src", + "/tests", +] + +[tool.hatch.build.targets.wheel] +packages = ["src/openinference"] diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py new file mode 100644 index 000000000..3217d6457 --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py @@ -0,0 +1,57 @@ +import logging +from typing import Any, Collection + +from openinference.instrumentation.openai._request import ( + _AsyncRequest, + _Request, +) +from openinference.instrumentation.openai.package import _instruments +from openinference.instrumentation.openai.version import __version__ +from opentelemetry import trace as trace_api +from opentelemetry.instrumentation.instrumentor import BaseInstrumentor # type: ignore +from wrapt import wrap_function_wrapper + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + +_MODULE = "openai" + + +class OpenAIInstrumentor(BaseInstrumentor): # type: ignore + """An instrumentor for openai.OpenAI.request and + openai.AsyncOpenAI.request + """ + + __slots__ = ( + "_original_request", + "_original_async_request", + ) + + def instrumentation_dependencies(self) -> Collection[str]: + return _instruments + + def _instrument(self, **kwargs: Any) -> None: + if (include_extra_attributes := kwargs.get("include_extra_attributes")) is None: + include_extra_attributes = True + if not (tracer_provider := kwargs.get("tracer_provider")): + tracer_provider = trace_api.get_tracer_provider() + tracer = trace_api.get_tracer(__name__, __version__, tracer_provider) + wrap_function_wrapper( + module=_MODULE, + name="OpenAI.request", + wrapper=_Request( + tracer=tracer, + include_extra_attributes=include_extra_attributes, + ), + ) + wrap_function_wrapper( + module=_MODULE, + name="AsyncOpenAI.request", + wrapper=_AsyncRequest( + tracer=tracer, + include_extra_attributes=include_extra_attributes, + ), + ) + + def _uninstrument(self, **kwargs: Any) -> None: + pass diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py new file mode 100644 index 000000000..47f444ee8 --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py @@ -0,0 +1,143 @@ +import json +import logging +from enum import Enum +from typing import ( + Any, + Iterable, + Iterator, + List, + Mapping, + Tuple, +) + +from openinference.instrumentation.openai._utils import _OPENAI_VERSION +from openinference.semconv.trace import MessageAttributes, SpanAttributes, ToolCallAttributes +from opentelemetry.util.types import AttributeValue + +from openai.types import Completion, CreateEmbeddingResponse +from openai.types.chat import ChatCompletion + +__all__ = ("_get_extra_attributes_from_request",) + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + + +def _get_extra_attributes_from_request( + cast_to: type, + request_options: Mapping[str, Any], +) -> Iterator[Tuple[str, AttributeValue]]: + if not isinstance(request_options, Mapping): + return + if cast_to is ChatCompletion: + yield from _get_attributes_from_chat_completion_create_param(request_options) + elif cast_to is CreateEmbeddingResponse: + yield from _get_attributes_from_embedding_create_param(request_options) + elif cast_to is Completion: + yield from _get_attributes_from_completion_create_param(request_options) + else: + try: + yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(request_options) + except Exception: + logger.exception("Failed to serialize request options") + + +def _get_attributes_from_chat_completion_create_param( + params: Mapping[str, Any], +) -> Iterator[Tuple[str, AttributeValue]]: + # openai.types.chat.completion_create_params.CompletionCreateParamsBase + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/completion_create_params.py#L28 # noqa: E501 + if not isinstance(params, Mapping): + return + invocation_params = dict(params) + invocation_params.pop("messages", None) + invocation_params.pop("functions", None) + invocation_params.pop("tools", None) + yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(invocation_params) + if (input_messages := params.get("messages")) and isinstance(input_messages, Iterable): + for index, input_message in enumerate(input_messages): + for key, value in _get_attributes_from_message_param(input_message): + yield f"{SpanAttributes.LLM_INPUT_MESSAGES}.{index}.{key}", value + + +def _get_attributes_from_message_param( + message: Mapping[str, Any], +) -> Iterator[Tuple[str, AttributeValue]]: + # openai.types.chat.ChatCompletionMessageParam + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_message_param.py#L15 # noqa: E501 + if not hasattr(message, "get"): + return + if role := message.get("role"): + yield ( + MessageAttributes.MESSAGE_ROLE, + role.value if isinstance(role, Enum) else role, + ) + if content := message.get("content"): + if isinstance(content, str): + yield MessageAttributes.MESSAGE_CONTENT, content + elif isinstance(content, List): + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_user_message_param.py#L14 # noqa: E501 + try: + json_string = json.dumps(content) + except Exception: + logger.exception("Failed to serialize message content") + else: + yield MessageAttributes.MESSAGE_CONTENT, json_string + if name := message.get("name"): + yield MessageAttributes.MESSAGE_NAME, name + if (function_call := message.get("function_call")) and hasattr(function_call, "get"): + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_assistant_message_param.py#L13 # noqa: E501 + if function_name := function_call.get("name"): + yield MessageAttributes.MESSAGE_FUNCTION_CALL_NAME, function_name + if function_arguments := function_call.get("arguments"): + yield ( + MessageAttributes.MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON, + function_arguments, + ) + if ( + _OPENAI_VERSION >= (1, 1, 0) + and (tool_calls := message.get("tool_calls"),) + and isinstance(tool_calls, Iterable) + ): + for index, tool_call in enumerate(tool_calls): + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_message_tool_call_param.py#L23 # noqa: E501 + if not hasattr(tool_call, "get"): + continue + if (function := tool_call.get("function")) and hasattr(function, "get"): + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_message_tool_call_param.py#L10 # noqa: E501 + if name := function.get("name"): + yield ( + f"{MessageAttributes.MESSAGE_TOOL_CALLS}.{index}." + f"{ToolCallAttributes.TOOL_CALL_FUNCTION_NAME}", + name, + ) + if arguments := function.get("arguments"): + yield ( + f"{MessageAttributes.MESSAGE_TOOL_CALLS}.{index}." + f"{ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON}", + arguments, + ) + + +def _get_attributes_from_completion_create_param( + params: Mapping[str, Any], +) -> Iterator[Tuple[str, AttributeValue]]: + # openai.types.completion_create_params.CompletionCreateParamsBase + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion_create_params.py#L11 # noqa: E501 + if not isinstance(params, Mapping): + return + invocation_params = dict(params) + invocation_params.pop("prompt", None) + yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(invocation_params) + + +def _get_attributes_from_embedding_create_param( + params: Mapping[str, Any], +) -> Iterator[Tuple[str, AttributeValue]]: + # openai.types.EmbeddingCreateParams + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding_create_params.py#L11 # noqa: E501 + if not isinstance(params, Mapping): + return + invocation_params = dict(params) + invocation_params.pop("input", None) + yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(invocation_params) diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py new file mode 100644 index 000000000..05d0d0a1d --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py @@ -0,0 +1,204 @@ +import base64 +import logging +from functools import singledispatch +from importlib import import_module +from types import MappingProxyType, ModuleType +from typing import ( + Any, + Iterable, + Iterator, + Mapping, + Optional, + Sequence, + Tuple, +) + +from openinference.instrumentation.openai._utils import ( + _OPENAI_VERSION, + _get_texts, +) +from openinference.semconv.trace import ( + EmbeddingAttributes, + MessageAttributes, + SpanAttributes, + ToolCallAttributes, +) +from opentelemetry.util.types import AttributeValue + +from openai.types import Completion, CreateEmbeddingResponse +from openai.types.chat import ChatCompletion + +__all__ = ("_get_extra_attributes_from_response",) + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + + +try: + _NUMPY: Optional[ModuleType] = import_module("numpy") +except ImportError: + _NUMPY = None + + +@singledispatch +def _get_extra_attributes_from_response( + response: Any, + request_options: Mapping[str, Any] = MappingProxyType({}), +) -> Iterator[Tuple[str, AttributeValue]]: + # this is a fallback (for singledispatch) + yield from () + + +@_get_extra_attributes_from_response.register +def _( + completion: ChatCompletion, + request_options: Mapping[str, Any] = MappingProxyType({}), +) -> Iterator[Tuple[str, AttributeValue]]: + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion.py#L40 # noqa: E501 + if model := getattr(completion, "model", None): + yield SpanAttributes.LLM_MODEL_NAME, model + if usage := getattr(completion, "usage", None): + yield from _get_attributes_from_completion_usage(usage) + if (choices := getattr(completion, "choices", None)) and isinstance(choices, Iterable): + for choice in choices: + if (index := getattr(choice, "index", None)) is None: + continue + if message := getattr(choice, "message", None): + for key, value in _get_attributes_from_chat_completion_message(message): + yield f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{index}.{key}", value + + +@_get_extra_attributes_from_response.register +def _( + completion: Completion, + request_options: Mapping[str, Any] = MappingProxyType({}), +) -> Iterator[Tuple[str, AttributeValue]]: + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion.py#L13 # noqa: E501 + if model := getattr(completion, "model", None): + yield SpanAttributes.LLM_MODEL_NAME, model + if usage := getattr(completion, "usage", None): + yield from _get_attributes_from_completion_usage(usage) + if model_prompt := request_options.get("prompt"): + # prompt: Required[Union[str, List[str], List[int], List[List[int]], None]] + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion_create_params.py#L38 # noqa: E501 + # FIXME: tokens (List[int], List[List[int]]) can't be decoded reliably because model + # names are not reliable (across OpenAI and Azure). + if prompts := list(_get_texts(model_prompt, model)): + yield SpanAttributes.LLM_PROMPTS, prompts + + +@_get_extra_attributes_from_response.register +def _( + response: CreateEmbeddingResponse, + request_options: Mapping[str, Any] = MappingProxyType({}), +) -> Iterator[Tuple[str, AttributeValue]]: + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/create_embedding_response.py#L20 # noqa: E501 + if usage := getattr(response, "usage", None): + yield from _get_attributes_from_embedding_usage(usage) + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding_create_params.py#L23 # noqa: E501 + if model := getattr(response, "model"): + yield f"{EmbeddingAttributes.EMBEDDING_MODEL_NAME}", model + if (data := getattr(response, "data", None)) and isinstance(data, Iterable): + for embedding in data: + if (index := getattr(embedding, "index", None)) is None: + continue + for key, value in _get_attributes_from_embedding(embedding): + yield f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}.{key}", value + embedding_input = request_options.get("input") + for index, text in enumerate(_get_texts(embedding_input, model)): + # input: Required[Union[str, List[str], List[int], List[List[int]]]] + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding_create_params.py#L12 # noqa: E501 + # FIXME: tokens (List[int], List[List[int]]) can't be decoded reliably because model + # names are not reliable (across OpenAI and Azure). + yield ( + ( + f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}." + f"{EmbeddingAttributes.EMBEDDING_TEXT}" + ), + text, + ) + + +def _get_attributes_from_embedding( + embedding: object, +) -> Iterator[Tuple[str, AttributeValue]]: + # openai.types.Embedding + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding.py#L11 # noqa: E501 + if not (_vector := getattr(embedding, "embedding", None)): + return + if isinstance(_vector, Sequence) and len(_vector) and isinstance(_vector[0], float): + vector = list(_vector) + yield f"{EmbeddingAttributes.EMBEDDING_VECTOR}", vector + elif isinstance(_vector, str) and _vector and _NUMPY: + try: + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/resources/embeddings.py#L100 # noqa: E501 + vector = _NUMPY.frombuffer(base64.b64decode(_vector), dtype="float32").tolist() + except Exception: + logger.exception("Failed to decode embedding") + pass + else: + yield f"{EmbeddingAttributes.EMBEDDING_VECTOR}", vector + + +def _get_attributes_from_chat_completion_message( + message: object, +) -> Iterator[Tuple[str, AttributeValue]]: + # openai.types.chat.ChatCompletionMessage + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_message.py#L25 # noqa: E501 + if role := getattr(message, "role", None): + yield MessageAttributes.MESSAGE_ROLE, role + if content := getattr(message, "content", None): + yield MessageAttributes.MESSAGE_CONTENT, content + if function_call := getattr(message, "function_call", None): + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_message.py#L12 # noqa: E501 + if name := getattr(function_call, "name", None): + yield MessageAttributes.MESSAGE_FUNCTION_CALL_NAME, name + if arguments := getattr(function_call, "arguments", None): + yield MessageAttributes.MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON, arguments + if ( + _OPENAI_VERSION >= (1, 1, 0) + and (tool_calls := getattr(message, "tool_calls", None)) + and isinstance(tool_calls, Iterable) + ): + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_message_tool_call.py#L23 # noqa: E501 + for index, tool_call in enumerate(tool_calls): + if function := getattr(tool_call, "function", None): + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_message_tool_call.py#L10 # noqa: E501 + if name := getattr(function, "name", None): + yield ( + ( + f"{MessageAttributes.MESSAGE_TOOL_CALLS}.{index}." + f"{ToolCallAttributes.TOOL_CALL_FUNCTION_NAME}" + ), + name, + ) + if arguments := getattr(function, "arguments", None): + yield ( + f"{MessageAttributes.MESSAGE_TOOL_CALLS}.{index}." + f"{ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON}", + arguments, + ) + + +def _get_attributes_from_completion_usage( + usage: object, +) -> Iterator[Tuple[str, AttributeValue]]: + # openai.types.CompletionUsage + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion_usage.py#L8 # noqa: E501 + if (total_tokens := getattr(usage, "total_tokens", None)) is not None: + yield SpanAttributes.LLM_TOKEN_COUNT_TOTAL, total_tokens + if (prompt_tokens := getattr(usage, "prompt_tokens", None)) is not None: + yield SpanAttributes.LLM_TOKEN_COUNT_PROMPT, prompt_tokens + if (completion_tokens := getattr(usage, "completion_tokens", None)) is not None: + yield SpanAttributes.LLM_TOKEN_COUNT_COMPLETION, completion_tokens + + +def _get_attributes_from_embedding_usage( + usage: object, +) -> Iterator[Tuple[str, AttributeValue]]: + # openai.types.create_embedding_response.Usage + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/create_embedding_response.py#L12 # noqa: E501 + if (total_tokens := getattr(usage, "total_tokens", None)) is not None: + yield SpanAttributes.LLM_TOKEN_COUNT_TOTAL, total_tokens + if (prompt_tokens := getattr(usage, "prompt_tokens", None)) is not None: + yield SpanAttributes.LLM_TOKEN_COUNT_PROMPT, prompt_tokens diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py new file mode 100644 index 000000000..0f1d0790c --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py @@ -0,0 +1,360 @@ +import logging +from abc import ABC +from contextlib import contextmanager +from types import MappingProxyType +from typing import ( + Any, + Awaitable, + Callable, + Dict, + Hashable, + Iterator, + Mapping, + Optional, + Tuple, +) + +from openinference.instrumentation.openai._extra_attributes_from_request import ( + _get_extra_attributes_from_request, +) +from openinference.instrumentation.openai._extra_attributes_from_response import ( + _get_extra_attributes_from_response, +) +from openinference.instrumentation.openai._response_accumulator import ( + _ChatCompletionAccumulator, + _CompletionAccumulator, +) +from openinference.instrumentation.openai._stream import ( + _ResponseAccumulator, + _Stream, +) +from openinference.instrumentation.openai._utils import ( + _as_input_attributes, + _as_output_attributes, + _finish_tracing, + _io_value_and_type, +) +from openinference.instrumentation.openai._with_span import _WithSpan +from openinference.semconv.trace import SpanAttributes +from opentelemetry import context as context_api +from opentelemetry import trace as trace_api +from opentelemetry.context import _SUPPRESS_INSTRUMENTATION_KEY +from opentelemetry.trace import INVALID_SPAN +from opentelemetry.util.types import AttributeValue + +from openai import AsyncStream, Stream +from openai.types import Completion, CreateEmbeddingResponse +from openai.types.chat import ChatCompletion + +__all__ = ( + "_Request", + "_AsyncRequest", +) + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + +_LLM_SPAN_KIND = "LLM" +_EMBEDDING_SPAN_KIND = "EMBEDDING" + + +class _WithTracer(ABC): + __slots__ = ( + "_tracer", + "_include_extra_attributes", + ) + + def __init__( + self, + tracer: trace_api.Tracer, + include_extra_attributes: bool = True, + ) -> None: + self._tracer = tracer + self._include_extra_attributes = include_extra_attributes + + @contextmanager + def _start_as_current_span( + self, + span_name: str, + cast_to: type, + request_options: Mapping[str, Any], + ) -> Iterator[_WithSpan]: + span_kind = _EMBEDDING_SPAN_KIND if cast_to is CreateEmbeddingResponse else _LLM_SPAN_KIND + attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind} + try: + input_attributes = _as_input_attributes(_io_value_and_type(request_options)) + except Exception: + logger.exception( + f"Failed to get input attributes from request options of " + f"type {type(request_options)}" + ) + else: + attributes.update(input_attributes) + # Secondary attributes should be added after input and output to ensure + # that input and output are not dropped if there are too many attributes. + try: + extra_attributes = ( + dict(_get_extra_attributes_from_request(cast_to, request_options)) + if self._include_extra_attributes + else {} + ) + except Exception: + logger.exception( + f"Failed to get extra attributes from request options of " + f"type {type(request_options)}" + ) + extra_attributes = {} + try: + span = self._tracer.start_span(span_name, attributes=attributes) + except Exception: + logger.exception("Failed to start span") + span = INVALID_SPAN + with trace_api.use_span( + span, + end_on_exit=False, + record_exception=False, + set_status_on_exception=False, + ) as span: + yield _WithSpan(span, extra_attributes) + + +class _Request(_WithTracer): + def __call__( + self, + wrapped: Callable[..., Any], + instance: Any, + args: Tuple[type, Any], + kwargs: Mapping[str, Any], + ) -> Any: + if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY): + return wrapped(*args, **kwargs) + try: + cast_to, request_options = _parse_request_args(args) + # E.g. cast_to = openai.types.chat.ChatCompletion => span_name = "ChatCompletion" + span_name: str = cast_to.__name__.split(".")[-1] + except Exception: + logger.exception("Failed to parse request args") + return wrapped(*args, **kwargs) + with self._start_as_current_span( + span_name=span_name, + cast_to=cast_to, + request_options=request_options, + ) as with_span: + try: + response = wrapped(*args, **kwargs) + except Exception as exception: + status_code = trace_api.StatusCode.ERROR + with_span.record_exception(exception) + with_span.finish_tracing(status_code=status_code) + raise + try: + response = _finalize_response( + response=response, + with_span=with_span, + cast_to=cast_to, + request_options=request_options, + include_extra_attributes=self._include_extra_attributes, + ) + except Exception: + logger.exception(f"Failed to finalize response of type {type(response)}") + with_span.finish_tracing(status_code=None) + return response + + +class _AsyncRequest(_WithTracer): + async def __call__( + self, + wrapped: Callable[..., Awaitable[Any]], + instance: Any, + args: Tuple[type, Any], + kwargs: Mapping[str, Any], + ) -> Any: + if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY): + return await wrapped(*args, **kwargs) + try: + cast_to, request_options = _parse_request_args(args) + # E.g. cast_to = openai.types.chat.ChatCompletion => span_name = "ChatCompletion" + span_name: str = cast_to.__name__.split(".")[-1] + except Exception: + logger.exception("Failed to parse request args") + return await wrapped(*args, **kwargs) + with self._start_as_current_span( + span_name=span_name, + cast_to=cast_to, + request_options=request_options, + ) as with_span: + try: + response = await wrapped(*args, **kwargs) + except Exception as exception: + status_code = trace_api.StatusCode.ERROR + with_span.record_exception(exception) + with_span.finish_tracing(status_code=status_code) + raise + try: + response = _finalize_response( + response=response, + with_span=with_span, + cast_to=cast_to, + request_options=request_options, + include_extra_attributes=self._include_extra_attributes, + ) + except Exception: + logger.exception(f"Failed to finalize response of type {type(response)}") + with_span.finish_tracing(status_code=None) + return response + + +def _parse_request_args(args: Tuple[type, Any]) -> Tuple[type, Mapping[str, Any]]: + # We don't use `signature(request).bind()` because `request` could have been monkey-patched + # (incorrectly) by others and the signature at runtime may not match the original. + # The targeted signature of `request` is here: + # https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_base_client.py#L846-L847 # noqa: E501 + cast_to: type = args[0] + options: Mapping[str, Any] = ( + json_data + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_models.py#L427 # noqa: E501 + if hasattr(args[1], "json_data") and isinstance(json_data := args[1].json_data, Mapping) + else {} + ) + # FIXME: Because request options is just a Mapping, it can contain any value as long as it + # serializes correctly in an HTTP request body. For example, Enum values may be present if a + # third-party library puts them there. Enums can turn into their intended string values via + # `json.dumps` when the final HTTP request body is serialized, but can pose problems when we + # try to extract attributes. However, this round-trip seems expensive, so wa opted to treat + # only the Enums that we know about: e.g. message role sometimes can be an Enum, so we will + # convert it only when it's encountered. + # try: + # options = json.loads(json.dumps(options)) + # except Exception: + # pass + return cast_to, options + + +def _finalize_response( + response: Any, + with_span: _WithSpan, + cast_to: type, + request_options: Mapping[str, Any], + include_extra_attributes: bool = True, +) -> Any: + """Monkey-patch the response object to trace the stream, or finish tracing if the response is + not a stream. + """ + if hasattr(response, "parse") and callable(response.parse): + # `.request()` may be called under `.with_raw_response` and it's necessary to call + # `.parse()` to get back the usual response types. + # E.g. see https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_base_client.py#L518 # noqa: E501 + try: + response.parse() + except Exception: + logger.exception(f"Failed to parse response of type {type(response)}") + if ( + isinstance(response, (Stream, AsyncStream)) + or hasattr( + # FIXME: Ideally we should not rely on a private attribute (but it may be impossible). + # The assumption here is that calling `.parse()` stores the stream object in `._parsed` + # and calling `.parse()` again will not overwrite the monkey-patched version. + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_response.py#L65 # noqa: E501 + response, + "_parsed", + ) + # Note that we must have called `.parse()` beforehand, otherwise `._parsed` is None. + and isinstance(response._parsed, (Stream, AsyncStream)) + ): + # For streaming, we need (optional) accumulators to process each chunk iteration. + response_accumulator = _ResponseAccumulators.find(cast_to) + if hasattr(response, "_parsed") and isinstance( + parsed := response._parsed, (Stream, AsyncStream) + ): + # Monkey-patch a private attribute assumed to be caching the output of `.parse()`. + response._parsed = _Stream( + stream=parsed, + with_span=with_span, + response_accumulator=response_accumulator, + include_extra_attributes=include_extra_attributes, + ) + return response + return _Stream( + stream=response, + with_span=with_span, + response_accumulator=response_accumulator, + include_extra_attributes=include_extra_attributes, + ) + _finish_tracing( + status_code=trace_api.StatusCode.OK, + with_span=with_span, + has_attributes=_ResponseAttributes( + response=response, + request_options=request_options, + include_extra_attributes=include_extra_attributes, + ), + ) + return response + + +class _ResponseAttributes: + __slots__ = ( + "_request_options", + "_response", + "_include_extra_attributes", + ) + + def __init__( + self, + response: Any, + request_options: Mapping[str, Any], + include_extra_attributes: bool = True, + ) -> None: + if hasattr(response, "parse") and callable(response.parse): + # E.g. see https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_base_client.py#L518 # noqa: E501 + try: + response = response.parse() + except Exception: + logger.exception(f"Failed to parse response of type {type(response)}") + self._request_options = request_options + self._response = response + self._include_extra_attributes = include_extra_attributes + + def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: + yield from _as_output_attributes(_io_value_and_type(self._response)) + + def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: + if self._include_extra_attributes: + yield from _get_extra_attributes_from_response( + self._response, + self._request_options, + ) + + +class _Accumulators(ABC): + _mapping: Mapping[type, type] + + def __init_subclass__(cls, mapping: Mapping[type, type], **kwargs: Any) -> None: + super().__init_subclass__(**kwargs) + cls._mapping = mapping + + @classmethod + def find(cls, cast_to: type) -> Optional[_ResponseAccumulator]: + if not isinstance(cast_to, Hashable): + # `cast_to` may not be hashable + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_response.py#L172 # noqa: E501 + return None + try: + factory = cls._mapping.get(cast_to) + except Exception: + logger.exception(f"Failed to get factory for {cast_to}") + return None + return factory() if factory else None + + +class _ResponseAccumulators( + _Accumulators, + ABC, + mapping=MappingProxyType( + { + ChatCompletion: _ChatCompletionAccumulator, + Completion: _CompletionAccumulator, + } + ), +): + ... diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py new file mode 100644 index 000000000..5ae1340b8 --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py @@ -0,0 +1,248 @@ +import warnings +from collections import defaultdict +from copy import deepcopy +from types import MappingProxyType +from typing import ( + Any, + Callable, + DefaultDict, + Dict, + Iterator, + List, + Mapping, + Optional, + Tuple, +) + +from openinference.instrumentation.openai._extra_attributes_from_response import ( + _get_extra_attributes_from_response, +) +from openinference.instrumentation.openai._utils import ( + _as_output_attributes, + _MimeType, + _ValueAndType, +) +from opentelemetry.util.types import AttributeValue +from typing_extensions import TypeAlias + +from openai.types import Completion +from openai.types.chat import ( + ChatCompletion, + ChatCompletionChunk, +) + +__all__ = ( + "_CompletionAccumulator", + "_ChatCompletionAccumulator", +) + +_ChoiceIndex: TypeAlias = int + + +class _ChatCompletionAccumulator: + __slots__ = ( + "_is_null", + "_values", + "_cached", + ) + + def __init__(self) -> None: + self._is_null = True + self._cached: Optional[ChatCompletion] = None + self._values = _ValuesAccumulator( + choices=_IndexedAccumulator( + lambda: _ValuesAccumulator( + message=_ValuesAccumulator( + content=_StringAccumulator(), + function_call=_ValuesAccumulator(arguments=_StringAccumulator()), + tool_calls=_IndexedAccumulator( + lambda: _ValuesAccumulator( + function=_ValuesAccumulator(arguments=_StringAccumulator()), + ) + ), + ), + ), + ), + ) + + def process_chunk(self, chunk: ChatCompletionChunk) -> None: + if not isinstance(chunk, ChatCompletionChunk): + return + self._is_null = False + self._cached = None + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # `warnings=False` in `model_dump()` is only supported in Pydantic v2 + values = chunk.model_dump(exclude_unset=True) + for choice in values.get("choices", ()): + if delta := choice.pop("delta", None): + choice["message"] = delta + self._values += values + + def _construct(self) -> Optional[ChatCompletion]: + if self._is_null: + return None + if not self._cached: + self._cached = ChatCompletion.construct(**dict(self._values)) + return self._cached + + def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: + if not (chat_completion := self._construct()): + return + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # `warnings=False` in `model_dump_json()` is only supported in Pydantic v2 + json_string = chat_completion.model_dump_json(exclude_unset=True) + yield from _as_output_attributes(_ValueAndType(json_string, _MimeType.application_json)) + + def get_extra_attributes( + self, + request_options: Mapping[str, Any] = MappingProxyType({}), + ) -> Iterator[Tuple[str, AttributeValue]]: + if not (chat_completion := self._construct()): + return + yield from _get_extra_attributes_from_response( + chat_completion.model_copy(), + request_options, + ) + + +class _CompletionAccumulator: + __slots__ = ( + "_is_null", + "_values", + "_cached", + ) + + def __init__(self) -> None: + self._is_null = True + self._cached: Optional[Completion] = None + self._values = _ValuesAccumulator( + choices=_IndexedAccumulator(lambda: _ValuesAccumulator(text=_StringAccumulator())), + ) + + def process_chunk(self, chunk: Completion) -> None: + if not isinstance(chunk, Completion): + return + self._is_null = False + self._cached = None + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # `warnings=False` in `model_dump()` is only supported in Pydantic v2 + values = chunk.model_dump(exclude_unset=True) + self._values += values + + def _construct(self) -> Optional[Completion]: + if self._is_null: + return None + if not self._cached: + self._cached = Completion.construct(**dict(self._values)) + return self._cached + + def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: + if not (completion := self._construct()): + return + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # `warnings=False` in `model_dump_json()` is only supported in Pydantic v2 + json_string = completion.model_dump_json(exclude_unset=True) + yield from _as_output_attributes(_ValueAndType(json_string, _MimeType.application_json)) + + def get_extra_attributes( + self, + request_options: Mapping[str, Any] = MappingProxyType({}), + ) -> Iterator[Tuple[str, AttributeValue]]: + if not (completion := self._construct()): + return + yield from _get_extra_attributes_from_response( + completion.model_copy(), + request_options, + ) + + +class _ValuesAccumulator: + __slots__ = ("_values",) + + def __init__(self, **values: Any) -> None: + self._values: Dict[str, Any] = values + + def __iter__(self) -> Iterator[Tuple[str, Any]]: + for key, value in self._values.items(): + if value is None: + continue + if isinstance(value, _ValuesAccumulator): + if dict_value := dict(value): + yield key, dict_value + elif isinstance(value, _IndexedAccumulator): + if list_value := list(value): + yield key, list_value + elif isinstance(value, _StringAccumulator): + if str_value := str(value): + yield key, str_value + else: + yield key, value + + def __iadd__(self, values: Optional[Mapping[str, Any]]) -> "_ValuesAccumulator": + if not values: + return self + for key in self._values.keys(): + if (value := values.get(key)) is None: + continue + self_value = self._values[key] + if isinstance(self_value, _ValuesAccumulator): + if isinstance(value, Mapping): + self_value += value + elif isinstance(self_value, _StringAccumulator): + if isinstance(value, str): + self_value += value + elif isinstance(self_value, _IndexedAccumulator): + if isinstance(value, List): + for v in value: + self_value += v + else: + self_value += value + elif isinstance(self_value, List) and isinstance(value, List): + self_value.extend(value) + else: + self._values[key] = value # replacement + for key in values.keys(): + if key in self._values or (value := values[key]) is None: + continue + value = deepcopy(value) + if isinstance(value, Mapping): + value = _ValuesAccumulator(**value) + self._values[key] = value # new entry + return self + + +class _StringAccumulator: + __slots__ = ("_fragments",) + + def __init__(self) -> None: + self._fragments: List[str] = [] + + def __str__(self) -> str: + return "".join(self._fragments) + + def __iadd__(self, value: Optional[str]) -> "_StringAccumulator": + if not value: + return self + self._fragments.append(value) + return self + + +class _IndexedAccumulator: + __slots__ = ("_indexed",) + + def __init__(self, factory: Callable[[], _ValuesAccumulator]) -> None: + self._indexed: DefaultDict[int, _ValuesAccumulator] = defaultdict(factory) + + def __iter__(self) -> Iterator[Dict[str, Any]]: + for _, values in sorted(self._indexed.items()): + yield dict(values) + + def __iadd__(self, values: Optional[Mapping[str, Any]]) -> "_IndexedAccumulator": + if not values or not hasattr(values, "get") or (index := values.get("index")) is None: + return self + self._indexed[index] += values + return self diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py new file mode 100644 index 000000000..3bba04c83 --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py @@ -0,0 +1,149 @@ +import logging +from typing import ( + Any, + AsyncIterator, + Iterator, + Optional, + Protocol, + Tuple, + Union, +) + +from openinference.instrumentation.openai._utils import ( + _finish_tracing, +) +from openinference.instrumentation.openai._with_span import _WithSpan +from opentelemetry import trace as trace_api +from opentelemetry.util.types import AttributeValue +from wrapt import ObjectProxy + +from openai import AsyncStream, Stream + +__all__ = ( + "_Stream", + "_ResponseAccumulator", +) + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + + +class _ResponseAccumulator(Protocol): + def process_chunk(self, chunk: Any) -> None: + ... + + def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: + ... + + def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: + ... + + +class _Stream(ObjectProxy): # type: ignore + __slots__ = ( + "_self_with_span", + "_self_iteration_count", + "_self_is_finished", + "_self_include_extra_attributes", + "_self_response_accumulator", + ) + + def __init__( + self, + stream: Union[Stream[Any], AsyncStream[Any]], + with_span: _WithSpan, + response_accumulator: Optional[_ResponseAccumulator] = None, + include_extra_attributes: bool = True, + ) -> None: + super().__init__(stream) + self._self_with_span = with_span + self._self_iteration_count = 0 + self._self_is_finished = with_span.is_finished + self._self_include_extra_attributes = include_extra_attributes + self._self_response_accumulator = response_accumulator + + def __iter__(self) -> Iterator[Any]: + return self + + def __next__(self) -> Any: + # pass through mistaken calls + if not hasattr(self.__wrapped__, "__next__"): + self.__wrapped__.__next__() + iteration_is_finished = False + status_code: Optional[trace_api.StatusCode] = None + try: + chunk: Any = self.__wrapped__.__next__() + except Exception as exception: + iteration_is_finished = True + if isinstance(exception, StopIteration): + status_code = trace_api.StatusCode.OK + else: + status_code = trace_api.StatusCode.ERROR + self._self_with_span.record_exception(exception) + raise + else: + self._process_chunk(chunk) + status_code = trace_api.StatusCode.OK + return chunk + finally: + if iteration_is_finished and not self._self_is_finished: + self._finish_tracing(status_code=status_code) + + def __aiter__(self) -> AsyncIterator[Any]: + return self + + async def __anext__(self) -> Any: + # pass through mistaken calls + if not hasattr(self.__wrapped__, "__anext__"): + self.__wrapped__.__anext__() + iteration_is_finished = False + status_code: Optional[trace_api.StatusCode] = None + try: + chunk: Any = await self.__wrapped__.__anext__() + except Exception as exception: + iteration_is_finished = True + if isinstance(exception, StopAsyncIteration): + status_code = trace_api.StatusCode.OK + else: + status_code = trace_api.StatusCode.ERROR + self._self_with_span.record_exception(exception) + raise + else: + self._process_chunk(chunk) + status_code = trace_api.StatusCode.OK + return chunk + finally: + if iteration_is_finished and not self._self_is_finished: + self._finish_tracing(status_code=status_code) + + def _process_chunk(self, chunk: Any) -> None: + if not self._self_iteration_count: + try: + self._self_with_span.add_event("First Token Stream Event") + except Exception: + logger.exception("Failed to add event to span") + self._self_iteration_count += 1 + if self._self_response_accumulator is not None: + try: + self._self_response_accumulator.process_chunk(chunk) + except Exception: + logger.exception("Failed to accumulate response") + + def _finish_tracing( + self, + status_code: Optional[trace_api.StatusCode] = None, + ) -> None: + _finish_tracing( + status_code=status_code, + with_span=self._self_with_span, + has_attributes=self, + ) + self._self_is_finished = True + + def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: + if self._self_response_accumulator is not None: + yield from self._self_response_accumulator.get_attributes() + + def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: + if self._self_include_extra_attributes and self._self_response_accumulator is not None: + yield from self._self_response_accumulator.get_extra_attributes() diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py new file mode 100644 index 000000000..b277236d0 --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py @@ -0,0 +1,134 @@ +import json +import logging +import warnings +from enum import Enum +from importlib.metadata import version +from typing import ( + Any, + Iterator, + List, + Mapping, + NamedTuple, + Optional, + Protocol, + Sequence, + Tuple, + Union, + cast, +) + +from openinference.instrumentation.openai._with_span import _WithSpan +from openinference.semconv.trace import SpanAttributes +from opentelemetry import trace as trace_api +from opentelemetry.util.types import Attributes, AttributeValue + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + +_OPENAI_VERSION = tuple(map(int, version("openai").split(".")[:3])) + + +class _MimeType(Enum): + text_plain = "text/plain" + application_json = "application/json" + + +class _ValueAndType(NamedTuple): + value: str + type: _MimeType + + +def _io_value_and_type(obj: Any) -> _ValueAndType: + if hasattr(obj, "model_dump_json") and callable(obj.model_dump_json): + try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # `warnings=False` in `model_dump_json()` is only supported in Pydantic v2 + value = obj.model_dump_json(exclude_unset=True) + assert isinstance(value, str) + except Exception: + logger.exception("Failed to get model dump json") + else: + return _ValueAndType(value, _MimeType.application_json) + if not isinstance(obj, str) and isinstance(obj, (Sequence, Mapping)): + try: + value = json.dumps(obj) + except Exception: + logger.exception("Failed to dump json") + else: + return _ValueAndType(value, _MimeType.application_json) + return _ValueAndType(str(obj), _MimeType.text_plain) + + +def _as_input_attributes( + value_and_type: Optional[_ValueAndType], +) -> Iterator[Tuple[str, AttributeValue]]: + if not value_and_type: + return + yield SpanAttributes.INPUT_VALUE, value_and_type.value + yield SpanAttributes.INPUT_MIME_TYPE, value_and_type.type.value + + +def _as_output_attributes( + value_and_type: Optional[_ValueAndType], +) -> Iterator[Tuple[str, AttributeValue]]: + if not value_and_type: + return + yield SpanAttributes.OUTPUT_VALUE, value_and_type.value + yield SpanAttributes.OUTPUT_MIME_TYPE, value_and_type.type.value + + +class _HasAttributes(Protocol): + def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: + ... + + def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: + ... + + +def _finish_tracing( + with_span: _WithSpan, + has_attributes: _HasAttributes, + status_code: Optional[trace_api.StatusCode] = None, +) -> None: + try: + attributes: Attributes = dict(has_attributes.get_attributes()) + except Exception: + logger.exception("Failed to get output value") + attributes = None + try: + extra_attributes: Attributes = dict(has_attributes.get_extra_attributes()) + except Exception: + logger.exception("Failed to get extra attributes") + extra_attributes = None + try: + with_span.finish_tracing( + status_code=status_code, + attributes=attributes, + extra_attributes=extra_attributes, + ) + except Exception: + logger.exception("Failed to finish tracing") + + +def _get_texts( + model_input: Optional[Union[str, List[str], List[int], List[List[int]]]], + model: Optional[str], +) -> Iterator[str]: + if not model_input: + return + if isinstance(model_input, str): + text = model_input + yield text + return + if not isinstance(model_input, Sequence): + return + if any(not isinstance(item, str) for item in model_input): + # FIXME: We can't decode tokens (List[int]) reliably because the model name is not reliable, + # e.g. for text-embedding-ada-002 (cl100k_base), OpenAI returns "text-embedding-ada-002-v2", + # and Azure returns "ada", which refers to a different model (r50k_base). We could use the + # request model name instead, but that doesn't work for Azure because Azure uses the + # deployment name (which differs from the model name). + return + for text in cast(List[str], model_input): + yield text diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_with_span.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_with_span.py new file mode 100644 index 000000000..e09e18830 --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_with_span.py @@ -0,0 +1,82 @@ +import logging +from typing import Optional + +from opentelemetry import trace as trace_api +from opentelemetry.util.types import Attributes + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + + +class _WithSpan: + __slots__ = ( + "_span", + "_extra_attributes", + "_is_finished", + ) + + def __init__( + self, + span: trace_api.Span, + extra_attributes: Attributes = None, + ) -> None: + self._span = span + self._extra_attributes = extra_attributes + try: + self._is_finished = not self._span.is_recording() + except Exception: + logger.exception("Failed to check if span is recording") + self._is_finished = True + + @property + def is_finished(self) -> bool: + return self._is_finished + + def record_exception(self, exception: Exception) -> None: + if self._is_finished: + return + try: + self._span.record_exception(exception) + except Exception: + logger.exception("Failed to record exception on span") + + def add_event(self, name: str) -> None: + if self._is_finished: + return + try: + self._span.add_event(name) + except Exception: + logger.exception("Failed to add event to span") + + def finish_tracing( + self, + status_code: Optional[trace_api.StatusCode] = None, + attributes: Attributes = None, + extra_attributes: Attributes = None, + ) -> None: + if self._is_finished: + return + for mapping in ( + attributes, + self._extra_attributes, + extra_attributes, + ): + if not mapping: + continue + for key, value in mapping.items(): + if value is None: + continue + try: + self._span.set_attribute(key, value) + except Exception: + logger.exception("Failed to set attribute on span") + if status_code is not None: + try: + self._span.set_status(status_code) + except Exception: + logger.exception("Failed to set status code on span") + try: + self._span.end() + except Exception: + logger.exception("Failed to end span") + self._is_finished = True diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/package.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/package.py new file mode 100644 index 000000000..1ac5bcfee --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/package.py @@ -0,0 +1,2 @@ +_instruments = ("openai >= 1.0.0",) +_supports_metrics = False diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/py.typed b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/version.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/version.py new file mode 100644 index 000000000..f102a9cad --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/version.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py new file mode 100644 index 000000000..0d7875094 --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py @@ -0,0 +1,154 @@ +import pytest +from openai.types.chat import ChatCompletion, ChatCompletionChunk +from openinference.instrumentation.openai._response_accumulator import ( + _ChatCompletionAccumulator, +) + + +def test_chat_completion_accumulator(chat_completion_chunks, desired_chat_completion): + accumulator = _ChatCompletionAccumulator() + for chunk in chat_completion_chunks: + accumulator.process_chunk(chunk) + assert accumulator._construct().model_dump( + exclude_unset=True + ) == desired_chat_completion.model_dump(exclude_unset=True) + + +@pytest.fixture +def desired_chat_completion(): + return ChatCompletion.construct( + **{ + "id": "xyz", + "choices": [ + { + "index": 0, + "finish_reason": "length", + "message": {"content": "A1", "role": "assistant"}, + }, + { + "index": 1, + "finish_reason": "stop", + "message": {"content": "B2", "role": "assistant"}, + }, + { + "index": 2, + }, + { + "index": 3, + "message": { + "tool_calls": [ + {"index": 0, "function": {"arguments": "C3"}}, + {"index": 1, "function": {"arguments": "D4"}}, + ] + }, + }, + ], + "created": 123, + "model": "ultra-turbo", + } + ) + + +@pytest.fixture +def chat_completion_chunks(): + chunks = [ + { + "id": "xyz", + "choices": [{"index": 0, "delta": {"content": "A"}}], + "created": 123, + "model": "ultra-turbo", + }, + { + "id": "xyz", + "choices": [ + { + "index": 3, + "delta": {"tool_calls": [{"index": 1, "function": {"arguments": "D"}}]}, + }, + ], + "created": 123, + "model": "ultra-turbo", + }, + { + "id": "xyz", + "choices": [{"index": 1, "delta": {"content": "B"}, "finish_reason": None}], + "created": 123, + "model": "ultra-turbo", + }, + { + "id": "xyz", + "choices": [ + { + "index": 3, + "delta": {"tool_calls": [{"index": 0, "function": {"arguments": "C"}}]}, + }, + ], + "created": 123, + "model": "ultra-turbo", + }, + { + "id": "xyz", + "choices": [{"index": 0, "delta": {"role": "assistant"}}], + "created": 123, + "model": "ultra-turbo", + }, + { + "id": "xyz", + "choices": [{"index": 1, "delta": {"content": "2"}}], + "created": 123, + "model": "ultra-turbo", + }, + { + "id": "xyz", + "choices": [{"index": 0, "delta": {"content": "1"}, "finish_reason": None}], + "created": 123, + "model": "ultra-turbo", + }, + { + "id": "xyz", + "choices": [{"index": 1, "delta": {"role": "assistant"}}], + "created": 123, + "model": "ultra-turbo", + }, + { + "id": "xyz", + "choices": [{"finish_reason": "length", "index": 0}], + "created": 123, + "model": "ultra-turbo", + }, + { + "id": "xyz", + "choices": [ + { + "index": 3, + "delta": {"tool_calls": [{"index": 1, "function": {"arguments": "4"}}]}, + }, + ], + "created": 123, + "model": "ultra-turbo", + }, + { + "id": "xyz", + "choices": [{"finish_reason": "stop", "index": 1}], + "created": 123, + "model": "ultra-turbo", + }, + { + "id": "xyz", + "choices": [ + { + "index": 3, + "delta": {"tool_calls": [{"index": 0, "function": {"arguments": "3"}}]}, + }, + ], + "created": 123, + "model": "ultra-turbo", + }, + { + "id": "xyz", + "choices": [{"index": 2, "delta": {"content": ""}}], + "created": 123, + "model": "ultra-turbo", + }, + ] + return [ChatCompletionChunk.construct(**chunk) for chunk in chunks] diff --git a/python/mypy.ini b/python/mypy.ini new file mode 100644 index 000000000..533084ef6 --- /dev/null +++ b/python/mypy.ini @@ -0,0 +1,5 @@ +[mypy] +strict = true + +[mypy-wrapt] +ignore_missing_imports = True diff --git a/python/openinference-semantic-conventions/pyproject.toml b/python/openinference-semantic-conventions/pyproject.toml index 3581a2dc2..0128aec0f 100644 --- a/python/openinference-semantic-conventions/pyproject.toml +++ b/python/openinference-semantic-conventions/pyproject.toml @@ -41,4 +41,4 @@ include = [ ] [tool.hatch.build.targets.wheel] -packages = ["src/openinference"] \ No newline at end of file +packages = ["src/openinference"] diff --git a/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py b/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py index 391e36c82..196b4674d 100644 --- a/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py +++ b/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py @@ -1,4 +1,3 @@ - class SpanAttributes: OUTPUT_VALUE = "output.value" OUTPUT_MIME_TYPE = "output.mime_type" @@ -17,47 +16,11 @@ class SpanAttributes: """ A list of objects containing embedding data, including the vector and represented piece of text. """ - EMBEDDING_MODEL_NAME = "embedding.model_name" - """ - The name of the embedding model. - """ - EMBEDDING_TEXT = "embedding.text" - """ - The text represented by the embedding. - """ - EMBEDDING_VECTOR = "embedding.vector" - """ - The embedding vector. - """ - TOOL_CALL_FUNCTION_NAME = "tool_call.function.name" - """ - The name of function that is being called during a tool call. - """ - TOOL_CALL_FUNCTION_ARGUMENTS_JSON = "tool_call.function.arguments" - """ - The JSON string representing the arguments passed to the function - during a tool call. - """ - MESSAGE_FUNCTION_CALL_NAME = "message.function_call_name" - """ - The function name that is a part of the message list. - This is populated for role 'function' or 'agent' as a mechanism to identify - the function that was called during the execution of a tool - """ - MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON = "message.function_call_arguments_json" - """ - The JSON string representing the arguments passed to the function - during a function call - """ - MESSAGE_CONTENT = "message.content" - """ - The content of the message to the llm - """ LLM_FUNCTION_CALL = "llm.function_call" """ - For models and APIs that support function calling. Records attributes such as the function name and - arguments to the called function. + For models and APIs that support function calling. Records attributes such as the function + name and arguments to the called function. """ LLM_INVOCATION_PARAMETERS = "llm.invocation_parameters" """ @@ -118,12 +81,71 @@ class SpanAttributes: """ RETRIEVAL_DOCUMENTS = "retrieval.documents" + + OPENINFERENCE_SPAN_KIND = "openinference.span.kind" + + +class MessageAttributes: + """ + Attributes for a message generated by a LLM + """ + + MESSAGE_ROLE = "message.role" + """ + The role of the message, such as "user", "agent", "function". + """ + MESSAGE_CONTENT = "message.content" + """ + The content of the message to the llm + """ + MESSAGE_NAME = "message.name" + """ + The name of the message, often used to identify the function + that was used to generate the message. + """ + MESSAGE_TOOL_CALLS = "message.tool_calls" + """ + The tool calls generated by the model, such as function calls. + """ + MESSAGE_FUNCTION_CALL_NAME = "message.function_call_name" + """ + The function name that is a part of the message list. + This is populated for role 'function' or 'agent' as a mechanism to identify + the function that was called during the execution of a tool + """ + MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON = "message.function_call_arguments_json" + """ + The JSON string representing the arguments passed to the function + during a function call + """ + + +class DocumentAttributes: + """ + Attributes for a document + """ + DOCUMENT_ID = "document.id" + """ + The id of the document + """ DOCUMENT_SCORE = "document.score" + """ + The score of the document + """ DOCUMENT_CONTENT = "document.content" + """ + The content of the document + """ DOCUMENT_METADATA = "document.metadata" """ - Document metadata as a string representing a JSON object + The metadata of the document + """ + + +class RerankerAttributes: + """ + Attributes for a reranker """ RERANKER_INPUT_DOCUMENTS = "reranker.input_documents" @@ -147,23 +169,41 @@ class SpanAttributes: Top K parameter of the reranker """ - OPENINFERENCE_SPAN_KIND = "openinference.span.kind" -class MessageAttributes: +class EmbeddingAttributes: """ - Attributes for a message generated by a LLM + Attributes for an embedding """ - - MESSAGE_ROLE = "message.role" + + EMBEDDING_EMBEDDINGS = "embedding.embeddings" """ - The role of the message, such as "user", "agent", "function". + A list of objects containing embedding data, including the vector and represented piece of text. """ - MESSAGE_NAME = "message.name" + EMBEDDING_MODEL_NAME = "embedding.model_name" """ - The name of the message, often used to identify the function - that was used to generate the message. + The name of the embedding model. """ - MESSAGE_TOOL_CALLS = "message.tool_calls" + EMBEDDING_TEXT = "embedding.text" + """ + The text represented by the embedding. + """ + EMBEDDING_VECTOR = "embedding.vector" + """ + The embedding vector. + """ + + +class ToolCallAttributes: + """ + Attributes for a tool call + """ + + TOOL_CALL_FUNCTION_NAME = "tool_call.function.name" + """ + The name of function that is being called during a tool call. + """ + TOOL_CALL_FUNCTION_ARGUMENTS_JSON = "tool_call.function.arguments" + """ + The JSON string representing the arguments passed to the function + during a tool call. """ - The tool calls generated by the model, such as function calls. - """ \ No newline at end of file diff --git a/python/openinference-semantic-conventions/src/openinference/semconv/version.py b/python/openinference-semantic-conventions/src/openinference/semconv/version.py index b3c06d488..f102a9cad 100644 --- a/python/openinference-semantic-conventions/src/openinference/semconv/version.py +++ b/python/openinference-semantic-conventions/src/openinference/semconv/version.py @@ -1 +1 @@ -__version__ = "0.0.1" \ No newline at end of file +__version__ = "0.0.1" diff --git a/python/ruff.toml b/python/ruff.toml new file mode 100644 index 000000000..afeb5281c --- /dev/null +++ b/python/ruff.toml @@ -0,0 +1,12 @@ +line-length = 100 +exclude = [ + ".git", + ".tox", + "dist", +] +ignore-init-module-imports = true +select = ["E", "F", "W", "I"] +target-version = "py38" + +[lint.isort] +force-single-line = false diff --git a/python/tox.ini b/python/tox.ini new file mode 100644 index 000000000..4819f6f3e --- /dev/null +++ b/python/tox.ini @@ -0,0 +1,29 @@ +[tox] +isolated_build = True +skipsdist = True +skip_missing_interpreters = True +envlist = + ruff + mypy-openai + py3{8,11}-test-openai + + +[testenv] +package = wheel +wheel_build_env = .pkg +deps = + test: pytest == 7.4.4 + ruff: ruff == 0.1.11 + mypy: mypy == 1.8.0 +changedir = + openai: instrumentation/openinference-instrumentation-openai/ +commands_pre = + py3{8,11}: python -m pip install -U pip setuptools wheel +; FIXME: installation below for semantic conventions can be removed once the package is stable and published + openai: pip install {toxinidir}/openinference-semantic-conventions + openai: pip install {toxinidir}/instrumentation/openinference-instrumentation-openai[test] +commands = + test: pytest {posargs} + ruff: ruff format . + ruff: ruff --fix . + mypy: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini From dc39934308f0a078b9e4aac2c8876eff7d6f70d4 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Thu, 4 Jan 2024 10:28:34 -0800 Subject: [PATCH 02/44] ci default working directory --- .github/workflows/python-CI.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml index 935f74c45..9de6cc0bb 100644 --- a/.github/workflows/python-CI.yaml +++ b/.github/workflows/python-CI.yaml @@ -13,6 +13,10 @@ concurrency: group: test-python-${{ github.head_ref }} cancel-in-progress: true +defaults: + run: + working-directory: ./python + jobs: ci: name: CI Python From 0ffecc79664328ef361e92980267b4a6e0c18e3f Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Thu, 4 Jan 2024 11:22:23 -0800 Subject: [PATCH 03/44] clean up --- .github/workflows/python-CI.yaml | 4 +--- .../integration_tests/completions.py | 5 ++--- .../integration_tests/completions_async.py | 6 ++---- .../integration_tests/embeddings.py | 2 -- .../integration_tests/functions_and_tool_calls.py | 2 -- 5 files changed, 5 insertions(+), 14 deletions(-) diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml index 9de6cc0bb..973b50550 100644 --- a/.github/workflows/python-CI.yaml +++ b/.github/workflows/python-CI.yaml @@ -16,7 +16,7 @@ concurrency: defaults: run: working-directory: ./python - + jobs: ci: name: CI Python @@ -40,5 +40,3 @@ jobs: - name: Run tests run: | tox run -e test-openai - - diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py index 8830ff8b3..efc031125 100644 --- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py +++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py @@ -53,8 +53,8 @@ def default_tracer_provider() -> trace_sdk.TracerProvider: N = 3 # iteration i = 0 results in intentional BadRequestError HAIKU = "Write a haiku." HAIKU_TOKENS = [8144, 264, 6520, 39342, 13] -RESUME = "Write a resume." -RESUME_TOKENS = [8144, 264, 6520, 39342, 13] +RESUME = "Write a résumé." +RESUME_TOKENS = [8144, 264, 9517, 1264, 978, 13] CHAT_KWARGS = { "model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": HAIKU}], @@ -206,4 +206,3 @@ def tasks(n, task, **kwargs): mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌" print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};") assert _EXPECTED_SPAN_COUNT == actual - sleep(1) # (if applicable) let the old exporter finish sending traces diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py index 6263956bf..2fddf54bd 100644 --- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py +++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py @@ -7,7 +7,6 @@ from contextlib import suppress from importlib.metadata import version from itertools import chain -from time import sleep from opentelemetry import trace as trace_api from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter @@ -52,8 +51,8 @@ def default_tracer_provider() -> trace_sdk.TracerProvider: N = 3 # iteration i = 0 results in intentional BadRequestError HAIKU = "Write a haiku." HAIKU_TOKENS = [8144, 264, 6520, 39342, 13] -RESUME = "Write a resume." -RESUME_TOKENS = [8144, 264, 6520, 39342, 13] +RESUME = "Write a résumé." +RESUME_TOKENS = [8144, 264, 9517, 1264, 978, 13] CHAT_KWARGS = { "model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": HAIKU}], @@ -204,4 +203,3 @@ async def main(*tasks): mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌" print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};") assert _EXPECTED_SPAN_COUNT == actual - sleep(1) # (if applicable) let the old exporter finish sending traces diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py index ee91e2898..2b0a40ef9 100644 --- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py +++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py @@ -6,7 +6,6 @@ import logging from contextlib import suppress from itertools import chain -from time import sleep from opentelemetry import trace as trace_api from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter @@ -139,4 +138,3 @@ async def main(*tasks): mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌" print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};") assert _EXPECTED_SPAN_COUNT == actual - sleep(1) # (if applicable) let the old exporter finish sending traces diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py index a281938e5..5166a6da3 100644 --- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py +++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py @@ -9,7 +9,6 @@ from contextlib import suppress from importlib.metadata import version from itertools import chain -from time import sleep from opentelemetry import trace as trace_api from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter @@ -192,4 +191,3 @@ async def main(*tasks): mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌" print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};") assert _EXPECTED_SPAN_COUNT == actual - sleep(1) # (if applicable) let the old exporter finish sending traces From b9e06548c52a1475f7e1bce8d4460ea5a27a8f47 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Thu, 4 Jan 2024 11:49:01 -0800 Subject: [PATCH 04/44] clean up --- .../instrumentation/openai/__init__.py | 9 +-- .../openai/_response_accumulator.py | 56 +++++++-------- .../instrumentation/openai/_stream.py | 4 +- .../openai/test_response_accumulator.py | 68 +++++++++---------- 4 files changed, 61 insertions(+), 76 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py index 3217d6457..29547e0ef 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py @@ -18,14 +18,9 @@ class OpenAIInstrumentor(BaseInstrumentor): # type: ignore - """An instrumentor for openai.OpenAI.request and - openai.AsyncOpenAI.request """ - - __slots__ = ( - "_original_request", - "_original_async_request", - ) + An instrumentor for openai.OpenAI.request and openai.AsyncOpenAI.request + """ def instrumentation_dependencies(self) -> Collection[str]: return _instruments diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py index 5ae1340b8..01c3ecb5b 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py @@ -1,3 +1,4 @@ +import json import warnings from collections import defaultdict from copy import deepcopy @@ -7,6 +8,7 @@ Callable, DefaultDict, Dict, + Iterable, Iterator, List, Mapping, @@ -43,12 +45,12 @@ class _ChatCompletionAccumulator: __slots__ = ( "_is_null", "_values", - "_cached", + "_cached_result", ) def __init__(self) -> None: self._is_null = True - self._cached: Optional[ChatCompletion] = None + self._cached_result: Optional[Mapping[str, Any]] = None self._values = _ValuesAccumulator( choices=_IndexedAccumulator( lambda: _ValuesAccumulator( @@ -69,7 +71,7 @@ def process_chunk(self, chunk: ChatCompletionChunk) -> None: if not isinstance(chunk, ChatCompletionChunk): return self._is_null = False - self._cached = None + self._cached_result = None with warnings.catch_warnings(): warnings.simplefilter("ignore") # `warnings=False` in `model_dump()` is only supported in Pydantic v2 @@ -79,30 +81,27 @@ def process_chunk(self, chunk: ChatCompletionChunk) -> None: choice["message"] = delta self._values += values - def _construct(self) -> Optional[ChatCompletion]: + def _result(self) -> Optional[Mapping[str, Any]]: if self._is_null: return None - if not self._cached: - self._cached = ChatCompletion.construct(**dict(self._values)) - return self._cached + if not self._cached_result: + self._cached_result = MappingProxyType(dict(self._values)) + return self._cached_result def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: - if not (chat_completion := self._construct()): + if not (result := self._result()): return - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - # `warnings=False` in `model_dump_json()` is only supported in Pydantic v2 - json_string = chat_completion.model_dump_json(exclude_unset=True) + json_string = json.dumps(result) yield from _as_output_attributes(_ValueAndType(json_string, _MimeType.application_json)) def get_extra_attributes( self, request_options: Mapping[str, Any] = MappingProxyType({}), ) -> Iterator[Tuple[str, AttributeValue]]: - if not (chat_completion := self._construct()): + if not (result := self._result()): return yield from _get_extra_attributes_from_response( - chat_completion.model_copy(), + ChatCompletion.construct(**result), request_options, ) @@ -111,12 +110,12 @@ class _CompletionAccumulator: __slots__ = ( "_is_null", "_values", - "_cached", + "_cached_result", ) def __init__(self) -> None: self._is_null = True - self._cached: Optional[Completion] = None + self._cached_result: Optional[Mapping[str, Any]] = None self._values = _ValuesAccumulator( choices=_IndexedAccumulator(lambda: _ValuesAccumulator(text=_StringAccumulator())), ) @@ -125,37 +124,34 @@ def process_chunk(self, chunk: Completion) -> None: if not isinstance(chunk, Completion): return self._is_null = False - self._cached = None + self._cached_result = None with warnings.catch_warnings(): warnings.simplefilter("ignore") # `warnings=False` in `model_dump()` is only supported in Pydantic v2 values = chunk.model_dump(exclude_unset=True) self._values += values - def _construct(self) -> Optional[Completion]: + def _result(self) -> Optional[Mapping[str, Any]]: if self._is_null: return None - if not self._cached: - self._cached = Completion.construct(**dict(self._values)) - return self._cached + if not self._cached_result: + self._cached_result = MappingProxyType(dict(self._values)) + return self._cached_result def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: - if not (completion := self._construct()): + if not (result := self._result()): return - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - # `warnings=False` in `model_dump_json()` is only supported in Pydantic v2 - json_string = completion.model_dump_json(exclude_unset=True) + json_string = json.dumps(result) yield from _as_output_attributes(_ValueAndType(json_string, _MimeType.application_json)) def get_extra_attributes( self, request_options: Mapping[str, Any] = MappingProxyType({}), ) -> Iterator[Tuple[str, AttributeValue]]: - if not (completion := self._construct()): + if not (result := self._result()): return yield from _get_extra_attributes_from_response( - completion.model_copy(), + Completion.construct(**result), request_options, ) @@ -196,12 +192,12 @@ def __iadd__(self, values: Optional[Mapping[str, Any]]) -> "_ValuesAccumulator": if isinstance(value, str): self_value += value elif isinstance(self_value, _IndexedAccumulator): - if isinstance(value, List): + if isinstance(value, Iterable): for v in value: self_value += v else: self_value += value - elif isinstance(self_value, List) and isinstance(value, List): + elif isinstance(self_value, List) and isinstance(value, Iterable): self_value.extend(value) else: self._values[key] = value # replacement diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py index 3bba04c83..5ac8e4f57 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py @@ -9,9 +9,7 @@ Union, ) -from openinference.instrumentation.openai._utils import ( - _finish_tracing, -) +from openinference.instrumentation.openai._utils import _finish_tracing from openinference.instrumentation.openai._with_span import _WithSpan from opentelemetry import trace as trace_api from opentelemetry.util.types import AttributeValue diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py index 0d7875094..fc922dc96 100644 --- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py +++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py @@ -1,52 +1,48 @@ import pytest -from openai.types.chat import ChatCompletion, ChatCompletionChunk +from openai.types.chat import ChatCompletionChunk from openinference.instrumentation.openai._response_accumulator import ( _ChatCompletionAccumulator, ) -def test_chat_completion_accumulator(chat_completion_chunks, desired_chat_completion): +def test_chat_completion_accumulator(chat_completion_chunks, desired_chat_completion_result): accumulator = _ChatCompletionAccumulator() for chunk in chat_completion_chunks: accumulator.process_chunk(chunk) - assert accumulator._construct().model_dump( - exclude_unset=True - ) == desired_chat_completion.model_dump(exclude_unset=True) + assert accumulator._result() == desired_chat_completion_result @pytest.fixture -def desired_chat_completion(): - return ChatCompletion.construct( - **{ - "id": "xyz", - "choices": [ - { - "index": 0, - "finish_reason": "length", - "message": {"content": "A1", "role": "assistant"}, - }, - { - "index": 1, - "finish_reason": "stop", - "message": {"content": "B2", "role": "assistant"}, - }, - { - "index": 2, +def desired_chat_completion_result(): + return { + "id": "xyz", + "choices": [ + { + "index": 0, + "finish_reason": "length", + "message": {"content": "A1", "role": "assistant"}, + }, + { + "index": 1, + "finish_reason": "stop", + "message": {"content": "B2", "role": "assistant"}, + }, + { + "index": 2, + }, + { + "index": 3, + "message": { + "tool_calls": [ + {"index": 0, "function": {"arguments": "C3"}}, + {"index": 1, "function": {"arguments": "D4"}}, + ] }, - { - "index": 3, - "message": { - "tool_calls": [ - {"index": 0, "function": {"arguments": "C3"}}, - {"index": 1, "function": {"arguments": "D4"}}, - ] - }, - }, - ], - "created": 123, - "model": "ultra-turbo", - } - ) + }, + ], + "created": 123, + "model": "ultra-turbo", + } @pytest.fixture From 31d3d1a0bc1782edfd46a02c14ec43f58841c179 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Thu, 4 Jan 2024 11:51:12 -0800 Subject: [PATCH 05/44] clean up --- .../instrumentation/openai/_response_accumulator.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py index 01c3ecb5b..658e7d4bf 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py @@ -50,7 +50,7 @@ class _ChatCompletionAccumulator: def __init__(self) -> None: self._is_null = True - self._cached_result: Optional[Mapping[str, Any]] = None + self._cached_result: Optional[Dict[str, Any]] = None self._values = _ValuesAccumulator( choices=_IndexedAccumulator( lambda: _ValuesAccumulator( @@ -81,11 +81,11 @@ def process_chunk(self, chunk: ChatCompletionChunk) -> None: choice["message"] = delta self._values += values - def _result(self) -> Optional[Mapping[str, Any]]: + def _result(self) -> Optional[Dict[str, Any]]: if self._is_null: return None if not self._cached_result: - self._cached_result = MappingProxyType(dict(self._values)) + self._cached_result = dict(self._values) return self._cached_result def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: @@ -115,7 +115,7 @@ class _CompletionAccumulator: def __init__(self) -> None: self._is_null = True - self._cached_result: Optional[Mapping[str, Any]] = None + self._cached_result: Optional[Dict[str, Any]] = None self._values = _ValuesAccumulator( choices=_IndexedAccumulator(lambda: _ValuesAccumulator(text=_StringAccumulator())), ) @@ -131,11 +131,11 @@ def process_chunk(self, chunk: Completion) -> None: values = chunk.model_dump(exclude_unset=True) self._values += values - def _result(self) -> Optional[Mapping[str, Any]]: + def _result(self) -> Optional[Dict[str, Any]]: if self._is_null: return None if not self._cached_result: - self._cached_result = MappingProxyType(dict(self._values)) + self._cached_result = dict(self._values) return self._cached_result def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: From 83d56eb397f14e63b1e179476d1240142acf00c0 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Thu, 4 Jan 2024 11:57:32 -0800 Subject: [PATCH 06/44] clean up --- .../instrumentation/openai/_response_accumulator.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py index 658e7d4bf..4e2abb9a5 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py @@ -25,7 +25,6 @@ _ValueAndType, ) from opentelemetry.util.types import AttributeValue -from typing_extensions import TypeAlias from openai.types import Completion from openai.types.chat import ( @@ -38,8 +37,6 @@ "_ChatCompletionAccumulator", ) -_ChoiceIndex: TypeAlias = int - class _ChatCompletionAccumulator: __slots__ = ( From 189f673dff9a71d2583f106576641acfa868fac6 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Fri, 5 Jan 2024 09:50:27 -0800 Subject: [PATCH 07/44] clean up --- .../openinference/instrumentation/openai/_request.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py index 0f1d0790c..e99cf8dd9 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py @@ -82,14 +82,21 @@ def _start_as_current_span( span_kind = _EMBEDDING_SPAN_KIND if cast_to is CreateEmbeddingResponse else _LLM_SPAN_KIND attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind} try: +<<<<<<< Updated upstream input_attributes = _as_input_attributes(_io_value_and_type(request_options)) +======= + attributes.update(_as_input_attributes(_io_value_and_type(request_options))) +>>>>>>> Stashed changes except Exception: logger.exception( f"Failed to get input attributes from request options of " f"type {type(request_options)}" ) +<<<<<<< Updated upstream else: attributes.update(input_attributes) +======= +>>>>>>> Stashed changes # Secondary attributes should be added after input and output to ensure # that input and output are not dropped if there are too many attributes. try: @@ -220,7 +227,11 @@ def _parse_request_args(args: Tuple[type, Any]) -> Tuple[type, Mapping[str, Any] # serializes correctly in an HTTP request body. For example, Enum values may be present if a # third-party library puts them there. Enums can turn into their intended string values via # `json.dumps` when the final HTTP request body is serialized, but can pose problems when we +<<<<<<< Updated upstream # try to extract attributes. However, this round-trip seems expensive, so wa opted to treat +======= + # try to extract attributes. However, this round-trip seems expensive, so we opted to treat +>>>>>>> Stashed changes # only the Enums that we know about: e.g. message role sometimes can be an Enum, so we will # convert it only when it's encountered. # try: From 05a49fbfc094cbe6e7d565cbd724dfec9b5927bd Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Fri, 5 Jan 2024 09:50:38 -0800 Subject: [PATCH 08/44] update README --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 3a1679af8..30f181e39 100644 --- a/README.md +++ b/README.md @@ -20,9 +20,10 @@ OpenInference provides a set of instrumentations for popular machine learning SD ## Python -| Package | Description | -| --------------------------------------------------------------------------------------------- | --------------------------------------------- | -| [`openinference-semantic-conventions`](./python/openinference-semantic-conventions/README.md) | Semantic conventions for tracing of LLM Apps. | +| Package | Description | +|--------------------------------------------------------------------------------------------------------------------|-----------------------------------------------| +| [`openinference-semantic-conventions`](./python/openinference-semantic-conventions/README.md) | Semantic conventions for tracing of LLM Apps. | +| [`openinference-instrumentation-openai`](./python/instrumentation/openinference-instrumentation-openai/README.rst) | OpenInference Instrumentation for OpenAI SDK. | ## JavaScript From 525e65c007365a2dd1d80e62bd8aa7389724e83e Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Fri, 5 Jan 2024 10:04:14 -0800 Subject: [PATCH 09/44] clean up --- .../instrumentation/openai/_request.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py index e99cf8dd9..de17d3077 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py @@ -82,21 +82,12 @@ def _start_as_current_span( span_kind = _EMBEDDING_SPAN_KIND if cast_to is CreateEmbeddingResponse else _LLM_SPAN_KIND attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind} try: -<<<<<<< Updated upstream - input_attributes = _as_input_attributes(_io_value_and_type(request_options)) -======= attributes.update(_as_input_attributes(_io_value_and_type(request_options))) ->>>>>>> Stashed changes except Exception: logger.exception( f"Failed to get input attributes from request options of " f"type {type(request_options)}" ) -<<<<<<< Updated upstream - else: - attributes.update(input_attributes) -======= ->>>>>>> Stashed changes # Secondary attributes should be added after input and output to ensure # that input and output are not dropped if there are too many attributes. try: @@ -227,11 +218,7 @@ def _parse_request_args(args: Tuple[type, Any]) -> Tuple[type, Mapping[str, Any] # serializes correctly in an HTTP request body. For example, Enum values may be present if a # third-party library puts them there. Enums can turn into their intended string values via # `json.dumps` when the final HTTP request body is serialized, but can pose problems when we -<<<<<<< Updated upstream - # try to extract attributes. However, this round-trip seems expensive, so wa opted to treat -======= # try to extract attributes. However, this round-trip seems expensive, so we opted to treat ->>>>>>> Stashed changes # only the Enums that we know about: e.g. message role sometimes can be an Enum, so we will # convert it only when it's encountered. # try: From f395f585f8cf012479f752ea502c99de8422570b Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Mon, 8 Jan 2024 16:08:39 -0800 Subject: [PATCH 10/44] fix indent --- .../openinference-instrumentation-openai/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml index 516092183..846687ab9 100644 --- a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml +++ b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml @@ -10,7 +10,7 @@ readme = "README.rst" license = "Apache-2.0" requires-python = ">=3.8, <3.12" authors = [ - { name = "OpenInference Authors", email = "oss@arize.com" }, + { name = "OpenInference Authors", email = "oss@arize.com" }, ] classifiers = [ "Development Status :: 4 - Beta", From dbe7bc39cef9034064712c0123dde10d0284dc0a Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Mon, 8 Jan 2024 17:47:11 -0800 Subject: [PATCH 11/44] add OpenInferenceSpanKindValues --- .../src/openinference/semconv/trace/__init__.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py b/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py index 196b4674d..81331131d 100644 --- a/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py +++ b/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py @@ -1,3 +1,6 @@ +from enum import Enum + + class SpanAttributes: OUTPUT_VALUE = "output.value" OUTPUT_MIME_TYPE = "output.mime_type" @@ -207,3 +210,14 @@ class ToolCallAttributes: The JSON string representing the arguments passed to the function during a tool call. """ + + +class OpenInferenceSpanKindValues(Enum): + TOOL = "TOOL" + CHAIN = "CHAIN" + LLM = "LLM" + RETRIEVER = "RETRIEVER" + EMBEDDING = "EMBEDDING" + AGENT = "AGENT" + RERANKER = "RERANKER" + UNKNOWN = "UNKNOWN" From 4cd42cdb4053f9f28fdd650d37cd2d5a983ca18a Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 09:52:02 -0800 Subject: [PATCH 12/44] use OpenInferenceSpanKindValues --- .../src/openinference/instrumentation/openai/_request.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py index de17d3077..f93e5cbdf 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py @@ -35,7 +35,7 @@ _io_value_and_type, ) from openinference.instrumentation.openai._with_span import _WithSpan -from openinference.semconv.trace import SpanAttributes +from openinference.semconv.trace import SpanAttributes, OpenInferenceSpanKindValues from opentelemetry import context as context_api from opentelemetry import trace as trace_api from opentelemetry.context import _SUPPRESS_INSTRUMENTATION_KEY @@ -54,10 +54,6 @@ logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) -_LLM_SPAN_KIND = "LLM" -_EMBEDDING_SPAN_KIND = "EMBEDDING" - - class _WithTracer(ABC): __slots__ = ( "_tracer", @@ -79,7 +75,7 @@ def _start_as_current_span( cast_to: type, request_options: Mapping[str, Any], ) -> Iterator[_WithSpan]: - span_kind = _EMBEDDING_SPAN_KIND if cast_to is CreateEmbeddingResponse else _LLM_SPAN_KIND + span_kind = OpenInferenceSpanKindValues.EMBEDDING if cast_to is CreateEmbeddingResponse else OpenInferenceSpanKindValues.LLM attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind} try: attributes.update(_as_input_attributes(_io_value_and_type(request_options))) From bc277810855e018dcb7ff71decc00d4c94f8fbaf Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 11:48:32 -0800 Subject: [PATCH 13/44] tox testing --- .github/workflows/python-CI.yaml | 12 ++---------- .../instrumentation/openai/_request.py | 9 +++++++-- python/tox.ini | 14 +++++++------- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml index 973b50550..a195b7fd4 100644 --- a/.github/workflows/python-CI.yaml +++ b/.github/workflows/python-CI.yaml @@ -30,13 +30,5 @@ jobs: python-version: "3.8" - name: Install tox run: pip install tox==4.11.4 - - name: Check style - run: | - tox run -e ruff - git diff --exit-code - - name: Run mypy - run: | - tox run -e mypy-openai - - name: Run tests - run: | - tox run -e test-openai + - name: Run tox + run: tox run-parallel diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py index f93e5cbdf..086025cce 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py @@ -35,7 +35,7 @@ _io_value_and_type, ) from openinference.instrumentation.openai._with_span import _WithSpan -from openinference.semconv.trace import SpanAttributes, OpenInferenceSpanKindValues +from openinference.semconv.trace import OpenInferenceSpanKindValues, SpanAttributes from opentelemetry import context as context_api from opentelemetry import trace as trace_api from opentelemetry.context import _SUPPRESS_INSTRUMENTATION_KEY @@ -54,6 +54,7 @@ logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) + class _WithTracer(ABC): __slots__ = ( "_tracer", @@ -75,7 +76,11 @@ def _start_as_current_span( cast_to: type, request_options: Mapping[str, Any], ) -> Iterator[_WithSpan]: - span_kind = OpenInferenceSpanKindValues.EMBEDDING if cast_to is CreateEmbeddingResponse else OpenInferenceSpanKindValues.LLM + span_kind = ( + OpenInferenceSpanKindValues.EMBEDDING + if cast_to is CreateEmbeddingResponse + else OpenInferenceSpanKindValues.LLM + ) attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind} try: attributes.update(_as_input_attributes(_io_value_and_type(request_options))) diff --git a/python/tox.ini b/python/tox.ini index 4819f6f3e..4b72f9dc1 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -3,18 +3,17 @@ isolated_build = True skipsdist = True skip_missing_interpreters = True envlist = - ruff - mypy-openai py3{8,11}-test-openai [testenv] package = wheel wheel_build_env = .pkg +allowlist_externals = git deps = test: pytest == 7.4.4 - ruff: ruff == 0.1.11 - mypy: mypy == 1.8.0 + test: ruff == 0.1.11 + test: mypy == 1.8.0 changedir = openai: instrumentation/openinference-instrumentation-openai/ commands_pre = @@ -23,7 +22,8 @@ commands_pre = openai: pip install {toxinidir}/openinference-semantic-conventions openai: pip install {toxinidir}/instrumentation/openinference-instrumentation-openai[test] commands = + test: ruff format . + test: ruff --fix . + test: git diff --exit-code + test: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini test: pytest {posargs} - ruff: ruff format . - ruff: ruff --fix . - mypy: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini From 9c2096ec58e2cd604b62a4ec13c621a5467cd275 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 11:52:57 -0800 Subject: [PATCH 14/44] tox testing --- .../src/openinference/instrumentation/openai/_request.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py index 086025cce..4ae893308 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py @@ -78,8 +78,7 @@ def _start_as_current_span( ) -> Iterator[_WithSpan]: span_kind = ( OpenInferenceSpanKindValues.EMBEDDING - if cast_to is CreateEmbeddingResponse - else OpenInferenceSpanKindValues.LLM + if cast_to is CreateEmbeddingResponse else OpenInferenceSpanKindValues.LLM ) attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind} try: From 1fb1038e5af7ef342056158d373ff6d976d2c30b Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 11:56:06 -0800 Subject: [PATCH 15/44] tox testing --- python/tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tox.ini b/python/tox.ini index 4b72f9dc1..f38fef824 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -5,7 +5,6 @@ skip_missing_interpreters = True envlist = py3{8,11}-test-openai - [testenv] package = wheel wheel_build_env = .pkg @@ -23,6 +22,7 @@ commands_pre = openai: pip install {toxinidir}/instrumentation/openinference-instrumentation-openai[test] commands = test: ruff format . + test: git diff --exit-code test: ruff --fix . test: git diff --exit-code test: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini From 0d142ad2e2b59fed7ba7d66138a50f0f89d783e3 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 12:11:47 -0800 Subject: [PATCH 16/44] wip --- .../src/openinference/instrumentation/openai/_request.py | 3 ++- python/tox.ini | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py index 4ae893308..086025cce 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py @@ -78,7 +78,8 @@ def _start_as_current_span( ) -> Iterator[_WithSpan]: span_kind = ( OpenInferenceSpanKindValues.EMBEDDING - if cast_to is CreateEmbeddingResponse else OpenInferenceSpanKindValues.LLM + if cast_to is CreateEmbeddingResponse + else OpenInferenceSpanKindValues.LLM ) attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind} try: diff --git a/python/tox.ini b/python/tox.ini index f38fef824..66d5a3da9 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -3,7 +3,7 @@ isolated_build = True skipsdist = True skip_missing_interpreters = True envlist = - py3{8,11}-test-openai + py3{8,11}-test-{semconv,openai} [testenv] package = wheel @@ -15,6 +15,7 @@ deps = test: mypy == 1.8.0 changedir = openai: instrumentation/openinference-instrumentation-openai/ + semconv: openinference-semantic-conventions commands_pre = py3{8,11}: python -m pip install -U pip setuptools wheel ; FIXME: installation below for semantic conventions can be removed once the package is stable and published From 775347287c71a243e9d76dbdcad8b2d1333086eb Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 12:17:59 -0800 Subject: [PATCH 17/44] tox testing --- python/dev-requirements.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 python/dev-requirements.txt diff --git a/python/dev-requirements.txt b/python/dev-requirements.txt new file mode 100644 index 000000000..e69de29bb From 4ed83fd1e63d722fcb035a0b54297b275820c56e Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 12:18:35 -0800 Subject: [PATCH 18/44] tox testing --- python/dev-requirements.txt | 3 +++ python/tox.ini | 23 +++++++++++++---------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/python/dev-requirements.txt b/python/dev-requirements.txt index e69de29bb..4abb5b680 100644 --- a/python/dev-requirements.txt +++ b/python/dev-requirements.txt @@ -0,0 +1,3 @@ +pytest == 7.4.4 +ruff == 0.1.11 +mypy == 1.8.0 diff --git a/python/tox.ini b/python/tox.ini index 66d5a3da9..e395a4e4b 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -3,28 +3,31 @@ isolated_build = True skipsdist = True skip_missing_interpreters = True envlist = - py3{8,11}-test-{semconv,openai} + py3{8,11}-ci-{semconv,openai} [testenv] package = wheel wheel_build_env = .pkg allowlist_externals = git deps = - test: pytest == 7.4.4 - test: ruff == 0.1.11 - test: mypy == 1.8.0 + -r dev-requirements.txt changedir = + semconv: openinference-semantic-conventions/ openai: instrumentation/openinference-instrumentation-openai/ - semconv: openinference-semantic-conventions commands_pre = py3{8,11}: python -m pip install -U pip setuptools wheel + semconv: pip install {toxinidir}/openinference-semantic-conventions ; FIXME: installation below for semantic conventions can be removed once the package is stable and published openai: pip install {toxinidir}/openinference-semantic-conventions openai: pip install {toxinidir}/instrumentation/openinference-instrumentation-openai[test] commands = - test: ruff format . - test: git diff --exit-code - test: ruff --fix . - test: git diff --exit-code - test: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini + ruff: ruff format . + ruff: ruff --fix . + mypy: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini test: pytest {posargs} + ci: ruff format . + ci: git diff --exit-code + ci: ruff --fix . + ci: git diff --exit-code + ci: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini + ci: pytest {posargs} From 84390433280c6df6a5f4c94d210fb7dd89dde366 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 12:21:54 -0800 Subject: [PATCH 19/44] tox testing --- .../src/openinference/instrumentation/openai/_request.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py index 086025cce..65e324015 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py @@ -77,9 +77,9 @@ def _start_as_current_span( request_options: Mapping[str, Any], ) -> Iterator[_WithSpan]: span_kind = ( - OpenInferenceSpanKindValues.EMBEDDING + OpenInferenceSpanKindValues.EMBEDDING.value if cast_to is CreateEmbeddingResponse - else OpenInferenceSpanKindValues.LLM + else OpenInferenceSpanKindValues.LLM.value ) attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind} try: From ec12409861f956bf4f7bb8bb786da811f0adb9a0 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 12:33:11 -0800 Subject: [PATCH 20/44] tox testing --- .github/workflows/python-CI.yaml | 12 ++---------- .../openinference/semconv/trace/test_version.py | 8 ++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) create mode 100644 python/openinference-semantic-conventions/tests/openinference/semconv/trace/test_version.py diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml index a195b7fd4..131fe0898 100644 --- a/.github/workflows/python-CI.yaml +++ b/.github/workflows/python-CI.yaml @@ -6,12 +6,6 @@ on: pull_request: paths: - "python/**" - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -concurrency: - group: test-python-${{ github.head_ref }} - cancel-in-progress: true defaults: run: @@ -22,10 +16,8 @@ jobs: name: CI Python runs-on: ubuntu-latest steps: - - name: Checkout Repository - uses: actions/checkout@v3 - - name: Set up Python 3.8 - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: "3.8" - name: Install tox diff --git a/python/openinference-semantic-conventions/tests/openinference/semconv/trace/test_version.py b/python/openinference-semantic-conventions/tests/openinference/semconv/trace/test_version.py new file mode 100644 index 000000000..1c7e13aed --- /dev/null +++ b/python/openinference-semantic-conventions/tests/openinference/semconv/trace/test_version.py @@ -0,0 +1,8 @@ +""" +This is a dummy test to ensure that every package has one test. +""" +from openinference.semconv.version import __version__ as semconv_version + + +def test_version(): + print(semconv_version) From 24570446b79d146823b17b7907310fbb23859c92 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 12:59:21 -0800 Subject: [PATCH 21/44] wip --- .../tests/openinference/semconv/trace/test_version.py | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 python/openinference-semantic-conventions/tests/openinference/semconv/trace/test_version.py diff --git a/python/openinference-semantic-conventions/tests/openinference/semconv/trace/test_version.py b/python/openinference-semantic-conventions/tests/openinference/semconv/trace/test_version.py deleted file mode 100644 index 1c7e13aed..000000000 --- a/python/openinference-semantic-conventions/tests/openinference/semconv/trace/test_version.py +++ /dev/null @@ -1,8 +0,0 @@ -""" -This is a dummy test to ensure that every package has one test. -""" -from openinference.semconv.version import __version__ as semconv_version - - -def test_version(): - print(semconv_version) From b965721c25b181269f427b50a7b418bdc663bf70 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 13:09:24 -0800 Subject: [PATCH 22/44] wip --- .github/workflows/python-CI.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml index 131fe0898..04e797fad 100644 --- a/.github/workflows/python-CI.yaml +++ b/.github/workflows/python-CI.yaml @@ -20,7 +20,5 @@ jobs: - uses: actions/setup-python@v5 with: python-version: "3.8" - - name: Install tox - run: pip install tox==4.11.4 - - name: Run tox - run: tox run-parallel + - run: pip install tox==4.11.4 + - run: tox run-parallel From f17680b9339af0544e554d63ec00c9723e2d63d7 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 13:11:23 -0800 Subject: [PATCH 23/44] wip --- .../instrumentation/openai/test_response_accumulator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py index fc922dc96..fed07e6b0 100644 --- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py +++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py @@ -25,7 +25,7 @@ def desired_chat_completion_result(): { "index": 1, "finish_reason": "stop", - "message": {"content": "B2", "role": "assistant"}, + "message": {"content": "B22", "role": "assistant"}, }, { "index": 2, From 479b2d6ec39024b61c4703a596630761abd71384 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 13:15:28 -0800 Subject: [PATCH 24/44] wip --- .github/workflows/python-CI.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml index 04e797fad..f26fceb3e 100644 --- a/.github/workflows/python-CI.yaml +++ b/.github/workflows/python-CI.yaml @@ -19,6 +19,8 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: "3.8" + python-version: | + - 3.8 + - 3.11 - run: pip install tox==4.11.4 - run: tox run-parallel From 09fb90104d7b4d38740054c258e3d9f9c3e45952 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 13:18:05 -0800 Subject: [PATCH 25/44] wip --- python/tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tox.ini b/python/tox.ini index e395a4e4b..4c076d871 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -30,4 +30,4 @@ commands = ci: ruff --fix . ci: git diff --exit-code ci: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini - ci: pytest {posargs} + ci: pytest From f3211dd515fd5b7040ed5b4f464d8ba5ded0efce Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 13:19:15 -0800 Subject: [PATCH 26/44] wip --- .github/workflows/python-CI.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml index f26fceb3e..c3791de04 100644 --- a/.github/workflows/python-CI.yaml +++ b/.github/workflows/python-CI.yaml @@ -20,7 +20,7 @@ jobs: - uses: actions/setup-python@v5 with: python-version: | - - 3.8 + - "3.8" - 3.11 - run: pip install tox==4.11.4 - run: tox run-parallel From c533f9f8a5012fe3347017521a434e8e4e1004f7 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 13:21:15 -0800 Subject: [PATCH 27/44] wip --- .github/workflows/python-CI.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml index c3791de04..1cb868986 100644 --- a/.github/workflows/python-CI.yaml +++ b/.github/workflows/python-CI.yaml @@ -19,8 +19,6 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: | - - "3.8" - - 3.11 + python-version: "3.x" - run: pip install tox==4.11.4 - run: tox run-parallel From 7ed318b13ea10ad4b98416c0dab5fe44c252d76c Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 13:21:53 -0800 Subject: [PATCH 28/44] wip --- .github/workflows/python-CI.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml index 1cb868986..0f2077d92 100644 --- a/.github/workflows/python-CI.yaml +++ b/.github/workflows/python-CI.yaml @@ -19,6 +19,8 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: "3.x" + python-version: | + 3.8 + 3.11 - run: pip install tox==4.11.4 - run: tox run-parallel From 3c08576f2a9942164417ee811704bd8c4538ef54 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 13:24:04 -0800 Subject: [PATCH 29/44] wip --- .../instrumentation/openai/test_response_accumulator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py index fed07e6b0..fc922dc96 100644 --- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py +++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py @@ -25,7 +25,7 @@ def desired_chat_completion_result(): { "index": 1, "finish_reason": "stop", - "message": {"content": "B22", "role": "assistant"}, + "message": {"content": "B2", "role": "assistant"}, }, { "index": 2, From ff84687f49084ed212a80c09686388f56f784bbd Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 14:27:50 -0800 Subject: [PATCH 30/44] wip --- .../instrumentation/openai/test_response_accumulator.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py index fc922dc96..730cedca9 100644 --- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py +++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py @@ -1,3 +1,5 @@ +import sys + import pytest from openai.types.chat import ChatCompletionChunk from openinference.instrumentation.openai._response_accumulator import ( @@ -7,6 +9,7 @@ def test_chat_completion_accumulator(chat_completion_chunks, desired_chat_completion_result): accumulator = _ChatCompletionAccumulator() + assert sys.version_info[1] == 8 for chunk in chat_completion_chunks: accumulator.process_chunk(chunk) assert accumulator._result() == desired_chat_completion_result From 9acb9c2d89572505813c190afb764e6a0f401147 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 14:29:31 -0800 Subject: [PATCH 31/44] wip --- .../instrumentation/openai/test_response_accumulator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py index 730cedca9..715b7f517 100644 --- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py +++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py @@ -9,7 +9,6 @@ def test_chat_completion_accumulator(chat_completion_chunks, desired_chat_completion_result): accumulator = _ChatCompletionAccumulator() - assert sys.version_info[1] == 8 for chunk in chat_completion_chunks: accumulator.process_chunk(chunk) assert accumulator._result() == desired_chat_completion_result From 164ca951f80f6f8811d5ba043bf350a77947bcdc Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 14:31:00 -0800 Subject: [PATCH 32/44] wip --- .../instrumentation/openai/test_response_accumulator.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py index 715b7f517..fc922dc96 100644 --- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py +++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py @@ -1,5 +1,3 @@ -import sys - import pytest from openai.types.chat import ChatCompletionChunk from openinference.instrumentation.openai._response_accumulator import ( From 6d759550d298b3097d67722dd5715ab09d917e20 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 14:48:55 -0800 Subject: [PATCH 33/44] wip --- python/tox.ini | 1 - 1 file changed, 1 deletion(-) diff --git a/python/tox.ini b/python/tox.ini index 4c076d871..aa8d4098e 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -1,7 +1,6 @@ [tox] isolated_build = True skipsdist = True -skip_missing_interpreters = True envlist = py3{8,11}-ci-{semconv,openai} From b70a56d939fc110075eb6c9e5b40bddbc40d5b06 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 14:59:25 -0800 Subject: [PATCH 34/44] wip --- python/tox.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tox.ini b/python/tox.ini index aa8d4098e..09453bc39 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -22,11 +22,11 @@ commands_pre = commands = ruff: ruff format . ruff: ruff --fix . - mypy: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini + mypy: mypy --config-file {toxinidir}/mypy.ini --explicit-package-bases {posargs:src} test: pytest {posargs} ci: ruff format . ci: git diff --exit-code ci: ruff --fix . ci: git diff --exit-code - ci: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini + ci: mypy --config-file {toxinidir}/mypy.ini --explicit-package-bases src ci: pytest From e3c538df4c0d786b47304f07d59eeec51a8d5b76 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 15:02:24 -0800 Subject: [PATCH 35/44] wip --- python/tox.ini | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/tox.ini b/python/tox.ini index 09453bc39..00c795ce2 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -2,7 +2,8 @@ isolated_build = True skipsdist = True envlist = - py3{8,11}-ci-{semconv,openai} + py3{8,11}-ci-semconv + py3{8,11}-ci-openai [testenv] package = wheel From f344c328022041ffae553b9eeb1fa250acce657b Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 15:03:12 -0800 Subject: [PATCH 36/44] wip --- python/tox.ini | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/tox.ini b/python/tox.ini index 00c795ce2..3d9091fa7 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -17,8 +17,6 @@ changedir = commands_pre = py3{8,11}: python -m pip install -U pip setuptools wheel semconv: pip install {toxinidir}/openinference-semantic-conventions -; FIXME: installation below for semantic conventions can be removed once the package is stable and published - openai: pip install {toxinidir}/openinference-semantic-conventions openai: pip install {toxinidir}/instrumentation/openinference-instrumentation-openai[test] commands = ruff: ruff format . From 1593c6dc5225720676cc60c7f2ee72ab7e934590 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 15:21:23 -0800 Subject: [PATCH 37/44] wip --- python/tox.ini | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/tox.ini b/python/tox.ini index 3d9091fa7..e8241d1af 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -19,13 +19,13 @@ commands_pre = semconv: pip install {toxinidir}/openinference-semantic-conventions openai: pip install {toxinidir}/instrumentation/openinference-instrumentation-openai[test] commands = - ruff: ruff format . - ruff: ruff --fix . + ruff: ruff format . --config {toxinidir}/ruff.toml + ruff: ruff . --fix --config {toxinidir}/ruff.toml mypy: mypy --config-file {toxinidir}/mypy.ini --explicit-package-bases {posargs:src} test: pytest {posargs} - ci: ruff format . + ci: ruff format . --config {toxinidir}/ruff.toml ci: git diff --exit-code - ci: ruff --fix . + ci: ruff . --fix --config {toxinidir}/ruff.toml ci: git diff --exit-code ci: mypy --config-file {toxinidir}/mypy.ini --explicit-package-bases src ci: pytest From 444906be8cf9aac0f9690aacd27c6517e6d440c1 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 15:25:22 -0800 Subject: [PATCH 38/44] wip --- python/tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tox.ini b/python/tox.ini index e8241d1af..1bc7672e3 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -28,4 +28,4 @@ commands = ci: ruff . --fix --config {toxinidir}/ruff.toml ci: git diff --exit-code ci: mypy --config-file {toxinidir}/mypy.ini --explicit-package-bases src - ci: pytest + ci: pytest tests From 5fd32ccc69acc67d7881e29b7da07e942d966d2f Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Tue, 9 Jan 2024 15:27:17 -0800 Subject: [PATCH 39/44] wip --- python/tox.ini | 1 - 1 file changed, 1 deletion(-) diff --git a/python/tox.ini b/python/tox.ini index 1bc7672e3..a38bd176f 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -15,7 +15,6 @@ changedir = semconv: openinference-semantic-conventions/ openai: instrumentation/openinference-instrumentation-openai/ commands_pre = - py3{8,11}: python -m pip install -U pip setuptools wheel semconv: pip install {toxinidir}/openinference-semantic-conventions openai: pip install {toxinidir}/instrumentation/openinference-instrumentation-openai[test] commands = From 9029fa8f3a2c57b0f4b478c95a1494beaf99352d Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Wed, 10 Jan 2024 16:06:19 -0800 Subject: [PATCH 40/44] wip --- .../examples/chat_completion_async_stream.py | 3 - .../integration_tests/completions.py | 3 - .../integration_tests/completions_async.py | 3 - .../integration_tests/embeddings.py | 3 - .../pyproject.toml | 5 + .../instrumentation/openai/__init__.py | 27 +- .../openai/_extra_attributes_from_request.py | 16 +- .../openai/_extra_attributes_from_response.py | 16 +- .../instrumentation/openai/_request.py | 135 ++--- .../openai/_response_accumulator.py | 29 +- .../instrumentation/openai/_stream.py | 5 +- .../instrumentation/openai/_utils.py | 24 +- .../openai/test_instrumentor.py | 543 ++++++++++++++++++ .../openai/test_response_accumulator.py | 2 +- python/tox.ini | 14 +- 15 files changed, 662 insertions(+), 166 deletions(-) create mode 100644 python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py diff --git a/python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py b/python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py index 4c81584bc..1216e9ff3 100644 --- a/python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py +++ b/python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py @@ -1,6 +1,3 @@ -""" -Phoenix collector should be running in the background. -""" import asyncio import openai diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py index efc031125..dc4226409 100644 --- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py +++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py @@ -1,6 +1,3 @@ -""" -Phoenix collector should be running in the background. -""" import contextvars import inspect import logging diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py index 2fddf54bd..2c94b0d78 100644 --- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py +++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py @@ -1,6 +1,3 @@ -""" -Phoenix collector should be running in the background. -""" import asyncio import inspect import logging diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py index 2b0a40ef9..10b1c11ef 100644 --- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py +++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py @@ -1,6 +1,3 @@ -""" -Phoenix collector should be running in the background. -""" import asyncio import inspect import logging diff --git a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml index 846687ab9..833c613d3 100644 --- a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml +++ b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml @@ -32,8 +32,13 @@ dependencies = [ ] [project.optional-dependencies] +instruments = [ + "openai >= 1.0.0", +] test = [ "openai == 1.0.0", + "opentelemetry-sdk", + "respx", ] [project.urls] diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py index 29547e0ef..1cec04144 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py @@ -1,4 +1,5 @@ import logging +from importlib import import_module from typing import Any, Collection from openinference.instrumentation.openai._request import ( @@ -19,34 +20,36 @@ class OpenAIInstrumentor(BaseInstrumentor): # type: ignore """ - An instrumentor for openai.OpenAI.request and openai.AsyncOpenAI.request + An instrumentor for openai """ + __slots__ = ( + "_original_request", + "_original_async_request", + ) + def instrumentation_dependencies(self) -> Collection[str]: return _instruments def _instrument(self, **kwargs: Any) -> None: - if (include_extra_attributes := kwargs.get("include_extra_attributes")) is None: - include_extra_attributes = True if not (tracer_provider := kwargs.get("tracer_provider")): tracer_provider = trace_api.get_tracer_provider() tracer = trace_api.get_tracer(__name__, __version__, tracer_provider) + openai = import_module(_MODULE) + self._original_request = openai.OpenAI.request + self._original_async_request = openai.AsyncOpenAI.request wrap_function_wrapper( module=_MODULE, name="OpenAI.request", - wrapper=_Request( - tracer=tracer, - include_extra_attributes=include_extra_attributes, - ), + wrapper=_Request(tracer=tracer), ) wrap_function_wrapper( module=_MODULE, name="AsyncOpenAI.request", - wrapper=_AsyncRequest( - tracer=tracer, - include_extra_attributes=include_extra_attributes, - ), + wrapper=_AsyncRequest(tracer=tracer), ) def _uninstrument(self, **kwargs: Any) -> None: - pass + openai = import_module(_MODULE) + openai.OpenAI.request = self._original_request + openai.AsyncOpenAI.request = self._original_async_request diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py index 47f444ee8..625e30cc1 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py @@ -25,19 +25,19 @@ def _get_extra_attributes_from_request( cast_to: type, - request_options: Mapping[str, Any], + request_parameters: Mapping[str, Any], ) -> Iterator[Tuple[str, AttributeValue]]: - if not isinstance(request_options, Mapping): + if not isinstance(request_parameters, Mapping): return if cast_to is ChatCompletion: - yield from _get_attributes_from_chat_completion_create_param(request_options) + yield from _get_attributes_from_chat_completion_create_param(request_parameters) elif cast_to is CreateEmbeddingResponse: - yield from _get_attributes_from_embedding_create_param(request_options) + yield from _get_attributes_from_embedding_create_param(request_parameters) elif cast_to is Completion: - yield from _get_attributes_from_completion_create_param(request_options) + yield from _get_attributes_from_completion_create_param(request_parameters) else: try: - yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(request_options) + yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(request_parameters) except Exception: logger.exception("Failed to serialize request options") @@ -55,7 +55,9 @@ def _get_attributes_from_chat_completion_create_param( invocation_params.pop("tools", None) yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(invocation_params) if (input_messages := params.get("messages")) and isinstance(input_messages, Iterable): - for index, input_message in enumerate(input_messages): + # Use reversed() to get the last message first. This is because OTEL has a default limit of + # 128 attributes per span, and flattening increases the number of attributes very quickly. + for index, input_message in reversed(list(enumerate(input_messages))): for key, value in _get_attributes_from_message_param(input_message): yield f"{SpanAttributes.LLM_INPUT_MESSAGES}.{index}.{key}", value diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py index 05d0d0a1d..ebab74ea9 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py @@ -2,7 +2,7 @@ import logging from functools import singledispatch from importlib import import_module -from types import MappingProxyType, ModuleType +from types import ModuleType from typing import ( Any, Iterable, @@ -43,16 +43,16 @@ @singledispatch def _get_extra_attributes_from_response( response: Any, - request_options: Mapping[str, Any] = MappingProxyType({}), + request_parameters: Mapping[str, Any], ) -> Iterator[Tuple[str, AttributeValue]]: - # this is a fallback (for singledispatch) + # this is a fallback for @singledispatch yield from () @_get_extra_attributes_from_response.register def _( completion: ChatCompletion, - request_options: Mapping[str, Any] = MappingProxyType({}), + request_parameters: Mapping[str, Any], ) -> Iterator[Tuple[str, AttributeValue]]: # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion.py#L40 # noqa: E501 if model := getattr(completion, "model", None): @@ -71,14 +71,14 @@ def _( @_get_extra_attributes_from_response.register def _( completion: Completion, - request_options: Mapping[str, Any] = MappingProxyType({}), + request_parameters: Mapping[str, Any], ) -> Iterator[Tuple[str, AttributeValue]]: # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion.py#L13 # noqa: E501 if model := getattr(completion, "model", None): yield SpanAttributes.LLM_MODEL_NAME, model if usage := getattr(completion, "usage", None): yield from _get_attributes_from_completion_usage(usage) - if model_prompt := request_options.get("prompt"): + if model_prompt := request_parameters.get("prompt"): # prompt: Required[Union[str, List[str], List[int], List[List[int]], None]] # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion_create_params.py#L38 # noqa: E501 # FIXME: tokens (List[int], List[List[int]]) can't be decoded reliably because model @@ -90,7 +90,7 @@ def _( @_get_extra_attributes_from_response.register def _( response: CreateEmbeddingResponse, - request_options: Mapping[str, Any] = MappingProxyType({}), + request_parameters: Mapping[str, Any], ) -> Iterator[Tuple[str, AttributeValue]]: # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/create_embedding_response.py#L20 # noqa: E501 if usage := getattr(response, "usage", None): @@ -104,7 +104,7 @@ def _( continue for key, value in _get_attributes_from_embedding(embedding): yield f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}.{key}", value - embedding_input = request_options.get("input") + embedding_input = request_parameters.get("input") for index, text in enumerate(_get_texts(embedding_input, model)): # input: Required[Union[str, List[str], List[int], List[List[int]]]] # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding_create_params.py#L12 # noqa: E501 diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py index 65e324015..1009e91ed 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py @@ -7,10 +7,8 @@ Awaitable, Callable, Dict, - Hashable, Iterator, Mapping, - Optional, Tuple, ) @@ -25,7 +23,6 @@ _CompletionAccumulator, ) from openinference.instrumentation.openai._stream import ( - _ResponseAccumulator, _Stream, ) from openinference.instrumentation.openai._utils import ( @@ -56,25 +53,17 @@ class _WithTracer(ABC): - __slots__ = ( - "_tracer", - "_include_extra_attributes", - ) + __slots__ = ("_tracer",) - def __init__( - self, - tracer: trace_api.Tracer, - include_extra_attributes: bool = True, - ) -> None: + def __init__(self, tracer: trace_api.Tracer) -> None: self._tracer = tracer - self._include_extra_attributes = include_extra_attributes @contextmanager def _start_as_current_span( self, span_name: str, cast_to: type, - request_options: Mapping[str, Any], + request_parameters: Mapping[str, Any], ) -> Iterator[_WithSpan]: span_kind = ( OpenInferenceSpanKindValues.EMBEDDING.value @@ -83,24 +72,20 @@ def _start_as_current_span( ) attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind} try: - attributes.update(_as_input_attributes(_io_value_and_type(request_options))) + attributes.update(_as_input_attributes(_io_value_and_type(request_parameters))) except Exception: logger.exception( - f"Failed to get input attributes from request options of " - f"type {type(request_options)}" + f"Failed to get input attributes from request parameters of " + f"type {type(request_parameters)}" ) # Secondary attributes should be added after input and output to ensure # that input and output are not dropped if there are too many attributes. try: - extra_attributes = ( - dict(_get_extra_attributes_from_request(cast_to, request_options)) - if self._include_extra_attributes - else {} - ) + extra_attributes = dict(_get_extra_attributes_from_request(cast_to, request_parameters)) except Exception: logger.exception( f"Failed to get extra attributes from request options of " - f"type {type(request_options)}" + f"type {type(request_parameters)}" ) extra_attributes = {} try: @@ -128,7 +113,7 @@ def __call__( if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY): return wrapped(*args, **kwargs) try: - cast_to, request_options = _parse_request_args(args) + cast_to, request_parameters = _parse_request_args(args) # E.g. cast_to = openai.types.chat.ChatCompletion => span_name = "ChatCompletion" span_name: str = cast_to.__name__.split(".")[-1] except Exception: @@ -137,7 +122,7 @@ def __call__( with self._start_as_current_span( span_name=span_name, cast_to=cast_to, - request_options=request_options, + request_parameters=request_parameters, ) as with_span: try: response = wrapped(*args, **kwargs) @@ -151,8 +136,7 @@ def __call__( response=response, with_span=with_span, cast_to=cast_to, - request_options=request_options, - include_extra_attributes=self._include_extra_attributes, + request_parameters=request_parameters, ) except Exception: logger.exception(f"Failed to finalize response of type {type(response)}") @@ -171,7 +155,7 @@ async def __call__( if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY): return await wrapped(*args, **kwargs) try: - cast_to, request_options = _parse_request_args(args) + cast_to, request_parameters = _parse_request_args(args) # E.g. cast_to = openai.types.chat.ChatCompletion => span_name = "ChatCompletion" span_name: str = cast_to.__name__.split(".")[-1] except Exception: @@ -180,7 +164,7 @@ async def __call__( with self._start_as_current_span( span_name=span_name, cast_to=cast_to, - request_options=request_options, + request_parameters=request_parameters, ) as with_span: try: response = await wrapped(*args, **kwargs) @@ -194,8 +178,7 @@ async def __call__( response=response, with_span=with_span, cast_to=cast_to, - request_options=request_options, - include_extra_attributes=self._include_extra_attributes, + request_parameters=request_parameters, ) except Exception: logger.exception(f"Failed to finalize response of type {type(response)}") @@ -209,13 +192,13 @@ def _parse_request_args(args: Tuple[type, Any]) -> Tuple[type, Mapping[str, Any] # The targeted signature of `request` is here: # https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_base_client.py#L846-L847 # noqa: E501 cast_to: type = args[0] - options: Mapping[str, Any] = ( + request_parameters: Mapping[str, Any] = ( json_data # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_models.py#L427 # noqa: E501 if hasattr(args[1], "json_data") and isinstance(json_data := args[1].json_data, Mapping) else {} ) - # FIXME: Because request options is just a Mapping, it can contain any value as long as it + # FIXME: Because request parameters is just a Mapping, it can contain any value as long as it # serializes correctly in an HTTP request body. For example, Enum values may be present if a # third-party library puts them there. Enums can turn into their intended string values via # `json.dumps` when the final HTTP request body is serialized, but can pose problems when we @@ -223,18 +206,25 @@ def _parse_request_args(args: Tuple[type, Any]) -> Tuple[type, Mapping[str, Any] # only the Enums that we know about: e.g. message role sometimes can be an Enum, so we will # convert it only when it's encountered. # try: - # options = json.loads(json.dumps(options)) + # request_parameters = json.loads(json.dumps(request_parameters)) # except Exception: # pass - return cast_to, options + return cast_to, request_parameters + + +_RESPONSE_ACCUMULATOR_FACTORIES: Mapping[type, type] = MappingProxyType( + { + ChatCompletion: _ChatCompletionAccumulator, + Completion: _CompletionAccumulator, + } +) def _finalize_response( response: Any, with_span: _WithSpan, cast_to: type, - request_options: Mapping[str, Any], - include_extra_attributes: bool = True, + request_parameters: Mapping[str, Any], ) -> Any: """Monkey-patch the response object to trace the stream, or finish tracing if the response is not a stream. @@ -260,8 +250,18 @@ def _finalize_response( # Note that we must have called `.parse()` beforehand, otherwise `._parsed` is None. and isinstance(response._parsed, (Stream, AsyncStream)) ): - # For streaming, we need (optional) accumulators to process each chunk iteration. - response_accumulator = _ResponseAccumulators.find(cast_to) + # For streaming, we need an (optional) accumulator to process each chunk iteration. + try: + response_accumulator_factory = _RESPONSE_ACCUMULATOR_FACTORIES.get(cast_to) + response_accumulator = ( + response_accumulator_factory(request_parameters) + if response_accumulator_factory + else None + ) + except Exception: + # E.g. cast_to may not be hashable + logger.exception(f"Failed to get response accumulator for {cast_to}") + response_accumulator = None if hasattr(response, "_parsed") and isinstance( parsed := response._parsed, (Stream, AsyncStream) ): @@ -270,22 +270,19 @@ def _finalize_response( stream=parsed, with_span=with_span, response_accumulator=response_accumulator, - include_extra_attributes=include_extra_attributes, ) return response return _Stream( stream=response, with_span=with_span, response_accumulator=response_accumulator, - include_extra_attributes=include_extra_attributes, ) _finish_tracing( status_code=trace_api.StatusCode.OK, with_span=with_span, has_attributes=_ResponseAttributes( + request_parameters=request_parameters, response=response, - request_options=request_options, - include_extra_attributes=include_extra_attributes, ), ) return response @@ -293,16 +290,14 @@ def _finalize_response( class _ResponseAttributes: __slots__ = ( - "_request_options", "_response", - "_include_extra_attributes", + "_request_parameters", ) def __init__( self, response: Any, - request_options: Mapping[str, Any], - include_extra_attributes: bool = True, + request_parameters: Mapping[str, Any], ) -> None: if hasattr(response, "parse") and callable(response.parse): # E.g. see https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_base_client.py#L518 # noqa: E501 @@ -310,50 +305,14 @@ def __init__( response = response.parse() except Exception: logger.exception(f"Failed to parse response of type {type(response)}") - self._request_options = request_options + self._request_parameters = request_parameters self._response = response - self._include_extra_attributes = include_extra_attributes def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: yield from _as_output_attributes(_io_value_and_type(self._response)) def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: - if self._include_extra_attributes: - yield from _get_extra_attributes_from_response( - self._response, - self._request_options, - ) - - -class _Accumulators(ABC): - _mapping: Mapping[type, type] - - def __init_subclass__(cls, mapping: Mapping[type, type], **kwargs: Any) -> None: - super().__init_subclass__(**kwargs) - cls._mapping = mapping - - @classmethod - def find(cls, cast_to: type) -> Optional[_ResponseAccumulator]: - if not isinstance(cast_to, Hashable): - # `cast_to` may not be hashable - # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_response.py#L172 # noqa: E501 - return None - try: - factory = cls._mapping.get(cast_to) - except Exception: - logger.exception(f"Failed to get factory for {cast_to}") - return None - return factory() if factory else None - - -class _ResponseAccumulators( - _Accumulators, - ABC, - mapping=MappingProxyType( - { - ChatCompletion: _ChatCompletionAccumulator, - Completion: _CompletionAccumulator, - } - ), -): - ... + yield from _get_extra_attributes_from_response( + self._response, + request_parameters=self._request_parameters, + ) diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py index 4e2abb9a5..e2b8f6807 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py @@ -2,7 +2,6 @@ import warnings from collections import defaultdict from copy import deepcopy -from types import MappingProxyType from typing import ( Any, Callable, @@ -21,9 +20,9 @@ ) from openinference.instrumentation.openai._utils import ( _as_output_attributes, - _MimeType, _ValueAndType, ) +from openinference.semconv.trace import OpenInferenceMimeTypeValues from opentelemetry.util.types import AttributeValue from openai.types import Completion @@ -43,9 +42,11 @@ class _ChatCompletionAccumulator: "_is_null", "_values", "_cached_result", + "_request_parameters", ) - def __init__(self) -> None: + def __init__(self, request_parameters: Mapping[str, Any]) -> None: + self._request_parameters = request_parameters self._is_null = True self._cached_result: Optional[Dict[str, Any]] = None self._values = _ValuesAccumulator( @@ -89,17 +90,18 @@ def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: if not (result := self._result()): return json_string = json.dumps(result) - yield from _as_output_attributes(_ValueAndType(json_string, _MimeType.application_json)) + yield from _as_output_attributes( + _ValueAndType(json_string, OpenInferenceMimeTypeValues.JSON) + ) def get_extra_attributes( self, - request_options: Mapping[str, Any] = MappingProxyType({}), ) -> Iterator[Tuple[str, AttributeValue]]: if not (result := self._result()): return yield from _get_extra_attributes_from_response( ChatCompletion.construct(**result), - request_options, + self._request_parameters, ) @@ -108,9 +110,11 @@ class _CompletionAccumulator: "_is_null", "_values", "_cached_result", + "_request_parameters", ) - def __init__(self) -> None: + def __init__(self, request_parameters: Mapping[str, Any]) -> None: + self._request_parameters = request_parameters self._is_null = True self._cached_result: Optional[Dict[str, Any]] = None self._values = _ValuesAccumulator( @@ -139,17 +143,16 @@ def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: if not (result := self._result()): return json_string = json.dumps(result) - yield from _as_output_attributes(_ValueAndType(json_string, _MimeType.application_json)) + yield from _as_output_attributes( + _ValueAndType(json_string, OpenInferenceMimeTypeValues.JSON) + ) - def get_extra_attributes( - self, - request_options: Mapping[str, Any] = MappingProxyType({}), - ) -> Iterator[Tuple[str, AttributeValue]]: + def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: if not (result := self._result()): return yield from _get_extra_attributes_from_response( Completion.construct(**result), - request_options, + self._request_parameters, ) diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py index 5ac8e4f57..6c3d36b9e 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py @@ -42,7 +42,6 @@ class _Stream(ObjectProxy): # type: ignore "_self_with_span", "_self_iteration_count", "_self_is_finished", - "_self_include_extra_attributes", "_self_response_accumulator", ) @@ -51,13 +50,11 @@ def __init__( stream: Union[Stream[Any], AsyncStream[Any]], with_span: _WithSpan, response_accumulator: Optional[_ResponseAccumulator] = None, - include_extra_attributes: bool = True, ) -> None: super().__init__(stream) self._self_with_span = with_span self._self_iteration_count = 0 self._self_is_finished = with_span.is_finished - self._self_include_extra_attributes = include_extra_attributes self._self_response_accumulator = response_accumulator def __iter__(self) -> Iterator[Any]: @@ -143,5 +140,5 @@ def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: yield from self._self_response_accumulator.get_attributes() def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: - if self._self_include_extra_attributes and self._self_response_accumulator is not None: + if self._self_response_accumulator is not None: yield from self._self_response_accumulator.get_extra_attributes() diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py index b277236d0..a8b3f3ca9 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py @@ -1,7 +1,6 @@ import json import logging import warnings -from enum import Enum from importlib.metadata import version from typing import ( Any, @@ -18,7 +17,7 @@ ) from openinference.instrumentation.openai._with_span import _WithSpan -from openinference.semconv.trace import SpanAttributes +from openinference.semconv.trace import OpenInferenceMimeTypeValues, SpanAttributes from opentelemetry import trace as trace_api from opentelemetry.util.types import Attributes, AttributeValue @@ -28,14 +27,9 @@ _OPENAI_VERSION = tuple(map(int, version("openai").split(".")[:3])) -class _MimeType(Enum): - text_plain = "text/plain" - application_json = "application/json" - - class _ValueAndType(NamedTuple): value: str - type: _MimeType + type: OpenInferenceMimeTypeValues def _io_value_and_type(obj: Any) -> _ValueAndType: @@ -49,15 +43,15 @@ def _io_value_and_type(obj: Any) -> _ValueAndType: except Exception: logger.exception("Failed to get model dump json") else: - return _ValueAndType(value, _MimeType.application_json) + return _ValueAndType(value, OpenInferenceMimeTypeValues.JSON) if not isinstance(obj, str) and isinstance(obj, (Sequence, Mapping)): try: value = json.dumps(obj) except Exception: logger.exception("Failed to dump json") else: - return _ValueAndType(value, _MimeType.application_json) - return _ValueAndType(str(obj), _MimeType.text_plain) + return _ValueAndType(value, OpenInferenceMimeTypeValues.JSON) + return _ValueAndType(str(obj), OpenInferenceMimeTypeValues.TEXT) def _as_input_attributes( @@ -66,7 +60,9 @@ def _as_input_attributes( if not value_and_type: return yield SpanAttributes.INPUT_VALUE, value_and_type.value - yield SpanAttributes.INPUT_MIME_TYPE, value_and_type.type.value + # it's TEXT by default, so we can skip to save one attribute + if value_and_type.type is not OpenInferenceMimeTypeValues.TEXT: + yield SpanAttributes.INPUT_MIME_TYPE, value_and_type.type.value def _as_output_attributes( @@ -75,7 +71,9 @@ def _as_output_attributes( if not value_and_type: return yield SpanAttributes.OUTPUT_VALUE, value_and_type.value - yield SpanAttributes.OUTPUT_MIME_TYPE, value_and_type.type.value + # it's TEXT by default, so we can skip to save one attribute + if value_and_type.type is not OpenInferenceMimeTypeValues.TEXT: + yield SpanAttributes.OUTPUT_MIME_TYPE, value_and_type.type.value class _HasAttributes(Protocol): diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py new file mode 100644 index 000000000..96ba01b91 --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py @@ -0,0 +1,543 @@ +import asyncio +import json +import random +from contextlib import suppress +from importlib.metadata import version +from itertools import count +from typing import ( + Any, + AsyncIterator, + Dict, + Generator, + Iterable, + Iterator, + List, + Mapping, + Sequence, + Tuple, + cast, +) + +import openai +import pytest +from httpx import AsyncByteStream, Response +from openinference.instrumentation.openai import OpenAIInstrumentor +from openinference.semconv.trace import ( + MessageAttributes, + OpenInferenceMimeTypeValues, + OpenInferenceSpanKindValues, + SpanAttributes, + ToolCallAttributes, +) +from opentelemetry.sdk import trace as trace_sdk +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace import ReadableSpan +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter +from opentelemetry.util.types import AttributeValue +from respx import MockRouter + + +@pytest.mark.parametrize("is_async", [False, True]) +@pytest.mark.parametrize("is_raw", [False, True]) +@pytest.mark.parametrize("is_stream", [False, True]) +@pytest.mark.parametrize("status_code", [200, 400]) +def test_chat_completions( + is_async: bool, + is_raw: bool, + is_stream, + status_code: int, + respx_mock: MockRouter, + in_memory_span_exporter: InMemorySpanExporter, + completion_usage: Dict[str, Any], + model_name: str, + tool_calls_mock_stream, +) -> None: + input_messages: List[Dict[str, Any]] = get_messages() + output_messages: List[Dict[str, Any]] = ( + tool_calls_mock_stream[1] if is_stream else get_messages() + ) + invocation_parameters = { + "stream": is_stream, + "model": randstr(), + "temperature": random.random(), + "n": len(output_messages), + } + url = "https://api.openai.com/v1/chat/completions" + respx_kwargs = { + **( + {"stream": MockAsyncByteStream(tool_calls_mock_stream[0])} + if is_stream + else { + "json": { + "choices": [ + {"index": i, "message": message, "finish_reason": "stop"} + for i, message in enumerate(output_messages) + ], + "model": model_name, + "usage": completion_usage, + } + } + ), + } + respx_mock.post(url).mock(return_value=Response(status_code=status_code, **respx_kwargs)) + create_kwargs = {"messages": input_messages, **invocation_parameters} + completions = ( + openai.AsyncOpenAI(api_key="sk-").chat.completions + if is_async + else openai.OpenAI(api_key="sk-").chat.completions + ) + create = completions.with_raw_response.create if is_raw else completions.create + with suppress(openai.BadRequestError): + if is_async: + + async def task() -> None: + response = await create(**create_kwargs) + response = response.parse() if is_raw else response + if is_stream: + async for _ in response: + pass + + asyncio.run(task()) + else: + response = create(**create_kwargs) + response = response.parse() if is_raw else response + if is_stream: + for _ in response: + pass + spans = in_memory_span_exporter.get_finished_spans() + assert len(spans) == 1 + span: ReadableSpan = spans[0] + if status_code == 200: + assert span.status.is_ok + elif status_code == 400: + assert not span.status.is_ok and not span.status.is_unset + assert len(span.events) == 1 + event = span.events[0] + assert event.name == "exception" + attributes = dict(cast(Mapping[str, AttributeValue], span.attributes)) + assert attributes.pop(OPENINFERENCE_SPAN_KIND, None) == OpenInferenceSpanKindValues.LLM.value + assert isinstance(attributes.pop(INPUT_VALUE, None), str) + assert ( + OpenInferenceMimeTypeValues(attributes.pop(INPUT_MIME_TYPE, None)) + == OpenInferenceMimeTypeValues.JSON + ) + assert ( + json.loads(cast(str, attributes.pop(LLM_INVOCATION_PARAMETERS, None))) + == invocation_parameters + ) + for prefix, messages in ( + (LLM_INPUT_MESSAGES, input_messages), + *(((LLM_OUTPUT_MESSAGES, output_messages),) if status_code == 200 else ()), + ): + for i, message in enumerate(messages): + assert attributes.pop(message_role(prefix, i), None) == message.get("role") + assert attributes.pop(message_content(prefix, i), None) == message.get("content") + if function_call := message.get("function_call"): + assert attributes.pop( + message_function_call_name(prefix, i), None + ) == function_call.get("name") + assert attributes.pop( + message_function_call_arguments(prefix, i), None + ) == function_call.get("arguments") + if _openai_version() >= (1, 1, 0) and (tool_calls := message.get("tool_calls")): + for j, tool_call in enumerate(tool_calls): + if function := tool_call.get("function"): + assert attributes.pop( + tool_call_function_name(prefix, i, j), None + ) == function.get("name") + assert attributes.pop( + tool_call_function_arguments(prefix, i, j), None + ) == function.get("arguments") + if status_code == 200: + assert isinstance(attributes.pop(OUTPUT_VALUE, None), str) + assert ( + OpenInferenceMimeTypeValues(attributes.pop(OUTPUT_MIME_TYPE, None)) + == OpenInferenceMimeTypeValues.JSON + ) + if not is_stream: + # Usage is not available for streaming in general. + assert attributes.pop(LLM_TOKEN_COUNT_TOTAL, None) == completion_usage["total_tokens"] + assert attributes.pop(LLM_TOKEN_COUNT_PROMPT, None) == completion_usage["prompt_tokens"] + assert ( + attributes.pop(LLM_TOKEN_COUNT_COMPLETION, None) + == completion_usage["completion_tokens"] + ) + # We left out model_name from our mock stream. + assert attributes.pop(LLM_MODEL_NAME, None) == model_name + assert attributes == {} # this test accounts for all the attributes after popping them + + +@pytest.mark.parametrize("is_async", [False, True]) +@pytest.mark.parametrize("is_raw", [False, True]) +@pytest.mark.parametrize("is_stream", [True]) +@pytest.mark.parametrize("status_code", [200, 400]) +def test_completions( + is_async: bool, + is_raw: bool, + is_stream: bool, + status_code: int, + respx_mock: MockRouter, + in_memory_span_exporter: InMemorySpanExporter, + completion_usage: Dict[str, Any], + model_name: str, + completions_mock_stream, +) -> None: + prompt: List[str] = get_texts() + output_texts: List[str] = completions_mock_stream[1] if is_stream else get_texts() + invocation_parameters = { + "stream": is_stream, + "model": randstr(), + "temperature": random.random(), + "n": len(output_texts), + } + url = "https://api.openai.com/v1/completions" + respx_kwargs = { + **( + {"stream": MockAsyncByteStream(completions_mock_stream[0])} + if is_stream + else { + "json": { + "choices": [ + {"index": i, "text": text, "finish_reason": "stop"} + for i, text in enumerate(output_texts) + ], + "model": model_name, + "usage": completion_usage, + } + } + ), + } + respx_mock.post(url).mock(return_value=Response(status_code=status_code, **respx_kwargs)) + create_kwargs = {"prompt": prompt, **invocation_parameters} + completions = ( + openai.AsyncOpenAI(api_key="sk-").completions + if is_async + else openai.OpenAI(api_key="sk-").completions + ) + create = completions.with_raw_response.create if is_raw else completions.create + with suppress(openai.BadRequestError): + if is_async: + + async def task() -> None: + response = await create(**create_kwargs) + response = response.parse() if is_raw else response + if is_stream: + async for _ in response: + pass + + asyncio.run(task()) + else: + response = create(**create_kwargs) + response = response.parse() if is_raw else response + if is_stream: + for _ in response: + pass + spans = in_memory_span_exporter.get_finished_spans() + assert len(spans) == 1 + span: ReadableSpan = spans[0] + if status_code == 200: + assert span.status.is_ok + elif status_code == 400: + assert not span.status.is_ok and not span.status.is_unset + assert len(span.events) == 1 + event = span.events[0] + assert event.name == "exception" + attributes = dict(cast(Mapping[str, AttributeValue], span.attributes)) + assert attributes.pop(OPENINFERENCE_SPAN_KIND, None) == OpenInferenceSpanKindValues.LLM.value + assert ( + json.loads(cast(str, attributes.pop(LLM_INVOCATION_PARAMETERS, None))) + == invocation_parameters + ) + assert isinstance(attributes.pop(INPUT_VALUE, None), str) + assert isinstance(attributes.pop(INPUT_MIME_TYPE, None), str) + if status_code == 200: + assert isinstance(attributes.pop(OUTPUT_VALUE, None), str) + assert isinstance(attributes.pop(OUTPUT_MIME_TYPE, None), str) + assert list(cast(Sequence[str], attributes.pop(LLM_PROMPTS, None))) == prompt + if not is_stream: + # Usage is not available for streaming in general. + assert attributes.pop(LLM_TOKEN_COUNT_TOTAL, None) == completion_usage["total_tokens"] + assert attributes.pop(LLM_TOKEN_COUNT_PROMPT, None) == completion_usage["prompt_tokens"] + assert ( + attributes.pop(LLM_TOKEN_COUNT_COMPLETION, None) + == completion_usage["completion_tokens"] + ) + # We left out model_name from our mock stream. + assert attributes.pop(LLM_MODEL_NAME, None) == model_name + assert attributes == {} # this test accounts for all the attributes after popping them + + +@pytest.fixture(scope="function") +def in_memory_span_exporter() -> InMemorySpanExporter: + return InMemorySpanExporter() + + +@pytest.fixture(autouse=True) +def instrument(in_memory_span_exporter: InMemorySpanExporter) -> Generator[None, None, None]: + """ + Instruments OpenAI before each test to ensure that the patch is applied + before any tests are run. + """ + resource = Resource(attributes={}) + tracer_provider = trace_sdk.TracerProvider(resource=resource) + span_processor = SimpleSpanProcessor(span_exporter=in_memory_span_exporter) + tracer_provider.add_span_processor(span_processor=span_processor) + OpenAIInstrumentor().instrument(tracer_provider=tracer_provider) + yield + OpenAIInstrumentor().uninstrument() + + +@pytest.fixture +def tool_calls_mock_stream() -> Tuple[List[bytes], List[Dict[str, Any]]]: + return ( + [ + b'data: {"choices": [{"delta": {"role": "assistant"}, "index": 0}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "id": "call_amGrubFmr2FSPHeC5OPgwcNs", "function": {"arguments": "", "name": "get_current_weather"}, "type": "function"}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": ""}, "index": 0}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "{\\"lo"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "{\\"lo"}, "index": 0}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "catio"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "catio"}, "index": 0}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "n\\": \\"B"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "n\\": \\"B"}, "index": 0}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "osto"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "osto"}, "index": 0}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "n, MA"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "n, MA"}, "index": 0}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "\\", \\"un"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "\\", \\"un"}, "index": 0}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "it\\":"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "it\\":"}, "index": 0}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": " \\"fah"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": " \\"fah"}, "index": 0}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "renhei"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "renhei"}, "index": 0}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "t\\"}"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "t\\"}"}, "index": 0}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "id": "call_6QTP4mLSYYzZwt3ZWj77vfZf", "function": {"arguments": "", "name": "get_current_weather"}, "type": "function"}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"role": "assistant"}, "index": 1}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "{\\"lo"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "{\\"lo"}, "index": 1}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "catio"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "catio"}, "index": 1}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "n\\": \\"S"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "n\\": \\"S"}, "index": 1}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "an F"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "an F"}, "index": 1}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "ranci"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "ranci"}, "index": 1}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "sco, C"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "sco, C"}, "index": 1}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "A\\", "}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "A\\", "}, "index": 1}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "\\"unit"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "\\"unit"}, "index": 1}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "\\": \\"fa"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "\\": \\"fa"}, "index": 1}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "hren"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "hren"}, "index": 1}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "heit\\""}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "heit\\""}, "index": 1}]}\n\n', + b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "}"}}]}, "index": 0}]}\n\n', # noqa: E501 + b'data: {"choices": [{"delta": {"content": "}"}, "index": 1}]}\n\n', + b'data: {"choices": [{"finish_reason": "tool_calls", "index": 0}]}\n\n', # noqa: E501 + b"data: [DONE]\n", + ], + [ + { + "role": "assistant", + "content": '{"location": "Boston, MA", "unit": "fahrenheit"}', + "tool_calls": [ + { + "id": "call_amGrubFmr2FSPHeC5OPgwcNs", + "function": { + "arguments": '{"location": "Boston, MA", "unit": "fahrenheit"}', + "name": "get_current_weather", + }, + "type": "function", + }, + { + "id": "call_6QTP4mLSYYzZwt3ZWj77vfZf", + "function": { + "arguments": '{"location": "San Francisco, CA", "unit": "fahrenheit"}', + "name": "get_current_weather", + }, + "type": "function", + }, + ], + }, + { + "role": "assistant", + "content": '{"location": "San Francisco, CA", "unit": "fahrenheit"}', + }, + ], + ) + + +@pytest.fixture +def completions_mock_stream() -> Tuple[List[bytes], List[str]]: + return ( + [ + b'data: {"choices": [{"text": "", "index": 0}]}\n\n', + b'data: {"choices": [{"text": "{\\"lo", "index": 1}]}\n\n', + b'data: {"choices": [{"text": "{\\"lo", "index": 0}]}\n\n', + b'data: {"choices": [{"text": "catio", "index": 1}]}\n\n', + b'data: {"choices": [{"text": "catio", "index": 0}]}\n\n', + b'data: {"choices": [{"text": "n\\": \\"S", "index": 1}]}\n\n', + b'data: {"choices": [{"text": "n\\": \\"B", "index": 0}]}\n\n', + b'data: {"choices": [{"text": "an F", "index": 1}]}\n\n', + b'data: {"choices": [{"text": "osto", "index": 0}]}\n\n', + b'data: {"choices": [{"text": "ranci", "index": 1}]}\n\n', + b'data: {"choices": [{"text": "n, MA", "index": 0}]}\n\n', + b'data: {"choices": [{"text": "sco, C", "index": 1}]}\n\n', + b'data: {"choices": [{"text": "\\", \\"un", "index": 0}]}\n\n', + b'data: {"choices": [{"text": "A\\", ", "index": 1}]}\n\n', + b'data: {"choices": [{"text": "it\\":", "index": 0}]}\n\n', + b'data: {"choices": [{"text": "\\"unit", "index": 1}]}\n\n', + b'data: {"choices": [{"text": " \\"fah", "index": 0}]}\n\n', + b'data: {"choices": [{"text": "\\": \\"fa", "index": 1}]}\n\n', + b'data: {"choices": [{"text": "renhei", "index": 0}]}\n\n', + b'data: {"choices": [{"text": "hren", "index": 1}]}\n\n', + b'data: {"choices": [{"text": "t\\"}", "index": 0}]}\n\n', + b'data: {"choices": [{"text": "heit\\"", "index": 1}]}\n\n', + b'data: {"choices": [{"text": "}", "index": 1}]}\n\n', + b"data: [DONE]\n", + ], + [ + '{"location": "Boston, MA", "unit": "fahrenheit"}', + '{"location": "San Francisco, CA", "unit": "fahrenheit"}', + ], + ) + + +@pytest.fixture +def completion_usage() -> Dict[str, Any]: + prompt_tokens = random.randint(1, 1000) + completion_tokens = random.randint(1, 1000) + return { + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens, + "total_tokens": prompt_tokens + completion_tokens, + } + + +@pytest.fixture +def model_name() -> str: + return randstr() + + +@pytest.fixture +def input_messages() -> List[Dict[str, Any]]: + return [{"role": randstr(), "content": randstr()} for _ in range(2)] + + +@pytest.fixture(scope="module") +def seed() -> Iterator[int]: + """ + Use rolling seeds to make debugging easier, because the rolling pseudo-random + values allow conditional breakpoints to be hit precisely (and repeatably). + """ + return count() + + +@pytest.fixture(autouse=True) +def set_seed(seed: Iterator[int]) -> None: + random.seed(next(seed)) + yield + + +def randstr() -> str: + return str(random.random()) + + +def get_texts() -> List[str]: + return [randstr() for _ in range(2)] + + +def get_messages() -> List[Dict[str, Any]]: + messages: List[Dict[str, Any]] = [ + *[{"role": randstr(), "content": randstr()} for _ in range(2)], + *[ + {"role": randstr(), "function_call": {"arguments": randstr(), "name": randstr()}} + for _ in range(2) + ], + *( + [ + { + "role": randstr(), + "tool_calls": [ + {"function": {"arguments": randstr(), "name": randstr()}} for _ in range(2) + ], + } + for _ in range(2) + ] + if _openai_version() >= (1, 1, 0) + else [] + ), + ] + random.shuffle(messages) + return messages + + +def _openai_version() -> Tuple[int, int, int]: + return cast(Tuple[int, int, int], tuple(map(int, version("openai").split(".")[:3]))) + + +def message_role(prefix: str, i: int) -> str: + return f"{prefix}.{i}.{MESSAGE_ROLE}" + + +def message_content(prefix: str, i: int) -> str: + return f"{prefix}.{i}.{MESSAGE_CONTENT}" + + +def message_function_call_name(prefix: str, i: int) -> str: + return f"{prefix}.{i}.{MESSAGE_FUNCTION_CALL_NAME}" + + +def message_function_call_arguments(prefix: str, i: int) -> str: + return f"{prefix}.{i}.{MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON}" + + +def tool_call_function_name(prefix: str, i: int, j: int) -> str: + return f"{prefix}.{i}.{MESSAGE_TOOL_CALLS}.{j}.{TOOL_CALL_FUNCTION_NAME}" + + +def tool_call_function_arguments(prefix: str, i: int, j: int) -> str: + return f"{prefix}.{i}.{MESSAGE_TOOL_CALLS}.{j}.{TOOL_CALL_FUNCTION_ARGUMENTS_JSON}" + + +class MockAsyncByteStream(AsyncByteStream): + def __init__(self, byte_stream: Iterable[bytes]): + self._byte_stream = byte_stream + + def __iter__(self) -> AsyncIterator[bytes]: + for byte_string in self._byte_stream: + yield byte_string + + async def __aiter__(self) -> AsyncIterator[bytes]: + for byte_string in self._byte_stream: + yield byte_string + + +OPENINFERENCE_SPAN_KIND = SpanAttributes.OPENINFERENCE_SPAN_KIND +INPUT_VALUE = SpanAttributes.INPUT_VALUE +INPUT_MIME_TYPE = SpanAttributes.INPUT_MIME_TYPE +OUTPUT_VALUE = SpanAttributes.OUTPUT_VALUE +OUTPUT_MIME_TYPE = SpanAttributes.OUTPUT_MIME_TYPE +LLM_INVOCATION_PARAMETERS = SpanAttributes.LLM_INVOCATION_PARAMETERS +LLM_MODEL_NAME = SpanAttributes.LLM_MODEL_NAME +LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL +LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT +LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION +LLM_INPUT_MESSAGES = SpanAttributes.LLM_INPUT_MESSAGES +LLM_OUTPUT_MESSAGES = SpanAttributes.LLM_OUTPUT_MESSAGES +LLM_PROMPTS = SpanAttributes.LLM_PROMPTS +MESSAGE_ROLE = MessageAttributes.MESSAGE_ROLE +MESSAGE_CONTENT = MessageAttributes.MESSAGE_CONTENT +MESSAGE_FUNCTION_CALL_NAME = MessageAttributes.MESSAGE_FUNCTION_CALL_NAME +MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON = MessageAttributes.MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON +MESSAGE_TOOL_CALLS = MessageAttributes.MESSAGE_TOOL_CALLS +TOOL_CALL_FUNCTION_NAME = ToolCallAttributes.TOOL_CALL_FUNCTION_NAME +TOOL_CALL_FUNCTION_ARGUMENTS_JSON = ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py index fc922dc96..c7e5d4ff0 100644 --- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py +++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py @@ -6,7 +6,7 @@ def test_chat_completion_accumulator(chat_completion_chunks, desired_chat_completion_result): - accumulator = _ChatCompletionAccumulator() + accumulator = _ChatCompletionAccumulator({}) for chunk in chat_completion_chunks: accumulator.process_chunk(chunk) assert accumulator._result() == desired_chat_completion_result diff --git a/python/tox.ini b/python/tox.ini index a38bd176f..cb9edf390 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -3,12 +3,11 @@ isolated_build = True skipsdist = True envlist = py3{8,11}-ci-semconv - py3{8,11}-ci-openai + py3{8,11}-ci-{openai,openai-latest} [testenv] package = wheel wheel_build_env = .pkg -allowlist_externals = git deps = -r dev-requirements.txt changedir = @@ -17,14 +16,13 @@ changedir = commands_pre = semconv: pip install {toxinidir}/openinference-semantic-conventions openai: pip install {toxinidir}/instrumentation/openinference-instrumentation-openai[test] + openai-latest: pip install -U openai commands = ruff: ruff format . --config {toxinidir}/ruff.toml - ruff: ruff . --fix --config {toxinidir}/ruff.toml + ruff: ruff check . --fix --config {toxinidir}/ruff.toml mypy: mypy --config-file {toxinidir}/mypy.ini --explicit-package-bases {posargs:src} - test: pytest {posargs} - ci: ruff format . --config {toxinidir}/ruff.toml - ci: git diff --exit-code - ci: ruff . --fix --config {toxinidir}/ruff.toml - ci: git diff --exit-code + test: pytest {posargs:tests} + ci: ruff format . --diff --config {toxinidir}/ruff.toml + ci: ruff check . --diff --config {toxinidir}/ruff.toml ci: mypy --config-file {toxinidir}/mypy.ini --explicit-package-bases src ci: pytest tests From e5ccc6d8e66d2dc6a4bebff306fda399808105b5 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Wed, 10 Jan 2024 17:22:42 -0800 Subject: [PATCH 41/44] wip --- .../integration_tests/embeddings.py | 27 +-- .../pyproject.toml | 1 + .../openai/_extra_attributes_from_response.py | 2 +- .../openai/test_instrumentor.py | 192 +++++++++++++----- 4 files changed, 162 insertions(+), 60 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py index 10b1c11ef..61725aa1b 100644 --- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py +++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py @@ -85,7 +85,8 @@ async def embeddings(**kwargs): async def embeddings_with_raw_response(**kwargs): try: with suppress(openai.BadRequestError): - await CLIENT.embeddings.with_raw_response.create(**{**KWARGS, **kwargs}) + response = await CLIENT.embeddings.with_raw_response.create(**{**KWARGS, **kwargs}) + response except Exception: logger.exception(f"{inspect.stack()[0][3]}({kwargs})") finally: @@ -99,18 +100,18 @@ async def main(*tasks): if __name__ == "__main__": asyncio.run( main( - embeddings(input="hello world"), - embeddings(input="hello world", encoding_format="float"), - embeddings(input="hello world", encoding_format="base64"), - embeddings(input=["hello", "world"]), - embeddings(input=["hello", "world"], encoding_format="float"), - embeddings(input=["hello", "world"], encoding_format="base64"), - embeddings(input=[15339, 1917]), - embeddings(input=[15339, 1917], encoding_format="float"), - embeddings(input=[15339, 1917], encoding_format="base64"), - embeddings(input=[[15339], [14957]]), - embeddings(input=[[15339], [14957]], encoding_format="float"), - embeddings(input=[[15339], [14957]], encoding_format="base64"), + # embeddings(input="hello world"), + # embeddings(input="hello world", encoding_format="float"), + # embeddings(input="hello world", encoding_format="base64"), + # embeddings(input=["hello", "world"]), + # embeddings(input=["hello", "world"], encoding_format="float"), + # embeddings(input=["hello", "world"], encoding_format="base64"), + # embeddings(input=[15339, 1917]), + # embeddings(input=[15339, 1917], encoding_format="float"), + # embeddings(input=[15339, 1917], encoding_format="base64"), + # embeddings(input=[[15339], [14957]]), + # embeddings(input=[[15339], [14957]], encoding_format="float"), + # embeddings(input=[[15339], [14957]], encoding_format="base64"), embeddings_with_raw_response(input="hello world"), embeddings_with_raw_response(input="hello world", encoding_format="float"), embeddings_with_raw_response(input="hello world", encoding_format="base64"), diff --git a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml index 833c613d3..1fbe6ea67 100644 --- a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml +++ b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml @@ -39,6 +39,7 @@ test = [ "openai == 1.0.0", "opentelemetry-sdk", "respx", + "numpy", ] [project.urls] diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py index ebab74ea9..e7f7ad63d 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py @@ -97,7 +97,7 @@ def _( yield from _get_attributes_from_embedding_usage(usage) # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding_create_params.py#L23 # noqa: E501 if model := getattr(response, "model"): - yield f"{EmbeddingAttributes.EMBEDDING_MODEL_NAME}", model + yield f"{SpanAttributes.EMBEDDING_MODEL_NAME}", model if (data := getattr(response, "data", None)) and isinstance(data, Iterable): for embedding in data: if (index := getattr(embedding, "index", None)) is None: diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py index 96ba01b91..8bf9aad6e 100644 --- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py +++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py @@ -15,6 +15,7 @@ Mapping, Sequence, Tuple, + Union, cast, ) @@ -23,6 +24,7 @@ from httpx import AsyncByteStream, Response from openinference.instrumentation.openai import OpenAIInstrumentor from openinference.semconv.trace import ( + EmbeddingAttributes, MessageAttributes, OpenInferenceMimeTypeValues, OpenInferenceSpanKindValues, @@ -170,7 +172,7 @@ async def task() -> None: @pytest.mark.parametrize("is_async", [False, True]) @pytest.mark.parametrize("is_raw", [False, True]) -@pytest.mark.parametrize("is_stream", [True]) +@pytest.mark.parametrize("is_stream", [False, True]) @pytest.mark.parametrize("status_code", [200, 400]) def test_completions( is_async: bool, @@ -268,6 +270,100 @@ async def task() -> None: assert attributes == {} # this test accounts for all the attributes after popping them +@pytest.mark.parametrize("is_async", [False, True]) +@pytest.mark.parametrize("is_raw", [False, True]) +@pytest.mark.parametrize("status_code", [200, 400]) +@pytest.mark.parametrize("encoding_format", ["float", "base64"]) +@pytest.mark.parametrize("input_text", ["hello", ["hello", "world"]]) +def test_embeddings( + is_async: bool, + is_raw: bool, + encoding_format: str, + input_text: Union[str, List[str]], + status_code: int, + respx_mock: MockRouter, + in_memory_span_exporter: InMemorySpanExporter, + model_name: str, +) -> None: + invocation_parameters = { + "model": randstr(), + "encoding_format": encoding_format, + } + embedding_model_name = randstr() + embedding_usage = { + "prompt_tokens": random.randint(10, 100), + "total_tokens": random.randint(10, 100), + } + output_embeddings = [("AACAPwAAAEA=", (1.0, 2.0)), ((2.0, 3.0), (2.0, 3.0))] + url = "https://api.openai.com/v1/embeddings" + respx_mock.post(url).mock( + return_value=Response( + status_code=status_code, + json={ + "object": "list", + "data": [ + {"object": "embedding", "index": i, "embedding": embedding[0]} + for i, embedding in enumerate(output_embeddings) + ], + "model": embedding_model_name, + "usage": embedding_usage, + }, + ) + ) + create_kwargs = {"input": input_text, **invocation_parameters} + completions = ( + openai.AsyncOpenAI(api_key="sk-").embeddings + if is_async + else openai.OpenAI(api_key="sk-").embeddings + ) + create = completions.with_raw_response.create if is_raw else completions.create + with suppress(openai.BadRequestError): + if is_async: + + async def task() -> None: + response = await create(**create_kwargs) + _ = response.parse() if is_raw else response + + asyncio.run(task()) + else: + response = create(**create_kwargs) + _ = response.parse() if is_raw else response + spans = in_memory_span_exporter.get_finished_spans() + assert len(spans) == 1 + span: ReadableSpan = spans[0] + if status_code == 200: + assert span.status.is_ok + elif status_code == 400: + assert not span.status.is_ok and not span.status.is_unset + assert len(span.events) == 1 + event = span.events[0] + assert event.name == "exception" + attributes = dict(cast(Mapping[str, AttributeValue], span.attributes)) + assert ( + attributes.pop(OPENINFERENCE_SPAN_KIND, None) == OpenInferenceSpanKindValues.EMBEDDING.value + ) + assert ( + json.loads(cast(str, attributes.pop(LLM_INVOCATION_PARAMETERS, None))) + == invocation_parameters + ) + assert isinstance(attributes.pop(INPUT_VALUE, None), str) + assert isinstance(attributes.pop(INPUT_MIME_TYPE, None), str) + if status_code == 200: + assert isinstance(attributes.pop(OUTPUT_VALUE, None), str) + assert isinstance(attributes.pop(OUTPUT_MIME_TYPE, None), str) + assert attributes.pop(EMBEDDING_MODEL_NAME, None) == embedding_model_name + assert attributes.pop(LLM_TOKEN_COUNT_TOTAL, None) == embedding_usage["total_tokens"] + assert attributes.pop(LLM_TOKEN_COUNT_PROMPT, None) == embedding_usage["prompt_tokens"] + for i, text in enumerate(input_text if isinstance(input_text, list) else [input_text]): + assert attributes.pop(f"{EMBEDDING_EMBEDDINGS}.{i}.{EMBEDDING_TEXT}", None) == text + for i, embedding in enumerate(output_embeddings): + assert ( + attributes.pop(f"{EMBEDDING_EMBEDDINGS}.{i}.{EMBEDDING_VECTOR}", None) + == embedding[1] + ) + assert attributes == {} # this test accounts for all the attributes after popping them + + @pytest.fixture(scope="function") def in_memory_span_exporter() -> InMemorySpanExporter: return InMemorySpanExporter() @@ -288,6 +384,42 @@ def instrument(in_memory_span_exporter: InMemorySpanExporter) -> Generator[None, OpenAIInstrumentor().uninstrument() +@pytest.fixture(scope="module") +def seed() -> Iterator[int]: + """ + Use rolling seeds to make debugging easier, because the rolling pseudo-random + values allow conditional breakpoints to be hit precisely (and repeatably). + """ + return count() + + +@pytest.fixture(autouse=True) +def set_seed(seed: Iterator[int]) -> None: + random.seed(next(seed)) + yield + + +@pytest.fixture +def completion_usage() -> Dict[str, Any]: + prompt_tokens = random.randint(1, 1000) + completion_tokens = random.randint(1, 1000) + return { + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens, + "total_tokens": prompt_tokens + completion_tokens, + } + + +@pytest.fixture +def model_name() -> str: + return randstr() + + +@pytest.fixture +def input_messages() -> List[Dict[str, Any]]: + return [{"role": randstr(), "content": randstr()} for _ in range(2)] + + @pytest.fixture def tool_calls_mock_stream() -> Tuple[List[bytes], List[Dict[str, Any]]]: return ( @@ -411,40 +543,17 @@ def completions_mock_stream() -> Tuple[List[bytes], List[str]]: ) -@pytest.fixture -def completion_usage() -> Dict[str, Any]: - prompt_tokens = random.randint(1, 1000) - completion_tokens = random.randint(1, 1000) - return { - "prompt_tokens": prompt_tokens, - "completion_tokens": completion_tokens, - "total_tokens": prompt_tokens + completion_tokens, - } - - -@pytest.fixture -def model_name() -> str: - return randstr() - - -@pytest.fixture -def input_messages() -> List[Dict[str, Any]]: - return [{"role": randstr(), "content": randstr()} for _ in range(2)] - - -@pytest.fixture(scope="module") -def seed() -> Iterator[int]: - """ - Use rolling seeds to make debugging easier, because the rolling pseudo-random - values allow conditional breakpoints to be hit precisely (and repeatably). - """ - return count() +class MockAsyncByteStream(AsyncByteStream): + def __init__(self, byte_stream: Iterable[bytes]): + self._byte_stream = byte_stream + def __iter__(self) -> AsyncIterator[bytes]: + for byte_string in self._byte_stream: + yield byte_string -@pytest.fixture(autouse=True) -def set_seed(seed: Iterator[int]) -> None: - random.seed(next(seed)) - yield + async def __aiter__(self) -> AsyncIterator[bytes]: + for byte_string in self._byte_stream: + yield byte_string def randstr() -> str: @@ -508,19 +617,6 @@ def tool_call_function_arguments(prefix: str, i: int, j: int) -> str: return f"{prefix}.{i}.{MESSAGE_TOOL_CALLS}.{j}.{TOOL_CALL_FUNCTION_ARGUMENTS_JSON}" -class MockAsyncByteStream(AsyncByteStream): - def __init__(self, byte_stream: Iterable[bytes]): - self._byte_stream = byte_stream - - def __iter__(self) -> AsyncIterator[bytes]: - for byte_string in self._byte_stream: - yield byte_string - - async def __aiter__(self) -> AsyncIterator[bytes]: - for byte_string in self._byte_stream: - yield byte_string - - OPENINFERENCE_SPAN_KIND = SpanAttributes.OPENINFERENCE_SPAN_KIND INPUT_VALUE = SpanAttributes.INPUT_VALUE INPUT_MIME_TYPE = SpanAttributes.INPUT_MIME_TYPE @@ -541,3 +637,7 @@ async def __aiter__(self) -> AsyncIterator[bytes]: MESSAGE_TOOL_CALLS = MessageAttributes.MESSAGE_TOOL_CALLS TOOL_CALL_FUNCTION_NAME = ToolCallAttributes.TOOL_CALL_FUNCTION_NAME TOOL_CALL_FUNCTION_ARGUMENTS_JSON = ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON +EMBEDDING_EMBEDDINGS = SpanAttributes.EMBEDDING_EMBEDDINGS +EMBEDDING_MODEL_NAME = SpanAttributes.EMBEDDING_MODEL_NAME +EMBEDDING_VECTOR = EmbeddingAttributes.EMBEDDING_VECTOR +EMBEDDING_TEXT = EmbeddingAttributes.EMBEDDING_TEXT From c20f488d0e81e5c391caccfb9d8dd206e7e8aac1 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Wed, 10 Jan 2024 17:23:14 -0800 Subject: [PATCH 42/44] wip --- .../instrumentation/openai/test_instrumentor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py index 8bf9aad6e..6480cbf22 100644 --- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py +++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py @@ -167,7 +167,7 @@ async def task() -> None: ) # We left out model_name from our mock stream. assert attributes.pop(LLM_MODEL_NAME, None) == model_name - assert attributes == {} # this test accounts for all the attributes after popping them + assert attributes == {} # this test should account for all the attributes after popping them @pytest.mark.parametrize("is_async", [False, True]) @@ -267,7 +267,7 @@ async def task() -> None: ) # We left out model_name from our mock stream. assert attributes.pop(LLM_MODEL_NAME, None) == model_name - assert attributes == {} # this test accounts for all the attributes after popping them + assert attributes == {} # this test should account for all the attributes after popping them @pytest.mark.parametrize("is_async", [False, True]) @@ -361,7 +361,7 @@ async def task() -> None: attributes.pop(f"{EMBEDDING_EMBEDDINGS}.{i}.{EMBEDDING_VECTOR}", None) == embedding[1] ) - assert attributes == {} # this test accounts for all the attributes after popping them + assert attributes == {} # this test should account for all the attributes after popping them @pytest.fixture(scope="function") From 6bd830ce5cf6deb623a77fbde53cda6ebbd0e692 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Wed, 10 Jan 2024 17:26:30 -0800 Subject: [PATCH 43/44] wip --- .../integration_tests/completions.py | 205 ------------------ .../integration_tests/completions_async.py | 202 ----------------- .../integration_tests/embeddings.py | 138 ------------ .../functions_and_tool_calls.py | 193 ----------------- 4 files changed, 738 deletions(-) delete mode 100644 python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py delete mode 100644 python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py delete mode 100644 python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py delete mode 100644 python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py deleted file mode 100644 index dc4226409..000000000 --- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py +++ /dev/null @@ -1,205 +0,0 @@ -import contextvars -import inspect -import logging -import threading -from contextlib import suppress -from importlib.metadata import version -from itertools import chain -from time import sleep - -from opentelemetry import trace as trace_api -from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter -from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor -from opentelemetry.sdk import trace as trace_sdk -from opentelemetry.sdk.resources import Resource -from opentelemetry.sdk.trace.export import SimpleSpanProcessor -from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter - - -def default_tracer_provider() -> trace_sdk.TracerProvider: - resource = Resource(attributes={}) - tracer_provider = trace_sdk.TracerProvider(resource=resource) - span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces") - span_processor = SimpleSpanProcessor(span_exporter=span_exporter) - tracer_provider.add_span_processor(span_processor=span_processor) - return tracer_provider - - -# Instrument httpx to show that it can show up as a child span. -# Note that it must be instrumented before it's imported by openai. -HTTPXClientInstrumentor().instrument() - -# To instrument httpx, it must be monkey-patched before it is imported by openai, so we do it -# like this to prevent the imports from being re-formatted to the top of file. -if True: - import openai - from openinference.instrumentation.openai import OpenAIInstrumentor - from openinference.semconv.trace import SpanAttributes - -CLIENT = openai.OpenAI() - -tracer_provider = default_tracer_provider() -in_memory_span_exporter = InMemorySpanExporter() -tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter)) -trace_api.set_tracer_provider(tracer_provider=tracer_provider) - -OpenAIInstrumentor().instrument() - -_OPENAI_VERSION = tuple(map(int, version("openai").split(".")[:3])) - -N = 3 # iteration i = 0 results in intentional BadRequestError -HAIKU = "Write a haiku." -HAIKU_TOKENS = [8144, 264, 6520, 39342, 13] -RESUME = "Write a résumé." -RESUME_TOKENS = [8144, 264, 9517, 1264, 978, 13] -CHAT_KWARGS = { - "model": "gpt-3.5-turbo", - "messages": [{"role": "user", "content": HAIKU}], - "max_tokens": 20, - "temperature": 2, - **( - { - "logprobs": True, - "top_logprobs": 5, - } - if _OPENAI_VERSION >= (1, 5, 0) - else {} - ), -} -COMP_KWARGS = { - "model": "gpt-3.5-turbo-instruct", - "prompt": HAIKU, - "max_tokens": 20, - "temperature": 2, - "logprobs": 5, -} - -for k, v in logging.root.manager.loggerDict.items(): - if k.startswith("openinference.instrumentation.openai") and isinstance(v, logging.Logger): - v.setLevel(logging.DEBUG) - v.handlers.clear() - v.addHandler(logging.StreamHandler()) - -logger = logging.getLogger(__name__) - -_EXPECTED_SPAN_COUNT = 0 -_LOCK = threading.Lock() - - -def _print_span_count(kwargs): - spans = in_memory_span_exporter.get_finished_spans() - llm_spans = [ - span - for span in spans - if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM" - ] - actual = len(llm_spans) - global _EXPECTED_SPAN_COUNT - with _LOCK: - _EXPECTED_SPAN_COUNT += 1 - mark = "✅" if _EXPECTED_SPAN_COUNT <= actual else "❌" - name = inspect.stack()[1][3] - print(f"{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual}; {name}({kwargs})") - - -def chat_completions(**kwargs): - try: - with suppress(openai.BadRequestError): - response = CLIENT.chat.completions.create(**{**CHAT_KWARGS, **kwargs}) - if kwargs.get("stream"): - for _ in response: - sleep(0.005) - except Exception: - logger.exception(f"{inspect.stack()[0][3]}({kwargs})") - finally: - _print_span_count(kwargs) - - -def completions(**kwargs): - try: - with suppress(openai.BadRequestError): - response = CLIENT.completions.create(**{**COMP_KWARGS, **kwargs}) - if kwargs.get("stream"): - for _ in response: - sleep(0.005) - except Exception: - logger.exception(f"{inspect.stack()[0][3]}({kwargs})") - finally: - _print_span_count(kwargs) - - -def chat_completions_with_raw_response(**kwargs): - try: - with suppress(openai.BadRequestError): - response = CLIENT.chat.completions.with_raw_response.create(**{**CHAT_KWARGS, **kwargs}) - if kwargs.get("stream"): - for _ in response.parse(): - sleep(0.005) - except Exception: - logger.exception(f"{inspect.stack()[0][3]}({kwargs})") - finally: - _print_span_count(kwargs) - - -def completions_with_raw_response(**kwargs): - try: - with suppress(openai.BadRequestError): - response = CLIENT.completions.with_raw_response.create(**{**COMP_KWARGS, **kwargs}) - if kwargs.get("stream"): - for _ in response.parse(): - sleep(0.005) - except Exception: - logger.exception(f"{inspect.stack()[0][3]}({kwargs})") - finally: - _print_span_count(kwargs) - - -def tasks(n, task, **kwargs): - for i in range(n): # i = 0 results in intentional BadRequestError - ctx = contextvars.copy_context() - yield threading.Thread( - target=ctx.run, - args=(task,), - kwargs={"n": i, **kwargs}, - ) - - -if __name__ == "__main__": - threads = list( - chain( - tasks(N, completions), - tasks(N, completions_with_raw_response), - tasks(N, completions, stream=True), - tasks(N, completions_with_raw_response, stream=True), - tasks(N, completions, prompt=[HAIKU, RESUME]), - tasks(N, completions_with_raw_response, prompt=[HAIKU, RESUME]), - tasks(N, completions, prompt=[HAIKU, RESUME], stream=True), - tasks(N, completions_with_raw_response, prompt=[HAIKU, RESUME], stream=True), - tasks(N, completions, prompt=HAIKU_TOKENS), - tasks(N, completions_with_raw_response, prompt=HAIKU_TOKENS), - tasks(N, completions, prompt=HAIKU_TOKENS, stream=True), - tasks(N, completions_with_raw_response, prompt=HAIKU_TOKENS, stream=True), - tasks(N, completions, prompt=[HAIKU_TOKENS, RESUME_TOKENS]), - tasks(N, completions_with_raw_response, prompt=[HAIKU_TOKENS, RESUME_TOKENS]), - tasks(N, completions, prompt=[HAIKU_TOKENS, RESUME_TOKENS], stream=True), - tasks( - N, completions_with_raw_response, prompt=[HAIKU_TOKENS, RESUME_TOKENS], stream=True - ), - tasks(N, chat_completions), - tasks(N, chat_completions_with_raw_response), - tasks(N, chat_completions, stream=True), - tasks(N, chat_completions_with_raw_response, stream=True), - ) - ) - [thread.start() for thread in threads] - [thread.join() for thread in threads] - spans = in_memory_span_exporter.get_finished_spans() - llm_spans = [ - span - for span in spans - if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM" - ] - actual = len(llm_spans) - mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌" - print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};") - assert _EXPECTED_SPAN_COUNT == actual diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py deleted file mode 100644 index 2c94b0d78..000000000 --- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py +++ /dev/null @@ -1,202 +0,0 @@ -import asyncio -import inspect -import logging -from contextlib import suppress -from importlib.metadata import version -from itertools import chain - -from opentelemetry import trace as trace_api -from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter -from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor -from opentelemetry.sdk import trace as trace_sdk -from opentelemetry.sdk.resources import Resource -from opentelemetry.sdk.trace.export import SimpleSpanProcessor -from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter - - -def default_tracer_provider() -> trace_sdk.TracerProvider: - resource = Resource(attributes={}) - tracer_provider = trace_sdk.TracerProvider(resource=resource) - span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces") - span_processor = SimpleSpanProcessor(span_exporter=span_exporter) - tracer_provider.add_span_processor(span_processor=span_processor) - return tracer_provider - - -# Instrument httpx to show that it can show up as a child span. -# Note that it must be instrumented before it's imported by openai. -HTTPXClientInstrumentor().instrument() - -# To instrument httpx, it must be monkey-patched before it is imported by openai, so we do it -# like this to prevent the imports from being re-formatted to the top of file. -if True: - import openai - from openinference.instrumentation.openai import OpenAIInstrumentor - from openinference.semconv.trace import SpanAttributes - -CLIENT = openai.AsyncOpenAI() - -tracer_provider = default_tracer_provider() -in_memory_span_exporter = InMemorySpanExporter() -tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter)) -trace_api.set_tracer_provider(tracer_provider=tracer_provider) - -OpenAIInstrumentor().instrument() - -_OPENAI_VERSION = tuple(map(int, version("openai").split(".")[:3])) - -N = 3 # iteration i = 0 results in intentional BadRequestError -HAIKU = "Write a haiku." -HAIKU_TOKENS = [8144, 264, 6520, 39342, 13] -RESUME = "Write a résumé." -RESUME_TOKENS = [8144, 264, 9517, 1264, 978, 13] -CHAT_KWARGS = { - "model": "gpt-3.5-turbo", - "messages": [{"role": "user", "content": HAIKU}], - "max_tokens": 20, - "temperature": 2, - **( - { - "logprobs": True, - "top_logprobs": 5, - } - if _OPENAI_VERSION >= (1, 5, 0) - else {} - ), -} -COMP_KWARGS = { - "model": "gpt-3.5-turbo-instruct", - "prompt": HAIKU, - "max_tokens": 20, - "temperature": 2, - "logprobs": 5, -} - -for k, v in logging.root.manager.loggerDict.items(): - if k.startswith("openinference.instrumentation.openai") and isinstance(v, logging.Logger): - v.setLevel(logging.DEBUG) - v.handlers.clear() - v.addHandler(logging.StreamHandler()) - - -logger = logging.getLogger(__name__) - -_EXPECTED_SPAN_COUNT = 0 - - -def _print_span_count(kwargs): - spans = in_memory_span_exporter.get_finished_spans() - llm_spans = [ - span - for span in spans - if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM" - ] - actual = len(llm_spans) - global _EXPECTED_SPAN_COUNT - _EXPECTED_SPAN_COUNT += 1 - mark = "✅" if _EXPECTED_SPAN_COUNT <= actual else "❌" - name = inspect.stack()[1][3] - print(f"{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual}; {name}({kwargs})") - - -async def chat_completions(**kwargs): - try: - with suppress(openai.BadRequestError): - response = await CLIENT.chat.completions.create(**{**CHAT_KWARGS, **kwargs}) - if kwargs.get("stream"): - async for _ in response: - await asyncio.sleep(0.005) - except Exception: - logger.exception(f"{inspect.stack()[0][3]}({kwargs})") - finally: - _print_span_count(kwargs) - - -async def completions(**kwargs): - try: - with suppress(openai.BadRequestError): - response = await CLIENT.completions.create(**{**COMP_KWARGS, **kwargs}) - if kwargs.get("stream"): - async for _ in response: - await asyncio.sleep(0.005) - except Exception: - logger.exception(f"{inspect.stack()[0][3]}({kwargs})") - finally: - _print_span_count(kwargs) - - -async def chat_completions_with_raw_response(**kwargs): - try: - with suppress(openai.BadRequestError): - response = await CLIENT.chat.completions.with_raw_response.create( - **{**CHAT_KWARGS, **kwargs} - ) - if kwargs.get("stream"): - async for _ in response.parse(): - await asyncio.sleep(0.005) - except Exception: - logger.exception(f"{inspect.stack()[0][3]}({kwargs})") - finally: - _print_span_count(kwargs) - - -async def completions_with_raw_response(**kwargs): - try: - with suppress(openai.BadRequestError): - response = await CLIENT.completions.with_raw_response.create( - **{**COMP_KWARGS, **kwargs} - ) - if kwargs.get("stream"): - async for _ in response.parse(): - await asyncio.sleep(0.005) - except Exception: - logger.exception(f"{inspect.stack()[0][3]}({kwargs})") - finally: - _print_span_count(kwargs) - - -def tasks(n, task, **kwargs): - return [task(n=i, **kwargs) for i in range(n)] # i = 0 results in intentional BadRequestError - - -async def main(*tasks): - await asyncio.gather(*chain.from_iterable(tasks)) - - -if __name__ == "__main__": - asyncio.run( - main( - tasks(N, completions), - tasks(N, completions_with_raw_response), - tasks(N, completions, stream=True), - tasks(N, completions_with_raw_response, stream=True), - tasks(N, completions, prompt=[HAIKU, RESUME]), - tasks(N, completions_with_raw_response, prompt=[HAIKU, RESUME]), - tasks(N, completions, prompt=[HAIKU, RESUME], stream=True), - tasks(N, completions_with_raw_response, prompt=[HAIKU, RESUME], stream=True), - tasks(N, completions, prompt=HAIKU_TOKENS), - tasks(N, completions_with_raw_response, prompt=HAIKU_TOKENS), - tasks(N, completions, prompt=HAIKU_TOKENS, stream=True), - tasks(N, completions_with_raw_response, prompt=HAIKU_TOKENS, stream=True), - tasks(N, completions, prompt=[HAIKU_TOKENS, RESUME_TOKENS]), - tasks(N, completions_with_raw_response, prompt=[HAIKU_TOKENS, RESUME_TOKENS]), - tasks(N, completions, prompt=[HAIKU_TOKENS, RESUME_TOKENS], stream=True), - tasks( - N, completions_with_raw_response, prompt=[HAIKU_TOKENS, RESUME_TOKENS], stream=True - ), - tasks(N, chat_completions), - tasks(N, chat_completions_with_raw_response), - tasks(N, chat_completions, stream=True), - tasks(N, chat_completions_with_raw_response, stream=True), - ) - ) - spans = in_memory_span_exporter.get_finished_spans() - llm_spans = [ - span - for span in spans - if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM" - ] - actual = len(llm_spans) - mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌" - print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};") - assert _EXPECTED_SPAN_COUNT == actual diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py deleted file mode 100644 index 61725aa1b..000000000 --- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py +++ /dev/null @@ -1,138 +0,0 @@ -import asyncio -import inspect -import logging -from contextlib import suppress -from itertools import chain - -from opentelemetry import trace as trace_api -from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter -from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor -from opentelemetry.sdk import trace as trace_sdk -from opentelemetry.sdk.resources import Resource -from opentelemetry.sdk.trace.export import SimpleSpanProcessor -from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter - - -def default_tracer_provider() -> trace_sdk.TracerProvider: - resource = Resource(attributes={}) - tracer_provider = trace_sdk.TracerProvider(resource=resource) - span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces") - span_processor = SimpleSpanProcessor(span_exporter=span_exporter) - tracer_provider.add_span_processor(span_processor=span_processor) - return tracer_provider - - -# Instrument httpx to show that it can show up as a child span. -# Note that it must be instrumented before it's imported by openai. -HTTPXClientInstrumentor().instrument() - -# To instrument httpx, it must be monkey-patched before it is imported by openai, so we do it -# like this to prevent the imports from being re-formatted to the top of file. -if True: - import openai - from openinference.instrumentation.openai import OpenAIInstrumentor - from openinference.semconv.trace import SpanAttributes - -CLIENT = openai.AsyncOpenAI() - -tracer_provider = default_tracer_provider() -in_memory_span_exporter = InMemorySpanExporter() -tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter)) -trace_api.set_tracer_provider(tracer_provider=tracer_provider) - -OpenAIInstrumentor().instrument() - -KWARGS = { - "model": "text-embedding-ada-002", -} - -for k, v in logging.root.manager.loggerDict.items(): - if k.startswith("openinference.instrumentation.openai") and isinstance(v, logging.Logger): - v.setLevel(logging.DEBUG) - v.handlers.clear() - v.addHandler(logging.StreamHandler()) - -logger = logging.getLogger(__name__) - -_EXPECTED_SPAN_COUNT = 0 - - -def _print_span_count(kwargs): - spans = in_memory_span_exporter.get_finished_spans() - llm_spans = [ - span - for span in spans - if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "EMBEDDING" - ] - actual = len(llm_spans) - global _EXPECTED_SPAN_COUNT - _EXPECTED_SPAN_COUNT += 1 - mark = "✅" if _EXPECTED_SPAN_COUNT <= actual else "❌" - name = inspect.stack()[1][3] - print(f"{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual}; {name}({kwargs})") - - -async def embeddings(**kwargs): - try: - with suppress(openai.BadRequestError): - await CLIENT.embeddings.create(**{**KWARGS, **kwargs}) - except Exception: - logger.exception(f"{inspect.stack()[0][3]}({kwargs})") - finally: - _print_span_count(kwargs) - - -async def embeddings_with_raw_response(**kwargs): - try: - with suppress(openai.BadRequestError): - response = await CLIENT.embeddings.with_raw_response.create(**{**KWARGS, **kwargs}) - response - except Exception: - logger.exception(f"{inspect.stack()[0][3]}({kwargs})") - finally: - _print_span_count(kwargs) - - -async def main(*tasks): - await asyncio.gather(*chain(tasks)) - - -if __name__ == "__main__": - asyncio.run( - main( - # embeddings(input="hello world"), - # embeddings(input="hello world", encoding_format="float"), - # embeddings(input="hello world", encoding_format="base64"), - # embeddings(input=["hello", "world"]), - # embeddings(input=["hello", "world"], encoding_format="float"), - # embeddings(input=["hello", "world"], encoding_format="base64"), - # embeddings(input=[15339, 1917]), - # embeddings(input=[15339, 1917], encoding_format="float"), - # embeddings(input=[15339, 1917], encoding_format="base64"), - # embeddings(input=[[15339], [14957]]), - # embeddings(input=[[15339], [14957]], encoding_format="float"), - # embeddings(input=[[15339], [14957]], encoding_format="base64"), - embeddings_with_raw_response(input="hello world"), - embeddings_with_raw_response(input="hello world", encoding_format="float"), - embeddings_with_raw_response(input="hello world", encoding_format="base64"), - embeddings_with_raw_response(input=["hello", "world"]), - embeddings_with_raw_response(input=["hello", "world"], encoding_format="float"), - embeddings_with_raw_response(input=["hello", "world"], encoding_format="base64"), - embeddings_with_raw_response(input=[15339, 1917]), - embeddings_with_raw_response(input=[15339, 1917], encoding_format="float"), - embeddings_with_raw_response(input=[15339, 1917], encoding_format="base64"), - embeddings_with_raw_response(input=[[15339], [14957]]), - embeddings_with_raw_response(input=[[15339], [14957]], encoding_format="float"), - embeddings_with_raw_response(input=[[15339], [14957]], encoding_format="base64"), - ) - ) - spans = in_memory_span_exporter.get_finished_spans() - llm_spans = [ - span - for span in spans - if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "EMBEDDING" - ] - actual = len(llm_spans) - mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌" - print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};") - assert _EXPECTED_SPAN_COUNT == actual diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py deleted file mode 100644 index 5166a6da3..000000000 --- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py +++ /dev/null @@ -1,193 +0,0 @@ -""" -Phoenix collector should be running in the background. - -tools requires openai>=1.1.0 -""" -import asyncio -import inspect -import logging -from contextlib import suppress -from importlib.metadata import version -from itertools import chain - -from opentelemetry import trace as trace_api -from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter -from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor -from opentelemetry.sdk import trace as trace_sdk -from opentelemetry.sdk.resources import Resource -from opentelemetry.sdk.trace.export import SimpleSpanProcessor -from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter - - -def default_tracer_provider() -> trace_sdk.TracerProvider: - resource = Resource(attributes={}) - tracer_provider = trace_sdk.TracerProvider(resource=resource) - span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces") - span_processor = SimpleSpanProcessor(span_exporter=span_exporter) - tracer_provider.add_span_processor(span_processor=span_processor) - return tracer_provider - - -# Instrument httpx to show that it can show up as a child span. -# Note that it must be instrumented before it's imported by openai. -HTTPXClientInstrumentor().instrument() - -# To instrument httpx, it must be monkey-patched before it is imported by openai, so we do it -# like this to prevent the imports from being re-formatted to the top of file. -if True: - import openai - from openinference.instrumentation.openai import OpenAIInstrumentor - from openinference.semconv.trace import SpanAttributes - -CLIENT = openai.AsyncOpenAI() - -tracer_provider = default_tracer_provider() -in_memory_span_exporter = InMemorySpanExporter() -tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter)) -trace_api.set_tracer_provider(tracer_provider=tracer_provider) - -OpenAIInstrumentor().instrument() - -_OPENAI_VERSION = tuple(map(int, version("openai").split(".")[:3])) - -N = 3 # iteration i = 0 results in intentional BadRequestError -KWARGS = { - "model": "gpt-4", - "messages": [ - { - "role": "user", - "content": "What's the current time and weather in San Francisco, CA?", - } - ], - "max_tokens": 20, - "temperature": 0, -} -TOOLS = [ - { - "type": "function", - "function": { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g., San Francisco, CA", - }, - }, - "required": ["location"], - }, - }, - }, - { - "type": "function", - "function": { - "name": "get_current_time", - "description": "Get the current time in a given location", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g., San Francisco, CA", - }, - }, - "required": ["location"], - }, - }, - }, -] -FUNCTIONS = [tool["function"] for tool in TOOLS] - -for k, v in logging.root.manager.loggerDict.items(): - if k.startswith("openinference.instrumentation.openai") and isinstance(v, logging.Logger): - v.setLevel(logging.DEBUG) - v.handlers.clear() - v.addHandler(logging.StreamHandler()) - -logger = logging.getLogger(__name__) - -_EXPECTED_SPAN_COUNT = 0 - - -def _print_span_count(): - spans = in_memory_span_exporter.get_finished_spans() - llm_spans = [ - span - for span in spans - if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM" - ] - actual = len(llm_spans) - global _EXPECTED_SPAN_COUNT - _EXPECTED_SPAN_COUNT += 1 - mark = "✅" if _EXPECTED_SPAN_COUNT <= actual else "❌" - name = inspect.stack()[1][3] - print(f"{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual}; {name}") - - -async def chat_completions(**kwargs): - try: - with suppress(openai.BadRequestError): - response = await CLIENT.chat.completions.create(**{**KWARGS, **kwargs}) - if kwargs.get("stream"): - async for _ in response: - await asyncio.sleep(0.005) - except Exception: - logger.exception(f"{inspect.stack()[0][3]}({kwargs})") - finally: - _print_span_count() - - -async def chat_completions_with_raw_response(**kwargs): - try: - with suppress(openai.BadRequestError): - response = await CLIENT.chat.completions.with_raw_response.create( - **{**KWARGS, **kwargs} - ) - if kwargs.get("stream"): - async for _ in response.parse(): - await asyncio.sleep(0.005) - except Exception: - logger.exception(f"{inspect.stack()[0][3]}({kwargs})") - finally: - _print_span_count() - - -def tasks(n, task, **kwargs): - return [task(n=i, **kwargs) for i in range(n)] # i = 0 results in intentional BadRequestError - - -async def main(*tasks): - await asyncio.gather(*chain.from_iterable(tasks)) - - -if __name__ == "__main__": - asyncio.run( - main( - tasks(N, chat_completions, functions=FUNCTIONS), - tasks(N, chat_completions, functions=FUNCTIONS, stream=True), - tasks(N, chat_completions_with_raw_response, functions=FUNCTIONS), - tasks(N, chat_completions_with_raw_response, functions=FUNCTIONS, stream=True), - *( - [ - tasks(N, chat_completions, tools=TOOLS), - tasks(N, chat_completions, tools=TOOLS, stream=True), - tasks(N, chat_completions_with_raw_response, tools=TOOLS), - tasks(N, chat_completions_with_raw_response, tools=TOOLS, stream=True), - ] - if _OPENAI_VERSION >= (1, 1, 0) - else () - ), - ) - ) - spans = in_memory_span_exporter.get_finished_spans() - llm_spans = [ - span - for span in spans - if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM" - ] - actual = len(llm_spans) - mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌" - print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};") - assert _EXPECTED_SPAN_COUNT == actual From 50be6c8130a5e06fe27cd1ca338a30260a3aa176 Mon Sep 17 00:00:00 2001 From: Roger Yang Date: Thu, 11 Jan 2024 08:36:49 -0800 Subject: [PATCH 44/44] wip --- .../examples/chat_completions.py | 25 ++ ...am.py => chat_completions_async_stream.py} | 0 .../examples/embeddings.py | 24 ++ .../examples/with_httpx_instrumentor.py | 29 ++ .../pyproject.toml | 1 + .../instrumentation/openai/__init__.py | 4 +- .../instrumentation/openai/_request.py | 301 +++++++++++------- ...st.py => _request_attributes_extractor.py} | 61 ++-- .../openai/_response_accumulator.py | 75 +++-- ...e.py => _response_attributes_extractor.py} | 73 +++-- .../instrumentation/openai/_stream.py | 6 +- .../instrumentation/openai/_utils.py | 6 +- .../openai/test_instrumentor.py | 82 +++-- .../openai/test_response_accumulator.py | 150 --------- 14 files changed, 461 insertions(+), 376 deletions(-) create mode 100644 python/instrumentation/openinference-instrumentation-openai/examples/chat_completions.py rename python/instrumentation/openinference-instrumentation-openai/examples/{chat_completion_async_stream.py => chat_completions_async_stream.py} (100%) create mode 100644 python/instrumentation/openinference-instrumentation-openai/examples/embeddings.py create mode 100644 python/instrumentation/openinference-instrumentation-openai/examples/with_httpx_instrumentor.py rename python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/{_extra_attributes_from_request.py => _request_attributes_extractor.py} (77%) rename python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/{_extra_attributes_from_response.py => _response_attributes_extractor.py} (80%) delete mode 100644 python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py diff --git a/python/instrumentation/openinference-instrumentation-openai/examples/chat_completions.py b/python/instrumentation/openinference-instrumentation-openai/examples/chat_completions.py new file mode 100644 index 000000000..39c845f70 --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/examples/chat_completions.py @@ -0,0 +1,25 @@ +import openai +from openinference.instrumentation.openai import OpenAIInstrumentor +from opentelemetry import trace as trace_api +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.sdk import trace as trace_sdk +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace.export import SimpleSpanProcessor + +resource = Resource(attributes={}) +tracer_provider = trace_sdk.TracerProvider(resource=resource) +span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces") +span_processor = SimpleSpanProcessor(span_exporter=span_exporter) +tracer_provider.add_span_processor(span_processor=span_processor) +trace_api.set_tracer_provider(tracer_provider=tracer_provider) + +OpenAIInstrumentor().instrument() + + +if __name__ == "__main__": + response = openai.OpenAI().chat.completions.create( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Write a haiku."}], + max_tokens=20, + ) + print(response.choices[0].message.content) diff --git a/python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py b/python/instrumentation/openinference-instrumentation-openai/examples/chat_completions_async_stream.py similarity index 100% rename from python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py rename to python/instrumentation/openinference-instrumentation-openai/examples/chat_completions_async_stream.py diff --git a/python/instrumentation/openinference-instrumentation-openai/examples/embeddings.py b/python/instrumentation/openinference-instrumentation-openai/examples/embeddings.py new file mode 100644 index 000000000..812d721e0 --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/examples/embeddings.py @@ -0,0 +1,24 @@ +import openai +from openinference.instrumentation.openai import OpenAIInstrumentor +from opentelemetry import trace as trace_api +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.sdk import trace as trace_sdk +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace.export import SimpleSpanProcessor + +resource = Resource(attributes={}) +tracer_provider = trace_sdk.TracerProvider(resource=resource) +span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces") +span_processor = SimpleSpanProcessor(span_exporter=span_exporter) +tracer_provider.add_span_processor(span_processor=span_processor) +trace_api.set_tracer_provider(tracer_provider=tracer_provider) + +OpenAIInstrumentor().instrument() + + +if __name__ == "__main__": + response = openai.OpenAI().embeddings.create( + model="text-embedding-ada-002", + input="hello world", + ) + print(response.data[0].embedding) diff --git a/python/instrumentation/openinference-instrumentation-openai/examples/with_httpx_instrumentor.py b/python/instrumentation/openinference-instrumentation-openai/examples/with_httpx_instrumentor.py new file mode 100644 index 000000000..9ba20b9fd --- /dev/null +++ b/python/instrumentation/openinference-instrumentation-openai/examples/with_httpx_instrumentor.py @@ -0,0 +1,29 @@ +from importlib import import_module + +from openinference.instrumentation.openai import OpenAIInstrumentor +from opentelemetry import trace as trace_api +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor +from opentelemetry.sdk import trace as trace_sdk +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace.export import SimpleSpanProcessor + +resource = Resource(attributes={}) +tracer_provider = trace_sdk.TracerProvider(resource=resource) +span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces") +span_processor = SimpleSpanProcessor(span_exporter=span_exporter) +tracer_provider.add_span_processor(span_processor=span_processor) +trace_api.set_tracer_provider(tracer_provider=tracer_provider) + +HTTPXClientInstrumentor().instrument() +OpenAIInstrumentor().instrument() + + +if __name__ == "__main__": + openai = import_module("openai") + response = openai.OpenAI().chat.completions.create( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Write a haiku."}], + max_tokens=20, + ) + print(response.choices[0].message.content) diff --git a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml index 1fbe6ea67..33add988d 100644 --- a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml +++ b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml @@ -38,6 +38,7 @@ instruments = [ test = [ "openai == 1.0.0", "opentelemetry-sdk", + "opentelemetry-instrumentation-httpx", "respx", "numpy", ] diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py index 1cec04144..e3b4faf8c 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py @@ -41,12 +41,12 @@ def _instrument(self, **kwargs: Any) -> None: wrap_function_wrapper( module=_MODULE, name="OpenAI.request", - wrapper=_Request(tracer=tracer), + wrapper=_Request(tracer=tracer, openai=openai), ) wrap_function_wrapper( module=_MODULE, name="AsyncOpenAI.request", - wrapper=_AsyncRequest(tracer=tracer), + wrapper=_AsyncRequest(tracer=tracer, openai=openai), ) def _uninstrument(self, **kwargs: Any) -> None: diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py index 1009e91ed..7e16a6ae1 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py @@ -1,28 +1,29 @@ import logging from abc import ABC from contextlib import contextmanager -from types import MappingProxyType +from types import ModuleType from typing import ( Any, Awaitable, Callable, - Dict, + Iterable, Iterator, Mapping, Tuple, ) -from openinference.instrumentation.openai._extra_attributes_from_request import ( - _get_extra_attributes_from_request, -) -from openinference.instrumentation.openai._extra_attributes_from_response import ( - _get_extra_attributes_from_response, +from openinference.instrumentation.openai._request_attributes_extractor import ( + _RequestAttributesExtractor, ) from openinference.instrumentation.openai._response_accumulator import ( _ChatCompletionAccumulator, _CompletionAccumulator, ) +from openinference.instrumentation.openai._response_attributes_extractor import ( + _ResponseAttributesExtractor, +) from openinference.instrumentation.openai._stream import ( + _ResponseAccumulator, _Stream, ) from openinference.instrumentation.openai._utils import ( @@ -38,10 +39,7 @@ from opentelemetry.context import _SUPPRESS_INSTRUMENTATION_KEY from opentelemetry.trace import INVALID_SPAN from opentelemetry.util.types import AttributeValue - -from openai import AsyncStream, Stream -from openai.types import Completion, CreateEmbeddingResponse -from openai.types.chat import ChatCompletion +from typing_extensions import TypeAlias __all__ = ( "_Request", @@ -53,56 +51,180 @@ class _WithTracer(ABC): - __slots__ = ("_tracer",) - - def __init__(self, tracer: trace_api.Tracer) -> None: + def __init__(self, tracer: trace_api.Tracer, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) self._tracer = tracer @contextmanager def _start_as_current_span( self, span_name: str, - cast_to: type, - request_parameters: Mapping[str, Any], + attributes: Iterable[Tuple[str, AttributeValue]], + extra_attributes: Iterable[Tuple[str, AttributeValue]], ) -> Iterator[_WithSpan]: - span_kind = ( + # Because OTEL has a default limit of 128 attributes, we split our attributes into + # two tiers, where the addition of "extra_attributes" is deferred until the end + # and only after the "attributes" are added. + try: + span = self._tracer.start_span(name=span_name, attributes=dict(attributes)) + except Exception: + logger.exception("Failed to start span") + span = INVALID_SPAN + with trace_api.use_span( + span, + end_on_exit=False, + record_exception=False, + set_status_on_exception=False, + ) as span: + yield _WithSpan(span=span, extra_attributes=dict(extra_attributes)) + + +_RequestParameters: TypeAlias = Mapping[str, Any] + + +class _WithOpenAI(ABC): + __slots__ = ( + "_openai", + "_stream_types", + "_request_attributes_extractor", + "_response_attributes_extractor", + "_response_accumulator_factories", + ) + + def __init__(self, openai: ModuleType, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self._openai = openai + self._stream_types = (openai.Stream, openai.AsyncStream) + self._request_attributes_extractor = _RequestAttributesExtractor(openai=openai) + self._response_attributes_extractor = _ResponseAttributesExtractor(openai=openai) + self._response_accumulator_factories: Mapping[ + type, Callable[[_RequestParameters], _ResponseAccumulator] + ] = { + openai.types.Completion: lambda request_parameters: _CompletionAccumulator( + request_parameters=request_parameters, + completion_type=openai.types.Completion, + response_attributes_extractor=self._response_attributes_extractor, + ), + openai.types.chat.ChatCompletion: lambda request_parameters: _ChatCompletionAccumulator( + request_parameters=request_parameters, + chat_completion_type=openai.types.chat.ChatCompletion, + response_attributes_extractor=self._response_attributes_extractor, + ), + } + + def _get_span_kind(self, cast_to: type) -> str: + return ( OpenInferenceSpanKindValues.EMBEDDING.value - if cast_to is CreateEmbeddingResponse + if cast_to is self._openai.types.CreateEmbeddingResponse else OpenInferenceSpanKindValues.LLM.value ) - attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind} + + def _get_attributes_from_request( + self, + cast_to: type, + request_parameters: Mapping[str, Any], + ) -> Iterator[Tuple[str, AttributeValue]]: + yield SpanAttributes.OPENINFERENCE_SPAN_KIND, self._get_span_kind(cast_to=cast_to) try: - attributes.update(_as_input_attributes(_io_value_and_type(request_parameters))) + yield from _as_input_attributes(_io_value_and_type(request_parameters)) except Exception: logger.exception( f"Failed to get input attributes from request parameters of " f"type {type(request_parameters)}" ) + + def _get_extra_attributes_from_request( + self, + cast_to: type, + request_parameters: Mapping[str, Any], + ) -> Iterator[Tuple[str, AttributeValue]]: # Secondary attributes should be added after input and output to ensure # that input and output are not dropped if there are too many attributes. try: - extra_attributes = dict(_get_extra_attributes_from_request(cast_to, request_parameters)) + yield from self._request_attributes_extractor.get_attributes_from_request( + cast_to=cast_to, + request_parameters=request_parameters, + ) except Exception: logger.exception( f"Failed to get extra attributes from request options of " f"type {type(request_parameters)}" ) - extra_attributes = {} - try: - span = self._tracer.start_span(span_name, attributes=attributes) - except Exception: - logger.exception("Failed to start span") - span = INVALID_SPAN - with trace_api.use_span( - span, - end_on_exit=False, - record_exception=False, - set_status_on_exception=False, - ) as span: - yield _WithSpan(span, extra_attributes) + def _is_streaming(self, response: Any) -> bool: + return isinstance(response, self._stream_types) -class _Request(_WithTracer): + def _finalize_response( + self, + response: Any, + with_span: _WithSpan, + cast_to: type, + request_parameters: Mapping[str, Any], + ) -> Any: + """ + Monkey-patch the response object to trace the stream, or finish tracing if the response is + not a stream. + """ + + if hasattr(response, "parse") and callable(response.parse): + # `.request()` may be called under `.with_raw_response` and it's necessary to call + # `.parse()` to get back the usual response types. + # E.g. see https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_base_client.py#L518 # noqa: E501 + try: + response.parse() + except Exception: + logger.exception(f"Failed to parse response of type {type(response)}") + if ( + self._is_streaming(response) + or hasattr( + # FIXME: Ideally we should not rely on a private attribute (but it may be impossible). + # The assumption here is that calling `.parse()` stores the stream object in `._parsed` + # and calling `.parse()` again will not overwrite the monkey-patched version. + # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_response.py#L65 # noqa: E501 + response, + "_parsed", + ) + # Note that we must have called `.parse()` beforehand, otherwise `._parsed` is None. + and self._is_streaming(response._parsed) + ): + # For streaming, we need an (optional) accumulator to process each chunk iteration. + try: + response_accumulator_factory = self._response_accumulator_factories.get(cast_to) + response_accumulator = ( + response_accumulator_factory(request_parameters) + if response_accumulator_factory + else None + ) + except Exception: + # Note that cast_to may not be hashable. + logger.exception(f"Failed to get response accumulator for {cast_to}") + response_accumulator = None + if hasattr(response, "_parsed") and self._is_streaming(parsed := response._parsed): + # Monkey-patch a private attribute assumed to be caching the output of `.parse()`. + response._parsed = _Stream( + stream=parsed, + with_span=with_span, + response_accumulator=response_accumulator, + ) + return response + return _Stream( + stream=response, + with_span=with_span, + response_accumulator=response_accumulator, + ) + _finish_tracing( + status_code=trace_api.StatusCode.OK, + with_span=with_span, + has_attributes=_ResponseAttributes( + request_parameters=request_parameters, + response=response, + response_attributes_extractor=self._response_attributes_extractor, + ), + ) + return response + + +class _Request(_WithTracer, _WithOpenAI): def __call__( self, wrapped: Callable[..., Any], @@ -121,8 +243,14 @@ def __call__( return wrapped(*args, **kwargs) with self._start_as_current_span( span_name=span_name, - cast_to=cast_to, - request_parameters=request_parameters, + attributes=self._get_attributes_from_request( + cast_to=cast_to, + request_parameters=request_parameters, + ), + extra_attributes=self._get_extra_attributes_from_request( + cast_to=cast_to, + request_parameters=request_parameters, + ), ) as with_span: try: response = wrapped(*args, **kwargs) @@ -132,7 +260,7 @@ def __call__( with_span.finish_tracing(status_code=status_code) raise try: - response = _finalize_response( + response = self._finalize_response( response=response, with_span=with_span, cast_to=cast_to, @@ -144,7 +272,7 @@ def __call__( return response -class _AsyncRequest(_WithTracer): +class _AsyncRequest(_WithTracer, _WithOpenAI): async def __call__( self, wrapped: Callable[..., Awaitable[Any]], @@ -163,8 +291,14 @@ async def __call__( return await wrapped(*args, **kwargs) with self._start_as_current_span( span_name=span_name, - cast_to=cast_to, - request_parameters=request_parameters, + attributes=self._get_attributes_from_request( + cast_to=cast_to, + request_parameters=request_parameters, + ), + extra_attributes=self._get_extra_attributes_from_request( + cast_to=cast_to, + request_parameters=request_parameters, + ), ) as with_span: try: response = await wrapped(*args, **kwargs) @@ -174,7 +308,7 @@ async def __call__( with_span.finish_tracing(status_code=status_code) raise try: - response = _finalize_response( + response = self._finalize_response( response=response, with_span=with_span, cast_to=cast_to, @@ -212,92 +346,18 @@ def _parse_request_args(args: Tuple[type, Any]) -> Tuple[type, Mapping[str, Any] return cast_to, request_parameters -_RESPONSE_ACCUMULATOR_FACTORIES: Mapping[type, type] = MappingProxyType( - { - ChatCompletion: _ChatCompletionAccumulator, - Completion: _CompletionAccumulator, - } -) - - -def _finalize_response( - response: Any, - with_span: _WithSpan, - cast_to: type, - request_parameters: Mapping[str, Any], -) -> Any: - """Monkey-patch the response object to trace the stream, or finish tracing if the response is - not a stream. - """ - if hasattr(response, "parse") and callable(response.parse): - # `.request()` may be called under `.with_raw_response` and it's necessary to call - # `.parse()` to get back the usual response types. - # E.g. see https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_base_client.py#L518 # noqa: E501 - try: - response.parse() - except Exception: - logger.exception(f"Failed to parse response of type {type(response)}") - if ( - isinstance(response, (Stream, AsyncStream)) - or hasattr( - # FIXME: Ideally we should not rely on a private attribute (but it may be impossible). - # The assumption here is that calling `.parse()` stores the stream object in `._parsed` - # and calling `.parse()` again will not overwrite the monkey-patched version. - # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_response.py#L65 # noqa: E501 - response, - "_parsed", - ) - # Note that we must have called `.parse()` beforehand, otherwise `._parsed` is None. - and isinstance(response._parsed, (Stream, AsyncStream)) - ): - # For streaming, we need an (optional) accumulator to process each chunk iteration. - try: - response_accumulator_factory = _RESPONSE_ACCUMULATOR_FACTORIES.get(cast_to) - response_accumulator = ( - response_accumulator_factory(request_parameters) - if response_accumulator_factory - else None - ) - except Exception: - # E.g. cast_to may not be hashable - logger.exception(f"Failed to get response accumulator for {cast_to}") - response_accumulator = None - if hasattr(response, "_parsed") and isinstance( - parsed := response._parsed, (Stream, AsyncStream) - ): - # Monkey-patch a private attribute assumed to be caching the output of `.parse()`. - response._parsed = _Stream( - stream=parsed, - with_span=with_span, - response_accumulator=response_accumulator, - ) - return response - return _Stream( - stream=response, - with_span=with_span, - response_accumulator=response_accumulator, - ) - _finish_tracing( - status_code=trace_api.StatusCode.OK, - with_span=with_span, - has_attributes=_ResponseAttributes( - request_parameters=request_parameters, - response=response, - ), - ) - return response - - class _ResponseAttributes: __slots__ = ( "_response", "_request_parameters", + "_response_attributes_extractor", ) def __init__( self, response: Any, request_parameters: Mapping[str, Any], + response_attributes_extractor: _ResponseAttributesExtractor, ) -> None: if hasattr(response, "parse") and callable(response.parse): # E.g. see https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_base_client.py#L518 # noqa: E501 @@ -307,12 +367,13 @@ def __init__( logger.exception(f"Failed to parse response of type {type(response)}") self._request_parameters = request_parameters self._response = response + self._response_attributes_extractor = response_attributes_extractor def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: yield from _as_output_attributes(_io_value_and_type(self._response)) def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: - yield from _get_extra_attributes_from_response( - self._response, + yield from self._response_attributes_extractor.get_attributes_from_response( + response=self._response, request_parameters=self._request_parameters, ) diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request_attributes_extractor.py similarity index 77% rename from python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py rename to python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request_attributes_extractor.py index 625e30cc1..324346160 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request_attributes_extractor.py @@ -1,45 +1,66 @@ import json import logging from enum import Enum +from types import ModuleType from typing import ( + TYPE_CHECKING, Any, Iterable, Iterator, List, Mapping, Tuple, + Type, ) from openinference.instrumentation.openai._utils import _OPENAI_VERSION from openinference.semconv.trace import MessageAttributes, SpanAttributes, ToolCallAttributes from opentelemetry.util.types import AttributeValue -from openai.types import Completion, CreateEmbeddingResponse -from openai.types.chat import ChatCompletion +if TYPE_CHECKING: + from openai.types import Completion, CreateEmbeddingResponse + from openai.types.chat import ChatCompletion -__all__ = ("_get_extra_attributes_from_request",) +__all__ = ("_RequestAttributesExtractor",) logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) -def _get_extra_attributes_from_request( - cast_to: type, - request_parameters: Mapping[str, Any], -) -> Iterator[Tuple[str, AttributeValue]]: - if not isinstance(request_parameters, Mapping): - return - if cast_to is ChatCompletion: - yield from _get_attributes_from_chat_completion_create_param(request_parameters) - elif cast_to is CreateEmbeddingResponse: - yield from _get_attributes_from_embedding_create_param(request_parameters) - elif cast_to is Completion: - yield from _get_attributes_from_completion_create_param(request_parameters) - else: - try: - yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(request_parameters) - except Exception: - logger.exception("Failed to serialize request options") +class _RequestAttributesExtractor: + __slots__ = ( + "_openai", + "_chat_completion_type", + "_completion_type", + "_create_embedding_response_type", + ) + + def __init__(self, openai: ModuleType) -> None: + self._openai = openai + self._chat_completion_type: Type["ChatCompletion"] = openai.types.chat.ChatCompletion + self._completion_type: Type["Completion"] = openai.types.Completion + self._create_embedding_response_type: Type[ + "CreateEmbeddingResponse" + ] = openai.types.CreateEmbeddingResponse + + def get_attributes_from_request( + self, + cast_to: type, + request_parameters: Mapping[str, Any], + ) -> Iterator[Tuple[str, AttributeValue]]: + if not isinstance(request_parameters, Mapping): + return + if cast_to is self._chat_completion_type: + yield from _get_attributes_from_chat_completion_create_param(request_parameters) + elif cast_to is self._create_embedding_response_type: + yield from _get_attributes_from_embedding_create_param(request_parameters) + elif cast_to is self._completion_type: + yield from _get_attributes_from_completion_create_param(request_parameters) + else: + try: + yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(request_parameters) + except Exception: + logger.exception("Failed to serialize request options") def _get_attributes_from_chat_completion_create_param( diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py index e2b8f6807..04e43f740 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py @@ -3,6 +3,7 @@ from collections import defaultdict from copy import deepcopy from typing import ( + TYPE_CHECKING, Any, Callable, DefaultDict, @@ -12,12 +13,11 @@ List, Mapping, Optional, + Protocol, Tuple, + Type, ) -from openinference.instrumentation.openai._extra_attributes_from_response import ( - _get_extra_attributes_from_response, -) from openinference.instrumentation.openai._utils import ( _as_output_attributes, _ValueAndType, @@ -25,11 +25,9 @@ from openinference.semconv.trace import OpenInferenceMimeTypeValues from opentelemetry.util.types import AttributeValue -from openai.types import Completion -from openai.types.chat import ( - ChatCompletion, - ChatCompletionChunk, -) +if TYPE_CHECKING: + from openai.types import Completion + from openai.types.chat import ChatCompletion, ChatCompletionChunk __all__ = ( "_CompletionAccumulator", @@ -37,16 +35,34 @@ ) +class _CanGetAttributesFromResponse(Protocol): + def get_attributes_from_response( + self, + response: Any, + request_parameters: Mapping[str, Any], + ) -> Iterator[Tuple[str, AttributeValue]]: + ... + + class _ChatCompletionAccumulator: __slots__ = ( "_is_null", "_values", "_cached_result", "_request_parameters", + "_response_attributes_extractor", + "_chat_completion_type", ) - def __init__(self, request_parameters: Mapping[str, Any]) -> None: + def __init__( + self, + request_parameters: Mapping[str, Any], + chat_completion_type: Type["ChatCompletion"], + response_attributes_extractor: Optional[_CanGetAttributesFromResponse] = None, + ) -> None: + self._chat_completion_type = chat_completion_type self._request_parameters = request_parameters + self._response_attributes_extractor = response_attributes_extractor self._is_null = True self._cached_result: Optional[Dict[str, Any]] = None self._values = _ValuesAccumulator( @@ -65,9 +81,7 @@ def __init__(self, request_parameters: Mapping[str, Any]) -> None: ), ) - def process_chunk(self, chunk: ChatCompletionChunk) -> None: - if not isinstance(chunk, ChatCompletionChunk): - return + def process_chunk(self, chunk: "ChatCompletionChunk") -> None: self._is_null = False self._cached_result = None with warnings.catch_warnings(): @@ -94,15 +108,14 @@ def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: _ValueAndType(json_string, OpenInferenceMimeTypeValues.JSON) ) - def get_extra_attributes( - self, - ) -> Iterator[Tuple[str, AttributeValue]]: + def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: if not (result := self._result()): return - yield from _get_extra_attributes_from_response( - ChatCompletion.construct(**result), - self._request_parameters, - ) + if self._response_attributes_extractor: + yield from self._response_attributes_extractor.get_attributes_from_response( + self._chat_completion_type.construct(**result), + self._request_parameters, + ) class _CompletionAccumulator: @@ -111,19 +124,26 @@ class _CompletionAccumulator: "_values", "_cached_result", "_request_parameters", + "_response_attributes_extractor", + "_completion_type", ) - def __init__(self, request_parameters: Mapping[str, Any]) -> None: + def __init__( + self, + request_parameters: Mapping[str, Any], + completion_type: Type["Completion"], + response_attributes_extractor: Optional[_CanGetAttributesFromResponse] = None, + ) -> None: + self._completion_type = completion_type self._request_parameters = request_parameters + self._response_attributes_extractor = response_attributes_extractor self._is_null = True self._cached_result: Optional[Dict[str, Any]] = None self._values = _ValuesAccumulator( choices=_IndexedAccumulator(lambda: _ValuesAccumulator(text=_StringAccumulator())), ) - def process_chunk(self, chunk: Completion) -> None: - if not isinstance(chunk, Completion): - return + def process_chunk(self, chunk: "Completion") -> None: self._is_null = False self._cached_result = None with warnings.catch_warnings(): @@ -150,10 +170,11 @@ def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]: if not (result := self._result()): return - yield from _get_extra_attributes_from_response( - Completion.construct(**result), - self._request_parameters, - ) + if self._response_attributes_extractor: + yield from self._response_attributes_extractor.get_attributes_from_response( + self._completion_type.construct(**result), + self._request_parameters, + ) class _ValuesAccumulator: diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_attributes_extractor.py similarity index 80% rename from python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py rename to python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_attributes_extractor.py index e7f7ad63d..7d07f558a 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_attributes_extractor.py @@ -1,9 +1,9 @@ import base64 import logging -from functools import singledispatch from importlib import import_module from types import ModuleType from typing import ( + TYPE_CHECKING, Any, Iterable, Iterator, @@ -11,6 +11,7 @@ Optional, Sequence, Tuple, + Type, ) from openinference.instrumentation.openai._utils import ( @@ -25,10 +26,11 @@ ) from opentelemetry.util.types import AttributeValue -from openai.types import Completion, CreateEmbeddingResponse -from openai.types.chat import ChatCompletion +if TYPE_CHECKING: + from openai.types import Completion, CreateEmbeddingResponse + from openai.types.chat import ChatCompletion -__all__ = ("_get_extra_attributes_from_response",) +__all__ = ("_ResponseAttributesExtractor",) logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) @@ -40,18 +42,48 @@ _NUMPY = None -@singledispatch -def _get_extra_attributes_from_response( - response: Any, - request_parameters: Mapping[str, Any], -) -> Iterator[Tuple[str, AttributeValue]]: - # this is a fallback for @singledispatch - yield from () +class _ResponseAttributesExtractor: + __slots__ = ( + "_openai", + "_chat_completion_type", + "_completion_type", + "_create_embedding_response_type", + ) + + def __init__(self, openai: ModuleType) -> None: + self._openai = openai + self._chat_completion_type: Type["ChatCompletion"] = openai.types.chat.ChatCompletion + self._completion_type: Type["Completion"] = openai.types.Completion + self._create_embedding_response_type: Type[ + "CreateEmbeddingResponse" + ] = openai.types.CreateEmbeddingResponse + + def get_attributes_from_response( + self, + response: Any, + request_parameters: Mapping[str, Any], + ) -> Iterator[Tuple[str, AttributeValue]]: + if isinstance(response, self._chat_completion_type): + yield from _get_attributes_from_chat_completion( + completion=response, + request_parameters=request_parameters, + ) + elif isinstance(response, self._create_embedding_response_type): + yield from _get_attributes_from_create_embedding_response( + response=response, + request_parameters=request_parameters, + ) + elif isinstance(response, self._completion_type): + yield from _get_attributes_from_completion( + completion=response, + request_parameters=request_parameters, + ) + else: + yield from () -@_get_extra_attributes_from_response.register -def _( - completion: ChatCompletion, +def _get_attributes_from_chat_completion( + completion: "ChatCompletion", request_parameters: Mapping[str, Any], ) -> Iterator[Tuple[str, AttributeValue]]: # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion.py#L40 # noqa: E501 @@ -68,9 +100,8 @@ def _( yield f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{index}.{key}", value -@_get_extra_attributes_from_response.register -def _( - completion: Completion, +def _get_attributes_from_completion( + completion: "Completion", request_parameters: Mapping[str, Any], ) -> Iterator[Tuple[str, AttributeValue]]: # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion.py#L13 # noqa: E501 @@ -79,6 +110,7 @@ def _( if usage := getattr(completion, "usage", None): yield from _get_attributes_from_completion_usage(usage) if model_prompt := request_parameters.get("prompt"): + # FIXME: this step should move to request attributes extractor if decoding is not necessary. # prompt: Required[Union[str, List[str], List[int], List[List[int]], None]] # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion_create_params.py#L38 # noqa: E501 # FIXME: tokens (List[int], List[List[int]]) can't be decoded reliably because model @@ -87,9 +119,8 @@ def _( yield SpanAttributes.LLM_PROMPTS, prompts -@_get_extra_attributes_from_response.register -def _( - response: CreateEmbeddingResponse, +def _get_attributes_from_create_embedding_response( + response: "CreateEmbeddingResponse", request_parameters: Mapping[str, Any], ) -> Iterator[Tuple[str, AttributeValue]]: # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/create_embedding_response.py#L20 # noqa: E501 @@ -106,6 +137,7 @@ def _( yield f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}.{key}", value embedding_input = request_parameters.get("input") for index, text in enumerate(_get_texts(embedding_input, model)): + # FIXME: this step should move to request attributes extractor if decoding is not necessary. # input: Required[Union[str, List[str], List[int], List[List[int]]]] # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding_create_params.py#L12 # noqa: E501 # FIXME: tokens (List[int], List[List[int]]) can't be decoded reliably because model @@ -130,6 +162,7 @@ def _get_attributes_from_embedding( vector = list(_vector) yield f"{EmbeddingAttributes.EMBEDDING_VECTOR}", vector elif isinstance(_vector, str) and _vector and _NUMPY: + # FIXME: this step should be removed if decoding is not necessary. try: # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/resources/embeddings.py#L100 # noqa: E501 vector = _NUMPY.frombuffer(base64.b64decode(_vector), dtype="float32").tolist() diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py index 6c3d36b9e..34576a9c9 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py @@ -1,5 +1,6 @@ import logging from typing import ( + TYPE_CHECKING, Any, AsyncIterator, Iterator, @@ -15,7 +16,8 @@ from opentelemetry.util.types import AttributeValue from wrapt import ObjectProxy -from openai import AsyncStream, Stream +if TYPE_CHECKING: + from openai import AsyncStream, Stream __all__ = ( "_Stream", @@ -47,7 +49,7 @@ class _Stream(ObjectProxy): # type: ignore def __init__( self, - stream: Union[Stream[Any], AsyncStream[Any]], + stream: Union["Stream[Any]", "AsyncStream[Any]"], with_span: _WithSpan, response_accumulator: Optional[_ResponseAccumulator] = None, ) -> None: diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py index a8b3f3ca9..ab8ddb803 100644 --- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py +++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py @@ -60,7 +60,7 @@ def _as_input_attributes( if not value_and_type: return yield SpanAttributes.INPUT_VALUE, value_and_type.value - # it's TEXT by default, so we can skip to save one attribute + # It's assumed to be TEXT by default, so we can skip to save one attribute. if value_and_type.type is not OpenInferenceMimeTypeValues.TEXT: yield SpanAttributes.INPUT_MIME_TYPE, value_and_type.type.value @@ -71,7 +71,7 @@ def _as_output_attributes( if not value_and_type: return yield SpanAttributes.OUTPUT_VALUE, value_and_type.value - # it's TEXT by default, so we can skip to save one attribute + # It's assumed to be TEXT by default, so we can skip to save one attribute. if value_and_type.type is not OpenInferenceMimeTypeValues.TEXT: yield SpanAttributes.OUTPUT_MIME_TYPE, value_and_type.type.value @@ -92,7 +92,7 @@ def _finish_tracing( try: attributes: Attributes = dict(has_attributes.get_attributes()) except Exception: - logger.exception("Failed to get output value") + logger.exception("Failed to get attributes") attributes = None try: extra_attributes: Attributes = dict(has_attributes.get_extra_attributes()) diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py index 6480cbf22..5d55e44fe 100644 --- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py +++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py @@ -1,7 +1,9 @@ import asyncio import json +import logging import random from contextlib import suppress +from importlib import import_module from importlib.metadata import version from itertools import count from typing import ( @@ -19,7 +21,6 @@ cast, ) -import openai import pytest from httpx import AsyncByteStream, Response from openinference.instrumentation.openai import OpenAIInstrumentor @@ -31,6 +32,8 @@ SpanAttributes, ToolCallAttributes, ) +from opentelemetry import trace as trace_api +from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor from opentelemetry.sdk import trace as trace_sdk from opentelemetry.sdk.resources import Resource from opentelemetry.sdk.trace import ReadableSpan @@ -39,6 +42,12 @@ from opentelemetry.util.types import AttributeValue from respx import MockRouter +for name, logger in logging.root.manager.loggerDict.items(): + if name.startswith("openinference.") and isinstance(logger, logging.Logger): + logger.setLevel(logging.DEBUG) + logger.handlers.clear() + logger.addHandler(logging.StreamHandler()) + @pytest.mark.parametrize("is_async", [False, True]) @pytest.mark.parametrize("is_raw", [False, True]) @@ -47,17 +56,17 @@ def test_chat_completions( is_async: bool, is_raw: bool, - is_stream, + is_stream: bool, status_code: int, respx_mock: MockRouter, in_memory_span_exporter: InMemorySpanExporter, completion_usage: Dict[str, Any], model_name: str, - tool_calls_mock_stream, + chat_completion_mock_stream: Tuple[List[bytes], List[Dict[str, Any]]], ) -> None: input_messages: List[Dict[str, Any]] = get_messages() output_messages: List[Dict[str, Any]] = ( - tool_calls_mock_stream[1] if is_stream else get_messages() + chat_completion_mock_stream[1] if is_stream else get_messages() ) invocation_parameters = { "stream": is_stream, @@ -66,9 +75,9 @@ def test_chat_completions( "n": len(output_messages), } url = "https://api.openai.com/v1/chat/completions" - respx_kwargs = { + respx_kwargs: Dict[str, Any] = { **( - {"stream": MockAsyncByteStream(tool_calls_mock_stream[0])} + {"stream": MockAsyncByteStream(chat_completion_mock_stream[0])} if is_stream else { "json": { @@ -84,6 +93,7 @@ def test_chat_completions( } respx_mock.post(url).mock(return_value=Response(status_code=status_code, **respx_kwargs)) create_kwargs = {"messages": input_messages, **invocation_parameters} + openai = import_module("openai") completions = ( openai.AsyncOpenAI(api_key="sk-").chat.completions if is_async @@ -108,8 +118,8 @@ async def task() -> None: for _ in response: pass spans = in_memory_span_exporter.get_finished_spans() - assert len(spans) == 1 - span: ReadableSpan = spans[0] + assert len(spans) == 2 # first span should be from the httpx instrumentor + span: ReadableSpan = spans[1] if status_code == 200: assert span.status.is_ok elif status_code == 400: @@ -167,7 +177,7 @@ async def task() -> None: ) # We left out model_name from our mock stream. assert attributes.pop(LLM_MODEL_NAME, None) == model_name - assert attributes == {} # this test should account for all the attributes after popping them + assert attributes == {} # test should account for all span attributes @pytest.mark.parametrize("is_async", [False, True]) @@ -183,10 +193,10 @@ def test_completions( in_memory_span_exporter: InMemorySpanExporter, completion_usage: Dict[str, Any], model_name: str, - completions_mock_stream, + completion_mock_stream: Tuple[List[bytes], List[str]], ) -> None: prompt: List[str] = get_texts() - output_texts: List[str] = completions_mock_stream[1] if is_stream else get_texts() + output_texts: List[str] = completion_mock_stream[1] if is_stream else get_texts() invocation_parameters = { "stream": is_stream, "model": randstr(), @@ -194,9 +204,9 @@ def test_completions( "n": len(output_texts), } url = "https://api.openai.com/v1/completions" - respx_kwargs = { + respx_kwargs: Dict[str, Any] = { **( - {"stream": MockAsyncByteStream(completions_mock_stream[0])} + {"stream": MockAsyncByteStream(completion_mock_stream[0])} if is_stream else { "json": { @@ -212,6 +222,7 @@ def test_completions( } respx_mock.post(url).mock(return_value=Response(status_code=status_code, **respx_kwargs)) create_kwargs = {"prompt": prompt, **invocation_parameters} + openai = import_module("openai") completions = ( openai.AsyncOpenAI(api_key="sk-").completions if is_async @@ -236,8 +247,8 @@ async def task() -> None: for _ in response: pass spans = in_memory_span_exporter.get_finished_spans() - assert len(spans) == 1 - span: ReadableSpan = spans[0] + assert len(spans) == 2 # first span should be from the httpx instrumentor + span: ReadableSpan = spans[1] if status_code == 200: assert span.status.is_ok elif status_code == 400: @@ -267,7 +278,7 @@ async def task() -> None: ) # We left out model_name from our mock stream. assert attributes.pop(LLM_MODEL_NAME, None) == model_name - assert attributes == {} # this test should account for all the attributes after popping them + assert attributes == {} # test should account for all span attributes @pytest.mark.parametrize("is_async", [False, True]) @@ -311,6 +322,7 @@ def test_embeddings( ) ) create_kwargs = {"input": input_text, **invocation_parameters} + openai = import_module("openai") completions = ( openai.AsyncOpenAI(api_key="sk-").embeddings if is_async @@ -329,8 +341,8 @@ async def task() -> None: response = create(**create_kwargs) _ = response.parse() if is_raw else response spans = in_memory_span_exporter.get_finished_spans() - assert len(spans) == 1 - span: ReadableSpan = spans[0] + assert len(spans) == 2 # first span should be from the httpx instrumentor + span: ReadableSpan = spans[1] if status_code == 200: assert span.status.is_ok elif status_code == 400: @@ -361,40 +373,46 @@ async def task() -> None: attributes.pop(f"{EMBEDDING_EMBEDDINGS}.{i}.{EMBEDDING_VECTOR}", None) == embedding[1] ) - assert attributes == {} # this test should account for all the attributes after popping them + assert attributes == {} # test should account for all span attributes -@pytest.fixture(scope="function") +@pytest.fixture(scope="module") def in_memory_span_exporter() -> InMemorySpanExporter: return InMemorySpanExporter() -@pytest.fixture(autouse=True) -def instrument(in_memory_span_exporter: InMemorySpanExporter) -> Generator[None, None, None]: - """ - Instruments OpenAI before each test to ensure that the patch is applied - before any tests are run. - """ +@pytest.fixture(scope="module") +def tracer_provider(in_memory_span_exporter: InMemorySpanExporter) -> trace_api.TracerProvider: resource = Resource(attributes={}) tracer_provider = trace_sdk.TracerProvider(resource=resource) span_processor = SimpleSpanProcessor(span_exporter=in_memory_span_exporter) tracer_provider.add_span_processor(span_processor=span_processor) + HTTPXClientInstrumentor().instrument(tracer_provider=tracer_provider) + return tracer_provider + + +@pytest.fixture(autouse=True) +def instrument( + tracer_provider: trace_api.TracerProvider, + in_memory_span_exporter: InMemorySpanExporter, +) -> Generator[None, None, None]: OpenAIInstrumentor().instrument(tracer_provider=tracer_provider) yield OpenAIInstrumentor().uninstrument() + in_memory_span_exporter.clear() @pytest.fixture(scope="module") def seed() -> Iterator[int]: """ - Use rolling seeds to make debugging easier, because the rolling pseudo-random - values allow conditional breakpoints to be hit precisely (and repeatably). + Use rolling seeds to help debugging, because the rolling pseudo-random values + allow conditional breakpoints to be hit precisely (and repeatably). """ return count() @pytest.fixture(autouse=True) -def set_seed(seed: Iterator[int]) -> None: +def set_seed(seed: Iterator[int]) -> Iterator[None]: random.seed(next(seed)) yield @@ -421,7 +439,7 @@ def input_messages() -> List[Dict[str, Any]]: @pytest.fixture -def tool_calls_mock_stream() -> Tuple[List[bytes], List[Dict[str, Any]]]: +def chat_completion_mock_stream() -> Tuple[List[bytes], List[Dict[str, Any]]]: return ( [ b'data: {"choices": [{"delta": {"role": "assistant"}, "index": 0}]}\n\n', @@ -508,7 +526,7 @@ def tool_calls_mock_stream() -> Tuple[List[bytes], List[Dict[str, Any]]]: @pytest.fixture -def completions_mock_stream() -> Tuple[List[bytes], List[str]]: +def completion_mock_stream() -> Tuple[List[bytes], List[str]]: return ( [ b'data: {"choices": [{"text": "", "index": 0}]}\n\n', @@ -547,7 +565,7 @@ class MockAsyncByteStream(AsyncByteStream): def __init__(self, byte_stream: Iterable[bytes]): self._byte_stream = byte_stream - def __iter__(self) -> AsyncIterator[bytes]: + def __iter__(self) -> Iterator[bytes]: for byte_string in self._byte_stream: yield byte_string diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py deleted file mode 100644 index c7e5d4ff0..000000000 --- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py +++ /dev/null @@ -1,150 +0,0 @@ -import pytest -from openai.types.chat import ChatCompletionChunk -from openinference.instrumentation.openai._response_accumulator import ( - _ChatCompletionAccumulator, -) - - -def test_chat_completion_accumulator(chat_completion_chunks, desired_chat_completion_result): - accumulator = _ChatCompletionAccumulator({}) - for chunk in chat_completion_chunks: - accumulator.process_chunk(chunk) - assert accumulator._result() == desired_chat_completion_result - - -@pytest.fixture -def desired_chat_completion_result(): - return { - "id": "xyz", - "choices": [ - { - "index": 0, - "finish_reason": "length", - "message": {"content": "A1", "role": "assistant"}, - }, - { - "index": 1, - "finish_reason": "stop", - "message": {"content": "B2", "role": "assistant"}, - }, - { - "index": 2, - }, - { - "index": 3, - "message": { - "tool_calls": [ - {"index": 0, "function": {"arguments": "C3"}}, - {"index": 1, "function": {"arguments": "D4"}}, - ] - }, - }, - ], - "created": 123, - "model": "ultra-turbo", - } - - -@pytest.fixture -def chat_completion_chunks(): - chunks = [ - { - "id": "xyz", - "choices": [{"index": 0, "delta": {"content": "A"}}], - "created": 123, - "model": "ultra-turbo", - }, - { - "id": "xyz", - "choices": [ - { - "index": 3, - "delta": {"tool_calls": [{"index": 1, "function": {"arguments": "D"}}]}, - }, - ], - "created": 123, - "model": "ultra-turbo", - }, - { - "id": "xyz", - "choices": [{"index": 1, "delta": {"content": "B"}, "finish_reason": None}], - "created": 123, - "model": "ultra-turbo", - }, - { - "id": "xyz", - "choices": [ - { - "index": 3, - "delta": {"tool_calls": [{"index": 0, "function": {"arguments": "C"}}]}, - }, - ], - "created": 123, - "model": "ultra-turbo", - }, - { - "id": "xyz", - "choices": [{"index": 0, "delta": {"role": "assistant"}}], - "created": 123, - "model": "ultra-turbo", - }, - { - "id": "xyz", - "choices": [{"index": 1, "delta": {"content": "2"}}], - "created": 123, - "model": "ultra-turbo", - }, - { - "id": "xyz", - "choices": [{"index": 0, "delta": {"content": "1"}, "finish_reason": None}], - "created": 123, - "model": "ultra-turbo", - }, - { - "id": "xyz", - "choices": [{"index": 1, "delta": {"role": "assistant"}}], - "created": 123, - "model": "ultra-turbo", - }, - { - "id": "xyz", - "choices": [{"finish_reason": "length", "index": 0}], - "created": 123, - "model": "ultra-turbo", - }, - { - "id": "xyz", - "choices": [ - { - "index": 3, - "delta": {"tool_calls": [{"index": 1, "function": {"arguments": "4"}}]}, - }, - ], - "created": 123, - "model": "ultra-turbo", - }, - { - "id": "xyz", - "choices": [{"finish_reason": "stop", "index": 1}], - "created": 123, - "model": "ultra-turbo", - }, - { - "id": "xyz", - "choices": [ - { - "index": 3, - "delta": {"tool_calls": [{"index": 0, "function": {"arguments": "3"}}]}, - }, - ], - "created": 123, - "model": "ultra-turbo", - }, - { - "id": "xyz", - "choices": [{"index": 2, "delta": {"content": ""}}], - "created": 123, - "model": "ultra-turbo", - }, - ] - return [ChatCompletionChunk.construct(**chunk) for chunk in chunks]