From 5ff89eff0fce934da20f6d46ba5623726282d2b0 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Thu, 4 Jan 2024 10:23:04 -0800
Subject: [PATCH 01/44] feat: openai instrumentator

---
 .github/workflows/python-CI.yaml              |  40 ++
 .gitignore                                    |   1 +
 .../LICENSE                                   | 201 ++++++++++
 .../README.rst                                |  14 +
 .../examples/chat_completion_async_stream.py  |  40 ++
 .../integration_tests/completions.py          | 209 ++++++++++
 .../integration_tests/completions_async.py    | 207 ++++++++++
 .../integration_tests/embeddings.py           | 142 +++++++
 .../functions_and_tool_calls.py               | 195 ++++++++++
 .../pyproject.toml                            |  52 +++
 .../instrumentation/openai/__init__.py        |  57 +++
 .../openai/_extra_attributes_from_request.py  | 143 +++++++
 .../openai/_extra_attributes_from_response.py | 204 ++++++++++
 .../instrumentation/openai/_request.py        | 360 ++++++++++++++++++
 .../openai/_response_accumulator.py           | 248 ++++++++++++
 .../instrumentation/openai/_stream.py         | 149 ++++++++
 .../instrumentation/openai/_utils.py          | 134 +++++++
 .../instrumentation/openai/_with_span.py      |  82 ++++
 .../instrumentation/openai/package.py         |   2 +
 .../instrumentation/openai/py.typed           |   0
 .../instrumentation/openai/version.py         |   1 +
 .../openai/test_response_accumulator.py       | 154 ++++++++
 python/mypy.ini                               |   5 +
 .../pyproject.toml                            |   2 +-
 .../openinference/semconv/trace/__init__.py   | 144 ++++---
 .../src/openinference/semconv/version.py      |   2 +-
 python/ruff.toml                              |  12 +
 python/tox.ini                                |  29 ++
 28 files changed, 2775 insertions(+), 54 deletions(-)
 create mode 100644 .github/workflows/python-CI.yaml
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/LICENSE
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/README.rst
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/pyproject.toml
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_with_span.py
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/package.py
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/py.typed
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/version.py
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
 create mode 100644 python/mypy.ini
 create mode 100644 python/ruff.toml
 create mode 100644 python/tox.ini

diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml
new file mode 100644
index 000000000..935f74c45
--- /dev/null
+++ b/.github/workflows/python-CI.yaml
@@ -0,0 +1,40 @@
+name: Python CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    paths:
+      - "python/**"
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+concurrency:
+  group: test-python-${{ github.head_ref }}
+  cancel-in-progress: true
+
+jobs:
+  ci:
+    name: CI Python
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v3
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.8"
+      - name: Install tox
+        run: pip install tox==4.11.4
+      - name: Check style
+        run: |
+          tox run -e ruff
+          git diff --exit-code
+      - name: Run mypy
+        run: |
+          tox run -e mypy-openai
+      - name: Run tests
+        run: |
+          tox run -e test-openai
+      
+      
diff --git a/.gitignore b/.gitignore
index 99baffced..82c639258 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 # Generated dirs
 .vscode
 .idea
+.tox
 node_modules
 dist
 sdist
diff --git a/python/instrumentation/openinference-instrumentation-openai/LICENSE b/python/instrumentation/openinference-instrumentation-openai/LICENSE
new file mode 100644
index 000000000..0223315cc
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/LICENSE
@@ -0,0 +1,201 @@
+                              Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+    "License" shall mean the terms and conditions for use, reproduction,
+    and distribution as defined by Sections 1 through 9 of this document.
+
+    "Licensor" shall mean the copyright owner or entity authorized by
+    the copyright owner that is granting the License.
+
+    "Legal Entity" shall mean the union of the acting entity and all
+    other entities that control, are controlled by, or are under common
+    control with that entity. For the purposes of this definition,
+    "control" means (i) the power, direct or indirect, to cause the
+    direction or management of such entity, whether by contract or
+    otherwise, or (ii) ownership of fifty percent (50%) or more of the
+    outstanding shares, or (iii) beneficial ownership of such entity.
+
+    "You" (or "Your") shall mean an individual or Legal Entity
+    exercising permissions granted by this License.
+
+    "Source" form shall mean the preferred form for making modifications,
+    including but not limited to software source code, documentation
+    source, and configuration files.
+
+    "Object" form shall mean any form resulting from mechanical
+    transformation or translation of a Source form, including but
+    not limited to compiled object code, generated documentation,
+    and conversions to other media types.
+
+    "Work" shall mean the work of authorship, whether in Source or
+    Object form, made available under the License, as indicated by a
+    copyright notice that is included in or attached to the work
+    (an example is provided in the Appendix below).
+
+    "Derivative Works" shall mean any work, whether in Source or Object
+    form, that is based on (or derived from) the Work and for which the
+    editorial revisions, annotations, elaborations, or other modifications
+    represent, as a whole, an original work of authorship. For the purposes
+    of this License, Derivative Works shall not include works that remain
+    separable from, or merely link (or bind by name) to the interfaces of,
+    the Work and Derivative Works thereof.
+
+    "Contribution" shall mean any work of authorship, including
+    the original version of the Work and any modifications or additions
+    to that Work or Derivative Works thereof, that is intentionally
+    submitted to Licensor for inclusion in the Work by the copyright owner
+    or by an individual or Legal Entity authorized to submit on behalf of
+    the copyright owner. For the purposes of this definition, "submitted"
+    means any form of electronic, verbal, or written communication sent
+    to the Licensor or its representatives, including but not limited to
+    communication on electronic mailing lists, source code control systems,
+    and issue tracking systems that are managed by, or on behalf of, the
+    Licensor for the purpose of discussing and improving the Work, but
+    excluding communication that is conspicuously marked or otherwise
+    designated in writing by the copyright owner as "Not a Contribution."
+
+    "Contributor" shall mean Licensor and any individual or Legal Entity
+    on behalf of whom a Contribution has been received by Licensor and
+    subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   copyright license to reproduce, prepare Derivative Works of,
+   publicly display, publicly perform, sublicense, and distribute the
+   Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   (except as stated in this section) patent license to make, have made,
+   use, offer to sell, sell, import, and otherwise transfer the Work,
+   where such license applies only to those patent claims licensable
+   by such Contributor that are necessarily infringed by their
+   Contribution(s) alone or by combination of their Contribution(s)
+   with the Work to which such Contribution(s) was submitted. If You
+   institute patent litigation against any entity (including a
+   cross-claim or counterclaim in a lawsuit) alleging that the Work
+   or a Contribution incorporated within the Work constitutes direct
+   or contributory patent infringement, then any patent licenses
+   granted to You under this License for that Work shall terminate
+   as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+   Work or Derivative Works thereof in any medium, with or without
+   modifications, and in Source or Object form, provided that You
+   meet the following conditions:
+
+    (a) You must give any other recipients of the Work or
+    Derivative Works a copy of this License; and
+
+    (b) You must cause any modified files to carry prominent notices
+    stating that You changed the files; and
+
+    (c) You must retain, in the Source form of any Derivative Works
+    that You distribute, all copyright, patent, trademark, and
+    attribution notices from the Source form of the Work,
+    excluding those notices that do not pertain to any part of
+    the Derivative Works; and
+
+    (d) If the Work includes a "NOTICE" text file as part of its
+    distribution, then any Derivative Works that You distribute must
+    include a readable copy of the attribution notices contained
+    within such NOTICE file, excluding those notices that do not
+    pertain to any part of the Derivative Works, in at least one
+    of the following places: within a NOTICE text file distributed
+    as part of the Derivative Works; within the Source form or
+    documentation, if provided along with the Derivative Works; or,
+    within a display generated by the Derivative Works, if and
+    wherever such third-party notices normally appear. The contents
+    of the NOTICE file are for informational purposes only and
+    do not modify the License. You may add Your own attribution
+    notices within Derivative Works that You distribute, alongside
+    or as an addendum to the NOTICE text from the Work, provided
+    that such additional attribution notices cannot be construed
+    as modifying the License.
+
+    You may add Your own copyright statement to Your modifications and
+    may provide additional or different license terms and conditions
+    for use, reproduction, or distribution of Your modifications, or
+    for any such Derivative Works as a whole, provided Your use,
+    reproduction, and distribution of the Work otherwise complies with
+    the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+   any Contribution intentionally submitted for inclusion in the Work
+   by You to the Licensor shall be under the terms and conditions of
+   this License, without any additional terms or conditions.
+   Notwithstanding the above, nothing herein shall supersede or modify
+   the terms of any separate license agreement you may have executed
+   with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+   names, trademarks, service marks, or product names of the Licensor,
+   except as required for reasonable and customary use in describing the
+   origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+   agreed to in writing, Licensor provides the Work (and each
+   Contributor provides its Contributions) on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+   implied, including, without limitation, any warranties or conditions
+   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+   PARTICULAR PURPOSE. You are solely responsible for determining the
+   appropriateness of using or redistributing the Work and assume any
+   risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+   whether in tort (including negligence), contract, or otherwise,
+   unless required by applicable law (such as deliberate and grossly
+   negligent acts) or agreed to in writing, shall any Contributor be
+   liable to You for damages, including any direct, indirect, special,
+   incidental, or consequential damages of any character arising as a
+   result of this License or out of the use or inability to use the
+   Work (including but not limited to damages for loss of goodwill,
+   work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses), even if such Contributor
+   has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+   the Work or Derivative Works thereof, You may choose to offer,
+   and charge a fee for, acceptance of support, warranty, indemnity,
+   or other liability obligations and/or rights consistent with this
+   License. However, in accepting such obligations, You may act only
+   on Your own behalf and on Your sole responsibility, not on behalf
+   of any other Contributor, and only if You agree to indemnify,
+   defend, and hold each Contributor harmless for any liability
+   incurred by, or claims asserted against, such Contributor by reason
+   of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+Copyright The OpenTelemetry Authors
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/python/instrumentation/openinference-instrumentation-openai/README.rst b/python/instrumentation/openinference-instrumentation-openai/README.rst
new file mode 100644
index 000000000..35f92883b
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/README.rst
@@ -0,0 +1,14 @@
+OpenInference OpenAI Instrumentation
+=============================================
+
+|pypi|
+
+.. |pypi| image:: https://badge.fury.io/py/openinference-instrumentation-openai.svg
+   :target: https://pypi.org/project/openinference-instrumentation-openai/
+
+Installation
+------------
+
+::
+
+    pip install openinference-instrumentation-openai
diff --git a/python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py b/python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py
new file mode 100644
index 000000000..4c81584bc
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py
@@ -0,0 +1,40 @@
+"""
+Phoenix collector should be running in the background.
+"""
+import asyncio
+
+import openai
+from openinference.instrumentation.openai import OpenAIInstrumentor
+from opentelemetry import trace as trace_api
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
+from opentelemetry.sdk import trace as trace_sdk
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+
+resource = Resource(attributes={})
+tracer_provider = trace_sdk.TracerProvider(resource=resource)
+span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces")
+span_processor = SimpleSpanProcessor(span_exporter=span_exporter)
+tracer_provider.add_span_processor(span_processor=span_processor)
+trace_api.set_tracer_provider(tracer_provider=tracer_provider)
+
+OpenAIInstrumentor().instrument()
+
+
+async def chat_completions(**kwargs):
+    client = openai.AsyncOpenAI()
+    async for chunk in await client.chat.completions.create(**kwargs):
+        if content := chunk.choices[0].delta.content:
+            print(content, end="")
+    print()
+
+
+if __name__ == "__main__":
+    asyncio.run(
+        chat_completions(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": "Write a haiku."}],
+            max_tokens=20,
+            stream=True,
+        ),
+    )
diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py
new file mode 100644
index 000000000..8830ff8b3
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py
@@ -0,0 +1,209 @@
+"""
+Phoenix collector should be running in the background.
+"""
+import contextvars
+import inspect
+import logging
+import threading
+from contextlib import suppress
+from importlib.metadata import version
+from itertools import chain
+from time import sleep
+
+from opentelemetry import trace as trace_api
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
+from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
+from opentelemetry.sdk import trace as trace_sdk
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
+
+
+def default_tracer_provider() -> trace_sdk.TracerProvider:
+    resource = Resource(attributes={})
+    tracer_provider = trace_sdk.TracerProvider(resource=resource)
+    span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces")
+    span_processor = SimpleSpanProcessor(span_exporter=span_exporter)
+    tracer_provider.add_span_processor(span_processor=span_processor)
+    return tracer_provider
+
+
+# Instrument httpx to show that it can show up as a child span.
+# Note that it must be instrumented before it's imported by openai.
+HTTPXClientInstrumentor().instrument()
+
+# To instrument httpx, it must be monkey-patched before it is imported by openai, so we do it
+# like this to prevent the imports from being re-formatted to the top of file.
+if True:
+    import openai
+    from openinference.instrumentation.openai import OpenAIInstrumentor
+    from openinference.semconv.trace import SpanAttributes
+
+CLIENT = openai.OpenAI()
+
+tracer_provider = default_tracer_provider()
+in_memory_span_exporter = InMemorySpanExporter()
+tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter))
+trace_api.set_tracer_provider(tracer_provider=tracer_provider)
+
+OpenAIInstrumentor().instrument()
+
+_OPENAI_VERSION = tuple(map(int, version("openai").split(".")[:3]))
+
+N = 3  # iteration i = 0 results in intentional BadRequestError
+HAIKU = "Write a haiku."
+HAIKU_TOKENS = [8144, 264, 6520, 39342, 13]
+RESUME = "Write a resume."
+RESUME_TOKENS = [8144, 264, 6520, 39342, 13]
+CHAT_KWARGS = {
+    "model": "gpt-3.5-turbo",
+    "messages": [{"role": "user", "content": HAIKU}],
+    "max_tokens": 20,
+    "temperature": 2,
+    **(
+        {
+            "logprobs": True,
+            "top_logprobs": 5,
+        }
+        if _OPENAI_VERSION >= (1, 5, 0)
+        else {}
+    ),
+}
+COMP_KWARGS = {
+    "model": "gpt-3.5-turbo-instruct",
+    "prompt": HAIKU,
+    "max_tokens": 20,
+    "temperature": 2,
+    "logprobs": 5,
+}
+
+for k, v in logging.root.manager.loggerDict.items():
+    if k.startswith("openinference.instrumentation.openai") and isinstance(v, logging.Logger):
+        v.setLevel(logging.DEBUG)
+        v.handlers.clear()
+        v.addHandler(logging.StreamHandler())
+
+logger = logging.getLogger(__name__)
+
+_EXPECTED_SPAN_COUNT = 0
+_LOCK = threading.Lock()
+
+
+def _print_span_count(kwargs):
+    spans = in_memory_span_exporter.get_finished_spans()
+    llm_spans = [
+        span
+        for span in spans
+        if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM"
+    ]
+    actual = len(llm_spans)
+    global _EXPECTED_SPAN_COUNT
+    with _LOCK:
+        _EXPECTED_SPAN_COUNT += 1
+        mark = "✅" if _EXPECTED_SPAN_COUNT <= actual else "❌"
+        name = inspect.stack()[1][3]
+        print(f"{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual}; {name}({kwargs})")
+
+
+def chat_completions(**kwargs):
+    try:
+        with suppress(openai.BadRequestError):
+            response = CLIENT.chat.completions.create(**{**CHAT_KWARGS, **kwargs})
+            if kwargs.get("stream"):
+                for _ in response:
+                    sleep(0.005)
+    except Exception:
+        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
+    finally:
+        _print_span_count(kwargs)
+
+
+def completions(**kwargs):
+    try:
+        with suppress(openai.BadRequestError):
+            response = CLIENT.completions.create(**{**COMP_KWARGS, **kwargs})
+            if kwargs.get("stream"):
+                for _ in response:
+                    sleep(0.005)
+    except Exception:
+        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
+    finally:
+        _print_span_count(kwargs)
+
+
+def chat_completions_with_raw_response(**kwargs):
+    try:
+        with suppress(openai.BadRequestError):
+            response = CLIENT.chat.completions.with_raw_response.create(**{**CHAT_KWARGS, **kwargs})
+            if kwargs.get("stream"):
+                for _ in response.parse():
+                    sleep(0.005)
+    except Exception:
+        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
+    finally:
+        _print_span_count(kwargs)
+
+
+def completions_with_raw_response(**kwargs):
+    try:
+        with suppress(openai.BadRequestError):
+            response = CLIENT.completions.with_raw_response.create(**{**COMP_KWARGS, **kwargs})
+            if kwargs.get("stream"):
+                for _ in response.parse():
+                    sleep(0.005)
+    except Exception:
+        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
+    finally:
+        _print_span_count(kwargs)
+
+
+def tasks(n, task, **kwargs):
+    for i in range(n):  # i = 0 results in intentional BadRequestError
+        ctx = contextvars.copy_context()
+        yield threading.Thread(
+            target=ctx.run,
+            args=(task,),
+            kwargs={"n": i, **kwargs},
+        )
+
+
+if __name__ == "__main__":
+    threads = list(
+        chain(
+            tasks(N, completions),
+            tasks(N, completions_with_raw_response),
+            tasks(N, completions, stream=True),
+            tasks(N, completions_with_raw_response, stream=True),
+            tasks(N, completions, prompt=[HAIKU, RESUME]),
+            tasks(N, completions_with_raw_response, prompt=[HAIKU, RESUME]),
+            tasks(N, completions, prompt=[HAIKU, RESUME], stream=True),
+            tasks(N, completions_with_raw_response, prompt=[HAIKU, RESUME], stream=True),
+            tasks(N, completions, prompt=HAIKU_TOKENS),
+            tasks(N, completions_with_raw_response, prompt=HAIKU_TOKENS),
+            tasks(N, completions, prompt=HAIKU_TOKENS, stream=True),
+            tasks(N, completions_with_raw_response, prompt=HAIKU_TOKENS, stream=True),
+            tasks(N, completions, prompt=[HAIKU_TOKENS, RESUME_TOKENS]),
+            tasks(N, completions_with_raw_response, prompt=[HAIKU_TOKENS, RESUME_TOKENS]),
+            tasks(N, completions, prompt=[HAIKU_TOKENS, RESUME_TOKENS], stream=True),
+            tasks(
+                N, completions_with_raw_response, prompt=[HAIKU_TOKENS, RESUME_TOKENS], stream=True
+            ),
+            tasks(N, chat_completions),
+            tasks(N, chat_completions_with_raw_response),
+            tasks(N, chat_completions, stream=True),
+            tasks(N, chat_completions_with_raw_response, stream=True),
+        )
+    )
+    [thread.start() for thread in threads]
+    [thread.join() for thread in threads]
+    spans = in_memory_span_exporter.get_finished_spans()
+    llm_spans = [
+        span
+        for span in spans
+        if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM"
+    ]
+    actual = len(llm_spans)
+    mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌"
+    print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};")
+    assert _EXPECTED_SPAN_COUNT == actual
+    sleep(1)  # (if applicable) let the old exporter finish sending traces
diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py
new file mode 100644
index 000000000..6263956bf
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py
@@ -0,0 +1,207 @@
+"""
+Phoenix collector should be running in the background.
+"""
+import asyncio
+import inspect
+import logging
+from contextlib import suppress
+from importlib.metadata import version
+from itertools import chain
+from time import sleep
+
+from opentelemetry import trace as trace_api
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
+from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
+from opentelemetry.sdk import trace as trace_sdk
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
+
+
+def default_tracer_provider() -> trace_sdk.TracerProvider:
+    resource = Resource(attributes={})
+    tracer_provider = trace_sdk.TracerProvider(resource=resource)
+    span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces")
+    span_processor = SimpleSpanProcessor(span_exporter=span_exporter)
+    tracer_provider.add_span_processor(span_processor=span_processor)
+    return tracer_provider
+
+
+# Instrument httpx to show that it can show up as a child span.
+# Note that it must be instrumented before it's imported by openai.
+HTTPXClientInstrumentor().instrument()
+
+# To instrument httpx, it must be monkey-patched before it is imported by openai, so we do it
+# like this to prevent the imports from being re-formatted to the top of file.
+if True:
+    import openai
+    from openinference.instrumentation.openai import OpenAIInstrumentor
+    from openinference.semconv.trace import SpanAttributes
+
+CLIENT = openai.AsyncOpenAI()
+
+tracer_provider = default_tracer_provider()
+in_memory_span_exporter = InMemorySpanExporter()
+tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter))
+trace_api.set_tracer_provider(tracer_provider=tracer_provider)
+
+OpenAIInstrumentor().instrument()
+
+_OPENAI_VERSION = tuple(map(int, version("openai").split(".")[:3]))
+
+N = 3  # iteration i = 0 results in intentional BadRequestError
+HAIKU = "Write a haiku."
+HAIKU_TOKENS = [8144, 264, 6520, 39342, 13]
+RESUME = "Write a resume."
+RESUME_TOKENS = [8144, 264, 6520, 39342, 13]
+CHAT_KWARGS = {
+    "model": "gpt-3.5-turbo",
+    "messages": [{"role": "user", "content": HAIKU}],
+    "max_tokens": 20,
+    "temperature": 2,
+    **(
+        {
+            "logprobs": True,
+            "top_logprobs": 5,
+        }
+        if _OPENAI_VERSION >= (1, 5, 0)
+        else {}
+    ),
+}
+COMP_KWARGS = {
+    "model": "gpt-3.5-turbo-instruct",
+    "prompt": HAIKU,
+    "max_tokens": 20,
+    "temperature": 2,
+    "logprobs": 5,
+}
+
+for k, v in logging.root.manager.loggerDict.items():
+    if k.startswith("openinference.instrumentation.openai") and isinstance(v, logging.Logger):
+        v.setLevel(logging.DEBUG)
+        v.handlers.clear()
+        v.addHandler(logging.StreamHandler())
+
+
+logger = logging.getLogger(__name__)
+
+_EXPECTED_SPAN_COUNT = 0
+
+
+def _print_span_count(kwargs):
+    spans = in_memory_span_exporter.get_finished_spans()
+    llm_spans = [
+        span
+        for span in spans
+        if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM"
+    ]
+    actual = len(llm_spans)
+    global _EXPECTED_SPAN_COUNT
+    _EXPECTED_SPAN_COUNT += 1
+    mark = "✅" if _EXPECTED_SPAN_COUNT <= actual else "❌"
+    name = inspect.stack()[1][3]
+    print(f"{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual}; {name}({kwargs})")
+
+
+async def chat_completions(**kwargs):
+    try:
+        with suppress(openai.BadRequestError):
+            response = await CLIENT.chat.completions.create(**{**CHAT_KWARGS, **kwargs})
+            if kwargs.get("stream"):
+                async for _ in response:
+                    await asyncio.sleep(0.005)
+    except Exception:
+        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
+    finally:
+        _print_span_count(kwargs)
+
+
+async def completions(**kwargs):
+    try:
+        with suppress(openai.BadRequestError):
+            response = await CLIENT.completions.create(**{**COMP_KWARGS, **kwargs})
+            if kwargs.get("stream"):
+                async for _ in response:
+                    await asyncio.sleep(0.005)
+    except Exception:
+        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
+    finally:
+        _print_span_count(kwargs)
+
+
+async def chat_completions_with_raw_response(**kwargs):
+    try:
+        with suppress(openai.BadRequestError):
+            response = await CLIENT.chat.completions.with_raw_response.create(
+                **{**CHAT_KWARGS, **kwargs}
+            )
+            if kwargs.get("stream"):
+                async for _ in response.parse():
+                    await asyncio.sleep(0.005)
+    except Exception:
+        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
+    finally:
+        _print_span_count(kwargs)
+
+
+async def completions_with_raw_response(**kwargs):
+    try:
+        with suppress(openai.BadRequestError):
+            response = await CLIENT.completions.with_raw_response.create(
+                **{**COMP_KWARGS, **kwargs}
+            )
+            if kwargs.get("stream"):
+                async for _ in response.parse():
+                    await asyncio.sleep(0.005)
+    except Exception:
+        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
+    finally:
+        _print_span_count(kwargs)
+
+
+def tasks(n, task, **kwargs):
+    return [task(n=i, **kwargs) for i in range(n)]  # i = 0 results in intentional BadRequestError
+
+
+async def main(*tasks):
+    await asyncio.gather(*chain.from_iterable(tasks))
+
+
+if __name__ == "__main__":
+    asyncio.run(
+        main(
+            tasks(N, completions),
+            tasks(N, completions_with_raw_response),
+            tasks(N, completions, stream=True),
+            tasks(N, completions_with_raw_response, stream=True),
+            tasks(N, completions, prompt=[HAIKU, RESUME]),
+            tasks(N, completions_with_raw_response, prompt=[HAIKU, RESUME]),
+            tasks(N, completions, prompt=[HAIKU, RESUME], stream=True),
+            tasks(N, completions_with_raw_response, prompt=[HAIKU, RESUME], stream=True),
+            tasks(N, completions, prompt=HAIKU_TOKENS),
+            tasks(N, completions_with_raw_response, prompt=HAIKU_TOKENS),
+            tasks(N, completions, prompt=HAIKU_TOKENS, stream=True),
+            tasks(N, completions_with_raw_response, prompt=HAIKU_TOKENS, stream=True),
+            tasks(N, completions, prompt=[HAIKU_TOKENS, RESUME_TOKENS]),
+            tasks(N, completions_with_raw_response, prompt=[HAIKU_TOKENS, RESUME_TOKENS]),
+            tasks(N, completions, prompt=[HAIKU_TOKENS, RESUME_TOKENS], stream=True),
+            tasks(
+                N, completions_with_raw_response, prompt=[HAIKU_TOKENS, RESUME_TOKENS], stream=True
+            ),
+            tasks(N, chat_completions),
+            tasks(N, chat_completions_with_raw_response),
+            tasks(N, chat_completions, stream=True),
+            tasks(N, chat_completions_with_raw_response, stream=True),
+        )
+    )
+    spans = in_memory_span_exporter.get_finished_spans()
+    llm_spans = [
+        span
+        for span in spans
+        if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM"
+    ]
+    actual = len(llm_spans)
+    mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌"
+    print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};")
+    assert _EXPECTED_SPAN_COUNT == actual
+    sleep(1)  # (if applicable) let the old exporter finish sending traces
diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py
new file mode 100644
index 000000000..ee91e2898
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py
@@ -0,0 +1,142 @@
+"""
+Phoenix collector should be running in the background.
+"""
+import asyncio
+import inspect
+import logging
+from contextlib import suppress
+from itertools import chain
+from time import sleep
+
+from opentelemetry import trace as trace_api
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
+from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
+from opentelemetry.sdk import trace as trace_sdk
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
+
+
+def default_tracer_provider() -> trace_sdk.TracerProvider:
+    resource = Resource(attributes={})
+    tracer_provider = trace_sdk.TracerProvider(resource=resource)
+    span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces")
+    span_processor = SimpleSpanProcessor(span_exporter=span_exporter)
+    tracer_provider.add_span_processor(span_processor=span_processor)
+    return tracer_provider
+
+
+# Instrument httpx to show that it can show up as a child span.
+# Note that it must be instrumented before it's imported by openai.
+HTTPXClientInstrumentor().instrument()
+
+# To instrument httpx, it must be monkey-patched before it is imported by openai, so we do it
+# like this to prevent the imports from being re-formatted to the top of file.
+if True:
+    import openai
+    from openinference.instrumentation.openai import OpenAIInstrumentor
+    from openinference.semconv.trace import SpanAttributes
+
+CLIENT = openai.AsyncOpenAI()
+
+tracer_provider = default_tracer_provider()
+in_memory_span_exporter = InMemorySpanExporter()
+tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter))
+trace_api.set_tracer_provider(tracer_provider=tracer_provider)
+
+OpenAIInstrumentor().instrument()
+
+KWARGS = {
+    "model": "text-embedding-ada-002",
+}
+
+for k, v in logging.root.manager.loggerDict.items():
+    if k.startswith("openinference.instrumentation.openai") and isinstance(v, logging.Logger):
+        v.setLevel(logging.DEBUG)
+        v.handlers.clear()
+        v.addHandler(logging.StreamHandler())
+
+logger = logging.getLogger(__name__)
+
+_EXPECTED_SPAN_COUNT = 0
+
+
+def _print_span_count(kwargs):
+    spans = in_memory_span_exporter.get_finished_spans()
+    llm_spans = [
+        span
+        for span in spans
+        if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "EMBEDDING"
+    ]
+    actual = len(llm_spans)
+    global _EXPECTED_SPAN_COUNT
+    _EXPECTED_SPAN_COUNT += 1
+    mark = "✅" if _EXPECTED_SPAN_COUNT <= actual else "❌"
+    name = inspect.stack()[1][3]
+    print(f"{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual}; {name}({kwargs})")
+
+
+async def embeddings(**kwargs):
+    try:
+        with suppress(openai.BadRequestError):
+            await CLIENT.embeddings.create(**{**KWARGS, **kwargs})
+    except Exception:
+        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
+    finally:
+        _print_span_count(kwargs)
+
+
+async def embeddings_with_raw_response(**kwargs):
+    try:
+        with suppress(openai.BadRequestError):
+            await CLIENT.embeddings.with_raw_response.create(**{**KWARGS, **kwargs})
+    except Exception:
+        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
+    finally:
+        _print_span_count(kwargs)
+
+
+async def main(*tasks):
+    await asyncio.gather(*chain(tasks))
+
+
+if __name__ == "__main__":
+    asyncio.run(
+        main(
+            embeddings(input="hello world"),
+            embeddings(input="hello world", encoding_format="float"),
+            embeddings(input="hello world", encoding_format="base64"),
+            embeddings(input=["hello", "world"]),
+            embeddings(input=["hello", "world"], encoding_format="float"),
+            embeddings(input=["hello", "world"], encoding_format="base64"),
+            embeddings(input=[15339, 1917]),
+            embeddings(input=[15339, 1917], encoding_format="float"),
+            embeddings(input=[15339, 1917], encoding_format="base64"),
+            embeddings(input=[[15339], [14957]]),
+            embeddings(input=[[15339], [14957]], encoding_format="float"),
+            embeddings(input=[[15339], [14957]], encoding_format="base64"),
+            embeddings_with_raw_response(input="hello world"),
+            embeddings_with_raw_response(input="hello world", encoding_format="float"),
+            embeddings_with_raw_response(input="hello world", encoding_format="base64"),
+            embeddings_with_raw_response(input=["hello", "world"]),
+            embeddings_with_raw_response(input=["hello", "world"], encoding_format="float"),
+            embeddings_with_raw_response(input=["hello", "world"], encoding_format="base64"),
+            embeddings_with_raw_response(input=[15339, 1917]),
+            embeddings_with_raw_response(input=[15339, 1917], encoding_format="float"),
+            embeddings_with_raw_response(input=[15339, 1917], encoding_format="base64"),
+            embeddings_with_raw_response(input=[[15339], [14957]]),
+            embeddings_with_raw_response(input=[[15339], [14957]], encoding_format="float"),
+            embeddings_with_raw_response(input=[[15339], [14957]], encoding_format="base64"),
+        )
+    )
+    spans = in_memory_span_exporter.get_finished_spans()
+    llm_spans = [
+        span
+        for span in spans
+        if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "EMBEDDING"
+    ]
+    actual = len(llm_spans)
+    mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌"
+    print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};")
+    assert _EXPECTED_SPAN_COUNT == actual
+    sleep(1)  # (if applicable) let the old exporter finish sending traces
diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py
new file mode 100644
index 000000000..a281938e5
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py
@@ -0,0 +1,195 @@
+"""
+Phoenix collector should be running in the background.
+
+tools requires openai>=1.1.0
+"""
+import asyncio
+import inspect
+import logging
+from contextlib import suppress
+from importlib.metadata import version
+from itertools import chain
+from time import sleep
+
+from opentelemetry import trace as trace_api
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
+from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
+from opentelemetry.sdk import trace as trace_sdk
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
+
+
+def default_tracer_provider() -> trace_sdk.TracerProvider:
+    resource = Resource(attributes={})
+    tracer_provider = trace_sdk.TracerProvider(resource=resource)
+    span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces")
+    span_processor = SimpleSpanProcessor(span_exporter=span_exporter)
+    tracer_provider.add_span_processor(span_processor=span_processor)
+    return tracer_provider
+
+
+# Instrument httpx to show that it can show up as a child span.
+# Note that it must be instrumented before it's imported by openai.
+HTTPXClientInstrumentor().instrument()
+
+# To instrument httpx, it must be monkey-patched before it is imported by openai, so we do it
+# like this to prevent the imports from being re-formatted to the top of file.
+if True:
+    import openai
+    from openinference.instrumentation.openai import OpenAIInstrumentor
+    from openinference.semconv.trace import SpanAttributes
+
+CLIENT = openai.AsyncOpenAI()
+
+tracer_provider = default_tracer_provider()
+in_memory_span_exporter = InMemorySpanExporter()
+tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter))
+trace_api.set_tracer_provider(tracer_provider=tracer_provider)
+
+OpenAIInstrumentor().instrument()
+
+_OPENAI_VERSION = tuple(map(int, version("openai").split(".")[:3]))
+
+N = 3  # iteration i = 0 results in intentional BadRequestError
+KWARGS = {
+    "model": "gpt-4",
+    "messages": [
+        {
+            "role": "user",
+            "content": "What's the current time and weather in San Francisco, CA?",
+        }
+    ],
+    "max_tokens": 20,
+    "temperature": 0,
+}
+TOOLS = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_current_weather",
+            "description": "Get the current weather in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city and state, e.g., San Francisco, CA",
+                    },
+                },
+                "required": ["location"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "get_current_time",
+            "description": "Get the current time in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city and state, e.g., San Francisco, CA",
+                    },
+                },
+                "required": ["location"],
+            },
+        },
+    },
+]
+FUNCTIONS = [tool["function"] for tool in TOOLS]
+
+for k, v in logging.root.manager.loggerDict.items():
+    if k.startswith("openinference.instrumentation.openai") and isinstance(v, logging.Logger):
+        v.setLevel(logging.DEBUG)
+        v.handlers.clear()
+        v.addHandler(logging.StreamHandler())
+
+logger = logging.getLogger(__name__)
+
+_EXPECTED_SPAN_COUNT = 0
+
+
+def _print_span_count():
+    spans = in_memory_span_exporter.get_finished_spans()
+    llm_spans = [
+        span
+        for span in spans
+        if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM"
+    ]
+    actual = len(llm_spans)
+    global _EXPECTED_SPAN_COUNT
+    _EXPECTED_SPAN_COUNT += 1
+    mark = "✅" if _EXPECTED_SPAN_COUNT <= actual else "❌"
+    name = inspect.stack()[1][3]
+    print(f"{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual}; {name}")
+
+
+async def chat_completions(**kwargs):
+    try:
+        with suppress(openai.BadRequestError):
+            response = await CLIENT.chat.completions.create(**{**KWARGS, **kwargs})
+            if kwargs.get("stream"):
+                async for _ in response:
+                    await asyncio.sleep(0.005)
+    except Exception:
+        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
+    finally:
+        _print_span_count()
+
+
+async def chat_completions_with_raw_response(**kwargs):
+    try:
+        with suppress(openai.BadRequestError):
+            response = await CLIENT.chat.completions.with_raw_response.create(
+                **{**KWARGS, **kwargs}
+            )
+            if kwargs.get("stream"):
+                async for _ in response.parse():
+                    await asyncio.sleep(0.005)
+    except Exception:
+        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
+    finally:
+        _print_span_count()
+
+
+def tasks(n, task, **kwargs):
+    return [task(n=i, **kwargs) for i in range(n)]  # i = 0 results in intentional BadRequestError
+
+
+async def main(*tasks):
+    await asyncio.gather(*chain.from_iterable(tasks))
+
+
+if __name__ == "__main__":
+    asyncio.run(
+        main(
+            tasks(N, chat_completions, functions=FUNCTIONS),
+            tasks(N, chat_completions, functions=FUNCTIONS, stream=True),
+            tasks(N, chat_completions_with_raw_response, functions=FUNCTIONS),
+            tasks(N, chat_completions_with_raw_response, functions=FUNCTIONS, stream=True),
+            *(
+                [
+                    tasks(N, chat_completions, tools=TOOLS),
+                    tasks(N, chat_completions, tools=TOOLS, stream=True),
+                    tasks(N, chat_completions_with_raw_response, tools=TOOLS),
+                    tasks(N, chat_completions_with_raw_response, tools=TOOLS, stream=True),
+                ]
+                if _OPENAI_VERSION >= (1, 1, 0)
+                else ()
+            ),
+        )
+    )
+    spans = in_memory_span_exporter.get_finished_spans()
+    llm_spans = [
+        span
+        for span in spans
+        if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM"
+    ]
+    actual = len(llm_spans)
+    mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌"
+    print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};")
+    assert _EXPECTED_SPAN_COUNT == actual
+    sleep(1)  # (if applicable) let the old exporter finish sending traces
diff --git a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml
new file mode 100644
index 000000000..a22f88ece
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml
@@ -0,0 +1,52 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "openinference-instrumentation-openai"
+dynamic = ["version"]
+description = "OpenInference OpenAI instrumentation"
+readme = "README.rst"
+license = "Apache-2.0"
+requires-python = ">=3.8, <3.12"
+authors = [
+  { name = "OpenInference Authors", email = "oss@arize.com" },
+]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Intended Audience :: Developers",
+  "License :: OSI Approved :: Apache Software License",
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.8",
+  "Programming Language :: Python :: 3.9",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+]
+dependencies = [
+  "opentelemetry-api",
+  "opentelemetry-instrumentation",
+  "opentelemetry-semantic-conventions",
+  "openinference-semantic-conventions",
+  "wrapt",
+]
+
+[project.optional-dependencies]
+test = [
+  "openai == 1.0.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/Arize-ai/openinference/tree/main/python/instrumentation/openinference-instrumentation-openai"
+
+[tool.hatch.version]
+path = "src/openinference/instrumentation/openai/version.py"
+
+[tool.hatch.build.targets.sdist]
+include = [
+  "/src",
+  "/tests",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/openinference"]
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py
new file mode 100644
index 000000000..3217d6457
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py
@@ -0,0 +1,57 @@
+import logging
+from typing import Any, Collection
+
+from openinference.instrumentation.openai._request import (
+    _AsyncRequest,
+    _Request,
+)
+from openinference.instrumentation.openai.package import _instruments
+from openinference.instrumentation.openai.version import __version__
+from opentelemetry import trace as trace_api
+from opentelemetry.instrumentation.instrumentor import BaseInstrumentor  # type: ignore
+from wrapt import wrap_function_wrapper
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+
+_MODULE = "openai"
+
+
+class OpenAIInstrumentor(BaseInstrumentor):  # type: ignore
+    """An instrumentor for openai.OpenAI.request and
+    openai.AsyncOpenAI.request
+    """
+
+    __slots__ = (
+        "_original_request",
+        "_original_async_request",
+    )
+
+    def instrumentation_dependencies(self) -> Collection[str]:
+        return _instruments
+
+    def _instrument(self, **kwargs: Any) -> None:
+        if (include_extra_attributes := kwargs.get("include_extra_attributes")) is None:
+            include_extra_attributes = True
+        if not (tracer_provider := kwargs.get("tracer_provider")):
+            tracer_provider = trace_api.get_tracer_provider()
+        tracer = trace_api.get_tracer(__name__, __version__, tracer_provider)
+        wrap_function_wrapper(
+            module=_MODULE,
+            name="OpenAI.request",
+            wrapper=_Request(
+                tracer=tracer,
+                include_extra_attributes=include_extra_attributes,
+            ),
+        )
+        wrap_function_wrapper(
+            module=_MODULE,
+            name="AsyncOpenAI.request",
+            wrapper=_AsyncRequest(
+                tracer=tracer,
+                include_extra_attributes=include_extra_attributes,
+            ),
+        )
+
+    def _uninstrument(self, **kwargs: Any) -> None:
+        pass
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py
new file mode 100644
index 000000000..47f444ee8
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py
@@ -0,0 +1,143 @@
+import json
+import logging
+from enum import Enum
+from typing import (
+    Any,
+    Iterable,
+    Iterator,
+    List,
+    Mapping,
+    Tuple,
+)
+
+from openinference.instrumentation.openai._utils import _OPENAI_VERSION
+from openinference.semconv.trace import MessageAttributes, SpanAttributes, ToolCallAttributes
+from opentelemetry.util.types import AttributeValue
+
+from openai.types import Completion, CreateEmbeddingResponse
+from openai.types.chat import ChatCompletion
+
+__all__ = ("_get_extra_attributes_from_request",)
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+
+
+def _get_extra_attributes_from_request(
+    cast_to: type,
+    request_options: Mapping[str, Any],
+) -> Iterator[Tuple[str, AttributeValue]]:
+    if not isinstance(request_options, Mapping):
+        return
+    if cast_to is ChatCompletion:
+        yield from _get_attributes_from_chat_completion_create_param(request_options)
+    elif cast_to is CreateEmbeddingResponse:
+        yield from _get_attributes_from_embedding_create_param(request_options)
+    elif cast_to is Completion:
+        yield from _get_attributes_from_completion_create_param(request_options)
+    else:
+        try:
+            yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(request_options)
+        except Exception:
+            logger.exception("Failed to serialize request options")
+
+
+def _get_attributes_from_chat_completion_create_param(
+    params: Mapping[str, Any],
+) -> Iterator[Tuple[str, AttributeValue]]:
+    # openai.types.chat.completion_create_params.CompletionCreateParamsBase
+    # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/completion_create_params.py#L28  # noqa: E501
+    if not isinstance(params, Mapping):
+        return
+    invocation_params = dict(params)
+    invocation_params.pop("messages", None)
+    invocation_params.pop("functions", None)
+    invocation_params.pop("tools", None)
+    yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(invocation_params)
+    if (input_messages := params.get("messages")) and isinstance(input_messages, Iterable):
+        for index, input_message in enumerate(input_messages):
+            for key, value in _get_attributes_from_message_param(input_message):
+                yield f"{SpanAttributes.LLM_INPUT_MESSAGES}.{index}.{key}", value
+
+
+def _get_attributes_from_message_param(
+    message: Mapping[str, Any],
+) -> Iterator[Tuple[str, AttributeValue]]:
+    # openai.types.chat.ChatCompletionMessageParam
+    # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_message_param.py#L15  # noqa: E501
+    if not hasattr(message, "get"):
+        return
+    if role := message.get("role"):
+        yield (
+            MessageAttributes.MESSAGE_ROLE,
+            role.value if isinstance(role, Enum) else role,
+        )
+    if content := message.get("content"):
+        if isinstance(content, str):
+            yield MessageAttributes.MESSAGE_CONTENT, content
+        elif isinstance(content, List):
+            # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_user_message_param.py#L14  # noqa: E501
+            try:
+                json_string = json.dumps(content)
+            except Exception:
+                logger.exception("Failed to serialize message content")
+            else:
+                yield MessageAttributes.MESSAGE_CONTENT, json_string
+    if name := message.get("name"):
+        yield MessageAttributes.MESSAGE_NAME, name
+    if (function_call := message.get("function_call")) and hasattr(function_call, "get"):
+        # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_assistant_message_param.py#L13  # noqa: E501
+        if function_name := function_call.get("name"):
+            yield MessageAttributes.MESSAGE_FUNCTION_CALL_NAME, function_name
+        if function_arguments := function_call.get("arguments"):
+            yield (
+                MessageAttributes.MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON,
+                function_arguments,
+            )
+    if (
+        _OPENAI_VERSION >= (1, 1, 0)
+        and (tool_calls := message.get("tool_calls"),)
+        and isinstance(tool_calls, Iterable)
+    ):
+        for index, tool_call in enumerate(tool_calls):
+            # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_message_tool_call_param.py#L23  # noqa: E501
+            if not hasattr(tool_call, "get"):
+                continue
+            if (function := tool_call.get("function")) and hasattr(function, "get"):
+                # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_message_tool_call_param.py#L10  # noqa: E501
+                if name := function.get("name"):
+                    yield (
+                        f"{MessageAttributes.MESSAGE_TOOL_CALLS}.{index}."
+                        f"{ToolCallAttributes.TOOL_CALL_FUNCTION_NAME}",
+                        name,
+                    )
+                if arguments := function.get("arguments"):
+                    yield (
+                        f"{MessageAttributes.MESSAGE_TOOL_CALLS}.{index}."
+                        f"{ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON}",
+                        arguments,
+                    )
+
+
+def _get_attributes_from_completion_create_param(
+    params: Mapping[str, Any],
+) -> Iterator[Tuple[str, AttributeValue]]:
+    # openai.types.completion_create_params.CompletionCreateParamsBase
+    # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion_create_params.py#L11  # noqa: E501
+    if not isinstance(params, Mapping):
+        return
+    invocation_params = dict(params)
+    invocation_params.pop("prompt", None)
+    yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(invocation_params)
+
+
+def _get_attributes_from_embedding_create_param(
+    params: Mapping[str, Any],
+) -> Iterator[Tuple[str, AttributeValue]]:
+    # openai.types.EmbeddingCreateParams
+    # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding_create_params.py#L11  # noqa: E501
+    if not isinstance(params, Mapping):
+        return
+    invocation_params = dict(params)
+    invocation_params.pop("input", None)
+    yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(invocation_params)
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py
new file mode 100644
index 000000000..05d0d0a1d
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py
@@ -0,0 +1,204 @@
+import base64
+import logging
+from functools import singledispatch
+from importlib import import_module
+from types import MappingProxyType, ModuleType
+from typing import (
+    Any,
+    Iterable,
+    Iterator,
+    Mapping,
+    Optional,
+    Sequence,
+    Tuple,
+)
+
+from openinference.instrumentation.openai._utils import (
+    _OPENAI_VERSION,
+    _get_texts,
+)
+from openinference.semconv.trace import (
+    EmbeddingAttributes,
+    MessageAttributes,
+    SpanAttributes,
+    ToolCallAttributes,
+)
+from opentelemetry.util.types import AttributeValue
+
+from openai.types import Completion, CreateEmbeddingResponse
+from openai.types.chat import ChatCompletion
+
+__all__ = ("_get_extra_attributes_from_response",)
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+
+
+try:
+    _NUMPY: Optional[ModuleType] = import_module("numpy")
+except ImportError:
+    _NUMPY = None
+
+
+@singledispatch
+def _get_extra_attributes_from_response(
+    response: Any,
+    request_options: Mapping[str, Any] = MappingProxyType({}),
+) -> Iterator[Tuple[str, AttributeValue]]:
+    # this is a fallback (for singledispatch)
+    yield from ()
+
+
+@_get_extra_attributes_from_response.register
+def _(
+    completion: ChatCompletion,
+    request_options: Mapping[str, Any] = MappingProxyType({}),
+) -> Iterator[Tuple[str, AttributeValue]]:
+    # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion.py#L40  # noqa: E501
+    if model := getattr(completion, "model", None):
+        yield SpanAttributes.LLM_MODEL_NAME, model
+    if usage := getattr(completion, "usage", None):
+        yield from _get_attributes_from_completion_usage(usage)
+    if (choices := getattr(completion, "choices", None)) and isinstance(choices, Iterable):
+        for choice in choices:
+            if (index := getattr(choice, "index", None)) is None:
+                continue
+            if message := getattr(choice, "message", None):
+                for key, value in _get_attributes_from_chat_completion_message(message):
+                    yield f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{index}.{key}", value
+
+
+@_get_extra_attributes_from_response.register
+def _(
+    completion: Completion,
+    request_options: Mapping[str, Any] = MappingProxyType({}),
+) -> Iterator[Tuple[str, AttributeValue]]:
+    # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion.py#L13  # noqa: E501
+    if model := getattr(completion, "model", None):
+        yield SpanAttributes.LLM_MODEL_NAME, model
+    if usage := getattr(completion, "usage", None):
+        yield from _get_attributes_from_completion_usage(usage)
+    if model_prompt := request_options.get("prompt"):
+        # prompt: Required[Union[str, List[str], List[int], List[List[int]], None]]
+        # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion_create_params.py#L38  # noqa: E501
+        # FIXME: tokens (List[int], List[List[int]]) can't be decoded reliably because model
+        # names are not reliable (across OpenAI and Azure).
+        if prompts := list(_get_texts(model_prompt, model)):
+            yield SpanAttributes.LLM_PROMPTS, prompts
+
+
+@_get_extra_attributes_from_response.register
+def _(
+    response: CreateEmbeddingResponse,
+    request_options: Mapping[str, Any] = MappingProxyType({}),
+) -> Iterator[Tuple[str, AttributeValue]]:
+    # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/create_embedding_response.py#L20  # noqa: E501
+    if usage := getattr(response, "usage", None):
+        yield from _get_attributes_from_embedding_usage(usage)
+    # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding_create_params.py#L23  # noqa: E501
+    if model := getattr(response, "model"):
+        yield f"{EmbeddingAttributes.EMBEDDING_MODEL_NAME}", model
+    if (data := getattr(response, "data", None)) and isinstance(data, Iterable):
+        for embedding in data:
+            if (index := getattr(embedding, "index", None)) is None:
+                continue
+            for key, value in _get_attributes_from_embedding(embedding):
+                yield f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}.{key}", value
+    embedding_input = request_options.get("input")
+    for index, text in enumerate(_get_texts(embedding_input, model)):
+        # input: Required[Union[str, List[str], List[int], List[List[int]]]]
+        # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding_create_params.py#L12  # noqa: E501
+        # FIXME: tokens (List[int], List[List[int]]) can't be decoded reliably because model
+        # names are not reliable (across OpenAI and Azure).
+        yield (
+            (
+                f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}."
+                f"{EmbeddingAttributes.EMBEDDING_TEXT}"
+            ),
+            text,
+        )
+
+
+def _get_attributes_from_embedding(
+    embedding: object,
+) -> Iterator[Tuple[str, AttributeValue]]:
+    # openai.types.Embedding
+    # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding.py#L11  # noqa: E501
+    if not (_vector := getattr(embedding, "embedding", None)):
+        return
+    if isinstance(_vector, Sequence) and len(_vector) and isinstance(_vector[0], float):
+        vector = list(_vector)
+        yield f"{EmbeddingAttributes.EMBEDDING_VECTOR}", vector
+    elif isinstance(_vector, str) and _vector and _NUMPY:
+        try:
+            # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/resources/embeddings.py#L100  # noqa: E501
+            vector = _NUMPY.frombuffer(base64.b64decode(_vector), dtype="float32").tolist()
+        except Exception:
+            logger.exception("Failed to decode embedding")
+            pass
+        else:
+            yield f"{EmbeddingAttributes.EMBEDDING_VECTOR}", vector
+
+
+def _get_attributes_from_chat_completion_message(
+    message: object,
+) -> Iterator[Tuple[str, AttributeValue]]:
+    # openai.types.chat.ChatCompletionMessage
+    # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_message.py#L25  # noqa: E501
+    if role := getattr(message, "role", None):
+        yield MessageAttributes.MESSAGE_ROLE, role
+    if content := getattr(message, "content", None):
+        yield MessageAttributes.MESSAGE_CONTENT, content
+    if function_call := getattr(message, "function_call", None):
+        # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_message.py#L12  # noqa: E501
+        if name := getattr(function_call, "name", None):
+            yield MessageAttributes.MESSAGE_FUNCTION_CALL_NAME, name
+        if arguments := getattr(function_call, "arguments", None):
+            yield MessageAttributes.MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON, arguments
+    if (
+        _OPENAI_VERSION >= (1, 1, 0)
+        and (tool_calls := getattr(message, "tool_calls", None))
+        and isinstance(tool_calls, Iterable)
+    ):
+        # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_message_tool_call.py#L23  # noqa: E501
+        for index, tool_call in enumerate(tool_calls):
+            if function := getattr(tool_call, "function", None):
+                # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion_message_tool_call.py#L10  # noqa: E501
+                if name := getattr(function, "name", None):
+                    yield (
+                        (
+                            f"{MessageAttributes.MESSAGE_TOOL_CALLS}.{index}."
+                            f"{ToolCallAttributes.TOOL_CALL_FUNCTION_NAME}"
+                        ),
+                        name,
+                    )
+                if arguments := getattr(function, "arguments", None):
+                    yield (
+                        f"{MessageAttributes.MESSAGE_TOOL_CALLS}.{index}."
+                        f"{ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON}",
+                        arguments,
+                    )
+
+
+def _get_attributes_from_completion_usage(
+    usage: object,
+) -> Iterator[Tuple[str, AttributeValue]]:
+    # openai.types.CompletionUsage
+    # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion_usage.py#L8  # noqa: E501
+    if (total_tokens := getattr(usage, "total_tokens", None)) is not None:
+        yield SpanAttributes.LLM_TOKEN_COUNT_TOTAL, total_tokens
+    if (prompt_tokens := getattr(usage, "prompt_tokens", None)) is not None:
+        yield SpanAttributes.LLM_TOKEN_COUNT_PROMPT, prompt_tokens
+    if (completion_tokens := getattr(usage, "completion_tokens", None)) is not None:
+        yield SpanAttributes.LLM_TOKEN_COUNT_COMPLETION, completion_tokens
+
+
+def _get_attributes_from_embedding_usage(
+    usage: object,
+) -> Iterator[Tuple[str, AttributeValue]]:
+    # openai.types.create_embedding_response.Usage
+    # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/create_embedding_response.py#L12  # noqa: E501
+    if (total_tokens := getattr(usage, "total_tokens", None)) is not None:
+        yield SpanAttributes.LLM_TOKEN_COUNT_TOTAL, total_tokens
+    if (prompt_tokens := getattr(usage, "prompt_tokens", None)) is not None:
+        yield SpanAttributes.LLM_TOKEN_COUNT_PROMPT, prompt_tokens
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
new file mode 100644
index 000000000..0f1d0790c
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
@@ -0,0 +1,360 @@
+import logging
+from abc import ABC
+from contextlib import contextmanager
+from types import MappingProxyType
+from typing import (
+    Any,
+    Awaitable,
+    Callable,
+    Dict,
+    Hashable,
+    Iterator,
+    Mapping,
+    Optional,
+    Tuple,
+)
+
+from openinference.instrumentation.openai._extra_attributes_from_request import (
+    _get_extra_attributes_from_request,
+)
+from openinference.instrumentation.openai._extra_attributes_from_response import (
+    _get_extra_attributes_from_response,
+)
+from openinference.instrumentation.openai._response_accumulator import (
+    _ChatCompletionAccumulator,
+    _CompletionAccumulator,
+)
+from openinference.instrumentation.openai._stream import (
+    _ResponseAccumulator,
+    _Stream,
+)
+from openinference.instrumentation.openai._utils import (
+    _as_input_attributes,
+    _as_output_attributes,
+    _finish_tracing,
+    _io_value_and_type,
+)
+from openinference.instrumentation.openai._with_span import _WithSpan
+from openinference.semconv.trace import SpanAttributes
+from opentelemetry import context as context_api
+from opentelemetry import trace as trace_api
+from opentelemetry.context import _SUPPRESS_INSTRUMENTATION_KEY
+from opentelemetry.trace import INVALID_SPAN
+from opentelemetry.util.types import AttributeValue
+
+from openai import AsyncStream, Stream
+from openai.types import Completion, CreateEmbeddingResponse
+from openai.types.chat import ChatCompletion
+
+__all__ = (
+    "_Request",
+    "_AsyncRequest",
+)
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+
+_LLM_SPAN_KIND = "LLM"
+_EMBEDDING_SPAN_KIND = "EMBEDDING"
+
+
+class _WithTracer(ABC):
+    __slots__ = (
+        "_tracer",
+        "_include_extra_attributes",
+    )
+
+    def __init__(
+        self,
+        tracer: trace_api.Tracer,
+        include_extra_attributes: bool = True,
+    ) -> None:
+        self._tracer = tracer
+        self._include_extra_attributes = include_extra_attributes
+
+    @contextmanager
+    def _start_as_current_span(
+        self,
+        span_name: str,
+        cast_to: type,
+        request_options: Mapping[str, Any],
+    ) -> Iterator[_WithSpan]:
+        span_kind = _EMBEDDING_SPAN_KIND if cast_to is CreateEmbeddingResponse else _LLM_SPAN_KIND
+        attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind}
+        try:
+            input_attributes = _as_input_attributes(_io_value_and_type(request_options))
+        except Exception:
+            logger.exception(
+                f"Failed to get input attributes from request options of "
+                f"type {type(request_options)}"
+            )
+        else:
+            attributes.update(input_attributes)
+        # Secondary attributes should be added after input and output to ensure
+        # that input and output are not dropped if there are too many attributes.
+        try:
+            extra_attributes = (
+                dict(_get_extra_attributes_from_request(cast_to, request_options))
+                if self._include_extra_attributes
+                else {}
+            )
+        except Exception:
+            logger.exception(
+                f"Failed to get extra attributes from request options of "
+                f"type {type(request_options)}"
+            )
+            extra_attributes = {}
+        try:
+            span = self._tracer.start_span(span_name, attributes=attributes)
+        except Exception:
+            logger.exception("Failed to start span")
+            span = INVALID_SPAN
+        with trace_api.use_span(
+            span,
+            end_on_exit=False,
+            record_exception=False,
+            set_status_on_exception=False,
+        ) as span:
+            yield _WithSpan(span, extra_attributes)
+
+
+class _Request(_WithTracer):
+    def __call__(
+        self,
+        wrapped: Callable[..., Any],
+        instance: Any,
+        args: Tuple[type, Any],
+        kwargs: Mapping[str, Any],
+    ) -> Any:
+        if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY):
+            return wrapped(*args, **kwargs)
+        try:
+            cast_to, request_options = _parse_request_args(args)
+            # E.g. cast_to = openai.types.chat.ChatCompletion => span_name = "ChatCompletion"
+            span_name: str = cast_to.__name__.split(".")[-1]
+        except Exception:
+            logger.exception("Failed to parse request args")
+            return wrapped(*args, **kwargs)
+        with self._start_as_current_span(
+            span_name=span_name,
+            cast_to=cast_to,
+            request_options=request_options,
+        ) as with_span:
+            try:
+                response = wrapped(*args, **kwargs)
+            except Exception as exception:
+                status_code = trace_api.StatusCode.ERROR
+                with_span.record_exception(exception)
+                with_span.finish_tracing(status_code=status_code)
+                raise
+            try:
+                response = _finalize_response(
+                    response=response,
+                    with_span=with_span,
+                    cast_to=cast_to,
+                    request_options=request_options,
+                    include_extra_attributes=self._include_extra_attributes,
+                )
+            except Exception:
+                logger.exception(f"Failed to finalize response of type {type(response)}")
+                with_span.finish_tracing(status_code=None)
+        return response
+
+
+class _AsyncRequest(_WithTracer):
+    async def __call__(
+        self,
+        wrapped: Callable[..., Awaitable[Any]],
+        instance: Any,
+        args: Tuple[type, Any],
+        kwargs: Mapping[str, Any],
+    ) -> Any:
+        if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY):
+            return await wrapped(*args, **kwargs)
+        try:
+            cast_to, request_options = _parse_request_args(args)
+            # E.g. cast_to = openai.types.chat.ChatCompletion => span_name = "ChatCompletion"
+            span_name: str = cast_to.__name__.split(".")[-1]
+        except Exception:
+            logger.exception("Failed to parse request args")
+            return await wrapped(*args, **kwargs)
+        with self._start_as_current_span(
+            span_name=span_name,
+            cast_to=cast_to,
+            request_options=request_options,
+        ) as with_span:
+            try:
+                response = await wrapped(*args, **kwargs)
+            except Exception as exception:
+                status_code = trace_api.StatusCode.ERROR
+                with_span.record_exception(exception)
+                with_span.finish_tracing(status_code=status_code)
+                raise
+            try:
+                response = _finalize_response(
+                    response=response,
+                    with_span=with_span,
+                    cast_to=cast_to,
+                    request_options=request_options,
+                    include_extra_attributes=self._include_extra_attributes,
+                )
+            except Exception:
+                logger.exception(f"Failed to finalize response of type {type(response)}")
+                with_span.finish_tracing(status_code=None)
+        return response
+
+
+def _parse_request_args(args: Tuple[type, Any]) -> Tuple[type, Mapping[str, Any]]:
+    # We don't use `signature(request).bind()` because `request` could have been monkey-patched
+    # (incorrectly) by others and the signature at runtime may not match the original.
+    # The targeted signature of `request` is here:
+    # https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_base_client.py#L846-L847  # noqa: E501
+    cast_to: type = args[0]
+    options: Mapping[str, Any] = (
+        json_data
+        # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_models.py#L427  # noqa: E501
+        if hasattr(args[1], "json_data") and isinstance(json_data := args[1].json_data, Mapping)
+        else {}
+    )
+    # FIXME: Because request options is just a Mapping, it can contain any value as long as it
+    # serializes correctly in an HTTP request body. For example, Enum values may be present if a
+    # third-party library puts them there. Enums can turn into their intended string values via
+    # `json.dumps` when the final HTTP request body is serialized, but can pose problems when we
+    # try to extract attributes. However, this round-trip seems expensive, so wa opted to treat
+    # only the Enums that we know about: e.g. message role sometimes can be an Enum, so we will
+    # convert it only when it's encountered.
+    # try:
+    #     options = json.loads(json.dumps(options))
+    # except Exception:
+    #     pass
+    return cast_to, options
+
+
+def _finalize_response(
+    response: Any,
+    with_span: _WithSpan,
+    cast_to: type,
+    request_options: Mapping[str, Any],
+    include_extra_attributes: bool = True,
+) -> Any:
+    """Monkey-patch the response object to trace the stream, or finish tracing if the response is
+    not a stream.
+    """
+    if hasattr(response, "parse") and callable(response.parse):
+        # `.request()` may be called under `.with_raw_response` and it's necessary to call
+        # `.parse()` to get back the usual response types.
+        # E.g. see https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_base_client.py#L518  # noqa: E501
+        try:
+            response.parse()
+        except Exception:
+            logger.exception(f"Failed to parse response of type {type(response)}")
+    if (
+        isinstance(response, (Stream, AsyncStream))
+        or hasattr(
+            # FIXME: Ideally we should not rely on a private attribute (but it may be impossible).
+            # The assumption here is that calling `.parse()` stores the stream object in `._parsed`
+            # and calling `.parse()` again will not overwrite the monkey-patched version.
+            # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_response.py#L65  # noqa: E501
+            response,
+            "_parsed",
+        )
+        # Note that we must have called `.parse()` beforehand, otherwise `._parsed` is None.
+        and isinstance(response._parsed, (Stream, AsyncStream))
+    ):
+        # For streaming, we need (optional) accumulators to process each chunk iteration.
+        response_accumulator = _ResponseAccumulators.find(cast_to)
+        if hasattr(response, "_parsed") and isinstance(
+            parsed := response._parsed, (Stream, AsyncStream)
+        ):
+            # Monkey-patch a private attribute assumed to be caching the output of `.parse()`.
+            response._parsed = _Stream(
+                stream=parsed,
+                with_span=with_span,
+                response_accumulator=response_accumulator,
+                include_extra_attributes=include_extra_attributes,
+            )
+            return response
+        return _Stream(
+            stream=response,
+            with_span=with_span,
+            response_accumulator=response_accumulator,
+            include_extra_attributes=include_extra_attributes,
+        )
+    _finish_tracing(
+        status_code=trace_api.StatusCode.OK,
+        with_span=with_span,
+        has_attributes=_ResponseAttributes(
+            response=response,
+            request_options=request_options,
+            include_extra_attributes=include_extra_attributes,
+        ),
+    )
+    return response
+
+
+class _ResponseAttributes:
+    __slots__ = (
+        "_request_options",
+        "_response",
+        "_include_extra_attributes",
+    )
+
+    def __init__(
+        self,
+        response: Any,
+        request_options: Mapping[str, Any],
+        include_extra_attributes: bool = True,
+    ) -> None:
+        if hasattr(response, "parse") and callable(response.parse):
+            # E.g. see https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_base_client.py#L518  # noqa: E501
+            try:
+                response = response.parse()
+            except Exception:
+                logger.exception(f"Failed to parse response of type {type(response)}")
+        self._request_options = request_options
+        self._response = response
+        self._include_extra_attributes = include_extra_attributes
+
+    def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
+        yield from _as_output_attributes(_io_value_and_type(self._response))
+
+    def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
+        if self._include_extra_attributes:
+            yield from _get_extra_attributes_from_response(
+                self._response,
+                self._request_options,
+            )
+
+
+class _Accumulators(ABC):
+    _mapping: Mapping[type, type]
+
+    def __init_subclass__(cls, mapping: Mapping[type, type], **kwargs: Any) -> None:
+        super().__init_subclass__(**kwargs)
+        cls._mapping = mapping
+
+    @classmethod
+    def find(cls, cast_to: type) -> Optional[_ResponseAccumulator]:
+        if not isinstance(cast_to, Hashable):
+            # `cast_to` may not be hashable
+            # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_response.py#L172  # noqa: E501
+            return None
+        try:
+            factory = cls._mapping.get(cast_to)
+        except Exception:
+            logger.exception(f"Failed to get factory for {cast_to}")
+            return None
+        return factory() if factory else None
+
+
+class _ResponseAccumulators(
+    _Accumulators,
+    ABC,
+    mapping=MappingProxyType(
+        {
+            ChatCompletion: _ChatCompletionAccumulator,
+            Completion: _CompletionAccumulator,
+        }
+    ),
+):
+    ...
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
new file mode 100644
index 000000000..5ae1340b8
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
@@ -0,0 +1,248 @@
+import warnings
+from collections import defaultdict
+from copy import deepcopy
+from types import MappingProxyType
+from typing import (
+    Any,
+    Callable,
+    DefaultDict,
+    Dict,
+    Iterator,
+    List,
+    Mapping,
+    Optional,
+    Tuple,
+)
+
+from openinference.instrumentation.openai._extra_attributes_from_response import (
+    _get_extra_attributes_from_response,
+)
+from openinference.instrumentation.openai._utils import (
+    _as_output_attributes,
+    _MimeType,
+    _ValueAndType,
+)
+from opentelemetry.util.types import AttributeValue
+from typing_extensions import TypeAlias
+
+from openai.types import Completion
+from openai.types.chat import (
+    ChatCompletion,
+    ChatCompletionChunk,
+)
+
+__all__ = (
+    "_CompletionAccumulator",
+    "_ChatCompletionAccumulator",
+)
+
+_ChoiceIndex: TypeAlias = int
+
+
+class _ChatCompletionAccumulator:
+    __slots__ = (
+        "_is_null",
+        "_values",
+        "_cached",
+    )
+
+    def __init__(self) -> None:
+        self._is_null = True
+        self._cached: Optional[ChatCompletion] = None
+        self._values = _ValuesAccumulator(
+            choices=_IndexedAccumulator(
+                lambda: _ValuesAccumulator(
+                    message=_ValuesAccumulator(
+                        content=_StringAccumulator(),
+                        function_call=_ValuesAccumulator(arguments=_StringAccumulator()),
+                        tool_calls=_IndexedAccumulator(
+                            lambda: _ValuesAccumulator(
+                                function=_ValuesAccumulator(arguments=_StringAccumulator()),
+                            )
+                        ),
+                    ),
+                ),
+            ),
+        )
+
+    def process_chunk(self, chunk: ChatCompletionChunk) -> None:
+        if not isinstance(chunk, ChatCompletionChunk):
+            return
+        self._is_null = False
+        self._cached = None
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            # `warnings=False` in `model_dump()` is only supported in Pydantic v2
+            values = chunk.model_dump(exclude_unset=True)
+        for choice in values.get("choices", ()):
+            if delta := choice.pop("delta", None):
+                choice["message"] = delta
+        self._values += values
+
+    def _construct(self) -> Optional[ChatCompletion]:
+        if self._is_null:
+            return None
+        if not self._cached:
+            self._cached = ChatCompletion.construct(**dict(self._values))
+        return self._cached
+
+    def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
+        if not (chat_completion := self._construct()):
+            return
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            # `warnings=False` in `model_dump_json()` is only supported in Pydantic v2
+            json_string = chat_completion.model_dump_json(exclude_unset=True)
+        yield from _as_output_attributes(_ValueAndType(json_string, _MimeType.application_json))
+
+    def get_extra_attributes(
+        self,
+        request_options: Mapping[str, Any] = MappingProxyType({}),
+    ) -> Iterator[Tuple[str, AttributeValue]]:
+        if not (chat_completion := self._construct()):
+            return
+        yield from _get_extra_attributes_from_response(
+            chat_completion.model_copy(),
+            request_options,
+        )
+
+
+class _CompletionAccumulator:
+    __slots__ = (
+        "_is_null",
+        "_values",
+        "_cached",
+    )
+
+    def __init__(self) -> None:
+        self._is_null = True
+        self._cached: Optional[Completion] = None
+        self._values = _ValuesAccumulator(
+            choices=_IndexedAccumulator(lambda: _ValuesAccumulator(text=_StringAccumulator())),
+        )
+
+    def process_chunk(self, chunk: Completion) -> None:
+        if not isinstance(chunk, Completion):
+            return
+        self._is_null = False
+        self._cached = None
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            # `warnings=False` in `model_dump()` is only supported in Pydantic v2
+            values = chunk.model_dump(exclude_unset=True)
+        self._values += values
+
+    def _construct(self) -> Optional[Completion]:
+        if self._is_null:
+            return None
+        if not self._cached:
+            self._cached = Completion.construct(**dict(self._values))
+        return self._cached
+
+    def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
+        if not (completion := self._construct()):
+            return
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            # `warnings=False` in `model_dump_json()` is only supported in Pydantic v2
+            json_string = completion.model_dump_json(exclude_unset=True)
+        yield from _as_output_attributes(_ValueAndType(json_string, _MimeType.application_json))
+
+    def get_extra_attributes(
+        self,
+        request_options: Mapping[str, Any] = MappingProxyType({}),
+    ) -> Iterator[Tuple[str, AttributeValue]]:
+        if not (completion := self._construct()):
+            return
+        yield from _get_extra_attributes_from_response(
+            completion.model_copy(),
+            request_options,
+        )
+
+
+class _ValuesAccumulator:
+    __slots__ = ("_values",)
+
+    def __init__(self, **values: Any) -> None:
+        self._values: Dict[str, Any] = values
+
+    def __iter__(self) -> Iterator[Tuple[str, Any]]:
+        for key, value in self._values.items():
+            if value is None:
+                continue
+            if isinstance(value, _ValuesAccumulator):
+                if dict_value := dict(value):
+                    yield key, dict_value
+            elif isinstance(value, _IndexedAccumulator):
+                if list_value := list(value):
+                    yield key, list_value
+            elif isinstance(value, _StringAccumulator):
+                if str_value := str(value):
+                    yield key, str_value
+            else:
+                yield key, value
+
+    def __iadd__(self, values: Optional[Mapping[str, Any]]) -> "_ValuesAccumulator":
+        if not values:
+            return self
+        for key in self._values.keys():
+            if (value := values.get(key)) is None:
+                continue
+            self_value = self._values[key]
+            if isinstance(self_value, _ValuesAccumulator):
+                if isinstance(value, Mapping):
+                    self_value += value
+            elif isinstance(self_value, _StringAccumulator):
+                if isinstance(value, str):
+                    self_value += value
+            elif isinstance(self_value, _IndexedAccumulator):
+                if isinstance(value, List):
+                    for v in value:
+                        self_value += v
+                else:
+                    self_value += value
+            elif isinstance(self_value, List) and isinstance(value, List):
+                self_value.extend(value)
+            else:
+                self._values[key] = value  # replacement
+        for key in values.keys():
+            if key in self._values or (value := values[key]) is None:
+                continue
+            value = deepcopy(value)
+            if isinstance(value, Mapping):
+                value = _ValuesAccumulator(**value)
+            self._values[key] = value  # new entry
+        return self
+
+
+class _StringAccumulator:
+    __slots__ = ("_fragments",)
+
+    def __init__(self) -> None:
+        self._fragments: List[str] = []
+
+    def __str__(self) -> str:
+        return "".join(self._fragments)
+
+    def __iadd__(self, value: Optional[str]) -> "_StringAccumulator":
+        if not value:
+            return self
+        self._fragments.append(value)
+        return self
+
+
+class _IndexedAccumulator:
+    __slots__ = ("_indexed",)
+
+    def __init__(self, factory: Callable[[], _ValuesAccumulator]) -> None:
+        self._indexed: DefaultDict[int, _ValuesAccumulator] = defaultdict(factory)
+
+    def __iter__(self) -> Iterator[Dict[str, Any]]:
+        for _, values in sorted(self._indexed.items()):
+            yield dict(values)
+
+    def __iadd__(self, values: Optional[Mapping[str, Any]]) -> "_IndexedAccumulator":
+        if not values or not hasattr(values, "get") or (index := values.get("index")) is None:
+            return self
+        self._indexed[index] += values
+        return self
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py
new file mode 100644
index 000000000..3bba04c83
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py
@@ -0,0 +1,149 @@
+import logging
+from typing import (
+    Any,
+    AsyncIterator,
+    Iterator,
+    Optional,
+    Protocol,
+    Tuple,
+    Union,
+)
+
+from openinference.instrumentation.openai._utils import (
+    _finish_tracing,
+)
+from openinference.instrumentation.openai._with_span import _WithSpan
+from opentelemetry import trace as trace_api
+from opentelemetry.util.types import AttributeValue
+from wrapt import ObjectProxy
+
+from openai import AsyncStream, Stream
+
+__all__ = (
+    "_Stream",
+    "_ResponseAccumulator",
+)
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+
+
+class _ResponseAccumulator(Protocol):
+    def process_chunk(self, chunk: Any) -> None:
+        ...
+
+    def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
+        ...
+
+    def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
+        ...
+
+
+class _Stream(ObjectProxy):  # type: ignore
+    __slots__ = (
+        "_self_with_span",
+        "_self_iteration_count",
+        "_self_is_finished",
+        "_self_include_extra_attributes",
+        "_self_response_accumulator",
+    )
+
+    def __init__(
+        self,
+        stream: Union[Stream[Any], AsyncStream[Any]],
+        with_span: _WithSpan,
+        response_accumulator: Optional[_ResponseAccumulator] = None,
+        include_extra_attributes: bool = True,
+    ) -> None:
+        super().__init__(stream)
+        self._self_with_span = with_span
+        self._self_iteration_count = 0
+        self._self_is_finished = with_span.is_finished
+        self._self_include_extra_attributes = include_extra_attributes
+        self._self_response_accumulator = response_accumulator
+
+    def __iter__(self) -> Iterator[Any]:
+        return self
+
+    def __next__(self) -> Any:
+        # pass through mistaken calls
+        if not hasattr(self.__wrapped__, "__next__"):
+            self.__wrapped__.__next__()
+        iteration_is_finished = False
+        status_code: Optional[trace_api.StatusCode] = None
+        try:
+            chunk: Any = self.__wrapped__.__next__()
+        except Exception as exception:
+            iteration_is_finished = True
+            if isinstance(exception, StopIteration):
+                status_code = trace_api.StatusCode.OK
+            else:
+                status_code = trace_api.StatusCode.ERROR
+                self._self_with_span.record_exception(exception)
+            raise
+        else:
+            self._process_chunk(chunk)
+            status_code = trace_api.StatusCode.OK
+            return chunk
+        finally:
+            if iteration_is_finished and not self._self_is_finished:
+                self._finish_tracing(status_code=status_code)
+
+    def __aiter__(self) -> AsyncIterator[Any]:
+        return self
+
+    async def __anext__(self) -> Any:
+        # pass through mistaken calls
+        if not hasattr(self.__wrapped__, "__anext__"):
+            self.__wrapped__.__anext__()
+        iteration_is_finished = False
+        status_code: Optional[trace_api.StatusCode] = None
+        try:
+            chunk: Any = await self.__wrapped__.__anext__()
+        except Exception as exception:
+            iteration_is_finished = True
+            if isinstance(exception, StopAsyncIteration):
+                status_code = trace_api.StatusCode.OK
+            else:
+                status_code = trace_api.StatusCode.ERROR
+                self._self_with_span.record_exception(exception)
+            raise
+        else:
+            self._process_chunk(chunk)
+            status_code = trace_api.StatusCode.OK
+            return chunk
+        finally:
+            if iteration_is_finished and not self._self_is_finished:
+                self._finish_tracing(status_code=status_code)
+
+    def _process_chunk(self, chunk: Any) -> None:
+        if not self._self_iteration_count:
+            try:
+                self._self_with_span.add_event("First Token Stream Event")
+            except Exception:
+                logger.exception("Failed to add event to span")
+        self._self_iteration_count += 1
+        if self._self_response_accumulator is not None:
+            try:
+                self._self_response_accumulator.process_chunk(chunk)
+            except Exception:
+                logger.exception("Failed to accumulate response")
+
+    def _finish_tracing(
+        self,
+        status_code: Optional[trace_api.StatusCode] = None,
+    ) -> None:
+        _finish_tracing(
+            status_code=status_code,
+            with_span=self._self_with_span,
+            has_attributes=self,
+        )
+        self._self_is_finished = True
+
+    def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
+        if self._self_response_accumulator is not None:
+            yield from self._self_response_accumulator.get_attributes()
+
+    def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
+        if self._self_include_extra_attributes and self._self_response_accumulator is not None:
+            yield from self._self_response_accumulator.get_extra_attributes()
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py
new file mode 100644
index 000000000..b277236d0
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py
@@ -0,0 +1,134 @@
+import json
+import logging
+import warnings
+from enum import Enum
+from importlib.metadata import version
+from typing import (
+    Any,
+    Iterator,
+    List,
+    Mapping,
+    NamedTuple,
+    Optional,
+    Protocol,
+    Sequence,
+    Tuple,
+    Union,
+    cast,
+)
+
+from openinference.instrumentation.openai._with_span import _WithSpan
+from openinference.semconv.trace import SpanAttributes
+from opentelemetry import trace as trace_api
+from opentelemetry.util.types import Attributes, AttributeValue
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+
+_OPENAI_VERSION = tuple(map(int, version("openai").split(".")[:3]))
+
+
+class _MimeType(Enum):
+    text_plain = "text/plain"
+    application_json = "application/json"
+
+
+class _ValueAndType(NamedTuple):
+    value: str
+    type: _MimeType
+
+
+def _io_value_and_type(obj: Any) -> _ValueAndType:
+    if hasattr(obj, "model_dump_json") and callable(obj.model_dump_json):
+        try:
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")
+                # `warnings=False` in `model_dump_json()` is only supported in Pydantic v2
+                value = obj.model_dump_json(exclude_unset=True)
+            assert isinstance(value, str)
+        except Exception:
+            logger.exception("Failed to get model dump json")
+        else:
+            return _ValueAndType(value, _MimeType.application_json)
+    if not isinstance(obj, str) and isinstance(obj, (Sequence, Mapping)):
+        try:
+            value = json.dumps(obj)
+        except Exception:
+            logger.exception("Failed to dump json")
+        else:
+            return _ValueAndType(value, _MimeType.application_json)
+    return _ValueAndType(str(obj), _MimeType.text_plain)
+
+
+def _as_input_attributes(
+    value_and_type: Optional[_ValueAndType],
+) -> Iterator[Tuple[str, AttributeValue]]:
+    if not value_and_type:
+        return
+    yield SpanAttributes.INPUT_VALUE, value_and_type.value
+    yield SpanAttributes.INPUT_MIME_TYPE, value_and_type.type.value
+
+
+def _as_output_attributes(
+    value_and_type: Optional[_ValueAndType],
+) -> Iterator[Tuple[str, AttributeValue]]:
+    if not value_and_type:
+        return
+    yield SpanAttributes.OUTPUT_VALUE, value_and_type.value
+    yield SpanAttributes.OUTPUT_MIME_TYPE, value_and_type.type.value
+
+
+class _HasAttributes(Protocol):
+    def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
+        ...
+
+    def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
+        ...
+
+
+def _finish_tracing(
+    with_span: _WithSpan,
+    has_attributes: _HasAttributes,
+    status_code: Optional[trace_api.StatusCode] = None,
+) -> None:
+    try:
+        attributes: Attributes = dict(has_attributes.get_attributes())
+    except Exception:
+        logger.exception("Failed to get output value")
+        attributes = None
+    try:
+        extra_attributes: Attributes = dict(has_attributes.get_extra_attributes())
+    except Exception:
+        logger.exception("Failed to get extra attributes")
+        extra_attributes = None
+    try:
+        with_span.finish_tracing(
+            status_code=status_code,
+            attributes=attributes,
+            extra_attributes=extra_attributes,
+        )
+    except Exception:
+        logger.exception("Failed to finish tracing")
+
+
+def _get_texts(
+    model_input: Optional[Union[str, List[str], List[int], List[List[int]]]],
+    model: Optional[str],
+) -> Iterator[str]:
+    if not model_input:
+        return
+    if isinstance(model_input, str):
+        text = model_input
+        yield text
+        return
+    if not isinstance(model_input, Sequence):
+        return
+    if any(not isinstance(item, str) for item in model_input):
+        # FIXME: We can't decode tokens (List[int]) reliably because the model name is not reliable,
+        # e.g. for text-embedding-ada-002 (cl100k_base), OpenAI returns "text-embedding-ada-002-v2",
+        # and Azure returns "ada", which refers to a different model (r50k_base). We could use the
+        # request model name instead, but that doesn't work for Azure because Azure uses the
+        # deployment name (which differs from the model name).
+        return
+    for text in cast(List[str], model_input):
+        yield text
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_with_span.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_with_span.py
new file mode 100644
index 000000000..e09e18830
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_with_span.py
@@ -0,0 +1,82 @@
+import logging
+from typing import Optional
+
+from opentelemetry import trace as trace_api
+from opentelemetry.util.types import Attributes
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+
+
+class _WithSpan:
+    __slots__ = (
+        "_span",
+        "_extra_attributes",
+        "_is_finished",
+    )
+
+    def __init__(
+        self,
+        span: trace_api.Span,
+        extra_attributes: Attributes = None,
+    ) -> None:
+        self._span = span
+        self._extra_attributes = extra_attributes
+        try:
+            self._is_finished = not self._span.is_recording()
+        except Exception:
+            logger.exception("Failed to check if span is recording")
+            self._is_finished = True
+
+    @property
+    def is_finished(self) -> bool:
+        return self._is_finished
+
+    def record_exception(self, exception: Exception) -> None:
+        if self._is_finished:
+            return
+        try:
+            self._span.record_exception(exception)
+        except Exception:
+            logger.exception("Failed to record exception on span")
+
+    def add_event(self, name: str) -> None:
+        if self._is_finished:
+            return
+        try:
+            self._span.add_event(name)
+        except Exception:
+            logger.exception("Failed to add event to span")
+
+    def finish_tracing(
+        self,
+        status_code: Optional[trace_api.StatusCode] = None,
+        attributes: Attributes = None,
+        extra_attributes: Attributes = None,
+    ) -> None:
+        if self._is_finished:
+            return
+        for mapping in (
+            attributes,
+            self._extra_attributes,
+            extra_attributes,
+        ):
+            if not mapping:
+                continue
+            for key, value in mapping.items():
+                if value is None:
+                    continue
+                try:
+                    self._span.set_attribute(key, value)
+                except Exception:
+                    logger.exception("Failed to set attribute on span")
+        if status_code is not None:
+            try:
+                self._span.set_status(status_code)
+            except Exception:
+                logger.exception("Failed to set status code on span")
+        try:
+            self._span.end()
+        except Exception:
+            logger.exception("Failed to end span")
+        self._is_finished = True
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/package.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/package.py
new file mode 100644
index 000000000..1ac5bcfee
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/package.py
@@ -0,0 +1,2 @@
+_instruments = ("openai >= 1.0.0",)
+_supports_metrics = False
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/py.typed b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/py.typed
new file mode 100644
index 000000000..e69de29bb
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/version.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/version.py
new file mode 100644
index 000000000..f102a9cad
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/version.py
@@ -0,0 +1 @@
+__version__ = "0.0.1"
diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
new file mode 100644
index 000000000..0d7875094
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
@@ -0,0 +1,154 @@
+import pytest
+from openai.types.chat import ChatCompletion, ChatCompletionChunk
+from openinference.instrumentation.openai._response_accumulator import (
+    _ChatCompletionAccumulator,
+)
+
+
+def test_chat_completion_accumulator(chat_completion_chunks, desired_chat_completion):
+    accumulator = _ChatCompletionAccumulator()
+    for chunk in chat_completion_chunks:
+        accumulator.process_chunk(chunk)
+    assert accumulator._construct().model_dump(
+        exclude_unset=True
+    ) == desired_chat_completion.model_dump(exclude_unset=True)
+
+
+@pytest.fixture
+def desired_chat_completion():
+    return ChatCompletion.construct(
+        **{
+            "id": "xyz",
+            "choices": [
+                {
+                    "index": 0,
+                    "finish_reason": "length",
+                    "message": {"content": "A1", "role": "assistant"},
+                },
+                {
+                    "index": 1,
+                    "finish_reason": "stop",
+                    "message": {"content": "B2", "role": "assistant"},
+                },
+                {
+                    "index": 2,
+                },
+                {
+                    "index": 3,
+                    "message": {
+                        "tool_calls": [
+                            {"index": 0, "function": {"arguments": "C3"}},
+                            {"index": 1, "function": {"arguments": "D4"}},
+                        ]
+                    },
+                },
+            ],
+            "created": 123,
+            "model": "ultra-turbo",
+        }
+    )
+
+
+@pytest.fixture
+def chat_completion_chunks():
+    chunks = [
+        {
+            "id": "xyz",
+            "choices": [{"index": 0, "delta": {"content": "A"}}],
+            "created": 123,
+            "model": "ultra-turbo",
+        },
+        {
+            "id": "xyz",
+            "choices": [
+                {
+                    "index": 3,
+                    "delta": {"tool_calls": [{"index": 1, "function": {"arguments": "D"}}]},
+                },
+            ],
+            "created": 123,
+            "model": "ultra-turbo",
+        },
+        {
+            "id": "xyz",
+            "choices": [{"index": 1, "delta": {"content": "B"}, "finish_reason": None}],
+            "created": 123,
+            "model": "ultra-turbo",
+        },
+        {
+            "id": "xyz",
+            "choices": [
+                {
+                    "index": 3,
+                    "delta": {"tool_calls": [{"index": 0, "function": {"arguments": "C"}}]},
+                },
+            ],
+            "created": 123,
+            "model": "ultra-turbo",
+        },
+        {
+            "id": "xyz",
+            "choices": [{"index": 0, "delta": {"role": "assistant"}}],
+            "created": 123,
+            "model": "ultra-turbo",
+        },
+        {
+            "id": "xyz",
+            "choices": [{"index": 1, "delta": {"content": "2"}}],
+            "created": 123,
+            "model": "ultra-turbo",
+        },
+        {
+            "id": "xyz",
+            "choices": [{"index": 0, "delta": {"content": "1"}, "finish_reason": None}],
+            "created": 123,
+            "model": "ultra-turbo",
+        },
+        {
+            "id": "xyz",
+            "choices": [{"index": 1, "delta": {"role": "assistant"}}],
+            "created": 123,
+            "model": "ultra-turbo",
+        },
+        {
+            "id": "xyz",
+            "choices": [{"finish_reason": "length", "index": 0}],
+            "created": 123,
+            "model": "ultra-turbo",
+        },
+        {
+            "id": "xyz",
+            "choices": [
+                {
+                    "index": 3,
+                    "delta": {"tool_calls": [{"index": 1, "function": {"arguments": "4"}}]},
+                },
+            ],
+            "created": 123,
+            "model": "ultra-turbo",
+        },
+        {
+            "id": "xyz",
+            "choices": [{"finish_reason": "stop", "index": 1}],
+            "created": 123,
+            "model": "ultra-turbo",
+        },
+        {
+            "id": "xyz",
+            "choices": [
+                {
+                    "index": 3,
+                    "delta": {"tool_calls": [{"index": 0, "function": {"arguments": "3"}}]},
+                },
+            ],
+            "created": 123,
+            "model": "ultra-turbo",
+        },
+        {
+            "id": "xyz",
+            "choices": [{"index": 2, "delta": {"content": ""}}],
+            "created": 123,
+            "model": "ultra-turbo",
+        },
+    ]
+    return [ChatCompletionChunk.construct(**chunk) for chunk in chunks]
diff --git a/python/mypy.ini b/python/mypy.ini
new file mode 100644
index 000000000..533084ef6
--- /dev/null
+++ b/python/mypy.ini
@@ -0,0 +1,5 @@
+[mypy]
+strict = true
+
+[mypy-wrapt]
+ignore_missing_imports = True
diff --git a/python/openinference-semantic-conventions/pyproject.toml b/python/openinference-semantic-conventions/pyproject.toml
index 3581a2dc2..0128aec0f 100644
--- a/python/openinference-semantic-conventions/pyproject.toml
+++ b/python/openinference-semantic-conventions/pyproject.toml
@@ -41,4 +41,4 @@ include = [
 ]
 
 [tool.hatch.build.targets.wheel]
-packages = ["src/openinference"]
\ No newline at end of file
+packages = ["src/openinference"]
diff --git a/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py b/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py
index 391e36c82..196b4674d 100644
--- a/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py
+++ b/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py
@@ -1,4 +1,3 @@
-
 class SpanAttributes:
     OUTPUT_VALUE = "output.value"
     OUTPUT_MIME_TYPE = "output.mime_type"
@@ -17,47 +16,11 @@ class SpanAttributes:
     """
     A list of objects containing embedding data, including the vector and represented piece of text.
     """
-    EMBEDDING_MODEL_NAME = "embedding.model_name"
-    """
-    The name of the embedding model.
-    """
-    EMBEDDING_TEXT = "embedding.text"
-    """
-    The text represented by the embedding.
-    """
-    EMBEDDING_VECTOR = "embedding.vector"
-    """
-    The embedding vector.
-    """
 
-    TOOL_CALL_FUNCTION_NAME = "tool_call.function.name"
-    """
-    The name of function that is being called during a tool call.
-    """
-    TOOL_CALL_FUNCTION_ARGUMENTS_JSON = "tool_call.function.arguments"
-    """
-    The JSON string representing the arguments passed to the function
-    during a tool call.
-    """
-    MESSAGE_FUNCTION_CALL_NAME = "message.function_call_name"
-    """
-    The function name that is a part of the message list.
-    This is populated for role 'function' or 'agent' as a mechanism to identify
-    the function that was called during the execution of a tool
-    """
-    MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON = "message.function_call_arguments_json"
-    """
-    The JSON string representing the arguments passed to the function
-    during a function call
-    """
-    MESSAGE_CONTENT = "message.content"
-    """
-    The content of the message to the llm
-    """
     LLM_FUNCTION_CALL = "llm.function_call"
     """
-    For models and APIs that support function calling. Records attributes such as the function name and
-    arguments to the called function.
+    For models and APIs that support function calling. Records attributes such as the function
+    name and arguments to the called function.
     """
     LLM_INVOCATION_PARAMETERS = "llm.invocation_parameters"
     """
@@ -118,12 +81,71 @@ class SpanAttributes:
     """
 
     RETRIEVAL_DOCUMENTS = "retrieval.documents"
+
+    OPENINFERENCE_SPAN_KIND = "openinference.span.kind"
+
+
+class MessageAttributes:
+    """
+    Attributes for a message generated by a LLM
+    """
+
+    MESSAGE_ROLE = "message.role"
+    """
+    The role of the message, such as "user", "agent", "function".
+    """
+    MESSAGE_CONTENT = "message.content"
+    """
+    The content of the message to the llm
+    """
+    MESSAGE_NAME = "message.name"
+    """
+    The name of the message, often used to identify the function
+    that was used to generate the message.
+    """
+    MESSAGE_TOOL_CALLS = "message.tool_calls"
+    """
+    The tool calls generated by the model, such as function calls.
+    """
+    MESSAGE_FUNCTION_CALL_NAME = "message.function_call_name"
+    """
+    The function name that is a part of the message list.
+    This is populated for role 'function' or 'agent' as a mechanism to identify
+    the function that was called during the execution of a tool
+    """
+    MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON = "message.function_call_arguments_json"
+    """
+    The JSON string representing the arguments passed to the function
+    during a function call
+    """
+
+
+class DocumentAttributes:
+    """
+    Attributes for a document
+    """
+
     DOCUMENT_ID = "document.id"
+    """
+    The id of the document
+    """
     DOCUMENT_SCORE = "document.score"
+    """
+    The score of the document
+    """
     DOCUMENT_CONTENT = "document.content"
+    """
+    The content of the document
+    """
     DOCUMENT_METADATA = "document.metadata"
     """
-    Document metadata as a string representing a JSON object
+    The metadata of the document
+    """
+
+
+class RerankerAttributes:
+    """
+    Attributes for a reranker
     """
 
     RERANKER_INPUT_DOCUMENTS = "reranker.input_documents"
@@ -147,23 +169,41 @@ class SpanAttributes:
     Top K parameter of the reranker
     """
 
-    OPENINFERENCE_SPAN_KIND = "openinference.span.kind"
 
-class MessageAttributes:
+class EmbeddingAttributes:
     """
-    Attributes for a message generated by a LLM
+    Attributes for an embedding
     """
-    
-    MESSAGE_ROLE = "message.role"
+
+    EMBEDDING_EMBEDDINGS = "embedding.embeddings"
     """
-    The role of the message, such as "user", "agent", "function".
+    A list of objects containing embedding data, including the vector and represented piece of text.
     """
-    MESSAGE_NAME = "message.name"
+    EMBEDDING_MODEL_NAME = "embedding.model_name"
     """
-    The name of the message, often used to identify the function
-    that was used to generate the message.
+    The name of the embedding model.
     """
-    MESSAGE_TOOL_CALLS = "message.tool_calls"
+    EMBEDDING_TEXT = "embedding.text"
+    """
+    The text represented by the embedding.
+    """
+    EMBEDDING_VECTOR = "embedding.vector"
+    """
+    The embedding vector.
+    """
+
+
+class ToolCallAttributes:
+    """
+    Attributes for a tool call
+    """
+
+    TOOL_CALL_FUNCTION_NAME = "tool_call.function.name"
+    """
+    The name of function that is being called during a tool call.
+    """
+    TOOL_CALL_FUNCTION_ARGUMENTS_JSON = "tool_call.function.arguments"
+    """
+    The JSON string representing the arguments passed to the function
+    during a tool call.
     """
-    The tool calls generated by the model, such as function calls.
-    """
\ No newline at end of file
diff --git a/python/openinference-semantic-conventions/src/openinference/semconv/version.py b/python/openinference-semantic-conventions/src/openinference/semconv/version.py
index b3c06d488..f102a9cad 100644
--- a/python/openinference-semantic-conventions/src/openinference/semconv/version.py
+++ b/python/openinference-semantic-conventions/src/openinference/semconv/version.py
@@ -1 +1 @@
-__version__ = "0.0.1"
\ No newline at end of file
+__version__ = "0.0.1"
diff --git a/python/ruff.toml b/python/ruff.toml
new file mode 100644
index 000000000..afeb5281c
--- /dev/null
+++ b/python/ruff.toml
@@ -0,0 +1,12 @@
+line-length = 100
+exclude = [
+    ".git",
+    ".tox",
+    "dist",
+]
+ignore-init-module-imports = true
+select = ["E", "F", "W", "I"]
+target-version = "py38"
+
+[lint.isort]
+force-single-line = false
diff --git a/python/tox.ini b/python/tox.ini
new file mode 100644
index 000000000..4819f6f3e
--- /dev/null
+++ b/python/tox.ini
@@ -0,0 +1,29 @@
+[tox]
+isolated_build = True
+skipsdist = True
+skip_missing_interpreters = True
+envlist =
+  ruff
+  mypy-openai
+  py3{8,11}-test-openai
+
+
+[testenv]
+package = wheel
+wheel_build_env = .pkg
+deps =
+  test: pytest == 7.4.4
+  ruff: ruff == 0.1.11
+  mypy: mypy == 1.8.0
+changedir =
+  openai: instrumentation/openinference-instrumentation-openai/
+commands_pre =
+  py3{8,11}: python -m pip install -U pip setuptools wheel
+; FIXME: installation below for semantic conventions can be removed once the package is stable and published
+  openai: pip install {toxinidir}/openinference-semantic-conventions
+  openai: pip install {toxinidir}/instrumentation/openinference-instrumentation-openai[test]
+commands =
+  test: pytest {posargs}
+  ruff: ruff format .
+  ruff: ruff --fix .
+  mypy: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini

From dc39934308f0a078b9e4aac2c8876eff7d6f70d4 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Thu, 4 Jan 2024 10:28:34 -0800
Subject: [PATCH 02/44] ci default working directory

---
 .github/workflows/python-CI.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml
index 935f74c45..9de6cc0bb 100644
--- a/.github/workflows/python-CI.yaml
+++ b/.github/workflows/python-CI.yaml
@@ -13,6 +13,10 @@ concurrency:
   group: test-python-${{ github.head_ref }}
   cancel-in-progress: true
 
+defaults:
+  run:
+    working-directory: ./python
+    
 jobs:
   ci:
     name: CI Python

From 0ffecc79664328ef361e92980267b4a6e0c18e3f Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Thu, 4 Jan 2024 11:22:23 -0800
Subject: [PATCH 03/44] clean up

---
 .github/workflows/python-CI.yaml                            | 4 +---
 .../integration_tests/completions.py                        | 5 ++---
 .../integration_tests/completions_async.py                  | 6 ++----
 .../integration_tests/embeddings.py                         | 2 --
 .../integration_tests/functions_and_tool_calls.py           | 2 --
 5 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml
index 9de6cc0bb..973b50550 100644
--- a/.github/workflows/python-CI.yaml
+++ b/.github/workflows/python-CI.yaml
@@ -16,7 +16,7 @@ concurrency:
 defaults:
   run:
     working-directory: ./python
-    
+
 jobs:
   ci:
     name: CI Python
@@ -40,5 +40,3 @@ jobs:
       - name: Run tests
         run: |
           tox run -e test-openai
-      
-      
diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py
index 8830ff8b3..efc031125 100644
--- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py
+++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py
@@ -53,8 +53,8 @@ def default_tracer_provider() -> trace_sdk.TracerProvider:
 N = 3  # iteration i = 0 results in intentional BadRequestError
 HAIKU = "Write a haiku."
 HAIKU_TOKENS = [8144, 264, 6520, 39342, 13]
-RESUME = "Write a resume."
-RESUME_TOKENS = [8144, 264, 6520, 39342, 13]
+RESUME = "Write a résumé."
+RESUME_TOKENS = [8144, 264, 9517, 1264, 978, 13]
 CHAT_KWARGS = {
     "model": "gpt-3.5-turbo",
     "messages": [{"role": "user", "content": HAIKU}],
@@ -206,4 +206,3 @@ def tasks(n, task, **kwargs):
     mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌"
     print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};")
     assert _EXPECTED_SPAN_COUNT == actual
-    sleep(1)  # (if applicable) let the old exporter finish sending traces
diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py
index 6263956bf..2fddf54bd 100644
--- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py
+++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py
@@ -7,7 +7,6 @@
 from contextlib import suppress
 from importlib.metadata import version
 from itertools import chain
-from time import sleep
 
 from opentelemetry import trace as trace_api
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
@@ -52,8 +51,8 @@ def default_tracer_provider() -> trace_sdk.TracerProvider:
 N = 3  # iteration i = 0 results in intentional BadRequestError
 HAIKU = "Write a haiku."
 HAIKU_TOKENS = [8144, 264, 6520, 39342, 13]
-RESUME = "Write a resume."
-RESUME_TOKENS = [8144, 264, 6520, 39342, 13]
+RESUME = "Write a résumé."
+RESUME_TOKENS = [8144, 264, 9517, 1264, 978, 13]
 CHAT_KWARGS = {
     "model": "gpt-3.5-turbo",
     "messages": [{"role": "user", "content": HAIKU}],
@@ -204,4 +203,3 @@ async def main(*tasks):
     mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌"
     print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};")
     assert _EXPECTED_SPAN_COUNT == actual
-    sleep(1)  # (if applicable) let the old exporter finish sending traces
diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py
index ee91e2898..2b0a40ef9 100644
--- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py
+++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py
@@ -6,7 +6,6 @@
 import logging
 from contextlib import suppress
 from itertools import chain
-from time import sleep
 
 from opentelemetry import trace as trace_api
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
@@ -139,4 +138,3 @@ async def main(*tasks):
     mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌"
     print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};")
     assert _EXPECTED_SPAN_COUNT == actual
-    sleep(1)  # (if applicable) let the old exporter finish sending traces
diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py
index a281938e5..5166a6da3 100644
--- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py
+++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py
@@ -9,7 +9,6 @@
 from contextlib import suppress
 from importlib.metadata import version
 from itertools import chain
-from time import sleep
 
 from opentelemetry import trace as trace_api
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
@@ -192,4 +191,3 @@ async def main(*tasks):
     mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌"
     print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};")
     assert _EXPECTED_SPAN_COUNT == actual
-    sleep(1)  # (if applicable) let the old exporter finish sending traces

From b9e06548c52a1475f7e1bce8d4460ea5a27a8f47 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Thu, 4 Jan 2024 11:49:01 -0800
Subject: [PATCH 04/44] clean up

---
 .../instrumentation/openai/__init__.py        |  9 +--
 .../openai/_response_accumulator.py           | 56 +++++++--------
 .../instrumentation/openai/_stream.py         |  4 +-
 .../openai/test_response_accumulator.py       | 68 +++++++++----------
 4 files changed, 61 insertions(+), 76 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py
index 3217d6457..29547e0ef 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py
@@ -18,14 +18,9 @@
 
 
 class OpenAIInstrumentor(BaseInstrumentor):  # type: ignore
-    """An instrumentor for openai.OpenAI.request and
-    openai.AsyncOpenAI.request
     """
-
-    __slots__ = (
-        "_original_request",
-        "_original_async_request",
-    )
+    An instrumentor for openai.OpenAI.request and openai.AsyncOpenAI.request
+    """
 
     def instrumentation_dependencies(self) -> Collection[str]:
         return _instruments
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
index 5ae1340b8..01c3ecb5b 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
@@ -1,3 +1,4 @@
+import json
 import warnings
 from collections import defaultdict
 from copy import deepcopy
@@ -7,6 +8,7 @@
     Callable,
     DefaultDict,
     Dict,
+    Iterable,
     Iterator,
     List,
     Mapping,
@@ -43,12 +45,12 @@ class _ChatCompletionAccumulator:
     __slots__ = (
         "_is_null",
         "_values",
-        "_cached",
+        "_cached_result",
     )
 
     def __init__(self) -> None:
         self._is_null = True
-        self._cached: Optional[ChatCompletion] = None
+        self._cached_result: Optional[Mapping[str, Any]] = None
         self._values = _ValuesAccumulator(
             choices=_IndexedAccumulator(
                 lambda: _ValuesAccumulator(
@@ -69,7 +71,7 @@ def process_chunk(self, chunk: ChatCompletionChunk) -> None:
         if not isinstance(chunk, ChatCompletionChunk):
             return
         self._is_null = False
-        self._cached = None
+        self._cached_result = None
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             # `warnings=False` in `model_dump()` is only supported in Pydantic v2
@@ -79,30 +81,27 @@ def process_chunk(self, chunk: ChatCompletionChunk) -> None:
                 choice["message"] = delta
         self._values += values
 
-    def _construct(self) -> Optional[ChatCompletion]:
+    def _result(self) -> Optional[Mapping[str, Any]]:
         if self._is_null:
             return None
-        if not self._cached:
-            self._cached = ChatCompletion.construct(**dict(self._values))
-        return self._cached
+        if not self._cached_result:
+            self._cached_result = MappingProxyType(dict(self._values))
+        return self._cached_result
 
     def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
-        if not (chat_completion := self._construct()):
+        if not (result := self._result()):
             return
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            # `warnings=False` in `model_dump_json()` is only supported in Pydantic v2
-            json_string = chat_completion.model_dump_json(exclude_unset=True)
+        json_string = json.dumps(result)
         yield from _as_output_attributes(_ValueAndType(json_string, _MimeType.application_json))
 
     def get_extra_attributes(
         self,
         request_options: Mapping[str, Any] = MappingProxyType({}),
     ) -> Iterator[Tuple[str, AttributeValue]]:
-        if not (chat_completion := self._construct()):
+        if not (result := self._result()):
             return
         yield from _get_extra_attributes_from_response(
-            chat_completion.model_copy(),
+            ChatCompletion.construct(**result),
             request_options,
         )
 
@@ -111,12 +110,12 @@ class _CompletionAccumulator:
     __slots__ = (
         "_is_null",
         "_values",
-        "_cached",
+        "_cached_result",
     )
 
     def __init__(self) -> None:
         self._is_null = True
-        self._cached: Optional[Completion] = None
+        self._cached_result: Optional[Mapping[str, Any]] = None
         self._values = _ValuesAccumulator(
             choices=_IndexedAccumulator(lambda: _ValuesAccumulator(text=_StringAccumulator())),
         )
@@ -125,37 +124,34 @@ def process_chunk(self, chunk: Completion) -> None:
         if not isinstance(chunk, Completion):
             return
         self._is_null = False
-        self._cached = None
+        self._cached_result = None
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
             # `warnings=False` in `model_dump()` is only supported in Pydantic v2
             values = chunk.model_dump(exclude_unset=True)
         self._values += values
 
-    def _construct(self) -> Optional[Completion]:
+    def _result(self) -> Optional[Mapping[str, Any]]:
         if self._is_null:
             return None
-        if not self._cached:
-            self._cached = Completion.construct(**dict(self._values))
-        return self._cached
+        if not self._cached_result:
+            self._cached_result = MappingProxyType(dict(self._values))
+        return self._cached_result
 
     def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
-        if not (completion := self._construct()):
+        if not (result := self._result()):
             return
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            # `warnings=False` in `model_dump_json()` is only supported in Pydantic v2
-            json_string = completion.model_dump_json(exclude_unset=True)
+        json_string = json.dumps(result)
         yield from _as_output_attributes(_ValueAndType(json_string, _MimeType.application_json))
 
     def get_extra_attributes(
         self,
         request_options: Mapping[str, Any] = MappingProxyType({}),
     ) -> Iterator[Tuple[str, AttributeValue]]:
-        if not (completion := self._construct()):
+        if not (result := self._result()):
             return
         yield from _get_extra_attributes_from_response(
-            completion.model_copy(),
+            Completion.construct(**result),
             request_options,
         )
 
@@ -196,12 +192,12 @@ def __iadd__(self, values: Optional[Mapping[str, Any]]) -> "_ValuesAccumulator":
                 if isinstance(value, str):
                     self_value += value
             elif isinstance(self_value, _IndexedAccumulator):
-                if isinstance(value, List):
+                if isinstance(value, Iterable):
                     for v in value:
                         self_value += v
                 else:
                     self_value += value
-            elif isinstance(self_value, List) and isinstance(value, List):
+            elif isinstance(self_value, List) and isinstance(value, Iterable):
                 self_value.extend(value)
             else:
                 self._values[key] = value  # replacement
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py
index 3bba04c83..5ac8e4f57 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py
@@ -9,9 +9,7 @@
     Union,
 )
 
-from openinference.instrumentation.openai._utils import (
-    _finish_tracing,
-)
+from openinference.instrumentation.openai._utils import _finish_tracing
 from openinference.instrumentation.openai._with_span import _WithSpan
 from opentelemetry import trace as trace_api
 from opentelemetry.util.types import AttributeValue
diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
index 0d7875094..fc922dc96 100644
--- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
+++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
@@ -1,52 +1,48 @@
 import pytest
-from openai.types.chat import ChatCompletion, ChatCompletionChunk
+from openai.types.chat import ChatCompletionChunk
 from openinference.instrumentation.openai._response_accumulator import (
     _ChatCompletionAccumulator,
 )
 
 
-def test_chat_completion_accumulator(chat_completion_chunks, desired_chat_completion):
+def test_chat_completion_accumulator(chat_completion_chunks, desired_chat_completion_result):
     accumulator = _ChatCompletionAccumulator()
     for chunk in chat_completion_chunks:
         accumulator.process_chunk(chunk)
-    assert accumulator._construct().model_dump(
-        exclude_unset=True
-    ) == desired_chat_completion.model_dump(exclude_unset=True)
+    assert accumulator._result() == desired_chat_completion_result
 
 
 @pytest.fixture
-def desired_chat_completion():
-    return ChatCompletion.construct(
-        **{
-            "id": "xyz",
-            "choices": [
-                {
-                    "index": 0,
-                    "finish_reason": "length",
-                    "message": {"content": "A1", "role": "assistant"},
-                },
-                {
-                    "index": 1,
-                    "finish_reason": "stop",
-                    "message": {"content": "B2", "role": "assistant"},
-                },
-                {
-                    "index": 2,
+def desired_chat_completion_result():
+    return {
+        "id": "xyz",
+        "choices": [
+            {
+                "index": 0,
+                "finish_reason": "length",
+                "message": {"content": "A1", "role": "assistant"},
+            },
+            {
+                "index": 1,
+                "finish_reason": "stop",
+                "message": {"content": "B2", "role": "assistant"},
+            },
+            {
+                "index": 2,
+            },
+            {
+                "index": 3,
+                "message": {
+                    "tool_calls": [
+                        {"index": 0, "function": {"arguments": "C3"}},
+                        {"index": 1, "function": {"arguments": "D4"}},
+                    ]
                 },
-                {
-                    "index": 3,
-                    "message": {
-                        "tool_calls": [
-                            {"index": 0, "function": {"arguments": "C3"}},
-                            {"index": 1, "function": {"arguments": "D4"}},
-                        ]
-                    },
-                },
-            ],
-            "created": 123,
-            "model": "ultra-turbo",
-        }
-    )
+            },
+        ],
+        "created": 123,
+        "model": "ultra-turbo",
+    }
 
 
 @pytest.fixture

From 31d3d1a0bc1782edfd46a02c14ec43f58841c179 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Thu, 4 Jan 2024 11:51:12 -0800
Subject: [PATCH 05/44] clean up

---
 .../instrumentation/openai/_response_accumulator.py  | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
index 01c3ecb5b..658e7d4bf 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
@@ -50,7 +50,7 @@ class _ChatCompletionAccumulator:
 
     def __init__(self) -> None:
         self._is_null = True
-        self._cached_result: Optional[Mapping[str, Any]] = None
+        self._cached_result: Optional[Dict[str, Any]] = None
         self._values = _ValuesAccumulator(
             choices=_IndexedAccumulator(
                 lambda: _ValuesAccumulator(
@@ -81,11 +81,11 @@ def process_chunk(self, chunk: ChatCompletionChunk) -> None:
                 choice["message"] = delta
         self._values += values
 
-    def _result(self) -> Optional[Mapping[str, Any]]:
+    def _result(self) -> Optional[Dict[str, Any]]:
         if self._is_null:
             return None
         if not self._cached_result:
-            self._cached_result = MappingProxyType(dict(self._values))
+            self._cached_result = dict(self._values)
         return self._cached_result
 
     def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
@@ -115,7 +115,7 @@ class _CompletionAccumulator:
 
     def __init__(self) -> None:
         self._is_null = True
-        self._cached_result: Optional[Mapping[str, Any]] = None
+        self._cached_result: Optional[Dict[str, Any]] = None
         self._values = _ValuesAccumulator(
             choices=_IndexedAccumulator(lambda: _ValuesAccumulator(text=_StringAccumulator())),
         )
@@ -131,11 +131,11 @@ def process_chunk(self, chunk: Completion) -> None:
             values = chunk.model_dump(exclude_unset=True)
         self._values += values
 
-    def _result(self) -> Optional[Mapping[str, Any]]:
+    def _result(self) -> Optional[Dict[str, Any]]:
         if self._is_null:
             return None
         if not self._cached_result:
-            self._cached_result = MappingProxyType(dict(self._values))
+            self._cached_result = dict(self._values)
         return self._cached_result
 
     def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:

From 83d56eb397f14e63b1e179476d1240142acf00c0 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Thu, 4 Jan 2024 11:57:32 -0800
Subject: [PATCH 06/44] clean up

---
 .../instrumentation/openai/_response_accumulator.py            | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
index 658e7d4bf..4e2abb9a5 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
@@ -25,7 +25,6 @@
     _ValueAndType,
 )
 from opentelemetry.util.types import AttributeValue
-from typing_extensions import TypeAlias
 
 from openai.types import Completion
 from openai.types.chat import (
@@ -38,8 +37,6 @@
     "_ChatCompletionAccumulator",
 )
 
-_ChoiceIndex: TypeAlias = int
-
 
 class _ChatCompletionAccumulator:
     __slots__ = (

From 189f673dff9a71d2583f106576641acfa868fac6 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Fri, 5 Jan 2024 09:50:27 -0800
Subject: [PATCH 07/44] clean up

---
 .../openinference/instrumentation/openai/_request.py  | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
index 0f1d0790c..e99cf8dd9 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
@@ -82,14 +82,21 @@ def _start_as_current_span(
         span_kind = _EMBEDDING_SPAN_KIND if cast_to is CreateEmbeddingResponse else _LLM_SPAN_KIND
         attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind}
         try:
+<<<<<<< Updated upstream
             input_attributes = _as_input_attributes(_io_value_and_type(request_options))
+=======
+            attributes.update(_as_input_attributes(_io_value_and_type(request_options)))
+>>>>>>> Stashed changes
         except Exception:
             logger.exception(
                 f"Failed to get input attributes from request options of "
                 f"type {type(request_options)}"
             )
+<<<<<<< Updated upstream
         else:
             attributes.update(input_attributes)
+=======
+>>>>>>> Stashed changes
         # Secondary attributes should be added after input and output to ensure
         # that input and output are not dropped if there are too many attributes.
         try:
@@ -220,7 +227,11 @@ def _parse_request_args(args: Tuple[type, Any]) -> Tuple[type, Mapping[str, Any]
     # serializes correctly in an HTTP request body. For example, Enum values may be present if a
     # third-party library puts them there. Enums can turn into their intended string values via
     # `json.dumps` when the final HTTP request body is serialized, but can pose problems when we
+<<<<<<< Updated upstream
     # try to extract attributes. However, this round-trip seems expensive, so wa opted to treat
+=======
+    # try to extract attributes. However, this round-trip seems expensive, so we opted to treat
+>>>>>>> Stashed changes
     # only the Enums that we know about: e.g. message role sometimes can be an Enum, so we will
     # convert it only when it's encountered.
     # try:

From 05a49fbfc094cbe6e7d565cbd724dfec9b5927bd Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Fri, 5 Jan 2024 09:50:38 -0800
Subject: [PATCH 08/44] update README

---
 README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 3a1679af8..30f181e39 100644
--- a/README.md
+++ b/README.md
@@ -20,9 +20,10 @@ OpenInference provides a set of instrumentations for popular machine learning SD
 
 ## Python
 
-| Package                                                                                       | Description                                   |
-| --------------------------------------------------------------------------------------------- | --------------------------------------------- |
-| [`openinference-semantic-conventions`](./python/openinference-semantic-conventions/README.md) | Semantic conventions for tracing of LLM Apps. |
+| Package                                                                                                            | Description                                   |
+|--------------------------------------------------------------------------------------------------------------------|-----------------------------------------------|
+| [`openinference-semantic-conventions`](./python/openinference-semantic-conventions/README.md)                      | Semantic conventions for tracing of LLM Apps. |
+| [`openinference-instrumentation-openai`](./python/instrumentation/openinference-instrumentation-openai/README.rst) | OpenInference Instrumentation for OpenAI SDK. |
 
 ## JavaScript
 

From 525e65c007365a2dd1d80e62bd8aa7389724e83e Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Fri, 5 Jan 2024 10:04:14 -0800
Subject: [PATCH 09/44] clean up

---
 .../instrumentation/openai/_request.py              | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
index e99cf8dd9..de17d3077 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
@@ -82,21 +82,12 @@ def _start_as_current_span(
         span_kind = _EMBEDDING_SPAN_KIND if cast_to is CreateEmbeddingResponse else _LLM_SPAN_KIND
         attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind}
         try:
-<<<<<<< Updated upstream
-            input_attributes = _as_input_attributes(_io_value_and_type(request_options))
-=======
             attributes.update(_as_input_attributes(_io_value_and_type(request_options)))
->>>>>>> Stashed changes
         except Exception:
             logger.exception(
                 f"Failed to get input attributes from request options of "
                 f"type {type(request_options)}"
             )
-<<<<<<< Updated upstream
-        else:
-            attributes.update(input_attributes)
-=======
->>>>>>> Stashed changes
         # Secondary attributes should be added after input and output to ensure
         # that input and output are not dropped if there are too many attributes.
         try:
@@ -227,11 +218,7 @@ def _parse_request_args(args: Tuple[type, Any]) -> Tuple[type, Mapping[str, Any]
     # serializes correctly in an HTTP request body. For example, Enum values may be present if a
     # third-party library puts them there. Enums can turn into their intended string values via
     # `json.dumps` when the final HTTP request body is serialized, but can pose problems when we
-<<<<<<< Updated upstream
-    # try to extract attributes. However, this round-trip seems expensive, so wa opted to treat
-=======
     # try to extract attributes. However, this round-trip seems expensive, so we opted to treat
->>>>>>> Stashed changes
     # only the Enums that we know about: e.g. message role sometimes can be an Enum, so we will
     # convert it only when it's encountered.
     # try:

From f395f585f8cf012479f752ea502c99de8422570b Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Mon, 8 Jan 2024 16:08:39 -0800
Subject: [PATCH 10/44] fix indent

---
 .../openinference-instrumentation-openai/pyproject.toml         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml
index 516092183..846687ab9 100644
--- a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml
+++ b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml
@@ -10,7 +10,7 @@ readme = "README.rst"
 license = "Apache-2.0"
 requires-python = ">=3.8, <3.12"
 authors = [
-    { name = "OpenInference Authors", email = "oss@arize.com" },
+  { name = "OpenInference Authors", email = "oss@arize.com" },
 ]
 classifiers = [
   "Development Status :: 4 - Beta",

From dbe7bc39cef9034064712c0123dde10d0284dc0a Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Mon, 8 Jan 2024 17:47:11 -0800
Subject: [PATCH 11/44] add OpenInferenceSpanKindValues

---
 .../src/openinference/semconv/trace/__init__.py    | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py b/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py
index 196b4674d..81331131d 100644
--- a/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py
+++ b/python/openinference-semantic-conventions/src/openinference/semconv/trace/__init__.py
@@ -1,3 +1,6 @@
+from enum import Enum
+
+
 class SpanAttributes:
     OUTPUT_VALUE = "output.value"
     OUTPUT_MIME_TYPE = "output.mime_type"
@@ -207,3 +210,14 @@ class ToolCallAttributes:
     The JSON string representing the arguments passed to the function
     during a tool call.
     """
+
+
+class OpenInferenceSpanKindValues(Enum):
+    TOOL = "TOOL"
+    CHAIN = "CHAIN"
+    LLM = "LLM"
+    RETRIEVER = "RETRIEVER"
+    EMBEDDING = "EMBEDDING"
+    AGENT = "AGENT"
+    RERANKER = "RERANKER"
+    UNKNOWN = "UNKNOWN"

From 4cd42cdb4053f9f28fdd650d37cd2d5a983ca18a Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 09:52:02 -0800
Subject: [PATCH 12/44] use OpenInferenceSpanKindValues

---
 .../src/openinference/instrumentation/openai/_request.py  | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
index de17d3077..f93e5cbdf 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
@@ -35,7 +35,7 @@
     _io_value_and_type,
 )
 from openinference.instrumentation.openai._with_span import _WithSpan
-from openinference.semconv.trace import SpanAttributes
+from openinference.semconv.trace import SpanAttributes, OpenInferenceSpanKindValues
 from opentelemetry import context as context_api
 from opentelemetry import trace as trace_api
 from opentelemetry.context import _SUPPRESS_INSTRUMENTATION_KEY
@@ -54,10 +54,6 @@
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
 
-_LLM_SPAN_KIND = "LLM"
-_EMBEDDING_SPAN_KIND = "EMBEDDING"
-
-
 class _WithTracer(ABC):
     __slots__ = (
         "_tracer",
@@ -79,7 +75,7 @@ def _start_as_current_span(
         cast_to: type,
         request_options: Mapping[str, Any],
     ) -> Iterator[_WithSpan]:
-        span_kind = _EMBEDDING_SPAN_KIND if cast_to is CreateEmbeddingResponse else _LLM_SPAN_KIND
+        span_kind = OpenInferenceSpanKindValues.EMBEDDING if cast_to is CreateEmbeddingResponse else OpenInferenceSpanKindValues.LLM
         attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind}
         try:
             attributes.update(_as_input_attributes(_io_value_and_type(request_options)))

From bc277810855e018dcb7ff71decc00d4c94f8fbaf Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 11:48:32 -0800
Subject: [PATCH 13/44] tox testing

---
 .github/workflows/python-CI.yaml                   | 12 ++----------
 .../instrumentation/openai/_request.py             |  9 +++++++--
 python/tox.ini                                     | 14 +++++++-------
 3 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml
index 973b50550..a195b7fd4 100644
--- a/.github/workflows/python-CI.yaml
+++ b/.github/workflows/python-CI.yaml
@@ -30,13 +30,5 @@ jobs:
           python-version: "3.8"
       - name: Install tox
         run: pip install tox==4.11.4
-      - name: Check style
-        run: |
-          tox run -e ruff
-          git diff --exit-code
-      - name: Run mypy
-        run: |
-          tox run -e mypy-openai
-      - name: Run tests
-        run: |
-          tox run -e test-openai
+      - name: Run tox
+        run: tox run-parallel
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
index f93e5cbdf..086025cce 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
@@ -35,7 +35,7 @@
     _io_value_and_type,
 )
 from openinference.instrumentation.openai._with_span import _WithSpan
-from openinference.semconv.trace import SpanAttributes, OpenInferenceSpanKindValues
+from openinference.semconv.trace import OpenInferenceSpanKindValues, SpanAttributes
 from opentelemetry import context as context_api
 from opentelemetry import trace as trace_api
 from opentelemetry.context import _SUPPRESS_INSTRUMENTATION_KEY
@@ -54,6 +54,7 @@
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
 
+
 class _WithTracer(ABC):
     __slots__ = (
         "_tracer",
@@ -75,7 +76,11 @@ def _start_as_current_span(
         cast_to: type,
         request_options: Mapping[str, Any],
     ) -> Iterator[_WithSpan]:
-        span_kind = OpenInferenceSpanKindValues.EMBEDDING if cast_to is CreateEmbeddingResponse else OpenInferenceSpanKindValues.LLM
+        span_kind = (
+            OpenInferenceSpanKindValues.EMBEDDING
+            if cast_to is CreateEmbeddingResponse
+            else OpenInferenceSpanKindValues.LLM
+        )
         attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind}
         try:
             attributes.update(_as_input_attributes(_io_value_and_type(request_options)))
diff --git a/python/tox.ini b/python/tox.ini
index 4819f6f3e..4b72f9dc1 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -3,18 +3,17 @@ isolated_build = True
 skipsdist = True
 skip_missing_interpreters = True
 envlist =
-  ruff
-  mypy-openai
   py3{8,11}-test-openai
 
 
 [testenv]
 package = wheel
 wheel_build_env = .pkg
+allowlist_externals = git
 deps =
   test: pytest == 7.4.4
-  ruff: ruff == 0.1.11
-  mypy: mypy == 1.8.0
+  test: ruff == 0.1.11
+  test: mypy == 1.8.0
 changedir =
   openai: instrumentation/openinference-instrumentation-openai/
 commands_pre =
@@ -23,7 +22,8 @@ commands_pre =
   openai: pip install {toxinidir}/openinference-semantic-conventions
   openai: pip install {toxinidir}/instrumentation/openinference-instrumentation-openai[test]
 commands =
+  test: ruff format .
+  test: ruff --fix .
+  test: git diff --exit-code
+  test: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini
   test: pytest {posargs}
-  ruff: ruff format .
-  ruff: ruff --fix .
-  mypy: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini

From 9c2096ec58e2cd604b62a4ec13c621a5467cd275 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 11:52:57 -0800
Subject: [PATCH 14/44] tox testing

---
 .../src/openinference/instrumentation/openai/_request.py       | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
index 086025cce..4ae893308 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
@@ -78,8 +78,7 @@ def _start_as_current_span(
     ) -> Iterator[_WithSpan]:
         span_kind = (
             OpenInferenceSpanKindValues.EMBEDDING
-            if cast_to is CreateEmbeddingResponse
-            else OpenInferenceSpanKindValues.LLM
+            if cast_to is CreateEmbeddingResponse else OpenInferenceSpanKindValues.LLM
         )
         attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind}
         try:

From 1fb1038e5af7ef342056158d373ff6d976d2c30b Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 11:56:06 -0800
Subject: [PATCH 15/44] tox testing

---
 python/tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tox.ini b/python/tox.ini
index 4b72f9dc1..f38fef824 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -5,7 +5,6 @@ skip_missing_interpreters = True
 envlist =
   py3{8,11}-test-openai
 
-
 [testenv]
 package = wheel
 wheel_build_env = .pkg
@@ -23,6 +22,7 @@ commands_pre =
   openai: pip install {toxinidir}/instrumentation/openinference-instrumentation-openai[test]
 commands =
   test: ruff format .
+  test: git diff --exit-code
   test: ruff --fix .
   test: git diff --exit-code
   test: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini

From 0d142ad2e2b59fed7ba7d66138a50f0f89d783e3 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 12:11:47 -0800
Subject: [PATCH 16/44] wip

---
 .../src/openinference/instrumentation/openai/_request.py       | 3 ++-
 python/tox.ini                                                 | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
index 4ae893308..086025cce 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
@@ -78,7 +78,8 @@ def _start_as_current_span(
     ) -> Iterator[_WithSpan]:
         span_kind = (
             OpenInferenceSpanKindValues.EMBEDDING
-            if cast_to is CreateEmbeddingResponse else OpenInferenceSpanKindValues.LLM
+            if cast_to is CreateEmbeddingResponse
+            else OpenInferenceSpanKindValues.LLM
         )
         attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind}
         try:
diff --git a/python/tox.ini b/python/tox.ini
index f38fef824..66d5a3da9 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -3,7 +3,7 @@ isolated_build = True
 skipsdist = True
 skip_missing_interpreters = True
 envlist =
-  py3{8,11}-test-openai
+  py3{8,11}-test-{semconv,openai}
 
 [testenv]
 package = wheel
@@ -15,6 +15,7 @@ deps =
   test: mypy == 1.8.0
 changedir =
   openai: instrumentation/openinference-instrumentation-openai/
+  semconv: openinference-semantic-conventions
 commands_pre =
   py3{8,11}: python -m pip install -U pip setuptools wheel
 ; FIXME: installation below for semantic conventions can be removed once the package is stable and published

From 775347287c71a243e9d76dbdcad8b2d1333086eb Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 12:17:59 -0800
Subject: [PATCH 17/44] tox testing

---
 python/dev-requirements.txt | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 python/dev-requirements.txt

diff --git a/python/dev-requirements.txt b/python/dev-requirements.txt
new file mode 100644
index 000000000..e69de29bb

From 4ed83fd1e63d722fcb035a0b54297b275820c56e Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 12:18:35 -0800
Subject: [PATCH 18/44] tox testing

---
 python/dev-requirements.txt |  3 +++
 python/tox.ini              | 23 +++++++++++++----------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/python/dev-requirements.txt b/python/dev-requirements.txt
index e69de29bb..4abb5b680 100644
--- a/python/dev-requirements.txt
+++ b/python/dev-requirements.txt
@@ -0,0 +1,3 @@
+pytest == 7.4.4
+ruff == 0.1.11
+mypy == 1.8.0
diff --git a/python/tox.ini b/python/tox.ini
index 66d5a3da9..e395a4e4b 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -3,28 +3,31 @@ isolated_build = True
 skipsdist = True
 skip_missing_interpreters = True
 envlist =
-  py3{8,11}-test-{semconv,openai}
+  py3{8,11}-ci-{semconv,openai}
 
 [testenv]
 package = wheel
 wheel_build_env = .pkg
 allowlist_externals = git
 deps =
-  test: pytest == 7.4.4
-  test: ruff == 0.1.11
-  test: mypy == 1.8.0
+  -r dev-requirements.txt
 changedir =
+  semconv: openinference-semantic-conventions/
   openai: instrumentation/openinference-instrumentation-openai/
-  semconv: openinference-semantic-conventions
 commands_pre =
   py3{8,11}: python -m pip install -U pip setuptools wheel
+  semconv: pip install {toxinidir}/openinference-semantic-conventions
 ; FIXME: installation below for semantic conventions can be removed once the package is stable and published
   openai: pip install {toxinidir}/openinference-semantic-conventions
   openai: pip install {toxinidir}/instrumentation/openinference-instrumentation-openai[test]
 commands =
-  test: ruff format .
-  test: git diff --exit-code
-  test: ruff --fix .
-  test: git diff --exit-code
-  test: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini
+  ruff: ruff format .
+  ruff: ruff --fix .
+  mypy: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini
   test: pytest {posargs}
+  ci: ruff format .
+  ci: git diff --exit-code
+  ci: ruff --fix .
+  ci: git diff --exit-code
+  ci: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini
+  ci: pytest {posargs}

From 84390433280c6df6a5f4c94d210fb7dd89dde366 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 12:21:54 -0800
Subject: [PATCH 19/44] tox testing

---
 .../src/openinference/instrumentation/openai/_request.py      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
index 086025cce..65e324015 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
@@ -77,9 +77,9 @@ def _start_as_current_span(
         request_options: Mapping[str, Any],
     ) -> Iterator[_WithSpan]:
         span_kind = (
-            OpenInferenceSpanKindValues.EMBEDDING
+            OpenInferenceSpanKindValues.EMBEDDING.value
             if cast_to is CreateEmbeddingResponse
-            else OpenInferenceSpanKindValues.LLM
+            else OpenInferenceSpanKindValues.LLM.value
         )
         attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind}
         try:

From ec12409861f956bf4f7bb8bb786da811f0adb9a0 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 12:33:11 -0800
Subject: [PATCH 20/44] tox testing

---
 .github/workflows/python-CI.yaml                     | 12 ++----------
 .../openinference/semconv/trace/test_version.py      |  8 ++++++++
 2 files changed, 10 insertions(+), 10 deletions(-)
 create mode 100644 python/openinference-semantic-conventions/tests/openinference/semconv/trace/test_version.py

diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml
index a195b7fd4..131fe0898 100644
--- a/.github/workflows/python-CI.yaml
+++ b/.github/workflows/python-CI.yaml
@@ -6,12 +6,6 @@ on:
   pull_request:
     paths:
       - "python/**"
-  # Allows you to run this workflow manually from the Actions tab
-  workflow_dispatch:
-
-concurrency:
-  group: test-python-${{ github.head_ref }}
-  cancel-in-progress: true
 
 defaults:
   run:
@@ -22,10 +16,8 @@ jobs:
     name: CI Python
     runs-on: ubuntu-latest
     steps:
-      - name: Checkout Repository
-        uses: actions/checkout@v3
-      - name: Set up Python 3.8
-        uses: actions/setup-python@v4
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
         with:
           python-version: "3.8"
       - name: Install tox
diff --git a/python/openinference-semantic-conventions/tests/openinference/semconv/trace/test_version.py b/python/openinference-semantic-conventions/tests/openinference/semconv/trace/test_version.py
new file mode 100644
index 000000000..1c7e13aed
--- /dev/null
+++ b/python/openinference-semantic-conventions/tests/openinference/semconv/trace/test_version.py
@@ -0,0 +1,8 @@
+"""
+This is a dummy test to ensure that every package has one test.
+"""
+from openinference.semconv.version import __version__ as semconv_version
+
+
+def test_version():
+    print(semconv_version)

From 24570446b79d146823b17b7907310fbb23859c92 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 12:59:21 -0800
Subject: [PATCH 21/44] wip

---
 .../tests/openinference/semconv/trace/test_version.py     | 8 --------
 1 file changed, 8 deletions(-)
 delete mode 100644 python/openinference-semantic-conventions/tests/openinference/semconv/trace/test_version.py

diff --git a/python/openinference-semantic-conventions/tests/openinference/semconv/trace/test_version.py b/python/openinference-semantic-conventions/tests/openinference/semconv/trace/test_version.py
deleted file mode 100644
index 1c7e13aed..000000000
--- a/python/openinference-semantic-conventions/tests/openinference/semconv/trace/test_version.py
+++ /dev/null
@@ -1,8 +0,0 @@
-"""
-This is a dummy test to ensure that every package has one test.
-"""
-from openinference.semconv.version import __version__ as semconv_version
-
-
-def test_version():
-    print(semconv_version)

From b965721c25b181269f427b50a7b418bdc663bf70 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 13:09:24 -0800
Subject: [PATCH 22/44] wip

---
 .github/workflows/python-CI.yaml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml
index 131fe0898..04e797fad 100644
--- a/.github/workflows/python-CI.yaml
+++ b/.github/workflows/python-CI.yaml
@@ -20,7 +20,5 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           python-version: "3.8"
-      - name: Install tox
-        run: pip install tox==4.11.4
-      - name: Run tox
-        run: tox run-parallel
+      - run: pip install tox==4.11.4
+      - run: tox run-parallel

From f17680b9339af0544e554d63ec00c9723e2d63d7 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 13:11:23 -0800
Subject: [PATCH 23/44] wip

---
 .../instrumentation/openai/test_response_accumulator.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
index fc922dc96..fed07e6b0 100644
--- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
+++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
@@ -25,7 +25,7 @@ def desired_chat_completion_result():
             {
                 "index": 1,
                 "finish_reason": "stop",
-                "message": {"content": "B2", "role": "assistant"},
+                "message": {"content": "B22", "role": "assistant"},
             },
             {
                 "index": 2,

From 479b2d6ec39024b61c4703a596630761abd71384 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 13:15:28 -0800
Subject: [PATCH 24/44] wip

---
 .github/workflows/python-CI.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml
index 04e797fad..f26fceb3e 100644
--- a/.github/workflows/python-CI.yaml
+++ b/.github/workflows/python-CI.yaml
@@ -19,6 +19,8 @@ jobs:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
         with:
-          python-version: "3.8"
+          python-version: |
+            - 3.8
+            - 3.11
       - run: pip install tox==4.11.4
       - run: tox run-parallel

From 09fb90104d7b4d38740054c258e3d9f9c3e45952 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 13:18:05 -0800
Subject: [PATCH 25/44] wip

---
 python/tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tox.ini b/python/tox.ini
index e395a4e4b..4c076d871 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -30,4 +30,4 @@ commands =
   ci: ruff --fix .
   ci: git diff --exit-code
   ci: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini
-  ci: pytest {posargs}
+  ci: pytest

From f3211dd515fd5b7040ed5b4f464d8ba5ded0efce Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 13:19:15 -0800
Subject: [PATCH 26/44] wip

---
 .github/workflows/python-CI.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml
index f26fceb3e..c3791de04 100644
--- a/.github/workflows/python-CI.yaml
+++ b/.github/workflows/python-CI.yaml
@@ -20,7 +20,7 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           python-version: |
-            - 3.8
+            - "3.8"
             - 3.11
       - run: pip install tox==4.11.4
       - run: tox run-parallel

From c533f9f8a5012fe3347017521a434e8e4e1004f7 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 13:21:15 -0800
Subject: [PATCH 27/44] wip

---
 .github/workflows/python-CI.yaml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml
index c3791de04..1cb868986 100644
--- a/.github/workflows/python-CI.yaml
+++ b/.github/workflows/python-CI.yaml
@@ -19,8 +19,6 @@ jobs:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
         with:
-          python-version: |
-            - "3.8"
-            - 3.11
+          python-version: "3.x"
       - run: pip install tox==4.11.4
       - run: tox run-parallel

From 7ed318b13ea10ad4b98416c0dab5fe44c252d76c Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 13:21:53 -0800
Subject: [PATCH 28/44] wip

---
 .github/workflows/python-CI.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python-CI.yaml b/.github/workflows/python-CI.yaml
index 1cb868986..0f2077d92 100644
--- a/.github/workflows/python-CI.yaml
+++ b/.github/workflows/python-CI.yaml
@@ -19,6 +19,8 @@ jobs:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
         with:
-          python-version: "3.x"
+          python-version: |
+            3.8
+            3.11
       - run: pip install tox==4.11.4
       - run: tox run-parallel

From 3c08576f2a9942164417ee811704bd8c4538ef54 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 13:24:04 -0800
Subject: [PATCH 29/44] wip

---
 .../instrumentation/openai/test_response_accumulator.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
index fed07e6b0..fc922dc96 100644
--- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
+++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
@@ -25,7 +25,7 @@ def desired_chat_completion_result():
             {
                 "index": 1,
                 "finish_reason": "stop",
-                "message": {"content": "B22", "role": "assistant"},
+                "message": {"content": "B2", "role": "assistant"},
             },
             {
                 "index": 2,

From ff84687f49084ed212a80c09686388f56f784bbd Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 14:27:50 -0800
Subject: [PATCH 30/44] wip

---
 .../instrumentation/openai/test_response_accumulator.py        | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
index fc922dc96..730cedca9 100644
--- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
+++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
@@ -1,3 +1,5 @@
+import sys
+
 import pytest
 from openai.types.chat import ChatCompletionChunk
 from openinference.instrumentation.openai._response_accumulator import (
@@ -7,6 +9,7 @@
 
 def test_chat_completion_accumulator(chat_completion_chunks, desired_chat_completion_result):
     accumulator = _ChatCompletionAccumulator()
+    assert sys.version_info[1] == 8
     for chunk in chat_completion_chunks:
         accumulator.process_chunk(chunk)
     assert accumulator._result() == desired_chat_completion_result

From 9acb9c2d89572505813c190afb764e6a0f401147 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 14:29:31 -0800
Subject: [PATCH 31/44] wip

---
 .../instrumentation/openai/test_response_accumulator.py          | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
index 730cedca9..715b7f517 100644
--- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
+++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
@@ -9,7 +9,6 @@
 
 def test_chat_completion_accumulator(chat_completion_chunks, desired_chat_completion_result):
     accumulator = _ChatCompletionAccumulator()
-    assert sys.version_info[1] == 8
     for chunk in chat_completion_chunks:
         accumulator.process_chunk(chunk)
     assert accumulator._result() == desired_chat_completion_result

From 164ca951f80f6f8811d5ba043bf350a77947bcdc Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 14:31:00 -0800
Subject: [PATCH 32/44] wip

---
 .../instrumentation/openai/test_response_accumulator.py         | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
index 715b7f517..fc922dc96 100644
--- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
+++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
@@ -1,5 +1,3 @@
-import sys
-
 import pytest
 from openai.types.chat import ChatCompletionChunk
 from openinference.instrumentation.openai._response_accumulator import (

From 6d759550d298b3097d67722dd5715ab09d917e20 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 14:48:55 -0800
Subject: [PATCH 33/44] wip

---
 python/tox.ini | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/tox.ini b/python/tox.ini
index 4c076d871..aa8d4098e 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -1,7 +1,6 @@
 [tox]
 isolated_build = True
 skipsdist = True
-skip_missing_interpreters = True
 envlist =
   py3{8,11}-ci-{semconv,openai}
 

From b70a56d939fc110075eb6c9e5b40bddbc40d5b06 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 14:59:25 -0800
Subject: [PATCH 34/44] wip

---
 python/tox.ini | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/tox.ini b/python/tox.ini
index aa8d4098e..09453bc39 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -22,11 +22,11 @@ commands_pre =
 commands =
   ruff: ruff format .
   ruff: ruff --fix .
-  mypy: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini
+  mypy: mypy --config-file {toxinidir}/mypy.ini --explicit-package-bases {posargs:src}
   test: pytest {posargs}
   ci: ruff format .
   ci: git diff --exit-code
   ci: ruff --fix .
   ci: git diff --exit-code
-  ci: mypy --explicit-package-bases src --config-file {toxinidir}/mypy.ini
+  ci: mypy --config-file {toxinidir}/mypy.ini --explicit-package-bases src
   ci: pytest

From e3c538df4c0d786b47304f07d59eeec51a8d5b76 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 15:02:24 -0800
Subject: [PATCH 35/44] wip

---
 python/tox.ini | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/tox.ini b/python/tox.ini
index 09453bc39..00c795ce2 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -2,7 +2,8 @@
 isolated_build = True
 skipsdist = True
 envlist =
-  py3{8,11}-ci-{semconv,openai}
+  py3{8,11}-ci-semconv
+  py3{8,11}-ci-openai
 
 [testenv]
 package = wheel

From f344c328022041ffae553b9eeb1fa250acce657b Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 15:03:12 -0800
Subject: [PATCH 36/44] wip

---
 python/tox.ini | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/tox.ini b/python/tox.ini
index 00c795ce2..3d9091fa7 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -17,8 +17,6 @@ changedir =
 commands_pre =
   py3{8,11}: python -m pip install -U pip setuptools wheel
   semconv: pip install {toxinidir}/openinference-semantic-conventions
-; FIXME: installation below for semantic conventions can be removed once the package is stable and published
-  openai: pip install {toxinidir}/openinference-semantic-conventions
   openai: pip install {toxinidir}/instrumentation/openinference-instrumentation-openai[test]
 commands =
   ruff: ruff format .

From 1593c6dc5225720676cc60c7f2ee72ab7e934590 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 15:21:23 -0800
Subject: [PATCH 37/44] wip

---
 python/tox.ini | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/tox.ini b/python/tox.ini
index 3d9091fa7..e8241d1af 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -19,13 +19,13 @@ commands_pre =
   semconv: pip install {toxinidir}/openinference-semantic-conventions
   openai: pip install {toxinidir}/instrumentation/openinference-instrumentation-openai[test]
 commands =
-  ruff: ruff format .
-  ruff: ruff --fix .
+  ruff: ruff format . --config {toxinidir}/ruff.toml
+  ruff: ruff . --fix --config {toxinidir}/ruff.toml
   mypy: mypy --config-file {toxinidir}/mypy.ini --explicit-package-bases {posargs:src}
   test: pytest {posargs}
-  ci: ruff format .
+  ci: ruff format . --config {toxinidir}/ruff.toml
   ci: git diff --exit-code
-  ci: ruff --fix .
+  ci: ruff . --fix --config {toxinidir}/ruff.toml
   ci: git diff --exit-code
   ci: mypy --config-file {toxinidir}/mypy.ini --explicit-package-bases src
   ci: pytest

From 444906be8cf9aac0f9690aacd27c6517e6d440c1 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 15:25:22 -0800
Subject: [PATCH 38/44] wip

---
 python/tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tox.ini b/python/tox.ini
index e8241d1af..1bc7672e3 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -28,4 +28,4 @@ commands =
   ci: ruff . --fix --config {toxinidir}/ruff.toml
   ci: git diff --exit-code
   ci: mypy --config-file {toxinidir}/mypy.ini --explicit-package-bases src
-  ci: pytest
+  ci: pytest tests

From 5fd32ccc69acc67d7881e29b7da07e942d966d2f Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Tue, 9 Jan 2024 15:27:17 -0800
Subject: [PATCH 39/44] wip

---
 python/tox.ini | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/tox.ini b/python/tox.ini
index 1bc7672e3..a38bd176f 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -15,7 +15,6 @@ changedir =
   semconv: openinference-semantic-conventions/
   openai: instrumentation/openinference-instrumentation-openai/
 commands_pre =
-  py3{8,11}: python -m pip install -U pip setuptools wheel
   semconv: pip install {toxinidir}/openinference-semantic-conventions
   openai: pip install {toxinidir}/instrumentation/openinference-instrumentation-openai[test]
 commands =

From 9029fa8f3a2c57b0f4b478c95a1494beaf99352d Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Wed, 10 Jan 2024 16:06:19 -0800
Subject: [PATCH 40/44] wip

---
 .../examples/chat_completion_async_stream.py  |   3 -
 .../integration_tests/completions.py          |   3 -
 .../integration_tests/completions_async.py    |   3 -
 .../integration_tests/embeddings.py           |   3 -
 .../pyproject.toml                            |   5 +
 .../instrumentation/openai/__init__.py        |  27 +-
 .../openai/_extra_attributes_from_request.py  |  16 +-
 .../openai/_extra_attributes_from_response.py |  16 +-
 .../instrumentation/openai/_request.py        | 135 ++---
 .../openai/_response_accumulator.py           |  29 +-
 .../instrumentation/openai/_stream.py         |   5 +-
 .../instrumentation/openai/_utils.py          |  24 +-
 .../openai/test_instrumentor.py               | 543 ++++++++++++++++++
 .../openai/test_response_accumulator.py       |   2 +-
 python/tox.ini                                |  14 +-
 15 files changed, 662 insertions(+), 166 deletions(-)
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py

diff --git a/python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py b/python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py
index 4c81584bc..1216e9ff3 100644
--- a/python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py
+++ b/python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py
@@ -1,6 +1,3 @@
-"""
-Phoenix collector should be running in the background.
-"""
 import asyncio
 
 import openai
diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py
index efc031125..dc4226409 100644
--- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py
+++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py
@@ -1,6 +1,3 @@
-"""
-Phoenix collector should be running in the background.
-"""
 import contextvars
 import inspect
 import logging
diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py
index 2fddf54bd..2c94b0d78 100644
--- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py
+++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py
@@ -1,6 +1,3 @@
-"""
-Phoenix collector should be running in the background.
-"""
 import asyncio
 import inspect
 import logging
diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py
index 2b0a40ef9..10b1c11ef 100644
--- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py
+++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py
@@ -1,6 +1,3 @@
-"""
-Phoenix collector should be running in the background.
-"""
 import asyncio
 import inspect
 import logging
diff --git a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml
index 846687ab9..833c613d3 100644
--- a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml
+++ b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml
@@ -32,8 +32,13 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
+instruments = [
+  "openai >= 1.0.0",
+]
 test = [
   "openai == 1.0.0",
+  "opentelemetry-sdk",
+  "respx",
 ]
 
 [project.urls]
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py
index 29547e0ef..1cec04144 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py
@@ -1,4 +1,5 @@
 import logging
+from importlib import import_module
 from typing import Any, Collection
 
 from openinference.instrumentation.openai._request import (
@@ -19,34 +20,36 @@
 
 class OpenAIInstrumentor(BaseInstrumentor):  # type: ignore
     """
-    An instrumentor for openai.OpenAI.request and openai.AsyncOpenAI.request
+    An instrumentor for openai
     """
 
+    __slots__ = (
+        "_original_request",
+        "_original_async_request",
+    )
+
     def instrumentation_dependencies(self) -> Collection[str]:
         return _instruments
 
     def _instrument(self, **kwargs: Any) -> None:
-        if (include_extra_attributes := kwargs.get("include_extra_attributes")) is None:
-            include_extra_attributes = True
         if not (tracer_provider := kwargs.get("tracer_provider")):
             tracer_provider = trace_api.get_tracer_provider()
         tracer = trace_api.get_tracer(__name__, __version__, tracer_provider)
+        openai = import_module(_MODULE)
+        self._original_request = openai.OpenAI.request
+        self._original_async_request = openai.AsyncOpenAI.request
         wrap_function_wrapper(
             module=_MODULE,
             name="OpenAI.request",
-            wrapper=_Request(
-                tracer=tracer,
-                include_extra_attributes=include_extra_attributes,
-            ),
+            wrapper=_Request(tracer=tracer),
         )
         wrap_function_wrapper(
             module=_MODULE,
             name="AsyncOpenAI.request",
-            wrapper=_AsyncRequest(
-                tracer=tracer,
-                include_extra_attributes=include_extra_attributes,
-            ),
+            wrapper=_AsyncRequest(tracer=tracer),
         )
 
     def _uninstrument(self, **kwargs: Any) -> None:
-        pass
+        openai = import_module(_MODULE)
+        openai.OpenAI.request = self._original_request
+        openai.AsyncOpenAI.request = self._original_async_request
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py
index 47f444ee8..625e30cc1 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py
@@ -25,19 +25,19 @@
 
 def _get_extra_attributes_from_request(
     cast_to: type,
-    request_options: Mapping[str, Any],
+    request_parameters: Mapping[str, Any],
 ) -> Iterator[Tuple[str, AttributeValue]]:
-    if not isinstance(request_options, Mapping):
+    if not isinstance(request_parameters, Mapping):
         return
     if cast_to is ChatCompletion:
-        yield from _get_attributes_from_chat_completion_create_param(request_options)
+        yield from _get_attributes_from_chat_completion_create_param(request_parameters)
     elif cast_to is CreateEmbeddingResponse:
-        yield from _get_attributes_from_embedding_create_param(request_options)
+        yield from _get_attributes_from_embedding_create_param(request_parameters)
     elif cast_to is Completion:
-        yield from _get_attributes_from_completion_create_param(request_options)
+        yield from _get_attributes_from_completion_create_param(request_parameters)
     else:
         try:
-            yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(request_options)
+            yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(request_parameters)
         except Exception:
             logger.exception("Failed to serialize request options")
 
@@ -55,7 +55,9 @@ def _get_attributes_from_chat_completion_create_param(
     invocation_params.pop("tools", None)
     yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(invocation_params)
     if (input_messages := params.get("messages")) and isinstance(input_messages, Iterable):
-        for index, input_message in enumerate(input_messages):
+        # Use reversed() to get the last message first. This is because OTEL has a default limit of
+        # 128 attributes per span, and flattening increases the number of attributes very quickly.
+        for index, input_message in reversed(list(enumerate(input_messages))):
             for key, value in _get_attributes_from_message_param(input_message):
                 yield f"{SpanAttributes.LLM_INPUT_MESSAGES}.{index}.{key}", value
 
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py
index 05d0d0a1d..ebab74ea9 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py
@@ -2,7 +2,7 @@
 import logging
 from functools import singledispatch
 from importlib import import_module
-from types import MappingProxyType, ModuleType
+from types import ModuleType
 from typing import (
     Any,
     Iterable,
@@ -43,16 +43,16 @@
 @singledispatch
 def _get_extra_attributes_from_response(
     response: Any,
-    request_options: Mapping[str, Any] = MappingProxyType({}),
+    request_parameters: Mapping[str, Any],
 ) -> Iterator[Tuple[str, AttributeValue]]:
-    # this is a fallback (for singledispatch)
+    # this is a fallback for @singledispatch
     yield from ()
 
 
 @_get_extra_attributes_from_response.register
 def _(
     completion: ChatCompletion,
-    request_options: Mapping[str, Any] = MappingProxyType({}),
+    request_parameters: Mapping[str, Any],
 ) -> Iterator[Tuple[str, AttributeValue]]:
     # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion.py#L40  # noqa: E501
     if model := getattr(completion, "model", None):
@@ -71,14 +71,14 @@ def _(
 @_get_extra_attributes_from_response.register
 def _(
     completion: Completion,
-    request_options: Mapping[str, Any] = MappingProxyType({}),
+    request_parameters: Mapping[str, Any],
 ) -> Iterator[Tuple[str, AttributeValue]]:
     # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion.py#L13  # noqa: E501
     if model := getattr(completion, "model", None):
         yield SpanAttributes.LLM_MODEL_NAME, model
     if usage := getattr(completion, "usage", None):
         yield from _get_attributes_from_completion_usage(usage)
-    if model_prompt := request_options.get("prompt"):
+    if model_prompt := request_parameters.get("prompt"):
         # prompt: Required[Union[str, List[str], List[int], List[List[int]], None]]
         # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion_create_params.py#L38  # noqa: E501
         # FIXME: tokens (List[int], List[List[int]]) can't be decoded reliably because model
@@ -90,7 +90,7 @@ def _(
 @_get_extra_attributes_from_response.register
 def _(
     response: CreateEmbeddingResponse,
-    request_options: Mapping[str, Any] = MappingProxyType({}),
+    request_parameters: Mapping[str, Any],
 ) -> Iterator[Tuple[str, AttributeValue]]:
     # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/create_embedding_response.py#L20  # noqa: E501
     if usage := getattr(response, "usage", None):
@@ -104,7 +104,7 @@ def _(
                 continue
             for key, value in _get_attributes_from_embedding(embedding):
                 yield f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}.{key}", value
-    embedding_input = request_options.get("input")
+    embedding_input = request_parameters.get("input")
     for index, text in enumerate(_get_texts(embedding_input, model)):
         # input: Required[Union[str, List[str], List[int], List[List[int]]]]
         # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding_create_params.py#L12  # noqa: E501
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
index 65e324015..1009e91ed 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
@@ -7,10 +7,8 @@
     Awaitable,
     Callable,
     Dict,
-    Hashable,
     Iterator,
     Mapping,
-    Optional,
     Tuple,
 )
 
@@ -25,7 +23,6 @@
     _CompletionAccumulator,
 )
 from openinference.instrumentation.openai._stream import (
-    _ResponseAccumulator,
     _Stream,
 )
 from openinference.instrumentation.openai._utils import (
@@ -56,25 +53,17 @@
 
 
 class _WithTracer(ABC):
-    __slots__ = (
-        "_tracer",
-        "_include_extra_attributes",
-    )
+    __slots__ = ("_tracer",)
 
-    def __init__(
-        self,
-        tracer: trace_api.Tracer,
-        include_extra_attributes: bool = True,
-    ) -> None:
+    def __init__(self, tracer: trace_api.Tracer) -> None:
         self._tracer = tracer
-        self._include_extra_attributes = include_extra_attributes
 
     @contextmanager
     def _start_as_current_span(
         self,
         span_name: str,
         cast_to: type,
-        request_options: Mapping[str, Any],
+        request_parameters: Mapping[str, Any],
     ) -> Iterator[_WithSpan]:
         span_kind = (
             OpenInferenceSpanKindValues.EMBEDDING.value
@@ -83,24 +72,20 @@ def _start_as_current_span(
         )
         attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind}
         try:
-            attributes.update(_as_input_attributes(_io_value_and_type(request_options)))
+            attributes.update(_as_input_attributes(_io_value_and_type(request_parameters)))
         except Exception:
             logger.exception(
-                f"Failed to get input attributes from request options of "
-                f"type {type(request_options)}"
+                f"Failed to get input attributes from request parameters of "
+                f"type {type(request_parameters)}"
             )
         # Secondary attributes should be added after input and output to ensure
         # that input and output are not dropped if there are too many attributes.
         try:
-            extra_attributes = (
-                dict(_get_extra_attributes_from_request(cast_to, request_options))
-                if self._include_extra_attributes
-                else {}
-            )
+            extra_attributes = dict(_get_extra_attributes_from_request(cast_to, request_parameters))
         except Exception:
             logger.exception(
                 f"Failed to get extra attributes from request options of "
-                f"type {type(request_options)}"
+                f"type {type(request_parameters)}"
             )
             extra_attributes = {}
         try:
@@ -128,7 +113,7 @@ def __call__(
         if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY):
             return wrapped(*args, **kwargs)
         try:
-            cast_to, request_options = _parse_request_args(args)
+            cast_to, request_parameters = _parse_request_args(args)
             # E.g. cast_to = openai.types.chat.ChatCompletion => span_name = "ChatCompletion"
             span_name: str = cast_to.__name__.split(".")[-1]
         except Exception:
@@ -137,7 +122,7 @@ def __call__(
         with self._start_as_current_span(
             span_name=span_name,
             cast_to=cast_to,
-            request_options=request_options,
+            request_parameters=request_parameters,
         ) as with_span:
             try:
                 response = wrapped(*args, **kwargs)
@@ -151,8 +136,7 @@ def __call__(
                     response=response,
                     with_span=with_span,
                     cast_to=cast_to,
-                    request_options=request_options,
-                    include_extra_attributes=self._include_extra_attributes,
+                    request_parameters=request_parameters,
                 )
             except Exception:
                 logger.exception(f"Failed to finalize response of type {type(response)}")
@@ -171,7 +155,7 @@ async def __call__(
         if context_api.get_value(_SUPPRESS_INSTRUMENTATION_KEY):
             return await wrapped(*args, **kwargs)
         try:
-            cast_to, request_options = _parse_request_args(args)
+            cast_to, request_parameters = _parse_request_args(args)
             # E.g. cast_to = openai.types.chat.ChatCompletion => span_name = "ChatCompletion"
             span_name: str = cast_to.__name__.split(".")[-1]
         except Exception:
@@ -180,7 +164,7 @@ async def __call__(
         with self._start_as_current_span(
             span_name=span_name,
             cast_to=cast_to,
-            request_options=request_options,
+            request_parameters=request_parameters,
         ) as with_span:
             try:
                 response = await wrapped(*args, **kwargs)
@@ -194,8 +178,7 @@ async def __call__(
                     response=response,
                     with_span=with_span,
                     cast_to=cast_to,
-                    request_options=request_options,
-                    include_extra_attributes=self._include_extra_attributes,
+                    request_parameters=request_parameters,
                 )
             except Exception:
                 logger.exception(f"Failed to finalize response of type {type(response)}")
@@ -209,13 +192,13 @@ def _parse_request_args(args: Tuple[type, Any]) -> Tuple[type, Mapping[str, Any]
     # The targeted signature of `request` is here:
     # https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_base_client.py#L846-L847  # noqa: E501
     cast_to: type = args[0]
-    options: Mapping[str, Any] = (
+    request_parameters: Mapping[str, Any] = (
         json_data
         # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_models.py#L427  # noqa: E501
         if hasattr(args[1], "json_data") and isinstance(json_data := args[1].json_data, Mapping)
         else {}
     )
-    # FIXME: Because request options is just a Mapping, it can contain any value as long as it
+    # FIXME: Because request parameters is just a Mapping, it can contain any value as long as it
     # serializes correctly in an HTTP request body. For example, Enum values may be present if a
     # third-party library puts them there. Enums can turn into their intended string values via
     # `json.dumps` when the final HTTP request body is serialized, but can pose problems when we
@@ -223,18 +206,25 @@ def _parse_request_args(args: Tuple[type, Any]) -> Tuple[type, Mapping[str, Any]
     # only the Enums that we know about: e.g. message role sometimes can be an Enum, so we will
     # convert it only when it's encountered.
     # try:
-    #     options = json.loads(json.dumps(options))
+    #     request_parameters = json.loads(json.dumps(request_parameters))
     # except Exception:
     #     pass
-    return cast_to, options
+    return cast_to, request_parameters
+
+
+_RESPONSE_ACCUMULATOR_FACTORIES: Mapping[type, type] = MappingProxyType(
+    {
+        ChatCompletion: _ChatCompletionAccumulator,
+        Completion: _CompletionAccumulator,
+    }
+)
 
 
 def _finalize_response(
     response: Any,
     with_span: _WithSpan,
     cast_to: type,
-    request_options: Mapping[str, Any],
-    include_extra_attributes: bool = True,
+    request_parameters: Mapping[str, Any],
 ) -> Any:
     """Monkey-patch the response object to trace the stream, or finish tracing if the response is
     not a stream.
@@ -260,8 +250,18 @@ def _finalize_response(
         # Note that we must have called `.parse()` beforehand, otherwise `._parsed` is None.
         and isinstance(response._parsed, (Stream, AsyncStream))
     ):
-        # For streaming, we need (optional) accumulators to process each chunk iteration.
-        response_accumulator = _ResponseAccumulators.find(cast_to)
+        # For streaming, we need an (optional) accumulator to process each chunk iteration.
+        try:
+            response_accumulator_factory = _RESPONSE_ACCUMULATOR_FACTORIES.get(cast_to)
+            response_accumulator = (
+                response_accumulator_factory(request_parameters)
+                if response_accumulator_factory
+                else None
+            )
+        except Exception:
+            # E.g. cast_to may not be hashable
+            logger.exception(f"Failed to get response accumulator for {cast_to}")
+            response_accumulator = None
         if hasattr(response, "_parsed") and isinstance(
             parsed := response._parsed, (Stream, AsyncStream)
         ):
@@ -270,22 +270,19 @@ def _finalize_response(
                 stream=parsed,
                 with_span=with_span,
                 response_accumulator=response_accumulator,
-                include_extra_attributes=include_extra_attributes,
             )
             return response
         return _Stream(
             stream=response,
             with_span=with_span,
             response_accumulator=response_accumulator,
-            include_extra_attributes=include_extra_attributes,
         )
     _finish_tracing(
         status_code=trace_api.StatusCode.OK,
         with_span=with_span,
         has_attributes=_ResponseAttributes(
+            request_parameters=request_parameters,
             response=response,
-            request_options=request_options,
-            include_extra_attributes=include_extra_attributes,
         ),
     )
     return response
@@ -293,16 +290,14 @@ def _finalize_response(
 
 class _ResponseAttributes:
     __slots__ = (
-        "_request_options",
         "_response",
-        "_include_extra_attributes",
+        "_request_parameters",
     )
 
     def __init__(
         self,
         response: Any,
-        request_options: Mapping[str, Any],
-        include_extra_attributes: bool = True,
+        request_parameters: Mapping[str, Any],
     ) -> None:
         if hasattr(response, "parse") and callable(response.parse):
             # E.g. see https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_base_client.py#L518  # noqa: E501
@@ -310,50 +305,14 @@ def __init__(
                 response = response.parse()
             except Exception:
                 logger.exception(f"Failed to parse response of type {type(response)}")
-        self._request_options = request_options
+        self._request_parameters = request_parameters
         self._response = response
-        self._include_extra_attributes = include_extra_attributes
 
     def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
         yield from _as_output_attributes(_io_value_and_type(self._response))
 
     def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
-        if self._include_extra_attributes:
-            yield from _get_extra_attributes_from_response(
-                self._response,
-                self._request_options,
-            )
-
-
-class _Accumulators(ABC):
-    _mapping: Mapping[type, type]
-
-    def __init_subclass__(cls, mapping: Mapping[type, type], **kwargs: Any) -> None:
-        super().__init_subclass__(**kwargs)
-        cls._mapping = mapping
-
-    @classmethod
-    def find(cls, cast_to: type) -> Optional[_ResponseAccumulator]:
-        if not isinstance(cast_to, Hashable):
-            # `cast_to` may not be hashable
-            # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_response.py#L172  # noqa: E501
-            return None
-        try:
-            factory = cls._mapping.get(cast_to)
-        except Exception:
-            logger.exception(f"Failed to get factory for {cast_to}")
-            return None
-        return factory() if factory else None
-
-
-class _ResponseAccumulators(
-    _Accumulators,
-    ABC,
-    mapping=MappingProxyType(
-        {
-            ChatCompletion: _ChatCompletionAccumulator,
-            Completion: _CompletionAccumulator,
-        }
-    ),
-):
-    ...
+        yield from _get_extra_attributes_from_response(
+            self._response,
+            request_parameters=self._request_parameters,
+        )
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
index 4e2abb9a5..e2b8f6807 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
@@ -2,7 +2,6 @@
 import warnings
 from collections import defaultdict
 from copy import deepcopy
-from types import MappingProxyType
 from typing import (
     Any,
     Callable,
@@ -21,9 +20,9 @@
 )
 from openinference.instrumentation.openai._utils import (
     _as_output_attributes,
-    _MimeType,
     _ValueAndType,
 )
+from openinference.semconv.trace import OpenInferenceMimeTypeValues
 from opentelemetry.util.types import AttributeValue
 
 from openai.types import Completion
@@ -43,9 +42,11 @@ class _ChatCompletionAccumulator:
         "_is_null",
         "_values",
         "_cached_result",
+        "_request_parameters",
     )
 
-    def __init__(self) -> None:
+    def __init__(self, request_parameters: Mapping[str, Any]) -> None:
+        self._request_parameters = request_parameters
         self._is_null = True
         self._cached_result: Optional[Dict[str, Any]] = None
         self._values = _ValuesAccumulator(
@@ -89,17 +90,18 @@ def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
         if not (result := self._result()):
             return
         json_string = json.dumps(result)
-        yield from _as_output_attributes(_ValueAndType(json_string, _MimeType.application_json))
+        yield from _as_output_attributes(
+            _ValueAndType(json_string, OpenInferenceMimeTypeValues.JSON)
+        )
 
     def get_extra_attributes(
         self,
-        request_options: Mapping[str, Any] = MappingProxyType({}),
     ) -> Iterator[Tuple[str, AttributeValue]]:
         if not (result := self._result()):
             return
         yield from _get_extra_attributes_from_response(
             ChatCompletion.construct(**result),
-            request_options,
+            self._request_parameters,
         )
 
 
@@ -108,9 +110,11 @@ class _CompletionAccumulator:
         "_is_null",
         "_values",
         "_cached_result",
+        "_request_parameters",
     )
 
-    def __init__(self) -> None:
+    def __init__(self, request_parameters: Mapping[str, Any]) -> None:
+        self._request_parameters = request_parameters
         self._is_null = True
         self._cached_result: Optional[Dict[str, Any]] = None
         self._values = _ValuesAccumulator(
@@ -139,17 +143,16 @@ def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
         if not (result := self._result()):
             return
         json_string = json.dumps(result)
-        yield from _as_output_attributes(_ValueAndType(json_string, _MimeType.application_json))
+        yield from _as_output_attributes(
+            _ValueAndType(json_string, OpenInferenceMimeTypeValues.JSON)
+        )
 
-    def get_extra_attributes(
-        self,
-        request_options: Mapping[str, Any] = MappingProxyType({}),
-    ) -> Iterator[Tuple[str, AttributeValue]]:
+    def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
         if not (result := self._result()):
             return
         yield from _get_extra_attributes_from_response(
             Completion.construct(**result),
-            request_options,
+            self._request_parameters,
         )
 
 
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py
index 5ac8e4f57..6c3d36b9e 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py
@@ -42,7 +42,6 @@ class _Stream(ObjectProxy):  # type: ignore
         "_self_with_span",
         "_self_iteration_count",
         "_self_is_finished",
-        "_self_include_extra_attributes",
         "_self_response_accumulator",
     )
 
@@ -51,13 +50,11 @@ def __init__(
         stream: Union[Stream[Any], AsyncStream[Any]],
         with_span: _WithSpan,
         response_accumulator: Optional[_ResponseAccumulator] = None,
-        include_extra_attributes: bool = True,
     ) -> None:
         super().__init__(stream)
         self._self_with_span = with_span
         self._self_iteration_count = 0
         self._self_is_finished = with_span.is_finished
-        self._self_include_extra_attributes = include_extra_attributes
         self._self_response_accumulator = response_accumulator
 
     def __iter__(self) -> Iterator[Any]:
@@ -143,5 +140,5 @@ def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
             yield from self._self_response_accumulator.get_attributes()
 
     def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
-        if self._self_include_extra_attributes and self._self_response_accumulator is not None:
+        if self._self_response_accumulator is not None:
             yield from self._self_response_accumulator.get_extra_attributes()
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py
index b277236d0..a8b3f3ca9 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py
@@ -1,7 +1,6 @@
 import json
 import logging
 import warnings
-from enum import Enum
 from importlib.metadata import version
 from typing import (
     Any,
@@ -18,7 +17,7 @@
 )
 
 from openinference.instrumentation.openai._with_span import _WithSpan
-from openinference.semconv.trace import SpanAttributes
+from openinference.semconv.trace import OpenInferenceMimeTypeValues, SpanAttributes
 from opentelemetry import trace as trace_api
 from opentelemetry.util.types import Attributes, AttributeValue
 
@@ -28,14 +27,9 @@
 _OPENAI_VERSION = tuple(map(int, version("openai").split(".")[:3]))
 
 
-class _MimeType(Enum):
-    text_plain = "text/plain"
-    application_json = "application/json"
-
-
 class _ValueAndType(NamedTuple):
     value: str
-    type: _MimeType
+    type: OpenInferenceMimeTypeValues
 
 
 def _io_value_and_type(obj: Any) -> _ValueAndType:
@@ -49,15 +43,15 @@ def _io_value_and_type(obj: Any) -> _ValueAndType:
         except Exception:
             logger.exception("Failed to get model dump json")
         else:
-            return _ValueAndType(value, _MimeType.application_json)
+            return _ValueAndType(value, OpenInferenceMimeTypeValues.JSON)
     if not isinstance(obj, str) and isinstance(obj, (Sequence, Mapping)):
         try:
             value = json.dumps(obj)
         except Exception:
             logger.exception("Failed to dump json")
         else:
-            return _ValueAndType(value, _MimeType.application_json)
-    return _ValueAndType(str(obj), _MimeType.text_plain)
+            return _ValueAndType(value, OpenInferenceMimeTypeValues.JSON)
+    return _ValueAndType(str(obj), OpenInferenceMimeTypeValues.TEXT)
 
 
 def _as_input_attributes(
@@ -66,7 +60,9 @@ def _as_input_attributes(
     if not value_and_type:
         return
     yield SpanAttributes.INPUT_VALUE, value_and_type.value
-    yield SpanAttributes.INPUT_MIME_TYPE, value_and_type.type.value
+    # it's TEXT by default, so we can skip to save one attribute
+    if value_and_type.type is not OpenInferenceMimeTypeValues.TEXT:
+        yield SpanAttributes.INPUT_MIME_TYPE, value_and_type.type.value
 
 
 def _as_output_attributes(
@@ -75,7 +71,9 @@ def _as_output_attributes(
     if not value_and_type:
         return
     yield SpanAttributes.OUTPUT_VALUE, value_and_type.value
-    yield SpanAttributes.OUTPUT_MIME_TYPE, value_and_type.type.value
+    # it's TEXT by default, so we can skip to save one attribute
+    if value_and_type.type is not OpenInferenceMimeTypeValues.TEXT:
+        yield SpanAttributes.OUTPUT_MIME_TYPE, value_and_type.type.value
 
 
 class _HasAttributes(Protocol):
diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py
new file mode 100644
index 000000000..96ba01b91
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py
@@ -0,0 +1,543 @@
+import asyncio
+import json
+import random
+from contextlib import suppress
+from importlib.metadata import version
+from itertools import count
+from typing import (
+    Any,
+    AsyncIterator,
+    Dict,
+    Generator,
+    Iterable,
+    Iterator,
+    List,
+    Mapping,
+    Sequence,
+    Tuple,
+    cast,
+)
+
+import openai
+import pytest
+from httpx import AsyncByteStream, Response
+from openinference.instrumentation.openai import OpenAIInstrumentor
+from openinference.semconv.trace import (
+    MessageAttributes,
+    OpenInferenceMimeTypeValues,
+    OpenInferenceSpanKindValues,
+    SpanAttributes,
+    ToolCallAttributes,
+)
+from opentelemetry.sdk import trace as trace_sdk
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import ReadableSpan
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
+from opentelemetry.util.types import AttributeValue
+from respx import MockRouter
+
+
+@pytest.mark.parametrize("is_async", [False, True])
+@pytest.mark.parametrize("is_raw", [False, True])
+@pytest.mark.parametrize("is_stream", [False, True])
+@pytest.mark.parametrize("status_code", [200, 400])
+def test_chat_completions(
+    is_async: bool,
+    is_raw: bool,
+    is_stream,
+    status_code: int,
+    respx_mock: MockRouter,
+    in_memory_span_exporter: InMemorySpanExporter,
+    completion_usage: Dict[str, Any],
+    model_name: str,
+    tool_calls_mock_stream,
+) -> None:
+    input_messages: List[Dict[str, Any]] = get_messages()
+    output_messages: List[Dict[str, Any]] = (
+        tool_calls_mock_stream[1] if is_stream else get_messages()
+    )
+    invocation_parameters = {
+        "stream": is_stream,
+        "model": randstr(),
+        "temperature": random.random(),
+        "n": len(output_messages),
+    }
+    url = "https://api.openai.com/v1/chat/completions"
+    respx_kwargs = {
+        **(
+            {"stream": MockAsyncByteStream(tool_calls_mock_stream[0])}
+            if is_stream
+            else {
+                "json": {
+                    "choices": [
+                        {"index": i, "message": message, "finish_reason": "stop"}
+                        for i, message in enumerate(output_messages)
+                    ],
+                    "model": model_name,
+                    "usage": completion_usage,
+                }
+            }
+        ),
+    }
+    respx_mock.post(url).mock(return_value=Response(status_code=status_code, **respx_kwargs))
+    create_kwargs = {"messages": input_messages, **invocation_parameters}
+    completions = (
+        openai.AsyncOpenAI(api_key="sk-").chat.completions
+        if is_async
+        else openai.OpenAI(api_key="sk-").chat.completions
+    )
+    create = completions.with_raw_response.create if is_raw else completions.create
+    with suppress(openai.BadRequestError):
+        if is_async:
+
+            async def task() -> None:
+                response = await create(**create_kwargs)
+                response = response.parse() if is_raw else response
+                if is_stream:
+                    async for _ in response:
+                        pass
+
+            asyncio.run(task())
+        else:
+            response = create(**create_kwargs)
+            response = response.parse() if is_raw else response
+            if is_stream:
+                for _ in response:
+                    pass
+    spans = in_memory_span_exporter.get_finished_spans()
+    assert len(spans) == 1
+    span: ReadableSpan = spans[0]
+    if status_code == 200:
+        assert span.status.is_ok
+    elif status_code == 400:
+        assert not span.status.is_ok and not span.status.is_unset
+        assert len(span.events) == 1
+        event = span.events[0]
+        assert event.name == "exception"
+    attributes = dict(cast(Mapping[str, AttributeValue], span.attributes))
+    assert attributes.pop(OPENINFERENCE_SPAN_KIND, None) == OpenInferenceSpanKindValues.LLM.value
+    assert isinstance(attributes.pop(INPUT_VALUE, None), str)
+    assert (
+        OpenInferenceMimeTypeValues(attributes.pop(INPUT_MIME_TYPE, None))
+        == OpenInferenceMimeTypeValues.JSON
+    )
+    assert (
+        json.loads(cast(str, attributes.pop(LLM_INVOCATION_PARAMETERS, None)))
+        == invocation_parameters
+    )
+    for prefix, messages in (
+        (LLM_INPUT_MESSAGES, input_messages),
+        *(((LLM_OUTPUT_MESSAGES, output_messages),) if status_code == 200 else ()),
+    ):
+        for i, message in enumerate(messages):
+            assert attributes.pop(message_role(prefix, i), None) == message.get("role")
+            assert attributes.pop(message_content(prefix, i), None) == message.get("content")
+            if function_call := message.get("function_call"):
+                assert attributes.pop(
+                    message_function_call_name(prefix, i), None
+                ) == function_call.get("name")
+                assert attributes.pop(
+                    message_function_call_arguments(prefix, i), None
+                ) == function_call.get("arguments")
+            if _openai_version() >= (1, 1, 0) and (tool_calls := message.get("tool_calls")):
+                for j, tool_call in enumerate(tool_calls):
+                    if function := tool_call.get("function"):
+                        assert attributes.pop(
+                            tool_call_function_name(prefix, i, j), None
+                        ) == function.get("name")
+                        assert attributes.pop(
+                            tool_call_function_arguments(prefix, i, j), None
+                        ) == function.get("arguments")
+    if status_code == 200:
+        assert isinstance(attributes.pop(OUTPUT_VALUE, None), str)
+        assert (
+            OpenInferenceMimeTypeValues(attributes.pop(OUTPUT_MIME_TYPE, None))
+            == OpenInferenceMimeTypeValues.JSON
+        )
+        if not is_stream:
+            # Usage is not available for streaming in general.
+            assert attributes.pop(LLM_TOKEN_COUNT_TOTAL, None) == completion_usage["total_tokens"]
+            assert attributes.pop(LLM_TOKEN_COUNT_PROMPT, None) == completion_usage["prompt_tokens"]
+            assert (
+                attributes.pop(LLM_TOKEN_COUNT_COMPLETION, None)
+                == completion_usage["completion_tokens"]
+            )
+            # We left out model_name from our mock stream.
+            assert attributes.pop(LLM_MODEL_NAME, None) == model_name
+    assert attributes == {}  # this test accounts for all the attributes after popping them
+
+
+@pytest.mark.parametrize("is_async", [False, True])
+@pytest.mark.parametrize("is_raw", [False, True])
+@pytest.mark.parametrize("is_stream", [True])
+@pytest.mark.parametrize("status_code", [200, 400])
+def test_completions(
+    is_async: bool,
+    is_raw: bool,
+    is_stream: bool,
+    status_code: int,
+    respx_mock: MockRouter,
+    in_memory_span_exporter: InMemorySpanExporter,
+    completion_usage: Dict[str, Any],
+    model_name: str,
+    completions_mock_stream,
+) -> None:
+    prompt: List[str] = get_texts()
+    output_texts: List[str] = completions_mock_stream[1] if is_stream else get_texts()
+    invocation_parameters = {
+        "stream": is_stream,
+        "model": randstr(),
+        "temperature": random.random(),
+        "n": len(output_texts),
+    }
+    url = "https://api.openai.com/v1/completions"
+    respx_kwargs = {
+        **(
+            {"stream": MockAsyncByteStream(completions_mock_stream[0])}
+            if is_stream
+            else {
+                "json": {
+                    "choices": [
+                        {"index": i, "text": text, "finish_reason": "stop"}
+                        for i, text in enumerate(output_texts)
+                    ],
+                    "model": model_name,
+                    "usage": completion_usage,
+                }
+            }
+        ),
+    }
+    respx_mock.post(url).mock(return_value=Response(status_code=status_code, **respx_kwargs))
+    create_kwargs = {"prompt": prompt, **invocation_parameters}
+    completions = (
+        openai.AsyncOpenAI(api_key="sk-").completions
+        if is_async
+        else openai.OpenAI(api_key="sk-").completions
+    )
+    create = completions.with_raw_response.create if is_raw else completions.create
+    with suppress(openai.BadRequestError):
+        if is_async:
+
+            async def task() -> None:
+                response = await create(**create_kwargs)
+                response = response.parse() if is_raw else response
+                if is_stream:
+                    async for _ in response:
+                        pass
+
+            asyncio.run(task())
+        else:
+            response = create(**create_kwargs)
+            response = response.parse() if is_raw else response
+            if is_stream:
+                for _ in response:
+                    pass
+    spans = in_memory_span_exporter.get_finished_spans()
+    assert len(spans) == 1
+    span: ReadableSpan = spans[0]
+    if status_code == 200:
+        assert span.status.is_ok
+    elif status_code == 400:
+        assert not span.status.is_ok and not span.status.is_unset
+        assert len(span.events) == 1
+        event = span.events[0]
+        assert event.name == "exception"
+    attributes = dict(cast(Mapping[str, AttributeValue], span.attributes))
+    assert attributes.pop(OPENINFERENCE_SPAN_KIND, None) == OpenInferenceSpanKindValues.LLM.value
+    assert (
+        json.loads(cast(str, attributes.pop(LLM_INVOCATION_PARAMETERS, None)))
+        == invocation_parameters
+    )
+    assert isinstance(attributes.pop(INPUT_VALUE, None), str)
+    assert isinstance(attributes.pop(INPUT_MIME_TYPE, None), str)
+    if status_code == 200:
+        assert isinstance(attributes.pop(OUTPUT_VALUE, None), str)
+        assert isinstance(attributes.pop(OUTPUT_MIME_TYPE, None), str)
+        assert list(cast(Sequence[str], attributes.pop(LLM_PROMPTS, None))) == prompt
+        if not is_stream:
+            # Usage is not available for streaming in general.
+            assert attributes.pop(LLM_TOKEN_COUNT_TOTAL, None) == completion_usage["total_tokens"]
+            assert attributes.pop(LLM_TOKEN_COUNT_PROMPT, None) == completion_usage["prompt_tokens"]
+            assert (
+                attributes.pop(LLM_TOKEN_COUNT_COMPLETION, None)
+                == completion_usage["completion_tokens"]
+            )
+            # We left out model_name from our mock stream.
+            assert attributes.pop(LLM_MODEL_NAME, None) == model_name
+    assert attributes == {}  # this test accounts for all the attributes after popping them
+
+
+@pytest.fixture(scope="function")
+def in_memory_span_exporter() -> InMemorySpanExporter:
+    return InMemorySpanExporter()
+
+
+@pytest.fixture(autouse=True)
+def instrument(in_memory_span_exporter: InMemorySpanExporter) -> Generator[None, None, None]:
+    """
+    Instruments OpenAI before each test to ensure that the patch is applied
+    before any tests are run.
+    """
+    resource = Resource(attributes={})
+    tracer_provider = trace_sdk.TracerProvider(resource=resource)
+    span_processor = SimpleSpanProcessor(span_exporter=in_memory_span_exporter)
+    tracer_provider.add_span_processor(span_processor=span_processor)
+    OpenAIInstrumentor().instrument(tracer_provider=tracer_provider)
+    yield
+    OpenAIInstrumentor().uninstrument()
+
+
+@pytest.fixture
+def tool_calls_mock_stream() -> Tuple[List[bytes], List[Dict[str, Any]]]:
+    return (
+        [
+            b'data: {"choices": [{"delta": {"role": "assistant"}, "index": 0}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "id": "call_amGrubFmr2FSPHeC5OPgwcNs", "function": {"arguments": "", "name": "get_current_weather"}, "type": "function"}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": ""}, "index": 0}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "{\\"lo"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "{\\"lo"}, "index": 0}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "catio"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "catio"}, "index": 0}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "n\\": \\"B"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "n\\": \\"B"}, "index": 0}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "osto"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "osto"}, "index": 0}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "n, MA"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "n, MA"}, "index": 0}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "\\", \\"un"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "\\", \\"un"}, "index": 0}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "it\\":"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "it\\":"}, "index": 0}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": " \\"fah"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": " \\"fah"}, "index": 0}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "renhei"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "renhei"}, "index": 0}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 0, "function": {"arguments": "t\\"}"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "t\\"}"}, "index": 0}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "id": "call_6QTP4mLSYYzZwt3ZWj77vfZf", "function": {"arguments": "", "name": "get_current_weather"}, "type": "function"}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"role": "assistant"}, "index": 1}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "{\\"lo"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "{\\"lo"}, "index": 1}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "catio"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "catio"}, "index": 1}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "n\\": \\"S"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "n\\": \\"S"}, "index": 1}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "an F"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "an F"}, "index": 1}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "ranci"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "ranci"}, "index": 1}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "sco, C"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "sco, C"}, "index": 1}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "A\\", "}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "A\\", "}, "index": 1}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "\\"unit"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "\\"unit"}, "index": 1}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "\\": \\"fa"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "\\": \\"fa"}, "index": 1}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "hren"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "hren"}, "index": 1}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "heit\\""}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "heit\\""}, "index": 1}]}\n\n',
+            b'data: {"choices": [{"delta": {"tool_calls": [{"index": 1, "function": {"arguments": "}"}}]}, "index": 0}]}\n\n',  # noqa: E501
+            b'data: {"choices": [{"delta": {"content": "}"}, "index": 1}]}\n\n',
+            b'data: {"choices": [{"finish_reason": "tool_calls", "index": 0}]}\n\n',  # noqa: E501
+            b"data: [DONE]\n",
+        ],
+        [
+            {
+                "role": "assistant",
+                "content": '{"location": "Boston, MA", "unit": "fahrenheit"}',
+                "tool_calls": [
+                    {
+                        "id": "call_amGrubFmr2FSPHeC5OPgwcNs",
+                        "function": {
+                            "arguments": '{"location": "Boston, MA", "unit": "fahrenheit"}',
+                            "name": "get_current_weather",
+                        },
+                        "type": "function",
+                    },
+                    {
+                        "id": "call_6QTP4mLSYYzZwt3ZWj77vfZf",
+                        "function": {
+                            "arguments": '{"location": "San Francisco, CA", "unit": "fahrenheit"}',
+                            "name": "get_current_weather",
+                        },
+                        "type": "function",
+                    },
+                ],
+            },
+            {
+                "role": "assistant",
+                "content": '{"location": "San Francisco, CA", "unit": "fahrenheit"}',
+            },
+        ],
+    )
+
+
+@pytest.fixture
+def completions_mock_stream() -> Tuple[List[bytes], List[str]]:
+    return (
+        [
+            b'data: {"choices": [{"text": "", "index": 0}]}\n\n',
+            b'data: {"choices": [{"text": "{\\"lo", "index": 1}]}\n\n',
+            b'data: {"choices": [{"text": "{\\"lo", "index": 0}]}\n\n',
+            b'data: {"choices": [{"text": "catio", "index": 1}]}\n\n',
+            b'data: {"choices": [{"text": "catio", "index": 0}]}\n\n',
+            b'data: {"choices": [{"text": "n\\": \\"S", "index": 1}]}\n\n',
+            b'data: {"choices": [{"text": "n\\": \\"B", "index": 0}]}\n\n',
+            b'data: {"choices": [{"text": "an F", "index": 1}]}\n\n',
+            b'data: {"choices": [{"text": "osto", "index": 0}]}\n\n',
+            b'data: {"choices": [{"text": "ranci", "index": 1}]}\n\n',
+            b'data: {"choices": [{"text": "n, MA", "index": 0}]}\n\n',
+            b'data: {"choices": [{"text": "sco, C", "index": 1}]}\n\n',
+            b'data: {"choices": [{"text": "\\", \\"un", "index": 0}]}\n\n',
+            b'data: {"choices": [{"text": "A\\", ", "index": 1}]}\n\n',
+            b'data: {"choices": [{"text": "it\\":", "index": 0}]}\n\n',
+            b'data: {"choices": [{"text": "\\"unit", "index": 1}]}\n\n',
+            b'data: {"choices": [{"text": " \\"fah", "index": 0}]}\n\n',
+            b'data: {"choices": [{"text": "\\": \\"fa", "index": 1}]}\n\n',
+            b'data: {"choices": [{"text": "renhei", "index": 0}]}\n\n',
+            b'data: {"choices": [{"text": "hren", "index": 1}]}\n\n',
+            b'data: {"choices": [{"text": "t\\"}", "index": 0}]}\n\n',
+            b'data: {"choices": [{"text": "heit\\"", "index": 1}]}\n\n',
+            b'data: {"choices": [{"text": "}", "index": 1}]}\n\n',
+            b"data: [DONE]\n",
+        ],
+        [
+            '{"location": "Boston, MA", "unit": "fahrenheit"}',
+            '{"location": "San Francisco, CA", "unit": "fahrenheit"}',
+        ],
+    )
+
+
+@pytest.fixture
+def completion_usage() -> Dict[str, Any]:
+    prompt_tokens = random.randint(1, 1000)
+    completion_tokens = random.randint(1, 1000)
+    return {
+        "prompt_tokens": prompt_tokens,
+        "completion_tokens": completion_tokens,
+        "total_tokens": prompt_tokens + completion_tokens,
+    }
+
+
+@pytest.fixture
+def model_name() -> str:
+    return randstr()
+
+
+@pytest.fixture
+def input_messages() -> List[Dict[str, Any]]:
+    return [{"role": randstr(), "content": randstr()} for _ in range(2)]
+
+
+@pytest.fixture(scope="module")
+def seed() -> Iterator[int]:
+    """
+    Use rolling seeds to make debugging easier, because the rolling pseudo-random
+    values allow conditional breakpoints to be hit precisely (and repeatably).
+    """
+    return count()
+
+
+@pytest.fixture(autouse=True)
+def set_seed(seed: Iterator[int]) -> None:
+    random.seed(next(seed))
+    yield
+
+
+def randstr() -> str:
+    return str(random.random())
+
+
+def get_texts() -> List[str]:
+    return [randstr() for _ in range(2)]
+
+
+def get_messages() -> List[Dict[str, Any]]:
+    messages: List[Dict[str, Any]] = [
+        *[{"role": randstr(), "content": randstr()} for _ in range(2)],
+        *[
+            {"role": randstr(), "function_call": {"arguments": randstr(), "name": randstr()}}
+            for _ in range(2)
+        ],
+        *(
+            [
+                {
+                    "role": randstr(),
+                    "tool_calls": [
+                        {"function": {"arguments": randstr(), "name": randstr()}} for _ in range(2)
+                    ],
+                }
+                for _ in range(2)
+            ]
+            if _openai_version() >= (1, 1, 0)
+            else []
+        ),
+    ]
+    random.shuffle(messages)
+    return messages
+
+
+def _openai_version() -> Tuple[int, int, int]:
+    return cast(Tuple[int, int, int], tuple(map(int, version("openai").split(".")[:3])))
+
+
+def message_role(prefix: str, i: int) -> str:
+    return f"{prefix}.{i}.{MESSAGE_ROLE}"
+
+
+def message_content(prefix: str, i: int) -> str:
+    return f"{prefix}.{i}.{MESSAGE_CONTENT}"
+
+
+def message_function_call_name(prefix: str, i: int) -> str:
+    return f"{prefix}.{i}.{MESSAGE_FUNCTION_CALL_NAME}"
+
+
+def message_function_call_arguments(prefix: str, i: int) -> str:
+    return f"{prefix}.{i}.{MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON}"
+
+
+def tool_call_function_name(prefix: str, i: int, j: int) -> str:
+    return f"{prefix}.{i}.{MESSAGE_TOOL_CALLS}.{j}.{TOOL_CALL_FUNCTION_NAME}"
+
+
+def tool_call_function_arguments(prefix: str, i: int, j: int) -> str:
+    return f"{prefix}.{i}.{MESSAGE_TOOL_CALLS}.{j}.{TOOL_CALL_FUNCTION_ARGUMENTS_JSON}"
+
+
+class MockAsyncByteStream(AsyncByteStream):
+    def __init__(self, byte_stream: Iterable[bytes]):
+        self._byte_stream = byte_stream
+
+    def __iter__(self) -> AsyncIterator[bytes]:
+        for byte_string in self._byte_stream:
+            yield byte_string
+
+    async def __aiter__(self) -> AsyncIterator[bytes]:
+        for byte_string in self._byte_stream:
+            yield byte_string
+
+
+OPENINFERENCE_SPAN_KIND = SpanAttributes.OPENINFERENCE_SPAN_KIND
+INPUT_VALUE = SpanAttributes.INPUT_VALUE
+INPUT_MIME_TYPE = SpanAttributes.INPUT_MIME_TYPE
+OUTPUT_VALUE = SpanAttributes.OUTPUT_VALUE
+OUTPUT_MIME_TYPE = SpanAttributes.OUTPUT_MIME_TYPE
+LLM_INVOCATION_PARAMETERS = SpanAttributes.LLM_INVOCATION_PARAMETERS
+LLM_MODEL_NAME = SpanAttributes.LLM_MODEL_NAME
+LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL
+LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT
+LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
+LLM_INPUT_MESSAGES = SpanAttributes.LLM_INPUT_MESSAGES
+LLM_OUTPUT_MESSAGES = SpanAttributes.LLM_OUTPUT_MESSAGES
+LLM_PROMPTS = SpanAttributes.LLM_PROMPTS
+MESSAGE_ROLE = MessageAttributes.MESSAGE_ROLE
+MESSAGE_CONTENT = MessageAttributes.MESSAGE_CONTENT
+MESSAGE_FUNCTION_CALL_NAME = MessageAttributes.MESSAGE_FUNCTION_CALL_NAME
+MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON = MessageAttributes.MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON
+MESSAGE_TOOL_CALLS = MessageAttributes.MESSAGE_TOOL_CALLS
+TOOL_CALL_FUNCTION_NAME = ToolCallAttributes.TOOL_CALL_FUNCTION_NAME
+TOOL_CALL_FUNCTION_ARGUMENTS_JSON = ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON
diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
index fc922dc96..c7e5d4ff0 100644
--- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
+++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
@@ -6,7 +6,7 @@
 
 
 def test_chat_completion_accumulator(chat_completion_chunks, desired_chat_completion_result):
-    accumulator = _ChatCompletionAccumulator()
+    accumulator = _ChatCompletionAccumulator({})
     for chunk in chat_completion_chunks:
         accumulator.process_chunk(chunk)
     assert accumulator._result() == desired_chat_completion_result
diff --git a/python/tox.ini b/python/tox.ini
index a38bd176f..cb9edf390 100644
--- a/python/tox.ini
+++ b/python/tox.ini
@@ -3,12 +3,11 @@ isolated_build = True
 skipsdist = True
 envlist =
   py3{8,11}-ci-semconv
-  py3{8,11}-ci-openai
+  py3{8,11}-ci-{openai,openai-latest}
 
 [testenv]
 package = wheel
 wheel_build_env = .pkg
-allowlist_externals = git
 deps =
   -r dev-requirements.txt
 changedir =
@@ -17,14 +16,13 @@ changedir =
 commands_pre =
   semconv: pip install {toxinidir}/openinference-semantic-conventions
   openai: pip install {toxinidir}/instrumentation/openinference-instrumentation-openai[test]
+  openai-latest: pip install -U openai
 commands =
   ruff: ruff format . --config {toxinidir}/ruff.toml
-  ruff: ruff . --fix --config {toxinidir}/ruff.toml
+  ruff: ruff check . --fix --config {toxinidir}/ruff.toml
   mypy: mypy --config-file {toxinidir}/mypy.ini --explicit-package-bases {posargs:src}
-  test: pytest {posargs}
-  ci: ruff format . --config {toxinidir}/ruff.toml
-  ci: git diff --exit-code
-  ci: ruff . --fix --config {toxinidir}/ruff.toml
-  ci: git diff --exit-code
+  test: pytest {posargs:tests}
+  ci: ruff format . --diff --config {toxinidir}/ruff.toml
+  ci: ruff check . --diff --config {toxinidir}/ruff.toml
   ci: mypy --config-file {toxinidir}/mypy.ini --explicit-package-bases src
   ci: pytest tests

From e5ccc6d8e66d2dc6a4bebff306fda399808105b5 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Wed, 10 Jan 2024 17:22:42 -0800
Subject: [PATCH 41/44] wip

---
 .../integration_tests/embeddings.py           |  27 +--
 .../pyproject.toml                            |   1 +
 .../openai/_extra_attributes_from_response.py |   2 +-
 .../openai/test_instrumentor.py               | 192 +++++++++++++-----
 4 files changed, 162 insertions(+), 60 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py
index 10b1c11ef..61725aa1b 100644
--- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py
+++ b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py
@@ -85,7 +85,8 @@ async def embeddings(**kwargs):
 async def embeddings_with_raw_response(**kwargs):
     try:
         with suppress(openai.BadRequestError):
-            await CLIENT.embeddings.with_raw_response.create(**{**KWARGS, **kwargs})
+            response = await CLIENT.embeddings.with_raw_response.create(**{**KWARGS, **kwargs})
+            response
     except Exception:
         logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
     finally:
@@ -99,18 +100,18 @@ async def main(*tasks):
 if __name__ == "__main__":
     asyncio.run(
         main(
-            embeddings(input="hello world"),
-            embeddings(input="hello world", encoding_format="float"),
-            embeddings(input="hello world", encoding_format="base64"),
-            embeddings(input=["hello", "world"]),
-            embeddings(input=["hello", "world"], encoding_format="float"),
-            embeddings(input=["hello", "world"], encoding_format="base64"),
-            embeddings(input=[15339, 1917]),
-            embeddings(input=[15339, 1917], encoding_format="float"),
-            embeddings(input=[15339, 1917], encoding_format="base64"),
-            embeddings(input=[[15339], [14957]]),
-            embeddings(input=[[15339], [14957]], encoding_format="float"),
-            embeddings(input=[[15339], [14957]], encoding_format="base64"),
+            # embeddings(input="hello world"),
+            # embeddings(input="hello world", encoding_format="float"),
+            # embeddings(input="hello world", encoding_format="base64"),
+            # embeddings(input=["hello", "world"]),
+            # embeddings(input=["hello", "world"], encoding_format="float"),
+            # embeddings(input=["hello", "world"], encoding_format="base64"),
+            # embeddings(input=[15339, 1917]),
+            # embeddings(input=[15339, 1917], encoding_format="float"),
+            # embeddings(input=[15339, 1917], encoding_format="base64"),
+            # embeddings(input=[[15339], [14957]]),
+            # embeddings(input=[[15339], [14957]], encoding_format="float"),
+            # embeddings(input=[[15339], [14957]], encoding_format="base64"),
             embeddings_with_raw_response(input="hello world"),
             embeddings_with_raw_response(input="hello world", encoding_format="float"),
             embeddings_with_raw_response(input="hello world", encoding_format="base64"),
diff --git a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml
index 833c613d3..1fbe6ea67 100644
--- a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml
+++ b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml
@@ -39,6 +39,7 @@ test = [
   "openai == 1.0.0",
   "opentelemetry-sdk",
   "respx",
+  "numpy",
 ]
 
 [project.urls]
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py
index ebab74ea9..e7f7ad63d 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py
@@ -97,7 +97,7 @@ def _(
         yield from _get_attributes_from_embedding_usage(usage)
     # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding_create_params.py#L23  # noqa: E501
     if model := getattr(response, "model"):
-        yield f"{EmbeddingAttributes.EMBEDDING_MODEL_NAME}", model
+        yield f"{SpanAttributes.EMBEDDING_MODEL_NAME}", model
     if (data := getattr(response, "data", None)) and isinstance(data, Iterable):
         for embedding in data:
             if (index := getattr(embedding, "index", None)) is None:
diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py
index 96ba01b91..8bf9aad6e 100644
--- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py
+++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py
@@ -15,6 +15,7 @@
     Mapping,
     Sequence,
     Tuple,
+    Union,
     cast,
 )
 
@@ -23,6 +24,7 @@
 from httpx import AsyncByteStream, Response
 from openinference.instrumentation.openai import OpenAIInstrumentor
 from openinference.semconv.trace import (
+    EmbeddingAttributes,
     MessageAttributes,
     OpenInferenceMimeTypeValues,
     OpenInferenceSpanKindValues,
@@ -170,7 +172,7 @@ async def task() -> None:
 
 @pytest.mark.parametrize("is_async", [False, True])
 @pytest.mark.parametrize("is_raw", [False, True])
-@pytest.mark.parametrize("is_stream", [True])
+@pytest.mark.parametrize("is_stream", [False, True])
 @pytest.mark.parametrize("status_code", [200, 400])
 def test_completions(
     is_async: bool,
@@ -268,6 +270,100 @@ async def task() -> None:
     assert attributes == {}  # this test accounts for all the attributes after popping them
 
 
+@pytest.mark.parametrize("is_async", [False, True])
+@pytest.mark.parametrize("is_raw", [False, True])
+@pytest.mark.parametrize("status_code", [200, 400])
+@pytest.mark.parametrize("encoding_format", ["float", "base64"])
+@pytest.mark.parametrize("input_text", ["hello", ["hello", "world"]])
+def test_embeddings(
+    is_async: bool,
+    is_raw: bool,
+    encoding_format: str,
+    input_text: Union[str, List[str]],
+    status_code: int,
+    respx_mock: MockRouter,
+    in_memory_span_exporter: InMemorySpanExporter,
+    model_name: str,
+) -> None:
+    invocation_parameters = {
+        "model": randstr(),
+        "encoding_format": encoding_format,
+    }
+    embedding_model_name = randstr()
+    embedding_usage = {
+        "prompt_tokens": random.randint(10, 100),
+        "total_tokens": random.randint(10, 100),
+    }
+    output_embeddings = [("AACAPwAAAEA=", (1.0, 2.0)), ((2.0, 3.0), (2.0, 3.0))]
+    url = "https://api.openai.com/v1/embeddings"
+    respx_mock.post(url).mock(
+        return_value=Response(
+            status_code=status_code,
+            json={
+                "object": "list",
+                "data": [
+                    {"object": "embedding", "index": i, "embedding": embedding[0]}
+                    for i, embedding in enumerate(output_embeddings)
+                ],
+                "model": embedding_model_name,
+                "usage": embedding_usage,
+            },
+        )
+    )
+    create_kwargs = {"input": input_text, **invocation_parameters}
+    completions = (
+        openai.AsyncOpenAI(api_key="sk-").embeddings
+        if is_async
+        else openai.OpenAI(api_key="sk-").embeddings
+    )
+    create = completions.with_raw_response.create if is_raw else completions.create
+    with suppress(openai.BadRequestError):
+        if is_async:
+
+            async def task() -> None:
+                response = await create(**create_kwargs)
+                _ = response.parse() if is_raw else response
+
+            asyncio.run(task())
+        else:
+            response = create(**create_kwargs)
+            _ = response.parse() if is_raw else response
+    spans = in_memory_span_exporter.get_finished_spans()
+    assert len(spans) == 1
+    span: ReadableSpan = spans[0]
+    if status_code == 200:
+        assert span.status.is_ok
+    elif status_code == 400:
+        assert not span.status.is_ok and not span.status.is_unset
+        assert len(span.events) == 1
+        event = span.events[0]
+        assert event.name == "exception"
+    attributes = dict(cast(Mapping[str, AttributeValue], span.attributes))
+    assert (
+        attributes.pop(OPENINFERENCE_SPAN_KIND, None) == OpenInferenceSpanKindValues.EMBEDDING.value
+    )
+    assert (
+        json.loads(cast(str, attributes.pop(LLM_INVOCATION_PARAMETERS, None)))
+        == invocation_parameters
+    )
+    assert isinstance(attributes.pop(INPUT_VALUE, None), str)
+    assert isinstance(attributes.pop(INPUT_MIME_TYPE, None), str)
+    if status_code == 200:
+        assert isinstance(attributes.pop(OUTPUT_VALUE, None), str)
+        assert isinstance(attributes.pop(OUTPUT_MIME_TYPE, None), str)
+        assert attributes.pop(EMBEDDING_MODEL_NAME, None) == embedding_model_name
+        assert attributes.pop(LLM_TOKEN_COUNT_TOTAL, None) == embedding_usage["total_tokens"]
+        assert attributes.pop(LLM_TOKEN_COUNT_PROMPT, None) == embedding_usage["prompt_tokens"]
+        for i, text in enumerate(input_text if isinstance(input_text, list) else [input_text]):
+            assert attributes.pop(f"{EMBEDDING_EMBEDDINGS}.{i}.{EMBEDDING_TEXT}", None) == text
+        for i, embedding in enumerate(output_embeddings):
+            assert (
+                attributes.pop(f"{EMBEDDING_EMBEDDINGS}.{i}.{EMBEDDING_VECTOR}", None)
+                == embedding[1]
+            )
+    assert attributes == {}  # this test accounts for all the attributes after popping them
+
+
 @pytest.fixture(scope="function")
 def in_memory_span_exporter() -> InMemorySpanExporter:
     return InMemorySpanExporter()
@@ -288,6 +384,42 @@ def instrument(in_memory_span_exporter: InMemorySpanExporter) -> Generator[None,
     OpenAIInstrumentor().uninstrument()
 
 
+@pytest.fixture(scope="module")
+def seed() -> Iterator[int]:
+    """
+    Use rolling seeds to make debugging easier, because the rolling pseudo-random
+    values allow conditional breakpoints to be hit precisely (and repeatably).
+    """
+    return count()
+
+
+@pytest.fixture(autouse=True)
+def set_seed(seed: Iterator[int]) -> None:
+    random.seed(next(seed))
+    yield
+
+
+@pytest.fixture
+def completion_usage() -> Dict[str, Any]:
+    prompt_tokens = random.randint(1, 1000)
+    completion_tokens = random.randint(1, 1000)
+    return {
+        "prompt_tokens": prompt_tokens,
+        "completion_tokens": completion_tokens,
+        "total_tokens": prompt_tokens + completion_tokens,
+    }
+
+
+@pytest.fixture
+def model_name() -> str:
+    return randstr()
+
+
+@pytest.fixture
+def input_messages() -> List[Dict[str, Any]]:
+    return [{"role": randstr(), "content": randstr()} for _ in range(2)]
+
+
 @pytest.fixture
 def tool_calls_mock_stream() -> Tuple[List[bytes], List[Dict[str, Any]]]:
     return (
@@ -411,40 +543,17 @@ def completions_mock_stream() -> Tuple[List[bytes], List[str]]:
     )
 
 
-@pytest.fixture
-def completion_usage() -> Dict[str, Any]:
-    prompt_tokens = random.randint(1, 1000)
-    completion_tokens = random.randint(1, 1000)
-    return {
-        "prompt_tokens": prompt_tokens,
-        "completion_tokens": completion_tokens,
-        "total_tokens": prompt_tokens + completion_tokens,
-    }
-
-
-@pytest.fixture
-def model_name() -> str:
-    return randstr()
-
-
-@pytest.fixture
-def input_messages() -> List[Dict[str, Any]]:
-    return [{"role": randstr(), "content": randstr()} for _ in range(2)]
-
-
-@pytest.fixture(scope="module")
-def seed() -> Iterator[int]:
-    """
-    Use rolling seeds to make debugging easier, because the rolling pseudo-random
-    values allow conditional breakpoints to be hit precisely (and repeatably).
-    """
-    return count()
+class MockAsyncByteStream(AsyncByteStream):
+    def __init__(self, byte_stream: Iterable[bytes]):
+        self._byte_stream = byte_stream
 
+    def __iter__(self) -> AsyncIterator[bytes]:
+        for byte_string in self._byte_stream:
+            yield byte_string
 
-@pytest.fixture(autouse=True)
-def set_seed(seed: Iterator[int]) -> None:
-    random.seed(next(seed))
-    yield
+    async def __aiter__(self) -> AsyncIterator[bytes]:
+        for byte_string in self._byte_stream:
+            yield byte_string
 
 
 def randstr() -> str:
@@ -508,19 +617,6 @@ def tool_call_function_arguments(prefix: str, i: int, j: int) -> str:
     return f"{prefix}.{i}.{MESSAGE_TOOL_CALLS}.{j}.{TOOL_CALL_FUNCTION_ARGUMENTS_JSON}"
 
 
-class MockAsyncByteStream(AsyncByteStream):
-    def __init__(self, byte_stream: Iterable[bytes]):
-        self._byte_stream = byte_stream
-
-    def __iter__(self) -> AsyncIterator[bytes]:
-        for byte_string in self._byte_stream:
-            yield byte_string
-
-    async def __aiter__(self) -> AsyncIterator[bytes]:
-        for byte_string in self._byte_stream:
-            yield byte_string
-
-
 OPENINFERENCE_SPAN_KIND = SpanAttributes.OPENINFERENCE_SPAN_KIND
 INPUT_VALUE = SpanAttributes.INPUT_VALUE
 INPUT_MIME_TYPE = SpanAttributes.INPUT_MIME_TYPE
@@ -541,3 +637,7 @@ async def __aiter__(self) -> AsyncIterator[bytes]:
 MESSAGE_TOOL_CALLS = MessageAttributes.MESSAGE_TOOL_CALLS
 TOOL_CALL_FUNCTION_NAME = ToolCallAttributes.TOOL_CALL_FUNCTION_NAME
 TOOL_CALL_FUNCTION_ARGUMENTS_JSON = ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON
+EMBEDDING_EMBEDDINGS = SpanAttributes.EMBEDDING_EMBEDDINGS
+EMBEDDING_MODEL_NAME = SpanAttributes.EMBEDDING_MODEL_NAME
+EMBEDDING_VECTOR = EmbeddingAttributes.EMBEDDING_VECTOR
+EMBEDDING_TEXT = EmbeddingAttributes.EMBEDDING_TEXT

From c20f488d0e81e5c391caccfb9d8dd206e7e8aac1 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Wed, 10 Jan 2024 17:23:14 -0800
Subject: [PATCH 42/44] wip

---
 .../instrumentation/openai/test_instrumentor.py             | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py
index 8bf9aad6e..6480cbf22 100644
--- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py
+++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py
@@ -167,7 +167,7 @@ async def task() -> None:
             )
             # We left out model_name from our mock stream.
             assert attributes.pop(LLM_MODEL_NAME, None) == model_name
-    assert attributes == {}  # this test accounts for all the attributes after popping them
+    assert attributes == {}  # this test should account for all the attributes after popping them
 
 
 @pytest.mark.parametrize("is_async", [False, True])
@@ -267,7 +267,7 @@ async def task() -> None:
             )
             # We left out model_name from our mock stream.
             assert attributes.pop(LLM_MODEL_NAME, None) == model_name
-    assert attributes == {}  # this test accounts for all the attributes after popping them
+    assert attributes == {}  # this test should account for all the attributes after popping them
 
 
 @pytest.mark.parametrize("is_async", [False, True])
@@ -361,7 +361,7 @@ async def task() -> None:
                 attributes.pop(f"{EMBEDDING_EMBEDDINGS}.{i}.{EMBEDDING_VECTOR}", None)
                 == embedding[1]
             )
-    assert attributes == {}  # this test accounts for all the attributes after popping them
+    assert attributes == {}  # this test should account for all the attributes after popping them
 
 
 @pytest.fixture(scope="function")

From 6bd830ce5cf6deb623a77fbde53cda6ebbd0e692 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Wed, 10 Jan 2024 17:26:30 -0800
Subject: [PATCH 43/44] wip

---
 .../integration_tests/completions.py          | 205 ------------------
 .../integration_tests/completions_async.py    | 202 -----------------
 .../integration_tests/embeddings.py           | 138 ------------
 .../functions_and_tool_calls.py               | 193 -----------------
 4 files changed, 738 deletions(-)
 delete mode 100644 python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py
 delete mode 100644 python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py
 delete mode 100644 python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py
 delete mode 100644 python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py

diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py
deleted file mode 100644
index dc4226409..000000000
--- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions.py
+++ /dev/null
@@ -1,205 +0,0 @@
-import contextvars
-import inspect
-import logging
-import threading
-from contextlib import suppress
-from importlib.metadata import version
-from itertools import chain
-from time import sleep
-
-from opentelemetry import trace as trace_api
-from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
-from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
-from opentelemetry.sdk import trace as trace_sdk
-from opentelemetry.sdk.resources import Resource
-from opentelemetry.sdk.trace.export import SimpleSpanProcessor
-from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
-
-
-def default_tracer_provider() -> trace_sdk.TracerProvider:
-    resource = Resource(attributes={})
-    tracer_provider = trace_sdk.TracerProvider(resource=resource)
-    span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces")
-    span_processor = SimpleSpanProcessor(span_exporter=span_exporter)
-    tracer_provider.add_span_processor(span_processor=span_processor)
-    return tracer_provider
-
-
-# Instrument httpx to show that it can show up as a child span.
-# Note that it must be instrumented before it's imported by openai.
-HTTPXClientInstrumentor().instrument()
-
-# To instrument httpx, it must be monkey-patched before it is imported by openai, so we do it
-# like this to prevent the imports from being re-formatted to the top of file.
-if True:
-    import openai
-    from openinference.instrumentation.openai import OpenAIInstrumentor
-    from openinference.semconv.trace import SpanAttributes
-
-CLIENT = openai.OpenAI()
-
-tracer_provider = default_tracer_provider()
-in_memory_span_exporter = InMemorySpanExporter()
-tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter))
-trace_api.set_tracer_provider(tracer_provider=tracer_provider)
-
-OpenAIInstrumentor().instrument()
-
-_OPENAI_VERSION = tuple(map(int, version("openai").split(".")[:3]))
-
-N = 3  # iteration i = 0 results in intentional BadRequestError
-HAIKU = "Write a haiku."
-HAIKU_TOKENS = [8144, 264, 6520, 39342, 13]
-RESUME = "Write a résumé."
-RESUME_TOKENS = [8144, 264, 9517, 1264, 978, 13]
-CHAT_KWARGS = {
-    "model": "gpt-3.5-turbo",
-    "messages": [{"role": "user", "content": HAIKU}],
-    "max_tokens": 20,
-    "temperature": 2,
-    **(
-        {
-            "logprobs": True,
-            "top_logprobs": 5,
-        }
-        if _OPENAI_VERSION >= (1, 5, 0)
-        else {}
-    ),
-}
-COMP_KWARGS = {
-    "model": "gpt-3.5-turbo-instruct",
-    "prompt": HAIKU,
-    "max_tokens": 20,
-    "temperature": 2,
-    "logprobs": 5,
-}
-
-for k, v in logging.root.manager.loggerDict.items():
-    if k.startswith("openinference.instrumentation.openai") and isinstance(v, logging.Logger):
-        v.setLevel(logging.DEBUG)
-        v.handlers.clear()
-        v.addHandler(logging.StreamHandler())
-
-logger = logging.getLogger(__name__)
-
-_EXPECTED_SPAN_COUNT = 0
-_LOCK = threading.Lock()
-
-
-def _print_span_count(kwargs):
-    spans = in_memory_span_exporter.get_finished_spans()
-    llm_spans = [
-        span
-        for span in spans
-        if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM"
-    ]
-    actual = len(llm_spans)
-    global _EXPECTED_SPAN_COUNT
-    with _LOCK:
-        _EXPECTED_SPAN_COUNT += 1
-        mark = "✅" if _EXPECTED_SPAN_COUNT <= actual else "❌"
-        name = inspect.stack()[1][3]
-        print(f"{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual}; {name}({kwargs})")
-
-
-def chat_completions(**kwargs):
-    try:
-        with suppress(openai.BadRequestError):
-            response = CLIENT.chat.completions.create(**{**CHAT_KWARGS, **kwargs})
-            if kwargs.get("stream"):
-                for _ in response:
-                    sleep(0.005)
-    except Exception:
-        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
-    finally:
-        _print_span_count(kwargs)
-
-
-def completions(**kwargs):
-    try:
-        with suppress(openai.BadRequestError):
-            response = CLIENT.completions.create(**{**COMP_KWARGS, **kwargs})
-            if kwargs.get("stream"):
-                for _ in response:
-                    sleep(0.005)
-    except Exception:
-        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
-    finally:
-        _print_span_count(kwargs)
-
-
-def chat_completions_with_raw_response(**kwargs):
-    try:
-        with suppress(openai.BadRequestError):
-            response = CLIENT.chat.completions.with_raw_response.create(**{**CHAT_KWARGS, **kwargs})
-            if kwargs.get("stream"):
-                for _ in response.parse():
-                    sleep(0.005)
-    except Exception:
-        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
-    finally:
-        _print_span_count(kwargs)
-
-
-def completions_with_raw_response(**kwargs):
-    try:
-        with suppress(openai.BadRequestError):
-            response = CLIENT.completions.with_raw_response.create(**{**COMP_KWARGS, **kwargs})
-            if kwargs.get("stream"):
-                for _ in response.parse():
-                    sleep(0.005)
-    except Exception:
-        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
-    finally:
-        _print_span_count(kwargs)
-
-
-def tasks(n, task, **kwargs):
-    for i in range(n):  # i = 0 results in intentional BadRequestError
-        ctx = contextvars.copy_context()
-        yield threading.Thread(
-            target=ctx.run,
-            args=(task,),
-            kwargs={"n": i, **kwargs},
-        )
-
-
-if __name__ == "__main__":
-    threads = list(
-        chain(
-            tasks(N, completions),
-            tasks(N, completions_with_raw_response),
-            tasks(N, completions, stream=True),
-            tasks(N, completions_with_raw_response, stream=True),
-            tasks(N, completions, prompt=[HAIKU, RESUME]),
-            tasks(N, completions_with_raw_response, prompt=[HAIKU, RESUME]),
-            tasks(N, completions, prompt=[HAIKU, RESUME], stream=True),
-            tasks(N, completions_with_raw_response, prompt=[HAIKU, RESUME], stream=True),
-            tasks(N, completions, prompt=HAIKU_TOKENS),
-            tasks(N, completions_with_raw_response, prompt=HAIKU_TOKENS),
-            tasks(N, completions, prompt=HAIKU_TOKENS, stream=True),
-            tasks(N, completions_with_raw_response, prompt=HAIKU_TOKENS, stream=True),
-            tasks(N, completions, prompt=[HAIKU_TOKENS, RESUME_TOKENS]),
-            tasks(N, completions_with_raw_response, prompt=[HAIKU_TOKENS, RESUME_TOKENS]),
-            tasks(N, completions, prompt=[HAIKU_TOKENS, RESUME_TOKENS], stream=True),
-            tasks(
-                N, completions_with_raw_response, prompt=[HAIKU_TOKENS, RESUME_TOKENS], stream=True
-            ),
-            tasks(N, chat_completions),
-            tasks(N, chat_completions_with_raw_response),
-            tasks(N, chat_completions, stream=True),
-            tasks(N, chat_completions_with_raw_response, stream=True),
-        )
-    )
-    [thread.start() for thread in threads]
-    [thread.join() for thread in threads]
-    spans = in_memory_span_exporter.get_finished_spans()
-    llm_spans = [
-        span
-        for span in spans
-        if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM"
-    ]
-    actual = len(llm_spans)
-    mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌"
-    print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};")
-    assert _EXPECTED_SPAN_COUNT == actual
diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py
deleted file mode 100644
index 2c94b0d78..000000000
--- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/completions_async.py
+++ /dev/null
@@ -1,202 +0,0 @@
-import asyncio
-import inspect
-import logging
-from contextlib import suppress
-from importlib.metadata import version
-from itertools import chain
-
-from opentelemetry import trace as trace_api
-from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
-from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
-from opentelemetry.sdk import trace as trace_sdk
-from opentelemetry.sdk.resources import Resource
-from opentelemetry.sdk.trace.export import SimpleSpanProcessor
-from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
-
-
-def default_tracer_provider() -> trace_sdk.TracerProvider:
-    resource = Resource(attributes={})
-    tracer_provider = trace_sdk.TracerProvider(resource=resource)
-    span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces")
-    span_processor = SimpleSpanProcessor(span_exporter=span_exporter)
-    tracer_provider.add_span_processor(span_processor=span_processor)
-    return tracer_provider
-
-
-# Instrument httpx to show that it can show up as a child span.
-# Note that it must be instrumented before it's imported by openai.
-HTTPXClientInstrumentor().instrument()
-
-# To instrument httpx, it must be monkey-patched before it is imported by openai, so we do it
-# like this to prevent the imports from being re-formatted to the top of file.
-if True:
-    import openai
-    from openinference.instrumentation.openai import OpenAIInstrumentor
-    from openinference.semconv.trace import SpanAttributes
-
-CLIENT = openai.AsyncOpenAI()
-
-tracer_provider = default_tracer_provider()
-in_memory_span_exporter = InMemorySpanExporter()
-tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter))
-trace_api.set_tracer_provider(tracer_provider=tracer_provider)
-
-OpenAIInstrumentor().instrument()
-
-_OPENAI_VERSION = tuple(map(int, version("openai").split(".")[:3]))
-
-N = 3  # iteration i = 0 results in intentional BadRequestError
-HAIKU = "Write a haiku."
-HAIKU_TOKENS = [8144, 264, 6520, 39342, 13]
-RESUME = "Write a résumé."
-RESUME_TOKENS = [8144, 264, 9517, 1264, 978, 13]
-CHAT_KWARGS = {
-    "model": "gpt-3.5-turbo",
-    "messages": [{"role": "user", "content": HAIKU}],
-    "max_tokens": 20,
-    "temperature": 2,
-    **(
-        {
-            "logprobs": True,
-            "top_logprobs": 5,
-        }
-        if _OPENAI_VERSION >= (1, 5, 0)
-        else {}
-    ),
-}
-COMP_KWARGS = {
-    "model": "gpt-3.5-turbo-instruct",
-    "prompt": HAIKU,
-    "max_tokens": 20,
-    "temperature": 2,
-    "logprobs": 5,
-}
-
-for k, v in logging.root.manager.loggerDict.items():
-    if k.startswith("openinference.instrumentation.openai") and isinstance(v, logging.Logger):
-        v.setLevel(logging.DEBUG)
-        v.handlers.clear()
-        v.addHandler(logging.StreamHandler())
-
-
-logger = logging.getLogger(__name__)
-
-_EXPECTED_SPAN_COUNT = 0
-
-
-def _print_span_count(kwargs):
-    spans = in_memory_span_exporter.get_finished_spans()
-    llm_spans = [
-        span
-        for span in spans
-        if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM"
-    ]
-    actual = len(llm_spans)
-    global _EXPECTED_SPAN_COUNT
-    _EXPECTED_SPAN_COUNT += 1
-    mark = "✅" if _EXPECTED_SPAN_COUNT <= actual else "❌"
-    name = inspect.stack()[1][3]
-    print(f"{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual}; {name}({kwargs})")
-
-
-async def chat_completions(**kwargs):
-    try:
-        with suppress(openai.BadRequestError):
-            response = await CLIENT.chat.completions.create(**{**CHAT_KWARGS, **kwargs})
-            if kwargs.get("stream"):
-                async for _ in response:
-                    await asyncio.sleep(0.005)
-    except Exception:
-        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
-    finally:
-        _print_span_count(kwargs)
-
-
-async def completions(**kwargs):
-    try:
-        with suppress(openai.BadRequestError):
-            response = await CLIENT.completions.create(**{**COMP_KWARGS, **kwargs})
-            if kwargs.get("stream"):
-                async for _ in response:
-                    await asyncio.sleep(0.005)
-    except Exception:
-        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
-    finally:
-        _print_span_count(kwargs)
-
-
-async def chat_completions_with_raw_response(**kwargs):
-    try:
-        with suppress(openai.BadRequestError):
-            response = await CLIENT.chat.completions.with_raw_response.create(
-                **{**CHAT_KWARGS, **kwargs}
-            )
-            if kwargs.get("stream"):
-                async for _ in response.parse():
-                    await asyncio.sleep(0.005)
-    except Exception:
-        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
-    finally:
-        _print_span_count(kwargs)
-
-
-async def completions_with_raw_response(**kwargs):
-    try:
-        with suppress(openai.BadRequestError):
-            response = await CLIENT.completions.with_raw_response.create(
-                **{**COMP_KWARGS, **kwargs}
-            )
-            if kwargs.get("stream"):
-                async for _ in response.parse():
-                    await asyncio.sleep(0.005)
-    except Exception:
-        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
-    finally:
-        _print_span_count(kwargs)
-
-
-def tasks(n, task, **kwargs):
-    return [task(n=i, **kwargs) for i in range(n)]  # i = 0 results in intentional BadRequestError
-
-
-async def main(*tasks):
-    await asyncio.gather(*chain.from_iterable(tasks))
-
-
-if __name__ == "__main__":
-    asyncio.run(
-        main(
-            tasks(N, completions),
-            tasks(N, completions_with_raw_response),
-            tasks(N, completions, stream=True),
-            tasks(N, completions_with_raw_response, stream=True),
-            tasks(N, completions, prompt=[HAIKU, RESUME]),
-            tasks(N, completions_with_raw_response, prompt=[HAIKU, RESUME]),
-            tasks(N, completions, prompt=[HAIKU, RESUME], stream=True),
-            tasks(N, completions_with_raw_response, prompt=[HAIKU, RESUME], stream=True),
-            tasks(N, completions, prompt=HAIKU_TOKENS),
-            tasks(N, completions_with_raw_response, prompt=HAIKU_TOKENS),
-            tasks(N, completions, prompt=HAIKU_TOKENS, stream=True),
-            tasks(N, completions_with_raw_response, prompt=HAIKU_TOKENS, stream=True),
-            tasks(N, completions, prompt=[HAIKU_TOKENS, RESUME_TOKENS]),
-            tasks(N, completions_with_raw_response, prompt=[HAIKU_TOKENS, RESUME_TOKENS]),
-            tasks(N, completions, prompt=[HAIKU_TOKENS, RESUME_TOKENS], stream=True),
-            tasks(
-                N, completions_with_raw_response, prompt=[HAIKU_TOKENS, RESUME_TOKENS], stream=True
-            ),
-            tasks(N, chat_completions),
-            tasks(N, chat_completions_with_raw_response),
-            tasks(N, chat_completions, stream=True),
-            tasks(N, chat_completions_with_raw_response, stream=True),
-        )
-    )
-    spans = in_memory_span_exporter.get_finished_spans()
-    llm_spans = [
-        span
-        for span in spans
-        if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM"
-    ]
-    actual = len(llm_spans)
-    mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌"
-    print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};")
-    assert _EXPECTED_SPAN_COUNT == actual
diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py
deleted file mode 100644
index 61725aa1b..000000000
--- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/embeddings.py
+++ /dev/null
@@ -1,138 +0,0 @@
-import asyncio
-import inspect
-import logging
-from contextlib import suppress
-from itertools import chain
-
-from opentelemetry import trace as trace_api
-from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
-from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
-from opentelemetry.sdk import trace as trace_sdk
-from opentelemetry.sdk.resources import Resource
-from opentelemetry.sdk.trace.export import SimpleSpanProcessor
-from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
-
-
-def default_tracer_provider() -> trace_sdk.TracerProvider:
-    resource = Resource(attributes={})
-    tracer_provider = trace_sdk.TracerProvider(resource=resource)
-    span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces")
-    span_processor = SimpleSpanProcessor(span_exporter=span_exporter)
-    tracer_provider.add_span_processor(span_processor=span_processor)
-    return tracer_provider
-
-
-# Instrument httpx to show that it can show up as a child span.
-# Note that it must be instrumented before it's imported by openai.
-HTTPXClientInstrumentor().instrument()
-
-# To instrument httpx, it must be monkey-patched before it is imported by openai, so we do it
-# like this to prevent the imports from being re-formatted to the top of file.
-if True:
-    import openai
-    from openinference.instrumentation.openai import OpenAIInstrumentor
-    from openinference.semconv.trace import SpanAttributes
-
-CLIENT = openai.AsyncOpenAI()
-
-tracer_provider = default_tracer_provider()
-in_memory_span_exporter = InMemorySpanExporter()
-tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter))
-trace_api.set_tracer_provider(tracer_provider=tracer_provider)
-
-OpenAIInstrumentor().instrument()
-
-KWARGS = {
-    "model": "text-embedding-ada-002",
-}
-
-for k, v in logging.root.manager.loggerDict.items():
-    if k.startswith("openinference.instrumentation.openai") and isinstance(v, logging.Logger):
-        v.setLevel(logging.DEBUG)
-        v.handlers.clear()
-        v.addHandler(logging.StreamHandler())
-
-logger = logging.getLogger(__name__)
-
-_EXPECTED_SPAN_COUNT = 0
-
-
-def _print_span_count(kwargs):
-    spans = in_memory_span_exporter.get_finished_spans()
-    llm_spans = [
-        span
-        for span in spans
-        if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "EMBEDDING"
-    ]
-    actual = len(llm_spans)
-    global _EXPECTED_SPAN_COUNT
-    _EXPECTED_SPAN_COUNT += 1
-    mark = "✅" if _EXPECTED_SPAN_COUNT <= actual else "❌"
-    name = inspect.stack()[1][3]
-    print(f"{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual}; {name}({kwargs})")
-
-
-async def embeddings(**kwargs):
-    try:
-        with suppress(openai.BadRequestError):
-            await CLIENT.embeddings.create(**{**KWARGS, **kwargs})
-    except Exception:
-        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
-    finally:
-        _print_span_count(kwargs)
-
-
-async def embeddings_with_raw_response(**kwargs):
-    try:
-        with suppress(openai.BadRequestError):
-            response = await CLIENT.embeddings.with_raw_response.create(**{**KWARGS, **kwargs})
-            response
-    except Exception:
-        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
-    finally:
-        _print_span_count(kwargs)
-
-
-async def main(*tasks):
-    await asyncio.gather(*chain(tasks))
-
-
-if __name__ == "__main__":
-    asyncio.run(
-        main(
-            # embeddings(input="hello world"),
-            # embeddings(input="hello world", encoding_format="float"),
-            # embeddings(input="hello world", encoding_format="base64"),
-            # embeddings(input=["hello", "world"]),
-            # embeddings(input=["hello", "world"], encoding_format="float"),
-            # embeddings(input=["hello", "world"], encoding_format="base64"),
-            # embeddings(input=[15339, 1917]),
-            # embeddings(input=[15339, 1917], encoding_format="float"),
-            # embeddings(input=[15339, 1917], encoding_format="base64"),
-            # embeddings(input=[[15339], [14957]]),
-            # embeddings(input=[[15339], [14957]], encoding_format="float"),
-            # embeddings(input=[[15339], [14957]], encoding_format="base64"),
-            embeddings_with_raw_response(input="hello world"),
-            embeddings_with_raw_response(input="hello world", encoding_format="float"),
-            embeddings_with_raw_response(input="hello world", encoding_format="base64"),
-            embeddings_with_raw_response(input=["hello", "world"]),
-            embeddings_with_raw_response(input=["hello", "world"], encoding_format="float"),
-            embeddings_with_raw_response(input=["hello", "world"], encoding_format="base64"),
-            embeddings_with_raw_response(input=[15339, 1917]),
-            embeddings_with_raw_response(input=[15339, 1917], encoding_format="float"),
-            embeddings_with_raw_response(input=[15339, 1917], encoding_format="base64"),
-            embeddings_with_raw_response(input=[[15339], [14957]]),
-            embeddings_with_raw_response(input=[[15339], [14957]], encoding_format="float"),
-            embeddings_with_raw_response(input=[[15339], [14957]], encoding_format="base64"),
-        )
-    )
-    spans = in_memory_span_exporter.get_finished_spans()
-    llm_spans = [
-        span
-        for span in spans
-        if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "EMBEDDING"
-    ]
-    actual = len(llm_spans)
-    mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌"
-    print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};")
-    assert _EXPECTED_SPAN_COUNT == actual
diff --git a/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py b/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py
deleted file mode 100644
index 5166a6da3..000000000
--- a/python/instrumentation/openinference-instrumentation-openai/integration_tests/functions_and_tool_calls.py
+++ /dev/null
@@ -1,193 +0,0 @@
-"""
-Phoenix collector should be running in the background.
-
-tools requires openai>=1.1.0
-"""
-import asyncio
-import inspect
-import logging
-from contextlib import suppress
-from importlib.metadata import version
-from itertools import chain
-
-from opentelemetry import trace as trace_api
-from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
-from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
-from opentelemetry.sdk import trace as trace_sdk
-from opentelemetry.sdk.resources import Resource
-from opentelemetry.sdk.trace.export import SimpleSpanProcessor
-from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
-
-
-def default_tracer_provider() -> trace_sdk.TracerProvider:
-    resource = Resource(attributes={})
-    tracer_provider = trace_sdk.TracerProvider(resource=resource)
-    span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces")
-    span_processor = SimpleSpanProcessor(span_exporter=span_exporter)
-    tracer_provider.add_span_processor(span_processor=span_processor)
-    return tracer_provider
-
-
-# Instrument httpx to show that it can show up as a child span.
-# Note that it must be instrumented before it's imported by openai.
-HTTPXClientInstrumentor().instrument()
-
-# To instrument httpx, it must be monkey-patched before it is imported by openai, so we do it
-# like this to prevent the imports from being re-formatted to the top of file.
-if True:
-    import openai
-    from openinference.instrumentation.openai import OpenAIInstrumentor
-    from openinference.semconv.trace import SpanAttributes
-
-CLIENT = openai.AsyncOpenAI()
-
-tracer_provider = default_tracer_provider()
-in_memory_span_exporter = InMemorySpanExporter()
-tracer_provider.add_span_processor(SimpleSpanProcessor(in_memory_span_exporter))
-trace_api.set_tracer_provider(tracer_provider=tracer_provider)
-
-OpenAIInstrumentor().instrument()
-
-_OPENAI_VERSION = tuple(map(int, version("openai").split(".")[:3]))
-
-N = 3  # iteration i = 0 results in intentional BadRequestError
-KWARGS = {
-    "model": "gpt-4",
-    "messages": [
-        {
-            "role": "user",
-            "content": "What's the current time and weather in San Francisco, CA?",
-        }
-    ],
-    "max_tokens": 20,
-    "temperature": 0,
-}
-TOOLS = [
-    {
-        "type": "function",
-        "function": {
-            "name": "get_current_weather",
-            "description": "Get the current weather in a given location",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "The city and state, e.g., San Francisco, CA",
-                    },
-                },
-                "required": ["location"],
-            },
-        },
-    },
-    {
-        "type": "function",
-        "function": {
-            "name": "get_current_time",
-            "description": "Get the current time in a given location",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "The city and state, e.g., San Francisco, CA",
-                    },
-                },
-                "required": ["location"],
-            },
-        },
-    },
-]
-FUNCTIONS = [tool["function"] for tool in TOOLS]
-
-for k, v in logging.root.manager.loggerDict.items():
-    if k.startswith("openinference.instrumentation.openai") and isinstance(v, logging.Logger):
-        v.setLevel(logging.DEBUG)
-        v.handlers.clear()
-        v.addHandler(logging.StreamHandler())
-
-logger = logging.getLogger(__name__)
-
-_EXPECTED_SPAN_COUNT = 0
-
-
-def _print_span_count():
-    spans = in_memory_span_exporter.get_finished_spans()
-    llm_spans = [
-        span
-        for span in spans
-        if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM"
-    ]
-    actual = len(llm_spans)
-    global _EXPECTED_SPAN_COUNT
-    _EXPECTED_SPAN_COUNT += 1
-    mark = "✅" if _EXPECTED_SPAN_COUNT <= actual else "❌"
-    name = inspect.stack()[1][3]
-    print(f"{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual}; {name}")
-
-
-async def chat_completions(**kwargs):
-    try:
-        with suppress(openai.BadRequestError):
-            response = await CLIENT.chat.completions.create(**{**KWARGS, **kwargs})
-            if kwargs.get("stream"):
-                async for _ in response:
-                    await asyncio.sleep(0.005)
-    except Exception:
-        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
-    finally:
-        _print_span_count()
-
-
-async def chat_completions_with_raw_response(**kwargs):
-    try:
-        with suppress(openai.BadRequestError):
-            response = await CLIENT.chat.completions.with_raw_response.create(
-                **{**KWARGS, **kwargs}
-            )
-            if kwargs.get("stream"):
-                async for _ in response.parse():
-                    await asyncio.sleep(0.005)
-    except Exception:
-        logger.exception(f"{inspect.stack()[0][3]}({kwargs})")
-    finally:
-        _print_span_count()
-
-
-def tasks(n, task, **kwargs):
-    return [task(n=i, **kwargs) for i in range(n)]  # i = 0 results in intentional BadRequestError
-
-
-async def main(*tasks):
-    await asyncio.gather(*chain.from_iterable(tasks))
-
-
-if __name__ == "__main__":
-    asyncio.run(
-        main(
-            tasks(N, chat_completions, functions=FUNCTIONS),
-            tasks(N, chat_completions, functions=FUNCTIONS, stream=True),
-            tasks(N, chat_completions_with_raw_response, functions=FUNCTIONS),
-            tasks(N, chat_completions_with_raw_response, functions=FUNCTIONS, stream=True),
-            *(
-                [
-                    tasks(N, chat_completions, tools=TOOLS),
-                    tasks(N, chat_completions, tools=TOOLS, stream=True),
-                    tasks(N, chat_completions_with_raw_response, tools=TOOLS),
-                    tasks(N, chat_completions_with_raw_response, tools=TOOLS, stream=True),
-                ]
-                if _OPENAI_VERSION >= (1, 1, 0)
-                else ()
-            ),
-        )
-    )
-    spans = in_memory_span_exporter.get_finished_spans()
-    llm_spans = [
-        span
-        for span in spans
-        if span.attributes.get(SpanAttributes.OPENINFERENCE_SPAN_KIND) == "LLM"
-    ]
-    actual = len(llm_spans)
-    mark = "✅" if _EXPECTED_SPAN_COUNT == actual else "❌"
-    print(f"\n{mark} expected {_EXPECTED_SPAN_COUNT}; actual {actual};")
-    assert _EXPECTED_SPAN_COUNT == actual

From 50be6c8130a5e06fe27cd1ca338a30260a3aa176 Mon Sep 17 00:00:00 2001
From: Roger Yang <roger.yang@arize.com>
Date: Thu, 11 Jan 2024 08:36:49 -0800
Subject: [PATCH 44/44] wip

---
 .../examples/chat_completions.py              |  25 ++
 ...am.py => chat_completions_async_stream.py} |   0
 .../examples/embeddings.py                    |  24 ++
 .../examples/with_httpx_instrumentor.py       |  29 ++
 .../pyproject.toml                            |   1 +
 .../instrumentation/openai/__init__.py        |   4 +-
 .../instrumentation/openai/_request.py        | 301 +++++++++++-------
 ...st.py => _request_attributes_extractor.py} |  61 ++--
 .../openai/_response_accumulator.py           |  75 +++--
 ...e.py => _response_attributes_extractor.py} |  73 +++--
 .../instrumentation/openai/_stream.py         |   6 +-
 .../instrumentation/openai/_utils.py          |   6 +-
 .../openai/test_instrumentor.py               |  82 +++--
 .../openai/test_response_accumulator.py       | 150 ---------
 14 files changed, 461 insertions(+), 376 deletions(-)
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/examples/chat_completions.py
 rename python/instrumentation/openinference-instrumentation-openai/examples/{chat_completion_async_stream.py => chat_completions_async_stream.py} (100%)
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/examples/embeddings.py
 create mode 100644 python/instrumentation/openinference-instrumentation-openai/examples/with_httpx_instrumentor.py
 rename python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/{_extra_attributes_from_request.py => _request_attributes_extractor.py} (77%)
 rename python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/{_extra_attributes_from_response.py => _response_attributes_extractor.py} (80%)
 delete mode 100644 python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py

diff --git a/python/instrumentation/openinference-instrumentation-openai/examples/chat_completions.py b/python/instrumentation/openinference-instrumentation-openai/examples/chat_completions.py
new file mode 100644
index 000000000..39c845f70
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/examples/chat_completions.py
@@ -0,0 +1,25 @@
+import openai
+from openinference.instrumentation.openai import OpenAIInstrumentor
+from opentelemetry import trace as trace_api
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
+from opentelemetry.sdk import trace as trace_sdk
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+
+resource = Resource(attributes={})
+tracer_provider = trace_sdk.TracerProvider(resource=resource)
+span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces")
+span_processor = SimpleSpanProcessor(span_exporter=span_exporter)
+tracer_provider.add_span_processor(span_processor=span_processor)
+trace_api.set_tracer_provider(tracer_provider=tracer_provider)
+
+OpenAIInstrumentor().instrument()
+
+
+if __name__ == "__main__":
+    response = openai.OpenAI().chat.completions.create(
+        model="gpt-3.5-turbo",
+        messages=[{"role": "user", "content": "Write a haiku."}],
+        max_tokens=20,
+    )
+    print(response.choices[0].message.content)
diff --git a/python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py b/python/instrumentation/openinference-instrumentation-openai/examples/chat_completions_async_stream.py
similarity index 100%
rename from python/instrumentation/openinference-instrumentation-openai/examples/chat_completion_async_stream.py
rename to python/instrumentation/openinference-instrumentation-openai/examples/chat_completions_async_stream.py
diff --git a/python/instrumentation/openinference-instrumentation-openai/examples/embeddings.py b/python/instrumentation/openinference-instrumentation-openai/examples/embeddings.py
new file mode 100644
index 000000000..812d721e0
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/examples/embeddings.py
@@ -0,0 +1,24 @@
+import openai
+from openinference.instrumentation.openai import OpenAIInstrumentor
+from opentelemetry import trace as trace_api
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
+from opentelemetry.sdk import trace as trace_sdk
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+
+resource = Resource(attributes={})
+tracer_provider = trace_sdk.TracerProvider(resource=resource)
+span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces")
+span_processor = SimpleSpanProcessor(span_exporter=span_exporter)
+tracer_provider.add_span_processor(span_processor=span_processor)
+trace_api.set_tracer_provider(tracer_provider=tracer_provider)
+
+OpenAIInstrumentor().instrument()
+
+
+if __name__ == "__main__":
+    response = openai.OpenAI().embeddings.create(
+        model="text-embedding-ada-002",
+        input="hello world",
+    )
+    print(response.data[0].embedding)
diff --git a/python/instrumentation/openinference-instrumentation-openai/examples/with_httpx_instrumentor.py b/python/instrumentation/openinference-instrumentation-openai/examples/with_httpx_instrumentor.py
new file mode 100644
index 000000000..9ba20b9fd
--- /dev/null
+++ b/python/instrumentation/openinference-instrumentation-openai/examples/with_httpx_instrumentor.py
@@ -0,0 +1,29 @@
+from importlib import import_module
+
+from openinference.instrumentation.openai import OpenAIInstrumentor
+from opentelemetry import trace as trace_api
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
+from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
+from opentelemetry.sdk import trace as trace_sdk
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+
+resource = Resource(attributes={})
+tracer_provider = trace_sdk.TracerProvider(resource=resource)
+span_exporter = OTLPSpanExporter(endpoint="http://127.0.0.1:6006/v1/traces")
+span_processor = SimpleSpanProcessor(span_exporter=span_exporter)
+tracer_provider.add_span_processor(span_processor=span_processor)
+trace_api.set_tracer_provider(tracer_provider=tracer_provider)
+
+HTTPXClientInstrumentor().instrument()
+OpenAIInstrumentor().instrument()
+
+
+if __name__ == "__main__":
+    openai = import_module("openai")
+    response = openai.OpenAI().chat.completions.create(
+        model="gpt-3.5-turbo",
+        messages=[{"role": "user", "content": "Write a haiku."}],
+        max_tokens=20,
+    )
+    print(response.choices[0].message.content)
diff --git a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml
index 1fbe6ea67..33add988d 100644
--- a/python/instrumentation/openinference-instrumentation-openai/pyproject.toml
+++ b/python/instrumentation/openinference-instrumentation-openai/pyproject.toml
@@ -38,6 +38,7 @@ instruments = [
 test = [
   "openai == 1.0.0",
   "opentelemetry-sdk",
+  "opentelemetry-instrumentation-httpx",
   "respx",
   "numpy",
 ]
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py
index 1cec04144..e3b4faf8c 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/__init__.py
@@ -41,12 +41,12 @@ def _instrument(self, **kwargs: Any) -> None:
         wrap_function_wrapper(
             module=_MODULE,
             name="OpenAI.request",
-            wrapper=_Request(tracer=tracer),
+            wrapper=_Request(tracer=tracer, openai=openai),
         )
         wrap_function_wrapper(
             module=_MODULE,
             name="AsyncOpenAI.request",
-            wrapper=_AsyncRequest(tracer=tracer),
+            wrapper=_AsyncRequest(tracer=tracer, openai=openai),
         )
 
     def _uninstrument(self, **kwargs: Any) -> None:
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
index 1009e91ed..7e16a6ae1 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request.py
@@ -1,28 +1,29 @@
 import logging
 from abc import ABC
 from contextlib import contextmanager
-from types import MappingProxyType
+from types import ModuleType
 from typing import (
     Any,
     Awaitable,
     Callable,
-    Dict,
+    Iterable,
     Iterator,
     Mapping,
     Tuple,
 )
 
-from openinference.instrumentation.openai._extra_attributes_from_request import (
-    _get_extra_attributes_from_request,
-)
-from openinference.instrumentation.openai._extra_attributes_from_response import (
-    _get_extra_attributes_from_response,
+from openinference.instrumentation.openai._request_attributes_extractor import (
+    _RequestAttributesExtractor,
 )
 from openinference.instrumentation.openai._response_accumulator import (
     _ChatCompletionAccumulator,
     _CompletionAccumulator,
 )
+from openinference.instrumentation.openai._response_attributes_extractor import (
+    _ResponseAttributesExtractor,
+)
 from openinference.instrumentation.openai._stream import (
+    _ResponseAccumulator,
     _Stream,
 )
 from openinference.instrumentation.openai._utils import (
@@ -38,10 +39,7 @@
 from opentelemetry.context import _SUPPRESS_INSTRUMENTATION_KEY
 from opentelemetry.trace import INVALID_SPAN
 from opentelemetry.util.types import AttributeValue
-
-from openai import AsyncStream, Stream
-from openai.types import Completion, CreateEmbeddingResponse
-from openai.types.chat import ChatCompletion
+from typing_extensions import TypeAlias
 
 __all__ = (
     "_Request",
@@ -53,56 +51,180 @@
 
 
 class _WithTracer(ABC):
-    __slots__ = ("_tracer",)
-
-    def __init__(self, tracer: trace_api.Tracer) -> None:
+    def __init__(self, tracer: trace_api.Tracer, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
         self._tracer = tracer
 
     @contextmanager
     def _start_as_current_span(
         self,
         span_name: str,
-        cast_to: type,
-        request_parameters: Mapping[str, Any],
+        attributes: Iterable[Tuple[str, AttributeValue]],
+        extra_attributes: Iterable[Tuple[str, AttributeValue]],
     ) -> Iterator[_WithSpan]:
-        span_kind = (
+        # Because OTEL has a default limit of 128 attributes, we split our attributes into
+        # two tiers, where the addition of "extra_attributes" is deferred until the end
+        # and only after the "attributes" are added.
+        try:
+            span = self._tracer.start_span(name=span_name, attributes=dict(attributes))
+        except Exception:
+            logger.exception("Failed to start span")
+            span = INVALID_SPAN
+        with trace_api.use_span(
+            span,
+            end_on_exit=False,
+            record_exception=False,
+            set_status_on_exception=False,
+        ) as span:
+            yield _WithSpan(span=span, extra_attributes=dict(extra_attributes))
+
+
+_RequestParameters: TypeAlias = Mapping[str, Any]
+
+
+class _WithOpenAI(ABC):
+    __slots__ = (
+        "_openai",
+        "_stream_types",
+        "_request_attributes_extractor",
+        "_response_attributes_extractor",
+        "_response_accumulator_factories",
+    )
+
+    def __init__(self, openai: ModuleType, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self._openai = openai
+        self._stream_types = (openai.Stream, openai.AsyncStream)
+        self._request_attributes_extractor = _RequestAttributesExtractor(openai=openai)
+        self._response_attributes_extractor = _ResponseAttributesExtractor(openai=openai)
+        self._response_accumulator_factories: Mapping[
+            type, Callable[[_RequestParameters], _ResponseAccumulator]
+        ] = {
+            openai.types.Completion: lambda request_parameters: _CompletionAccumulator(
+                request_parameters=request_parameters,
+                completion_type=openai.types.Completion,
+                response_attributes_extractor=self._response_attributes_extractor,
+            ),
+            openai.types.chat.ChatCompletion: lambda request_parameters: _ChatCompletionAccumulator(
+                request_parameters=request_parameters,
+                chat_completion_type=openai.types.chat.ChatCompletion,
+                response_attributes_extractor=self._response_attributes_extractor,
+            ),
+        }
+
+    def _get_span_kind(self, cast_to: type) -> str:
+        return (
             OpenInferenceSpanKindValues.EMBEDDING.value
-            if cast_to is CreateEmbeddingResponse
+            if cast_to is self._openai.types.CreateEmbeddingResponse
             else OpenInferenceSpanKindValues.LLM.value
         )
-        attributes: Dict[str, AttributeValue] = {SpanAttributes.OPENINFERENCE_SPAN_KIND: span_kind}
+
+    def _get_attributes_from_request(
+        self,
+        cast_to: type,
+        request_parameters: Mapping[str, Any],
+    ) -> Iterator[Tuple[str, AttributeValue]]:
+        yield SpanAttributes.OPENINFERENCE_SPAN_KIND, self._get_span_kind(cast_to=cast_to)
         try:
-            attributes.update(_as_input_attributes(_io_value_and_type(request_parameters)))
+            yield from _as_input_attributes(_io_value_and_type(request_parameters))
         except Exception:
             logger.exception(
                 f"Failed to get input attributes from request parameters of "
                 f"type {type(request_parameters)}"
             )
+
+    def _get_extra_attributes_from_request(
+        self,
+        cast_to: type,
+        request_parameters: Mapping[str, Any],
+    ) -> Iterator[Tuple[str, AttributeValue]]:
         # Secondary attributes should be added after input and output to ensure
         # that input and output are not dropped if there are too many attributes.
         try:
-            extra_attributes = dict(_get_extra_attributes_from_request(cast_to, request_parameters))
+            yield from self._request_attributes_extractor.get_attributes_from_request(
+                cast_to=cast_to,
+                request_parameters=request_parameters,
+            )
         except Exception:
             logger.exception(
                 f"Failed to get extra attributes from request options of "
                 f"type {type(request_parameters)}"
             )
-            extra_attributes = {}
-        try:
-            span = self._tracer.start_span(span_name, attributes=attributes)
-        except Exception:
-            logger.exception("Failed to start span")
-            span = INVALID_SPAN
-        with trace_api.use_span(
-            span,
-            end_on_exit=False,
-            record_exception=False,
-            set_status_on_exception=False,
-        ) as span:
-            yield _WithSpan(span, extra_attributes)
 
+    def _is_streaming(self, response: Any) -> bool:
+        return isinstance(response, self._stream_types)
 
-class _Request(_WithTracer):
+    def _finalize_response(
+        self,
+        response: Any,
+        with_span: _WithSpan,
+        cast_to: type,
+        request_parameters: Mapping[str, Any],
+    ) -> Any:
+        """
+        Monkey-patch the response object to trace the stream, or finish tracing if the response is
+        not a stream.
+        """
+
+        if hasattr(response, "parse") and callable(response.parse):
+            # `.request()` may be called under `.with_raw_response` and it's necessary to call
+            # `.parse()` to get back the usual response types.
+            # E.g. see https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_base_client.py#L518  # noqa: E501
+            try:
+                response.parse()
+            except Exception:
+                logger.exception(f"Failed to parse response of type {type(response)}")
+        if (
+            self._is_streaming(response)
+            or hasattr(
+                # FIXME: Ideally we should not rely on a private attribute (but it may be impossible).
+                # The assumption here is that calling `.parse()` stores the stream object in `._parsed`
+                # and calling `.parse()` again will not overwrite the monkey-patched version.
+                # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_response.py#L65  # noqa: E501
+                response,
+                "_parsed",
+            )
+            # Note that we must have called `.parse()` beforehand, otherwise `._parsed` is None.
+            and self._is_streaming(response._parsed)
+        ):
+            # For streaming, we need an (optional) accumulator to process each chunk iteration.
+            try:
+                response_accumulator_factory = self._response_accumulator_factories.get(cast_to)
+                response_accumulator = (
+                    response_accumulator_factory(request_parameters)
+                    if response_accumulator_factory
+                    else None
+                )
+            except Exception:
+                # Note that cast_to may not be hashable.
+                logger.exception(f"Failed to get response accumulator for {cast_to}")
+                response_accumulator = None
+            if hasattr(response, "_parsed") and self._is_streaming(parsed := response._parsed):
+                # Monkey-patch a private attribute assumed to be caching the output of `.parse()`.
+                response._parsed = _Stream(
+                    stream=parsed,
+                    with_span=with_span,
+                    response_accumulator=response_accumulator,
+                )
+                return response
+            return _Stream(
+                stream=response,
+                with_span=with_span,
+                response_accumulator=response_accumulator,
+            )
+        _finish_tracing(
+            status_code=trace_api.StatusCode.OK,
+            with_span=with_span,
+            has_attributes=_ResponseAttributes(
+                request_parameters=request_parameters,
+                response=response,
+                response_attributes_extractor=self._response_attributes_extractor,
+            ),
+        )
+        return response
+
+
+class _Request(_WithTracer, _WithOpenAI):
     def __call__(
         self,
         wrapped: Callable[..., Any],
@@ -121,8 +243,14 @@ def __call__(
             return wrapped(*args, **kwargs)
         with self._start_as_current_span(
             span_name=span_name,
-            cast_to=cast_to,
-            request_parameters=request_parameters,
+            attributes=self._get_attributes_from_request(
+                cast_to=cast_to,
+                request_parameters=request_parameters,
+            ),
+            extra_attributes=self._get_extra_attributes_from_request(
+                cast_to=cast_to,
+                request_parameters=request_parameters,
+            ),
         ) as with_span:
             try:
                 response = wrapped(*args, **kwargs)
@@ -132,7 +260,7 @@ def __call__(
                 with_span.finish_tracing(status_code=status_code)
                 raise
             try:
-                response = _finalize_response(
+                response = self._finalize_response(
                     response=response,
                     with_span=with_span,
                     cast_to=cast_to,
@@ -144,7 +272,7 @@ def __call__(
         return response
 
 
-class _AsyncRequest(_WithTracer):
+class _AsyncRequest(_WithTracer, _WithOpenAI):
     async def __call__(
         self,
         wrapped: Callable[..., Awaitable[Any]],
@@ -163,8 +291,14 @@ async def __call__(
             return await wrapped(*args, **kwargs)
         with self._start_as_current_span(
             span_name=span_name,
-            cast_to=cast_to,
-            request_parameters=request_parameters,
+            attributes=self._get_attributes_from_request(
+                cast_to=cast_to,
+                request_parameters=request_parameters,
+            ),
+            extra_attributes=self._get_extra_attributes_from_request(
+                cast_to=cast_to,
+                request_parameters=request_parameters,
+            ),
         ) as with_span:
             try:
                 response = await wrapped(*args, **kwargs)
@@ -174,7 +308,7 @@ async def __call__(
                 with_span.finish_tracing(status_code=status_code)
                 raise
             try:
-                response = _finalize_response(
+                response = self._finalize_response(
                     response=response,
                     with_span=with_span,
                     cast_to=cast_to,
@@ -212,92 +346,18 @@ def _parse_request_args(args: Tuple[type, Any]) -> Tuple[type, Mapping[str, Any]
     return cast_to, request_parameters
 
 
-_RESPONSE_ACCUMULATOR_FACTORIES: Mapping[type, type] = MappingProxyType(
-    {
-        ChatCompletion: _ChatCompletionAccumulator,
-        Completion: _CompletionAccumulator,
-    }
-)
-
-
-def _finalize_response(
-    response: Any,
-    with_span: _WithSpan,
-    cast_to: type,
-    request_parameters: Mapping[str, Any],
-) -> Any:
-    """Monkey-patch the response object to trace the stream, or finish tracing if the response is
-    not a stream.
-    """
-    if hasattr(response, "parse") and callable(response.parse):
-        # `.request()` may be called under `.with_raw_response` and it's necessary to call
-        # `.parse()` to get back the usual response types.
-        # E.g. see https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_base_client.py#L518  # noqa: E501
-        try:
-            response.parse()
-        except Exception:
-            logger.exception(f"Failed to parse response of type {type(response)}")
-    if (
-        isinstance(response, (Stream, AsyncStream))
-        or hasattr(
-            # FIXME: Ideally we should not rely on a private attribute (but it may be impossible).
-            # The assumption here is that calling `.parse()` stores the stream object in `._parsed`
-            # and calling `.parse()` again will not overwrite the monkey-patched version.
-            # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_response.py#L65  # noqa: E501
-            response,
-            "_parsed",
-        )
-        # Note that we must have called `.parse()` beforehand, otherwise `._parsed` is None.
-        and isinstance(response._parsed, (Stream, AsyncStream))
-    ):
-        # For streaming, we need an (optional) accumulator to process each chunk iteration.
-        try:
-            response_accumulator_factory = _RESPONSE_ACCUMULATOR_FACTORIES.get(cast_to)
-            response_accumulator = (
-                response_accumulator_factory(request_parameters)
-                if response_accumulator_factory
-                else None
-            )
-        except Exception:
-            # E.g. cast_to may not be hashable
-            logger.exception(f"Failed to get response accumulator for {cast_to}")
-            response_accumulator = None
-        if hasattr(response, "_parsed") and isinstance(
-            parsed := response._parsed, (Stream, AsyncStream)
-        ):
-            # Monkey-patch a private attribute assumed to be caching the output of `.parse()`.
-            response._parsed = _Stream(
-                stream=parsed,
-                with_span=with_span,
-                response_accumulator=response_accumulator,
-            )
-            return response
-        return _Stream(
-            stream=response,
-            with_span=with_span,
-            response_accumulator=response_accumulator,
-        )
-    _finish_tracing(
-        status_code=trace_api.StatusCode.OK,
-        with_span=with_span,
-        has_attributes=_ResponseAttributes(
-            request_parameters=request_parameters,
-            response=response,
-        ),
-    )
-    return response
-
-
 class _ResponseAttributes:
     __slots__ = (
         "_response",
         "_request_parameters",
+        "_response_attributes_extractor",
     )
 
     def __init__(
         self,
         response: Any,
         request_parameters: Mapping[str, Any],
+        response_attributes_extractor: _ResponseAttributesExtractor,
     ) -> None:
         if hasattr(response, "parse") and callable(response.parse):
             # E.g. see https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/_base_client.py#L518  # noqa: E501
@@ -307,12 +367,13 @@ def __init__(
                 logger.exception(f"Failed to parse response of type {type(response)}")
         self._request_parameters = request_parameters
         self._response = response
+        self._response_attributes_extractor = response_attributes_extractor
 
     def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
         yield from _as_output_attributes(_io_value_and_type(self._response))
 
     def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
-        yield from _get_extra_attributes_from_response(
-            self._response,
+        yield from self._response_attributes_extractor.get_attributes_from_response(
+            response=self._response,
             request_parameters=self._request_parameters,
         )
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request_attributes_extractor.py
similarity index 77%
rename from python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py
rename to python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request_attributes_extractor.py
index 625e30cc1..324346160 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_request.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_request_attributes_extractor.py
@@ -1,45 +1,66 @@
 import json
 import logging
 from enum import Enum
+from types import ModuleType
 from typing import (
+    TYPE_CHECKING,
     Any,
     Iterable,
     Iterator,
     List,
     Mapping,
     Tuple,
+    Type,
 )
 
 from openinference.instrumentation.openai._utils import _OPENAI_VERSION
 from openinference.semconv.trace import MessageAttributes, SpanAttributes, ToolCallAttributes
 from opentelemetry.util.types import AttributeValue
 
-from openai.types import Completion, CreateEmbeddingResponse
-from openai.types.chat import ChatCompletion
+if TYPE_CHECKING:
+    from openai.types import Completion, CreateEmbeddingResponse
+    from openai.types.chat import ChatCompletion
 
-__all__ = ("_get_extra_attributes_from_request",)
+__all__ = ("_RequestAttributesExtractor",)
 
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
 
 
-def _get_extra_attributes_from_request(
-    cast_to: type,
-    request_parameters: Mapping[str, Any],
-) -> Iterator[Tuple[str, AttributeValue]]:
-    if not isinstance(request_parameters, Mapping):
-        return
-    if cast_to is ChatCompletion:
-        yield from _get_attributes_from_chat_completion_create_param(request_parameters)
-    elif cast_to is CreateEmbeddingResponse:
-        yield from _get_attributes_from_embedding_create_param(request_parameters)
-    elif cast_to is Completion:
-        yield from _get_attributes_from_completion_create_param(request_parameters)
-    else:
-        try:
-            yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(request_parameters)
-        except Exception:
-            logger.exception("Failed to serialize request options")
+class _RequestAttributesExtractor:
+    __slots__ = (
+        "_openai",
+        "_chat_completion_type",
+        "_completion_type",
+        "_create_embedding_response_type",
+    )
+
+    def __init__(self, openai: ModuleType) -> None:
+        self._openai = openai
+        self._chat_completion_type: Type["ChatCompletion"] = openai.types.chat.ChatCompletion
+        self._completion_type: Type["Completion"] = openai.types.Completion
+        self._create_embedding_response_type: Type[
+            "CreateEmbeddingResponse"
+        ] = openai.types.CreateEmbeddingResponse
+
+    def get_attributes_from_request(
+        self,
+        cast_to: type,
+        request_parameters: Mapping[str, Any],
+    ) -> Iterator[Tuple[str, AttributeValue]]:
+        if not isinstance(request_parameters, Mapping):
+            return
+        if cast_to is self._chat_completion_type:
+            yield from _get_attributes_from_chat_completion_create_param(request_parameters)
+        elif cast_to is self._create_embedding_response_type:
+            yield from _get_attributes_from_embedding_create_param(request_parameters)
+        elif cast_to is self._completion_type:
+            yield from _get_attributes_from_completion_create_param(request_parameters)
+        else:
+            try:
+                yield SpanAttributes.LLM_INVOCATION_PARAMETERS, json.dumps(request_parameters)
+            except Exception:
+                logger.exception("Failed to serialize request options")
 
 
 def _get_attributes_from_chat_completion_create_param(
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
index e2b8f6807..04e43f740 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_accumulator.py
@@ -3,6 +3,7 @@
 from collections import defaultdict
 from copy import deepcopy
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     DefaultDict,
@@ -12,12 +13,11 @@
     List,
     Mapping,
     Optional,
+    Protocol,
     Tuple,
+    Type,
 )
 
-from openinference.instrumentation.openai._extra_attributes_from_response import (
-    _get_extra_attributes_from_response,
-)
 from openinference.instrumentation.openai._utils import (
     _as_output_attributes,
     _ValueAndType,
@@ -25,11 +25,9 @@
 from openinference.semconv.trace import OpenInferenceMimeTypeValues
 from opentelemetry.util.types import AttributeValue
 
-from openai.types import Completion
-from openai.types.chat import (
-    ChatCompletion,
-    ChatCompletionChunk,
-)
+if TYPE_CHECKING:
+    from openai.types import Completion
+    from openai.types.chat import ChatCompletion, ChatCompletionChunk
 
 __all__ = (
     "_CompletionAccumulator",
@@ -37,16 +35,34 @@
 )
 
 
+class _CanGetAttributesFromResponse(Protocol):
+    def get_attributes_from_response(
+        self,
+        response: Any,
+        request_parameters: Mapping[str, Any],
+    ) -> Iterator[Tuple[str, AttributeValue]]:
+        ...
+
+
 class _ChatCompletionAccumulator:
     __slots__ = (
         "_is_null",
         "_values",
         "_cached_result",
         "_request_parameters",
+        "_response_attributes_extractor",
+        "_chat_completion_type",
     )
 
-    def __init__(self, request_parameters: Mapping[str, Any]) -> None:
+    def __init__(
+        self,
+        request_parameters: Mapping[str, Any],
+        chat_completion_type: Type["ChatCompletion"],
+        response_attributes_extractor: Optional[_CanGetAttributesFromResponse] = None,
+    ) -> None:
+        self._chat_completion_type = chat_completion_type
         self._request_parameters = request_parameters
+        self._response_attributes_extractor = response_attributes_extractor
         self._is_null = True
         self._cached_result: Optional[Dict[str, Any]] = None
         self._values = _ValuesAccumulator(
@@ -65,9 +81,7 @@ def __init__(self, request_parameters: Mapping[str, Any]) -> None:
             ),
         )
 
-    def process_chunk(self, chunk: ChatCompletionChunk) -> None:
-        if not isinstance(chunk, ChatCompletionChunk):
-            return
+    def process_chunk(self, chunk: "ChatCompletionChunk") -> None:
         self._is_null = False
         self._cached_result = None
         with warnings.catch_warnings():
@@ -94,15 +108,14 @@ def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
             _ValueAndType(json_string, OpenInferenceMimeTypeValues.JSON)
         )
 
-    def get_extra_attributes(
-        self,
-    ) -> Iterator[Tuple[str, AttributeValue]]:
+    def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
         if not (result := self._result()):
             return
-        yield from _get_extra_attributes_from_response(
-            ChatCompletion.construct(**result),
-            self._request_parameters,
-        )
+        if self._response_attributes_extractor:
+            yield from self._response_attributes_extractor.get_attributes_from_response(
+                self._chat_completion_type.construct(**result),
+                self._request_parameters,
+            )
 
 
 class _CompletionAccumulator:
@@ -111,19 +124,26 @@ class _CompletionAccumulator:
         "_values",
         "_cached_result",
         "_request_parameters",
+        "_response_attributes_extractor",
+        "_completion_type",
     )
 
-    def __init__(self, request_parameters: Mapping[str, Any]) -> None:
+    def __init__(
+        self,
+        request_parameters: Mapping[str, Any],
+        completion_type: Type["Completion"],
+        response_attributes_extractor: Optional[_CanGetAttributesFromResponse] = None,
+    ) -> None:
+        self._completion_type = completion_type
         self._request_parameters = request_parameters
+        self._response_attributes_extractor = response_attributes_extractor
         self._is_null = True
         self._cached_result: Optional[Dict[str, Any]] = None
         self._values = _ValuesAccumulator(
             choices=_IndexedAccumulator(lambda: _ValuesAccumulator(text=_StringAccumulator())),
         )
 
-    def process_chunk(self, chunk: Completion) -> None:
-        if not isinstance(chunk, Completion):
-            return
+    def process_chunk(self, chunk: "Completion") -> None:
         self._is_null = False
         self._cached_result = None
         with warnings.catch_warnings():
@@ -150,10 +170,11 @@ def get_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
     def get_extra_attributes(self) -> Iterator[Tuple[str, AttributeValue]]:
         if not (result := self._result()):
             return
-        yield from _get_extra_attributes_from_response(
-            Completion.construct(**result),
-            self._request_parameters,
-        )
+        if self._response_attributes_extractor:
+            yield from self._response_attributes_extractor.get_attributes_from_response(
+                self._completion_type.construct(**result),
+                self._request_parameters,
+            )
 
 
 class _ValuesAccumulator:
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_attributes_extractor.py
similarity index 80%
rename from python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py
rename to python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_attributes_extractor.py
index e7f7ad63d..7d07f558a 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_extra_attributes_from_response.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_response_attributes_extractor.py
@@ -1,9 +1,9 @@
 import base64
 import logging
-from functools import singledispatch
 from importlib import import_module
 from types import ModuleType
 from typing import (
+    TYPE_CHECKING,
     Any,
     Iterable,
     Iterator,
@@ -11,6 +11,7 @@
     Optional,
     Sequence,
     Tuple,
+    Type,
 )
 
 from openinference.instrumentation.openai._utils import (
@@ -25,10 +26,11 @@
 )
 from opentelemetry.util.types import AttributeValue
 
-from openai.types import Completion, CreateEmbeddingResponse
-from openai.types.chat import ChatCompletion
+if TYPE_CHECKING:
+    from openai.types import Completion, CreateEmbeddingResponse
+    from openai.types.chat import ChatCompletion
 
-__all__ = ("_get_extra_attributes_from_response",)
+__all__ = ("_ResponseAttributesExtractor",)
 
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
@@ -40,18 +42,48 @@
     _NUMPY = None
 
 
-@singledispatch
-def _get_extra_attributes_from_response(
-    response: Any,
-    request_parameters: Mapping[str, Any],
-) -> Iterator[Tuple[str, AttributeValue]]:
-    # this is a fallback for @singledispatch
-    yield from ()
+class _ResponseAttributesExtractor:
+    __slots__ = (
+        "_openai",
+        "_chat_completion_type",
+        "_completion_type",
+        "_create_embedding_response_type",
+    )
+
+    def __init__(self, openai: ModuleType) -> None:
+        self._openai = openai
+        self._chat_completion_type: Type["ChatCompletion"] = openai.types.chat.ChatCompletion
+        self._completion_type: Type["Completion"] = openai.types.Completion
+        self._create_embedding_response_type: Type[
+            "CreateEmbeddingResponse"
+        ] = openai.types.CreateEmbeddingResponse
+
+    def get_attributes_from_response(
+        self,
+        response: Any,
+        request_parameters: Mapping[str, Any],
+    ) -> Iterator[Tuple[str, AttributeValue]]:
+        if isinstance(response, self._chat_completion_type):
+            yield from _get_attributes_from_chat_completion(
+                completion=response,
+                request_parameters=request_parameters,
+            )
+        elif isinstance(response, self._create_embedding_response_type):
+            yield from _get_attributes_from_create_embedding_response(
+                response=response,
+                request_parameters=request_parameters,
+            )
+        elif isinstance(response, self._completion_type):
+            yield from _get_attributes_from_completion(
+                completion=response,
+                request_parameters=request_parameters,
+            )
+        else:
+            yield from ()
 
 
-@_get_extra_attributes_from_response.register
-def _(
-    completion: ChatCompletion,
+def _get_attributes_from_chat_completion(
+    completion: "ChatCompletion",
     request_parameters: Mapping[str, Any],
 ) -> Iterator[Tuple[str, AttributeValue]]:
     # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/chat/chat_completion.py#L40  # noqa: E501
@@ -68,9 +100,8 @@ def _(
                     yield f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.{index}.{key}", value
 
 
-@_get_extra_attributes_from_response.register
-def _(
-    completion: Completion,
+def _get_attributes_from_completion(
+    completion: "Completion",
     request_parameters: Mapping[str, Any],
 ) -> Iterator[Tuple[str, AttributeValue]]:
     # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion.py#L13  # noqa: E501
@@ -79,6 +110,7 @@ def _(
     if usage := getattr(completion, "usage", None):
         yield from _get_attributes_from_completion_usage(usage)
     if model_prompt := request_parameters.get("prompt"):
+        # FIXME: this step should move to request attributes extractor if decoding is not necessary.
         # prompt: Required[Union[str, List[str], List[int], List[List[int]], None]]
         # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/completion_create_params.py#L38  # noqa: E501
         # FIXME: tokens (List[int], List[List[int]]) can't be decoded reliably because model
@@ -87,9 +119,8 @@ def _(
             yield SpanAttributes.LLM_PROMPTS, prompts
 
 
-@_get_extra_attributes_from_response.register
-def _(
-    response: CreateEmbeddingResponse,
+def _get_attributes_from_create_embedding_response(
+    response: "CreateEmbeddingResponse",
     request_parameters: Mapping[str, Any],
 ) -> Iterator[Tuple[str, AttributeValue]]:
     # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/create_embedding_response.py#L20  # noqa: E501
@@ -106,6 +137,7 @@ def _(
                 yield f"{SpanAttributes.EMBEDDING_EMBEDDINGS}.{index}.{key}", value
     embedding_input = request_parameters.get("input")
     for index, text in enumerate(_get_texts(embedding_input, model)):
+        # FIXME: this step should move to request attributes extractor if decoding is not necessary.
         # input: Required[Union[str, List[str], List[int], List[List[int]]]]
         # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/types/embedding_create_params.py#L12  # noqa: E501
         # FIXME: tokens (List[int], List[List[int]]) can't be decoded reliably because model
@@ -130,6 +162,7 @@ def _get_attributes_from_embedding(
         vector = list(_vector)
         yield f"{EmbeddingAttributes.EMBEDDING_VECTOR}", vector
     elif isinstance(_vector, str) and _vector and _NUMPY:
+        # FIXME: this step should be removed if decoding is not necessary.
         try:
             # See https://github.com/openai/openai-python/blob/f1c7d714914e3321ca2e72839fe2d132a8646e7f/src/openai/resources/embeddings.py#L100  # noqa: E501
             vector = _NUMPY.frombuffer(base64.b64decode(_vector), dtype="float32").tolist()
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py
index 6c3d36b9e..34576a9c9 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_stream.py
@@ -1,5 +1,6 @@
 import logging
 from typing import (
+    TYPE_CHECKING,
     Any,
     AsyncIterator,
     Iterator,
@@ -15,7 +16,8 @@
 from opentelemetry.util.types import AttributeValue
 from wrapt import ObjectProxy
 
-from openai import AsyncStream, Stream
+if TYPE_CHECKING:
+    from openai import AsyncStream, Stream
 
 __all__ = (
     "_Stream",
@@ -47,7 +49,7 @@ class _Stream(ObjectProxy):  # type: ignore
 
     def __init__(
         self,
-        stream: Union[Stream[Any], AsyncStream[Any]],
+        stream: Union["Stream[Any]", "AsyncStream[Any]"],
         with_span: _WithSpan,
         response_accumulator: Optional[_ResponseAccumulator] = None,
     ) -> None:
diff --git a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py
index a8b3f3ca9..ab8ddb803 100644
--- a/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py
+++ b/python/instrumentation/openinference-instrumentation-openai/src/openinference/instrumentation/openai/_utils.py
@@ -60,7 +60,7 @@ def _as_input_attributes(
     if not value_and_type:
         return
     yield SpanAttributes.INPUT_VALUE, value_and_type.value
-    # it's TEXT by default, so we can skip to save one attribute
+    # It's assumed to be TEXT by default, so we can skip to save one attribute.
     if value_and_type.type is not OpenInferenceMimeTypeValues.TEXT:
         yield SpanAttributes.INPUT_MIME_TYPE, value_and_type.type.value
 
@@ -71,7 +71,7 @@ def _as_output_attributes(
     if not value_and_type:
         return
     yield SpanAttributes.OUTPUT_VALUE, value_and_type.value
-    # it's TEXT by default, so we can skip to save one attribute
+    # It's assumed to be TEXT by default, so we can skip to save one attribute.
     if value_and_type.type is not OpenInferenceMimeTypeValues.TEXT:
         yield SpanAttributes.OUTPUT_MIME_TYPE, value_and_type.type.value
 
@@ -92,7 +92,7 @@ def _finish_tracing(
     try:
         attributes: Attributes = dict(has_attributes.get_attributes())
     except Exception:
-        logger.exception("Failed to get output value")
+        logger.exception("Failed to get attributes")
         attributes = None
     try:
         extra_attributes: Attributes = dict(has_attributes.get_extra_attributes())
diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py
index 6480cbf22..5d55e44fe 100644
--- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py
+++ b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_instrumentor.py
@@ -1,7 +1,9 @@
 import asyncio
 import json
+import logging
 import random
 from contextlib import suppress
+from importlib import import_module
 from importlib.metadata import version
 from itertools import count
 from typing import (
@@ -19,7 +21,6 @@
     cast,
 )
 
-import openai
 import pytest
 from httpx import AsyncByteStream, Response
 from openinference.instrumentation.openai import OpenAIInstrumentor
@@ -31,6 +32,8 @@
     SpanAttributes,
     ToolCallAttributes,
 )
+from opentelemetry import trace as trace_api
+from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
 from opentelemetry.sdk import trace as trace_sdk
 from opentelemetry.sdk.resources import Resource
 from opentelemetry.sdk.trace import ReadableSpan
@@ -39,6 +42,12 @@
 from opentelemetry.util.types import AttributeValue
 from respx import MockRouter
 
+for name, logger in logging.root.manager.loggerDict.items():
+    if name.startswith("openinference.") and isinstance(logger, logging.Logger):
+        logger.setLevel(logging.DEBUG)
+        logger.handlers.clear()
+        logger.addHandler(logging.StreamHandler())
+
 
 @pytest.mark.parametrize("is_async", [False, True])
 @pytest.mark.parametrize("is_raw", [False, True])
@@ -47,17 +56,17 @@
 def test_chat_completions(
     is_async: bool,
     is_raw: bool,
-    is_stream,
+    is_stream: bool,
     status_code: int,
     respx_mock: MockRouter,
     in_memory_span_exporter: InMemorySpanExporter,
     completion_usage: Dict[str, Any],
     model_name: str,
-    tool_calls_mock_stream,
+    chat_completion_mock_stream: Tuple[List[bytes], List[Dict[str, Any]]],
 ) -> None:
     input_messages: List[Dict[str, Any]] = get_messages()
     output_messages: List[Dict[str, Any]] = (
-        tool_calls_mock_stream[1] if is_stream else get_messages()
+        chat_completion_mock_stream[1] if is_stream else get_messages()
     )
     invocation_parameters = {
         "stream": is_stream,
@@ -66,9 +75,9 @@ def test_chat_completions(
         "n": len(output_messages),
     }
     url = "https://api.openai.com/v1/chat/completions"
-    respx_kwargs = {
+    respx_kwargs: Dict[str, Any] = {
         **(
-            {"stream": MockAsyncByteStream(tool_calls_mock_stream[0])}
+            {"stream": MockAsyncByteStream(chat_completion_mock_stream[0])}
             if is_stream
             else {
                 "json": {
@@ -84,6 +93,7 @@ def test_chat_completions(
     }
     respx_mock.post(url).mock(return_value=Response(status_code=status_code, **respx_kwargs))
     create_kwargs = {"messages": input_messages, **invocation_parameters}
+    openai = import_module("openai")
     completions = (
         openai.AsyncOpenAI(api_key="sk-").chat.completions
         if is_async
@@ -108,8 +118,8 @@ async def task() -> None:
                 for _ in response:
                     pass
     spans = in_memory_span_exporter.get_finished_spans()
-    assert len(spans) == 1
-    span: ReadableSpan = spans[0]
+    assert len(spans) == 2  # first span should be from the httpx instrumentor
+    span: ReadableSpan = spans[1]
     if status_code == 200:
         assert span.status.is_ok
     elif status_code == 400:
@@ -167,7 +177,7 @@ async def task() -> None:
             )
             # We left out model_name from our mock stream.
             assert attributes.pop(LLM_MODEL_NAME, None) == model_name
-    assert attributes == {}  # this test should account for all the attributes after popping them
+    assert attributes == {}  # test should account for all span attributes
 
 
 @pytest.mark.parametrize("is_async", [False, True])
@@ -183,10 +193,10 @@ def test_completions(
     in_memory_span_exporter: InMemorySpanExporter,
     completion_usage: Dict[str, Any],
     model_name: str,
-    completions_mock_stream,
+    completion_mock_stream: Tuple[List[bytes], List[str]],
 ) -> None:
     prompt: List[str] = get_texts()
-    output_texts: List[str] = completions_mock_stream[1] if is_stream else get_texts()
+    output_texts: List[str] = completion_mock_stream[1] if is_stream else get_texts()
     invocation_parameters = {
         "stream": is_stream,
         "model": randstr(),
@@ -194,9 +204,9 @@ def test_completions(
         "n": len(output_texts),
     }
     url = "https://api.openai.com/v1/completions"
-    respx_kwargs = {
+    respx_kwargs: Dict[str, Any] = {
         **(
-            {"stream": MockAsyncByteStream(completions_mock_stream[0])}
+            {"stream": MockAsyncByteStream(completion_mock_stream[0])}
             if is_stream
             else {
                 "json": {
@@ -212,6 +222,7 @@ def test_completions(
     }
     respx_mock.post(url).mock(return_value=Response(status_code=status_code, **respx_kwargs))
     create_kwargs = {"prompt": prompt, **invocation_parameters}
+    openai = import_module("openai")
     completions = (
         openai.AsyncOpenAI(api_key="sk-").completions
         if is_async
@@ -236,8 +247,8 @@ async def task() -> None:
                 for _ in response:
                     pass
     spans = in_memory_span_exporter.get_finished_spans()
-    assert len(spans) == 1
-    span: ReadableSpan = spans[0]
+    assert len(spans) == 2  # first span should be from the httpx instrumentor
+    span: ReadableSpan = spans[1]
     if status_code == 200:
         assert span.status.is_ok
     elif status_code == 400:
@@ -267,7 +278,7 @@ async def task() -> None:
             )
             # We left out model_name from our mock stream.
             assert attributes.pop(LLM_MODEL_NAME, None) == model_name
-    assert attributes == {}  # this test should account for all the attributes after popping them
+    assert attributes == {}  # test should account for all span attributes
 
 
 @pytest.mark.parametrize("is_async", [False, True])
@@ -311,6 +322,7 @@ def test_embeddings(
         )
     )
     create_kwargs = {"input": input_text, **invocation_parameters}
+    openai = import_module("openai")
     completions = (
         openai.AsyncOpenAI(api_key="sk-").embeddings
         if is_async
@@ -329,8 +341,8 @@ async def task() -> None:
             response = create(**create_kwargs)
             _ = response.parse() if is_raw else response
     spans = in_memory_span_exporter.get_finished_spans()
-    assert len(spans) == 1
-    span: ReadableSpan = spans[0]
+    assert len(spans) == 2  # first span should be from the httpx instrumentor
+    span: ReadableSpan = spans[1]
     if status_code == 200:
         assert span.status.is_ok
     elif status_code == 400:
@@ -361,40 +373,46 @@ async def task() -> None:
                 attributes.pop(f"{EMBEDDING_EMBEDDINGS}.{i}.{EMBEDDING_VECTOR}", None)
                 == embedding[1]
             )
-    assert attributes == {}  # this test should account for all the attributes after popping them
+    assert attributes == {}  # test should account for all span attributes
 
 
-@pytest.fixture(scope="function")
+@pytest.fixture(scope="module")
 def in_memory_span_exporter() -> InMemorySpanExporter:
     return InMemorySpanExporter()
 
 
-@pytest.fixture(autouse=True)
-def instrument(in_memory_span_exporter: InMemorySpanExporter) -> Generator[None, None, None]:
-    """
-    Instruments OpenAI before each test to ensure that the patch is applied
-    before any tests are run.
-    """
+@pytest.fixture(scope="module")
+def tracer_provider(in_memory_span_exporter: InMemorySpanExporter) -> trace_api.TracerProvider:
     resource = Resource(attributes={})
     tracer_provider = trace_sdk.TracerProvider(resource=resource)
     span_processor = SimpleSpanProcessor(span_exporter=in_memory_span_exporter)
     tracer_provider.add_span_processor(span_processor=span_processor)
+    HTTPXClientInstrumentor().instrument(tracer_provider=tracer_provider)
+    return tracer_provider
+
+
+@pytest.fixture(autouse=True)
+def instrument(
+    tracer_provider: trace_api.TracerProvider,
+    in_memory_span_exporter: InMemorySpanExporter,
+) -> Generator[None, None, None]:
     OpenAIInstrumentor().instrument(tracer_provider=tracer_provider)
     yield
     OpenAIInstrumentor().uninstrument()
+    in_memory_span_exporter.clear()
 
 
 @pytest.fixture(scope="module")
 def seed() -> Iterator[int]:
     """
-    Use rolling seeds to make debugging easier, because the rolling pseudo-random
-    values allow conditional breakpoints to be hit precisely (and repeatably).
+    Use rolling seeds to help debugging, because the rolling pseudo-random values
+    allow conditional breakpoints to be hit precisely (and repeatably).
     """
     return count()
 
 
 @pytest.fixture(autouse=True)
-def set_seed(seed: Iterator[int]) -> None:
+def set_seed(seed: Iterator[int]) -> Iterator[None]:
     random.seed(next(seed))
     yield
 
@@ -421,7 +439,7 @@ def input_messages() -> List[Dict[str, Any]]:
 
 
 @pytest.fixture
-def tool_calls_mock_stream() -> Tuple[List[bytes], List[Dict[str, Any]]]:
+def chat_completion_mock_stream() -> Tuple[List[bytes], List[Dict[str, Any]]]:
     return (
         [
             b'data: {"choices": [{"delta": {"role": "assistant"}, "index": 0}]}\n\n',
@@ -508,7 +526,7 @@ def tool_calls_mock_stream() -> Tuple[List[bytes], List[Dict[str, Any]]]:
 
 
 @pytest.fixture
-def completions_mock_stream() -> Tuple[List[bytes], List[str]]:
+def completion_mock_stream() -> Tuple[List[bytes], List[str]]:
     return (
         [
             b'data: {"choices": [{"text": "", "index": 0}]}\n\n',
@@ -547,7 +565,7 @@ class MockAsyncByteStream(AsyncByteStream):
     def __init__(self, byte_stream: Iterable[bytes]):
         self._byte_stream = byte_stream
 
-    def __iter__(self) -> AsyncIterator[bytes]:
+    def __iter__(self) -> Iterator[bytes]:
         for byte_string in self._byte_stream:
             yield byte_string
 
diff --git a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py b/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
deleted file mode 100644
index c7e5d4ff0..000000000
--- a/python/instrumentation/openinference-instrumentation-openai/tests/openinference/instrumentation/openai/test_response_accumulator.py
+++ /dev/null
@@ -1,150 +0,0 @@
-import pytest
-from openai.types.chat import ChatCompletionChunk
-from openinference.instrumentation.openai._response_accumulator import (
-    _ChatCompletionAccumulator,
-)
-
-
-def test_chat_completion_accumulator(chat_completion_chunks, desired_chat_completion_result):
-    accumulator = _ChatCompletionAccumulator({})
-    for chunk in chat_completion_chunks:
-        accumulator.process_chunk(chunk)
-    assert accumulator._result() == desired_chat_completion_result
-
-
-@pytest.fixture
-def desired_chat_completion_result():
-    return {
-        "id": "xyz",
-        "choices": [
-            {
-                "index": 0,
-                "finish_reason": "length",
-                "message": {"content": "A1", "role": "assistant"},
-            },
-            {
-                "index": 1,
-                "finish_reason": "stop",
-                "message": {"content": "B2", "role": "assistant"},
-            },
-            {
-                "index": 2,
-            },
-            {
-                "index": 3,
-                "message": {
-                    "tool_calls": [
-                        {"index": 0, "function": {"arguments": "C3"}},
-                        {"index": 1, "function": {"arguments": "D4"}},
-                    ]
-                },
-            },
-        ],
-        "created": 123,
-        "model": "ultra-turbo",
-    }
-
-
-@pytest.fixture
-def chat_completion_chunks():
-    chunks = [
-        {
-            "id": "xyz",
-            "choices": [{"index": 0, "delta": {"content": "A"}}],
-            "created": 123,
-            "model": "ultra-turbo",
-        },
-        {
-            "id": "xyz",
-            "choices": [
-                {
-                    "index": 3,
-                    "delta": {"tool_calls": [{"index": 1, "function": {"arguments": "D"}}]},
-                },
-            ],
-            "created": 123,
-            "model": "ultra-turbo",
-        },
-        {
-            "id": "xyz",
-            "choices": [{"index": 1, "delta": {"content": "B"}, "finish_reason": None}],
-            "created": 123,
-            "model": "ultra-turbo",
-        },
-        {
-            "id": "xyz",
-            "choices": [
-                {
-                    "index": 3,
-                    "delta": {"tool_calls": [{"index": 0, "function": {"arguments": "C"}}]},
-                },
-            ],
-            "created": 123,
-            "model": "ultra-turbo",
-        },
-        {
-            "id": "xyz",
-            "choices": [{"index": 0, "delta": {"role": "assistant"}}],
-            "created": 123,
-            "model": "ultra-turbo",
-        },
-        {
-            "id": "xyz",
-            "choices": [{"index": 1, "delta": {"content": "2"}}],
-            "created": 123,
-            "model": "ultra-turbo",
-        },
-        {
-            "id": "xyz",
-            "choices": [{"index": 0, "delta": {"content": "1"}, "finish_reason": None}],
-            "created": 123,
-            "model": "ultra-turbo",
-        },
-        {
-            "id": "xyz",
-            "choices": [{"index": 1, "delta": {"role": "assistant"}}],
-            "created": 123,
-            "model": "ultra-turbo",
-        },
-        {
-            "id": "xyz",
-            "choices": [{"finish_reason": "length", "index": 0}],
-            "created": 123,
-            "model": "ultra-turbo",
-        },
-        {
-            "id": "xyz",
-            "choices": [
-                {
-                    "index": 3,
-                    "delta": {"tool_calls": [{"index": 1, "function": {"arguments": "4"}}]},
-                },
-            ],
-            "created": 123,
-            "model": "ultra-turbo",
-        },
-        {
-            "id": "xyz",
-            "choices": [{"finish_reason": "stop", "index": 1}],
-            "created": 123,
-            "model": "ultra-turbo",
-        },
-        {
-            "id": "xyz",
-            "choices": [
-                {
-                    "index": 3,
-                    "delta": {"tool_calls": [{"index": 0, "function": {"arguments": "3"}}]},
-                },
-            ],
-            "created": 123,
-            "model": "ultra-turbo",
-        },
-        {
-            "id": "xyz",
-            "choices": [{"index": 2, "delta": {"content": ""}}],
-            "created": 123,
-            "model": "ultra-turbo",
-        },
-    ]
-    return [ChatCompletionChunk.construct(**chunk) for chunk in chunks]