add mime type to attachment info (#1341)

need to land langchain-ai/langchainplus#7415 first
langchain-ai · Jan 10, 2025 · 29a77b0 · 29a77b0
1 parent 2fa13b8
commit 29a77b0
Show file tree

Hide file tree

Showing 4 changed files with 113 additions and 1 deletion.
diff --git a/python/langsmith/client.py b/python/langsmith/client.py
@@ -4315,6 +4315,7 @@ def read_example(
                 attachments[key.removeprefix("attachment.")] = {
                     "presigned_url": value["presigned_url"],
                     "reader": reader,
+                    "mime_type": value.get("mime_type"),
                 }
 
         return ls_schemas.Example(
@@ -4449,6 +4450,7 @@ def list_examples(
                     attachments[key.removeprefix("attachment.")] = {
                         "presigned_url": value["presigned_url"],
                         "reader": reader,
+                        "mime_type": value.get("mime_type"),
                     }
 
             yield ls_schemas.Example(

diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
@@ -2247,6 +2247,7 @@ def _reset_example_attachments(example: schemas.Example) -> schemas.Example:
         new_attachments[key] = {
             "presigned_url": attachment["presigned_url"],
             "reader": reader,
+            "mime_type": attachment.get("mime_type"),
         }
 
     # Create a new Example instance with the updated attachments

diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
@@ -131,7 +131,7 @@ class AttachmentInfo(TypedDict):
 
     presigned_url: str
     reader: BinaryIOLike
-    # TODO: add mime type
+    mime_type: str
 
 
 class Example(ExampleBase):

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
@@ -1286,6 +1286,115 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None:
     langchain_client.delete_dataset(dataset_id=dataset.id)
 
 
+def test_mime_type_is_propogated(langchain_client: Client) -> None:
+    """Test that the mime type is propogated correctly."""
+    dataset_name = "__test_mime_type_is_propogated" + uuid4().hex[:4]
+    dataset = langchain_client.create_dataset(dataset_name=dataset_name)
+
+    langchain_client.upload_examples_multipart(
+        dataset_id=dataset.id,
+        uploads=[
+            ExampleUploadWithAttachments(
+                inputs={"text": "hello world"},
+                outputs={"response": "hi there"},
+                attachments={
+                    "test_file": ("text/plain", b"test content"),
+                },
+            )
+        ],
+    )
+
+    example = next(
+        langchain_client.list_examples(dataset_id=dataset.id, include_attachments=True)
+    )
+    assert example.attachments["test_file"]["mime_type"] == "text/plain"
+
+    example = langchain_client.read_example(example_id=example.id)
+    assert example.attachments["test_file"]["mime_type"] == "text/plain"
+
+    langchain_client.delete_dataset(dataset_id=dataset.id)
+
+
+def test_evaluate_mime_type_is_propogated(langchain_client: Client) -> None:
+    """Test that the mime type is propogated correctly when evaluating."""
+    dataset_name = "__test_evaluate_mime_type_is_propogated" + uuid4().hex[:4]
+    dataset = langchain_client.create_dataset(dataset_name=dataset_name)
+
+    langchain_client.upload_examples_multipart(
+        dataset_id=dataset.id,
+        uploads=[
+            ExampleUploadWithAttachments(
+                inputs={"text": "hello world"},
+                outputs={"response": "hi there"},
+                attachments={
+                    "test_file": ("text/plain", b"test content"),
+                },
+            )
+        ],
+    )
+
+    def target(inputs: Dict[str, Any], attachments: Dict[str, Any]) -> Dict[str, Any]:
+        # Verify we receive the attachment data
+        assert attachments["test_file"]["mime_type"] == "text/plain"
+        return {"answer": "hi there"}
+
+    def evaluator(
+        outputs: dict, reference_outputs: dict, attachments: dict
+    ) -> Dict[str, Any]:
+        # Verify we receive the attachment data
+        assert attachments["test_file"]["mime_type"] == "text/plain"
+        return {
+            "score": float(
+                reference_outputs.get("answer") == outputs.get("answer")  # type: ignore
+            )
+        }
+
+    langchain_client.evaluate(target, data=dataset_name, evaluators=[evaluator])
+
+    langchain_client.delete_dataset(dataset_name=dataset_name)
+
+
+async def test_aevaluate_mime_type_is_propogated(langchain_client: Client) -> None:
+    """Test that the mime type is propogated correctly when evaluating."""
+    dataset_name = "__test_evaluate_mime_type_is_propogated" + uuid4().hex[:4]
+    dataset = langchain_client.create_dataset(dataset_name=dataset_name)
+
+    langchain_client.upload_examples_multipart(
+        dataset_id=dataset.id,
+        uploads=[
+            ExampleUploadWithAttachments(
+                inputs={"text": "hello world"},
+                outputs={"response": "hi there"},
+                attachments={
+                    "test_file": ("text/plain", b"test content"),
+                },
+            )
+        ],
+    )
+
+    async def target(
+        inputs: Dict[str, Any], attachments: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        # Verify we receive the attachment data
+        assert attachments["test_file"]["mime_type"] == "text/plain"
+        return {"answer": "hi there"}
+
+    async def evaluator(
+        outputs: dict, reference_outputs: dict, attachments: dict
+    ) -> Dict[str, Any]:
+        # Verify we receive the attachment data
+        assert attachments["test_file"]["mime_type"] == "text/plain"
+        return {
+            "score": float(
+                reference_outputs.get("answer") == outputs.get("answer")  # type: ignore
+            )
+        }
+
+    await langchain_client.aevaluate(target, data=dataset_name, evaluators=[evaluator])
+
+    langchain_client.delete_dataset(dataset_name=dataset_name)
+
+
 def test_evaluate_with_attachments_multiple_evaluators(
     langchain_client: Client,
 ) -> None: