diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 7ded2be8..ef29836b 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.7.7"
+  ".": "0.7.8"
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b1709d72..52ff46e1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,38 @@
 # Changelog
 
+## 0.7.8 (2023-12-12)
+
+Full Changelog: [v0.7.7...v0.7.8](https://github.com/anthropics/anthropic-sdk-python/compare/v0.7.7...v0.7.8)
+
+### Bug Fixes
+
+* avoid leaking memory when Client.with_options is used ([#275](https://github.com/anthropics/anthropic-sdk-python/issues/275)) ([5e51ebd](https://github.com/anthropics/anthropic-sdk-python/commit/5e51ebdbc6e5c23c8c237b5e0231ef66f585f964))
+* **client:** correct base_url setter implementation ([#265](https://github.com/anthropics/anthropic-sdk-python/issues/265)) ([29d0c8b](https://github.com/anthropics/anthropic-sdk-python/commit/29d0c8b0eb174b499a904e02cce7fe7a6aaa1a01))
+* **client:** ensure retried requests are closed ([#261](https://github.com/anthropics/anthropic-sdk-python/issues/261)) ([5d9aa75](https://github.com/anthropics/anthropic-sdk-python/commit/5d9aa754ace5d53eb90c1055dd6b1ca8e7deee4f))
+* **errors:** properly assign APIError.body ([#274](https://github.com/anthropics/anthropic-sdk-python/issues/274)) ([342846f](https://github.com/anthropics/anthropic-sdk-python/commit/342846fa4d424a4d18dd2289d2b652bf53c97901))
+
+
+### Chores
+
+* **internal:** enable more lint rules ([#273](https://github.com/anthropics/anthropic-sdk-python/issues/273)) ([0ac62bc](https://github.com/anthropics/anthropic-sdk-python/commit/0ac62bc127ddf0367561427836ff19c1272fb0e1))
+* **internal:** reformat imports ([#270](https://github.com/anthropics/anthropic-sdk-python/issues/270)) ([dc55724](https://github.com/anthropics/anthropic-sdk-python/commit/dc55724673dfa59911a05fe4827b8804beba0b05))
+* **internal:** reformat imports ([#272](https://github.com/anthropics/anthropic-sdk-python/issues/272)) ([0d82ce4](https://github.com/anthropics/anthropic-sdk-python/commit/0d82ce4784c3a6c9599e6c09b8190e97ea028dc3))
+* **internal:** remove unused file ([#264](https://github.com/anthropics/anthropic-sdk-python/issues/264)) ([1bfc69b](https://github.com/anthropics/anthropic-sdk-python/commit/1bfc69b0e2a1eb79598409cbfcba060f699d28a7))
+* **internal:** replace string concatenation with f-strings ([#263](https://github.com/anthropics/anthropic-sdk-python/issues/263)) ([f545c35](https://github.com/anthropics/anthropic-sdk-python/commit/f545c350dd802079d057d34ff29444e32dc7bdcb))
+* **internal:** update formatting ([#271](https://github.com/anthropics/anthropic-sdk-python/issues/271)) ([802ab59](https://github.com/anthropics/anthropic-sdk-python/commit/802ab59401b06986b8023e9ef0d0f9e0d6858b86))
+* **package:** lift anyio v4 restriction ([#266](https://github.com/anthropics/anthropic-sdk-python/issues/266)) ([a217e99](https://github.com/anthropics/anthropic-sdk-python/commit/a217e9955569852d35ab1bc1351dd66ba807fc44))
+
+
+### Documentation
+
+* update examples to show claude-2.1 ([#276](https://github.com/anthropics/anthropic-sdk-python/issues/276)) ([8f562f4](https://github.com/anthropics/anthropic-sdk-python/commit/8f562f47f13ffaaab93f08b9b4c59d06e4a18b6c))
+
+
+### Refactors
+
+* **client:** simplify cleanup ([#278](https://github.com/anthropics/anthropic-sdk-python/issues/278)) ([3611ae2](https://github.com/anthropics/anthropic-sdk-python/commit/3611ae24d93fa33e55f2e9193a3c787bfd041da5))
+* simplify internal error handling ([#279](https://github.com/anthropics/anthropic-sdk-python/issues/279)) ([993b51a](https://github.com/anthropics/anthropic-sdk-python/commit/993b51aa4f41bae3938a12d60919065c4865a734))
+
 ## 0.7.7 (2023-11-29)
 
 Full Changelog: [v0.7.6...v0.7.7](https://github.com/anthropics/anthropic-sdk-python/compare/v0.7.6...v0.7.7)
diff --git a/README.md b/README.md
index d134e1af..749e9e4a 100644
--- a/README.md
+++ b/README.md
@@ -95,7 +95,7 @@ anthropic = Anthropic(
 )
 
 completion = anthropic.completions.create(
-    model="claude-2",
+    model="claude-2.1",
     max_tokens_to_sample=300,
     prompt=f"{HUMAN_PROMPT} how does a court case get to the Supreme Court?{AI_PROMPT}",
 )
@@ -122,7 +122,7 @@ anthropic = AsyncAnthropic(
 
 async def main():
     completion = await anthropic.completions.create(
-        model="claude-2",
+        model="claude-2.1",
         max_tokens_to_sample=300,
         prompt=f"{HUMAN_PROMPT} how does a court case get to the Supreme Court?{AI_PROMPT}",
     )
@@ -146,7 +146,7 @@ anthropic = Anthropic()
 stream = anthropic.completions.create(
     prompt=f"{HUMAN_PROMPT} Your prompt here{AI_PROMPT}",
     max_tokens_to_sample=300,
-    model="claude-2",
+    model="claude-2.1",
     stream=True,
 )
 for completion in stream:
@@ -163,7 +163,7 @@ anthropic = AsyncAnthropic()
 stream = await anthropic.completions.create(
     prompt=f"{HUMAN_PROMPT} Your prompt here{AI_PROMPT}",
     max_tokens_to_sample=300,
-    model="claude-2",
+    model="claude-2.1",
     stream=True,
 )
 async for completion in stream:
@@ -206,7 +206,7 @@ try:
     client.completions.create(
         prompt=f"{anthropic.HUMAN_PROMPT} Your prompt here{anthropic.AI_PROMPT}",
         max_tokens_to_sample=300,
-        model="claude-2",
+        model="claude-2.1",
     )
 except anthropic.APIConnectionError as e:
     print("The server could not be reached")
@@ -253,7 +253,7 @@ anthropic = Anthropic(
 anthropic.with_options(max_retries=5).completions.create(
     prompt=f"{HUMAN_PROMPT} Can you help me effectively ask for a raise at work?{AI_PROMPT}",
     max_tokens_to_sample=300,
-    model="claude-2",
+    model="claude-2.1",
 )
 ```
 
@@ -280,7 +280,7 @@ anthropic = Anthropic(
 anthropic.with_options(timeout=5 * 1000).completions.create(
     prompt=f"{HUMAN_PROMPT} Where can I get a good coffee in my neighbourhood?{AI_PROMPT}",
     max_tokens_to_sample=300,
-    model="claude-2",
+    model="claude-2.1",
 )
 ```
 
@@ -338,7 +338,7 @@ from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
 anthropic = Anthropic()
 
 response = anthropic.completions.with_raw_response.create(
-    model="claude-2",
+    model="claude-2.1",
     max_tokens_to_sample=300,
     prompt=f"{HUMAN_PROMPT} how does a court case get to the Supreme Court?{AI_PROMPT}",
 )
diff --git a/examples/demo_async.py b/examples/demo_async.py
index e4ec78d0..49fd5191 100644
--- a/examples/demo_async.py
+++ b/examples/demo_async.py
@@ -10,7 +10,7 @@ async def main() -> None:
     client = AsyncAnthropic()
 
     res = await client.completions.create(
-        model="claude-2",
+        model="claude-2.1",
         prompt=f"{anthropic.HUMAN_PROMPT} how does a court case get to the Supreme Court? {anthropic.AI_PROMPT}",
         max_tokens_to_sample=1000,
     )
diff --git a/examples/demo_sync.py b/examples/demo_sync.py
index 0c1c0084..e386e2ad 100644
--- a/examples/demo_sync.py
+++ b/examples/demo_sync.py
@@ -8,7 +8,7 @@ def main() -> None:
     client = Anthropic()
 
     res = client.completions.create(
-        model="claude-2",
+        model="claude-2.1",
         prompt=f"{anthropic.HUMAN_PROMPT} how does a court case get to the Supreme Court? {anthropic.AI_PROMPT}",
         max_tokens_to_sample=1000,
     )
diff --git a/examples/streaming.py b/examples/streaming.py
index 2e6d85dd..a738aad9 100644
--- a/examples/streaming.py
+++ b/examples/streaming.py
@@ -15,7 +15,7 @@
 def sync_stream() -> None:
     stream = client.completions.create(
         prompt=f"{HUMAN_PROMPT} {question}{AI_PROMPT}",
-        model="claude-2",
+        model="claude-2.1",
         stream=True,
         max_tokens_to_sample=300,
     )
@@ -29,7 +29,7 @@ def sync_stream() -> None:
 async def async_stream() -> None:
     stream = await async_client.completions.create(
         prompt=f"{HUMAN_PROMPT} {question}{AI_PROMPT}",
-        model="claude-2",
+        model="claude-2.1",
         stream=True,
         max_tokens_to_sample=300,
     )
diff --git a/pyproject.toml b/pyproject.toml
index 678097f4..7539f3be 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "anthropic"
-version = "0.7.7"
+version = "0.7.8"
 description = "The official Python library for the anthropic API"
 readme = "README.md"
 license = "MIT"
@@ -11,7 +11,7 @@ dependencies = [
     "httpx>=0.23.0, <1",
     "pydantic>=1.9.0, <3",
     "typing-extensions>=4.5, <5",
-    "anyio>=3.5.0, <4",
+    "anyio>=3.5.0, <5",
     "distro>=1.7.0, <2",
     "sniffio",
     "tokenizers >= 0.13.0"
@@ -45,17 +45,18 @@ Repository = "https://github.com/anthropics/anthropic-sdk-python"
 
 [tool.rye]
 managed = true
+# version pins are in requirements-dev.lock
 dev-dependencies = [
-    "pyright==1.1.332",
-    "mypy==1.7.1",
-    "black==23.3.0",
-    "respx==0.19.2",
-    "pytest==7.1.1",
-    "pytest-asyncio==0.21.1",
-    "ruff==0.0.282",
-    "isort==5.10.1",
-    "time-machine==2.9.0",
-    "nox==2023.4.22",
+    "pyright",
+    "mypy",
+    "black",
+    "respx",
+    "pytest",
+    "pytest-asyncio",
+    "ruff",
+    "isort",
+    "time-machine",
+    "nox",
     "dirty-equals>=0.6.0",
 
 ]
@@ -80,7 +81,7 @@ typecheck = { chain = [
 ]}
 "typecheck:pyright" = "pyright"
 "typecheck:verify-types" = "pyright --verifytypes anthropic --ignoreexternal"
-"typecheck:mypy" = "mypy --enable-incomplete-feature=Unpack ."
+"typecheck:mypy" = "mypy ."
 
 [build-system]
 requires = ["hatchling"]
@@ -132,9 +133,11 @@ extra_standard_library = ["typing_extensions"]
 
 [tool.ruff]
 line-length = 120
-format = "grouped"
+output-format = "grouped"
 target-version = "py37"
 select = [
+  # bugbear rules
+  "B",
   # remove unused imports
   "F401",
   # bare except statements
@@ -145,6 +148,10 @@ select = [
   "T201",
   "T203",
 ]
+ignore = [
+  # mutable defaults
+  "B006",
+]
 unfixable = [
   # disable auto fix for print statements
   "T201",
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 31342685..f1d546fc 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -8,7 +8,7 @@
 
 -e file:.
 annotated-types==0.6.0
-anyio==3.7.1
+anyio==4.1.0
 argcomplete==3.1.2
 attrs==23.1.0
 black==23.3.0
@@ -21,10 +21,10 @@ distlib==0.3.7
 distro==1.8.0
 exceptiongroup==1.1.3
 filelock==3.12.4
-fsspec==2023.10.0
-h11==0.12.0
-httpcore==0.15.0
-httpx==0.23.0
+fsspec==2023.12.1
+h11==0.14.0
+httpcore==1.0.2
+httpx==0.25.2
 huggingface-hub==0.16.4
 idna==3.4
 iniconfig==2.0.0
@@ -47,9 +47,8 @@ python-dateutil==2.8.2
 pytz==2023.3.post1
 pyyaml==6.0.1
 requests==2.31.0
-respx==0.19.2
-rfc3986==1.5.0
-ruff==0.0.282
+respx==0.20.2
+ruff==0.1.7
 six==1.16.0
 sniffio==1.3.0
 time-machine==2.9.0
diff --git a/requirements.lock b/requirements.lock
index 6386a2f7..54d268da 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -8,16 +8,16 @@
 
 -e file:.
 annotated-types==0.6.0
-anyio==3.7.1
+anyio==4.1.0
 certifi==2023.7.22
-charset-normalizer==3.3.0
+charset-normalizer==3.3.2
 distro==1.8.0
 exceptiongroup==1.1.3
-filelock==3.12.4
-fsspec==2023.9.2
-h11==0.12.0
-httpcore==0.15.0
-httpx==0.23.0
+filelock==3.13.1
+fsspec==2023.12.0
+h11==0.14.0
+httpcore==1.0.2
+httpx==0.25.2
 huggingface-hub==0.16.4
 idna==3.4
 packaging==23.2
@@ -25,9 +25,8 @@ pydantic==2.4.2
 pydantic-core==2.10.1
 pyyaml==6.0.1
 requests==2.31.0
-rfc3986==1.5.0
 sniffio==1.3.0
 tokenizers==0.14.0
 tqdm==4.66.1
 typing-extensions==4.8.0
-urllib3==2.0.7
+urllib3==2.1.0
diff --git a/src/anthropic/__init__.py b/src/anthropic/__init__.py
index 01b0a81c..3bae0eba 100644
--- a/src/anthropic/__init__.py
+++ b/src/anthropic/__init__.py
@@ -79,7 +79,7 @@
 for __name in __all__:
     if not __name.startswith("__"):
         try:
-            setattr(__locals[__name], "__module__", "anthropic")
+            __locals[__name].__module__ = "anthropic"
         except (TypeError, AttributeError):
             # Some of our exported symbols are builtins which we can't set attributes for.
             pass
diff --git a/src/anthropic/_base_client.py b/src/anthropic/_base_client.py
index a168301f..92189617 100644
--- a/src/anthropic/_base_client.py
+++ b/src/anthropic/_base_client.py
@@ -5,6 +5,7 @@
 import time
 import uuid
 import email
+import asyncio
 import inspect
 import logging
 import platform
@@ -72,6 +73,7 @@
     DEFAULT_TIMEOUT,
     DEFAULT_MAX_RETRIES,
     RAW_RESPONSE_HEADER,
+    STREAMED_RAW_RESPONSE_HEADER,
 )
 from ._streaming import Stream, AsyncStream
 from ._exceptions import (
@@ -363,14 +365,21 @@ def _make_status_error_from_response(
         self,
         response: httpx.Response,
     ) -> APIStatusError:
-        err_text = response.text.strip()
-        body = err_text
+        if response.is_closed and not response.is_stream_consumed:
+            # We can't read the response body as it has been closed
+            # before it was read. This can happen if an event hook
+            # raises a status error.
+            body = None
+            err_msg = f"Error code: {response.status_code}"
+        else:
+            err_text = response.text.strip()
+            body = err_text
 
-        try:
-            body = json.loads(err_text)
-            err_msg = f"Error code: {response.status_code} - {body}"
-        except Exception:
-            err_msg = err_text or f"Error code: {response.status_code}"
+            try:
+                body = json.loads(err_text)
+                err_msg = f"Error code: {response.status_code} - {body}"
+            except Exception:
+                err_msg = err_text or f"Error code: {response.status_code}"
 
         return self._make_status_error(err_msg, body=body, response=response)
 
@@ -395,14 +404,12 @@ def _build_headers(self, options: FinalRequestOptions) -> httpx.Headers:
         headers_dict = _merge_mappings(self.default_headers, custom_headers)
         self._validate_headers(headers_dict, custom_headers)
 
+        # headers are case-insensitive while dictionaries are not.
         headers = httpx.Headers(headers_dict)
 
         idempotency_header = self._idempotency_header
         if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
-            if not options.idempotency_key:
-                options.idempotency_key = self._idempotency_key()
-
-            headers[idempotency_header] = options.idempotency_key
+            headers[idempotency_header] = options.idempotency_key or self._idempotency_key()
 
         return headers
 
@@ -534,6 +541,12 @@ def _process_response_data(
         except pydantic.ValidationError as err:
             raise APIResponseValidationError(response=response, body=data) from err
 
+    def _should_stream_response_body(self, *, request: httpx.Request) -> bool:
+        if request.headers.get(STREAMED_RAW_RESPONSE_HEADER) == "true":
+            return True
+
+        return False
+
     @property
     def qs(self) -> Querystring:
         return Querystring()
@@ -578,18 +591,10 @@ def base_url(self) -> URL:
 
     @base_url.setter
     def base_url(self, url: URL | str) -> None:
-        self._client.base_url = url if isinstance(url, URL) else URL(url)
+        self._base_url = self._enforce_trailing_slash(url if isinstance(url, URL) else URL(url))
 
-    @lru_cache(maxsize=None)
     def platform_headers(self) -> Dict[str, str]:
-        return {
-            "X-Stainless-Lang": "python",
-            "X-Stainless-Package-Version": self._version,
-            "X-Stainless-OS": str(get_platform()),
-            "X-Stainless-Arch": str(get_architecture()),
-            "X-Stainless-Runtime": platform.python_implementation(),
-            "X-Stainless-Runtime-Version": platform.python_version(),
-        }
+        return platform_headers(self._version)
 
     def _calculate_retry_timeout(
         self,
@@ -606,7 +611,7 @@ def _calculate_retry_timeout(
             if response_headers is not None:
                 retry_header = response_headers.get("retry-after")
                 try:
-                    retry_after = int(retry_header)
+                    retry_after = float(retry_header)
                 except Exception:
                     retry_date_tuple = email.utils.parsedate_tz(retry_header)
                     if retry_date_tuple is None:
@@ -668,9 +673,16 @@ def _idempotency_key(self) -> str:
         return f"stainless-python-retry-{uuid.uuid4()}"
 
 
+class SyncHttpxClientWrapper(httpx.Client):
+    def __del__(self) -> None:
+        try:
+            self.close()
+        except Exception:
+            pass
+
+
 class SyncAPIClient(BaseClient[httpx.Client, Stream[Any]]):
     _client: httpx.Client
-    _has_custom_http_client: bool
     _default_stream_cls: type[Stream[Any]] | None = None
 
     def __init__(
@@ -743,7 +755,7 @@ def __init__(
             custom_headers=custom_headers,
             _strict_response_validation=_strict_response_validation,
         )
-        self._client = http_client or httpx.Client(
+        self._client = http_client or SyncHttpxClientWrapper(
             base_url=base_url,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
@@ -751,7 +763,6 @@ def __init__(
             transport=transport,
             limits=limits,
         )
-        self._has_custom_http_client = bool(http_client)
 
     def is_closed(self) -> bool:
         return self._client.is_closed
@@ -863,27 +874,24 @@ def _request(
         self._prepare_request(request)
 
         try:
-            response = self._client.send(request, auth=self.custom_auth, stream=stream)
-            log.debug(
-                'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
+            response = self._client.send(
+                request,
+                auth=self.custom_auth,
+                stream=stream or self._should_stream_response_body(request=request),
             )
-            response.raise_for_status()
-        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
-            if retries > 0 and self._should_retry(err.response):
+        except httpx.TimeoutException as err:
+            if retries > 0:
                 return self._retry_request(
                     options,
                     cast_to,
                     retries,
-                    err.response.headers,
                     stream=stream,
                     stream_cls=stream_cls,
+                    response_headers=None,
                 )
 
-            # If the response is streamed then we need to explicitly read the response
-            # to completion before attempting to access the response text.
-            err.response.read()
-            raise self._make_status_error_from_response(err.response) from None
-        except httpx.TimeoutException as err:
+            raise APITimeoutError(request=request) from err
+        except Exception as err:
             if retries > 0:
                 return self._retry_request(
                     options,
@@ -891,18 +899,35 @@ def _request(
                     retries,
                     stream=stream,
                     stream_cls=stream_cls,
+                    response_headers=None,
                 )
-            raise APITimeoutError(request=request) from err
-        except Exception as err:
-            if retries > 0:
+
+            raise APIConnectionError(request=request) from err
+
+        log.debug(
+            'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
+        )
+
+        try:
+            response.raise_for_status()
+        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
+            if retries > 0 and self._should_retry(err.response):
+                err.response.close()
                 return self._retry_request(
                     options,
                     cast_to,
                     retries,
+                    err.response.headers,
                     stream=stream,
                     stream_cls=stream_cls,
                 )
-            raise APIConnectionError(request=request) from err
+
+            # If the response is streamed then we need to explicitly read the response
+            # to completion before attempting to access the response text.
+            if not err.response.is_closed:
+                err.response.read()
+
+            raise self._make_status_error_from_response(err.response) from None
 
         return self._process_response(
             cast_to=cast_to,
@@ -917,7 +942,7 @@ def _retry_request(
         options: FinalRequestOptions,
         cast_to: Type[ResponseT],
         remaining_retries: int,
-        response_headers: Optional[httpx.Headers] = None,
+        response_headers: httpx.Headers | None,
         *,
         stream: bool,
         stream_cls: type[_StreamT] | None,
@@ -1112,9 +1137,17 @@ def get_api_list(
         return self._request_api_list(model, page, opts)
 
 
+class AsyncHttpxClientWrapper(httpx.AsyncClient):
+    def __del__(self) -> None:
+        try:
+            # TODO(someday): support non asyncio runtimes here
+            asyncio.get_running_loop().create_task(self.aclose())
+        except Exception:
+            pass
+
+
 class AsyncAPIClient(BaseClient[httpx.AsyncClient, AsyncStream[Any]]):
     _client: httpx.AsyncClient
-    _has_custom_http_client: bool
     _default_stream_cls: type[AsyncStream[Any]] | None = None
 
     def __init__(
@@ -1187,7 +1220,7 @@ def __init__(
             custom_headers=custom_headers,
             _strict_response_validation=_strict_response_validation,
         )
-        self._client = http_client or httpx.AsyncClient(
+        self._client = http_client or AsyncHttpxClientWrapper(
             base_url=base_url,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
@@ -1195,7 +1228,6 @@ def __init__(
             transport=transport,
             limits=limits,
         )
-        self._has_custom_http_client = bool(http_client)
 
     def is_closed(self) -> bool:
         return self._client.is_closed
@@ -1304,13 +1336,45 @@ async def _request(
         await self._prepare_request(request)
 
         try:
-            response = await self._client.send(request, auth=self.custom_auth, stream=stream)
-            log.debug(
-                'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
+            response = await self._client.send(
+                request,
+                auth=self.custom_auth,
+                stream=stream or self._should_stream_response_body(request=request),
             )
+        except httpx.TimeoutException as err:
+            if retries > 0:
+                return await self._retry_request(
+                    options,
+                    cast_to,
+                    retries,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    response_headers=None,
+                )
+
+            raise APITimeoutError(request=request) from err
+        except Exception as err:
+            if retries > 0:
+                return await self._retry_request(
+                    options,
+                    cast_to,
+                    retries,
+                    stream=stream,
+                    stream_cls=stream_cls,
+                    response_headers=None,
+                )
+
+            raise APIConnectionError(request=request) from err
+
+        log.debug(
+            'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
+        )
+
+        try:
             response.raise_for_status()
         except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
             if retries > 0 and self._should_retry(err.response):
+                await err.response.aclose()
                 return await self._retry_request(
                     options,
                     cast_to,
@@ -1322,20 +1386,10 @@ async def _request(
 
             # If the response is streamed then we need to explicitly read the response
             # to completion before attempting to access the response text.
-            await err.response.aread()
+            if not err.response.is_closed:
+                await err.response.aread()
+
             raise self._make_status_error_from_response(err.response) from None
-        except httpx.ConnectTimeout as err:
-            if retries > 0:
-                return await self._retry_request(options, cast_to, retries, stream=stream, stream_cls=stream_cls)
-            raise APITimeoutError(request=request) from err
-        except httpx.TimeoutException as err:
-            if retries > 0:
-                return await self._retry_request(options, cast_to, retries, stream=stream, stream_cls=stream_cls)
-            raise APITimeoutError(request=request) from err
-        except Exception as err:
-            if retries > 0:
-                return await self._retry_request(options, cast_to, retries, stream=stream, stream_cls=stream_cls)
-            raise APIConnectionError(request=request) from err
 
         return self._process_response(
             cast_to=cast_to,
@@ -1350,7 +1404,7 @@ async def _retry_request(
         options: FinalRequestOptions,
         cast_to: Type[ResponseT],
         remaining_retries: int,
-        response_headers: Optional[httpx.Headers] = None,
+        response_headers: httpx.Headers | None,
         *,
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None,
@@ -1631,6 +1685,18 @@ def get_platform() -> Platform:
     return "Unknown"
 
 
+@lru_cache(maxsize=None)
+def platform_headers(version: str) -> Dict[str, str]:
+    return {
+        "X-Stainless-Lang": "python",
+        "X-Stainless-Package-Version": version,
+        "X-Stainless-OS": str(get_platform()),
+        "X-Stainless-Arch": str(get_architecture()),
+        "X-Stainless-Runtime": platform.python_implementation(),
+        "X-Stainless-Runtime-Version": platform.python_version(),
+    }
+
+
 class OtherArch:
     def __init__(self, name: str) -> None:
         self.name = name
diff --git a/src/anthropic/_client.py b/src/anthropic/_client.py
index 33648605..be2ac5ea 100644
--- a/src/anthropic/_client.py
+++ b/src/anthropic/_client.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import os
-import asyncio
 from typing import Any, Union, Mapping
 from typing_extensions import Self, override
 
@@ -34,6 +33,8 @@
     DEFAULT_MAX_RETRIES,
     SyncAPIClient,
     AsyncAPIClient,
+    SyncHttpxClientWrapper,
+    AsyncHttpxClientWrapper,
 )
 
 __all__ = [
@@ -222,7 +223,7 @@ def copy(
             if http_client is not None:
                 raise ValueError("The 'http_client' argument is mutually exclusive with 'connection_pool_limits'")
 
-            if self._has_custom_http_client:
+            if not isinstance(self._client, SyncHttpxClientWrapper):
                 raise ValueError(
                     "A custom HTTP client has been set and is mutually exclusive with the 'connection_pool_limits' argument"
                 )
@@ -239,7 +240,7 @@ def copy(
         return self.__class__(
             api_key=api_key or self.api_key,
             auth_token=auth_token or self.auth_token,
-            base_url=base_url or str(self.base_url),
+            base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,
             connection_pool_limits=connection_pool_limits,
@@ -253,16 +254,6 @@ def copy(
     # client.with_options(timeout=10).foo.create(...)
     with_options = copy
 
-    def __del__(self) -> None:
-        if not hasattr(self, "_has_custom_http_client") or not hasattr(self, "close"):
-            # this can happen if the '__init__' method raised an error
-            return
-
-        if self._has_custom_http_client:
-            return
-
-        self.close()
-
     def count_tokens(
         self,
         text: str,
@@ -483,7 +474,7 @@ def copy(
             if http_client is not None:
                 raise ValueError("The 'http_client' argument is mutually exclusive with 'connection_pool_limits'")
 
-            if self._has_custom_http_client:
+            if not isinstance(self._client, AsyncHttpxClientWrapper):
                 raise ValueError(
                     "A custom HTTP client has been set and is mutually exclusive with the 'connection_pool_limits' argument"
                 )
@@ -500,7 +491,7 @@ def copy(
         return self.__class__(
             api_key=api_key or self.api_key,
             auth_token=auth_token or self.auth_token,
-            base_url=base_url or str(self.base_url),
+            base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,
             connection_pool_limits=connection_pool_limits,
@@ -514,19 +505,6 @@ def copy(
     # client.with_options(timeout=10).foo.create(...)
     with_options = copy
 
-    def __del__(self) -> None:
-        if not hasattr(self, "_has_custom_http_client") or not hasattr(self, "close"):
-            # this can happen if the '__init__' method raised an error
-            return
-
-        if self._has_custom_http_client:
-            return
-
-        try:
-            asyncio.get_running_loop().create_task(self.close())
-        except Exception:
-            pass
-
     async def count_tokens(
         self,
         text: str,
diff --git a/src/anthropic/_constants.py b/src/anthropic/_constants.py
index 7343a7a4..4e59e854 100644
--- a/src/anthropic/_constants.py
+++ b/src/anthropic/_constants.py
@@ -3,6 +3,7 @@
 import httpx
 
 RAW_RESPONSE_HEADER = "X-Stainless-Raw-Response"
+STREAMED_RAW_RESPONSE_HEADER = "X-Stainless-Streamed-Raw-Response"
 
 # default timeout is 10 minutes
 DEFAULT_TIMEOUT = httpx.Timeout(timeout=600.0, connect=5.0)
diff --git a/src/anthropic/_exceptions.py b/src/anthropic/_exceptions.py
index 0e2d5252..47568a94 100644
--- a/src/anthropic/_exceptions.py
+++ b/src/anthropic/_exceptions.py
@@ -41,6 +41,7 @@ def __init__(self, message: str, request: httpx.Request, *, body: object | None)
         super().__init__(message)
         self.request = request
         self.message = message
+        self.body = body
 
 
 class APIResponseValidationError(APIError):
diff --git a/src/anthropic/_streaming.py b/src/anthropic/_streaming.py
index f2d2246a..5be75e9a 100644
--- a/src/anthropic/_streaming.py
+++ b/src/anthropic/_streaming.py
@@ -70,7 +70,7 @@ def __stream__(self) -> Iterator[ResponseT]:
                 )
 
         # Ensure the entire stream is consumed
-        for sse in iterator:
+        for _sse in iterator:
             ...
 
 
@@ -132,7 +132,7 @@ async def __stream__(self) -> AsyncIterator[ResponseT]:
                 )
 
         # Ensure the entire stream is consumed
-        async for sse in iterator:
+        async for _sse in iterator:
             ...
 
 
diff --git a/src/anthropic/_types.py b/src/anthropic/_types.py
index 7e95adbe..6c001136 100644
--- a/src/anthropic/_types.py
+++ b/src/anthropic/_types.py
@@ -44,6 +44,7 @@
 
 
 class BinaryResponseContent(ABC):
+    @abstractmethod
     def __init__(
         self,
         response: Any,
diff --git a/src/anthropic/_utils/_utils.py b/src/anthropic/_utils/_utils.py
index d2bfc91a..c874d368 100644
--- a/src/anthropic/_utils/_utils.py
+++ b/src/anthropic/_utils/_utils.py
@@ -194,8 +194,8 @@ def extract_type_arg(typ: type, index: int) -> type:
     args = get_args(typ)
     try:
         return cast(type, args[index])
-    except IndexError:
-        raise RuntimeError(f"Expected type {typ} to have a type argument at index {index} but it did not")
+    except IndexError as err:
+        raise RuntimeError(f"Expected type {typ} to have a type argument at index {index} but it did not") from err
 
 
 def deepcopy_minimal(item: _T) -> _T:
@@ -230,7 +230,7 @@ def human_join(seq: Sequence[str], *, delim: str = ", ", final: str = "or") -> s
 
 def quote(string: str) -> str:
     """Add single quotation marks around the given string. Does *not* do any escaping."""
-    return "'" + string + "'"
+    return f"'{string}'"
 
 
 def required_args(*variants: Sequence[str]) -> Callable[[CallableT], CallableT]:
@@ -275,7 +275,9 @@ def wrapper(*args: object, **kwargs: object) -> object:
                 try:
                     given_params.add(positional[i])
                 except IndexError:
-                    raise TypeError(f"{func.__name__}() takes {len(positional)} argument(s) but {len(args)} were given")
+                    raise TypeError(
+                        f"{func.__name__}() takes {len(positional)} argument(s) but {len(args)} were given"
+                    ) from None
 
             for key in kwargs.keys():
                 given_params.add(key)
diff --git a/src/anthropic/_version.py b/src/anthropic/_version.py
index 4a703aca..d324b798 100644
--- a/src/anthropic/_version.py
+++ b/src/anthropic/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless.
 
 __title__ = "anthropic"
-__version__ = "0.7.7"  # x-release-please-version
+__version__ = "0.7.8"  # x-release-please-version
diff --git a/src/anthropic/pagination.py b/src/anthropic/pagination.py
deleted file mode 100644
index 2b7c4e7b..00000000
--- a/src/anthropic/pagination.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# File generated from our OpenAPI spec by Stainless.
-
-from typing import TypeVar
-
-from ._models import BaseModel
-
-_BaseModelT = TypeVar("_BaseModelT", bound=BaseModel)
diff --git a/src/anthropic/resources/completions.py b/src/anthropic/resources/completions.py
index a603634d..15d3d153 100644
--- a/src/anthropic/resources/completions.py
+++ b/src/anthropic/resources/completions.py
@@ -33,7 +33,7 @@ def create(
         self,
         *,
         max_tokens_to_sample: int,
-        model: Union[str, Literal["claude-2", "claude-instant-1"]],
+        model: Union[str, Literal["claude-2.1", "claude-instant-1"]],
         prompt: str,
         metadata: completion_create_params.Metadata | NotGiven = NOT_GIVEN,
         stop_sequences: List[str] | NotGiven = NOT_GIVEN,
@@ -62,7 +62,7 @@ def create(
               As we improve Claude, we develop new versions of it that you can query. This
               parameter controls which version of Claude answers your request. Right now we
               are offering two model families: Claude, and Claude Instant. You can use them by
-              setting `model` to `"claude-2"` or `"claude-instant-1"`, respectively. See
+              setting `model` to `"claude-2.1"` or `"claude-instant-1"`, respectively. See
               [models](https://docs.anthropic.com/claude/reference/selecting-a-model) for
               additional details.
 
@@ -125,7 +125,7 @@ def create(
         self,
         *,
         max_tokens_to_sample: int,
-        model: Union[str, Literal["claude-2", "claude-instant-1"]],
+        model: Union[str, Literal["claude-2.1", "claude-instant-1"]],
         prompt: str,
         stream: Literal[True],
         metadata: completion_create_params.Metadata | NotGiven = NOT_GIVEN,
@@ -154,7 +154,7 @@ def create(
               As we improve Claude, we develop new versions of it that you can query. This
               parameter controls which version of Claude answers your request. Right now we
               are offering two model families: Claude, and Claude Instant. You can use them by
-              setting `model` to `"claude-2"` or `"claude-instant-1"`, respectively. See
+              setting `model` to `"claude-2.1"` or `"claude-instant-1"`, respectively. See
               [models](https://docs.anthropic.com/claude/reference/selecting-a-model) for
               additional details.
 
@@ -217,7 +217,7 @@ def create(
         self,
         *,
         max_tokens_to_sample: int,
-        model: Union[str, Literal["claude-2", "claude-instant-1"]],
+        model: Union[str, Literal["claude-2.1", "claude-instant-1"]],
         prompt: str,
         stream: bool,
         metadata: completion_create_params.Metadata | NotGiven = NOT_GIVEN,
@@ -246,7 +246,7 @@ def create(
               As we improve Claude, we develop new versions of it that you can query. This
               parameter controls which version of Claude answers your request. Right now we
               are offering two model families: Claude, and Claude Instant. You can use them by
-              setting `model` to `"claude-2"` or `"claude-instant-1"`, respectively. See
+              setting `model` to `"claude-2.1"` or `"claude-instant-1"`, respectively. See
               [models](https://docs.anthropic.com/claude/reference/selecting-a-model) for
               additional details.
 
@@ -309,7 +309,7 @@ def create(
         self,
         *,
         max_tokens_to_sample: int,
-        model: Union[str, Literal["claude-2", "claude-instant-1"]],
+        model: Union[str, Literal["claude-2.1", "claude-instant-1"]],
         prompt: str,
         metadata: completion_create_params.Metadata | NotGiven = NOT_GIVEN,
         stop_sequences: List[str] | NotGiven = NOT_GIVEN,
@@ -361,7 +361,7 @@ async def create(
         self,
         *,
         max_tokens_to_sample: int,
-        model: Union[str, Literal["claude-2", "claude-instant-1"]],
+        model: Union[str, Literal["claude-2.1", "claude-instant-1"]],
         prompt: str,
         metadata: completion_create_params.Metadata | NotGiven = NOT_GIVEN,
         stop_sequences: List[str] | NotGiven = NOT_GIVEN,
@@ -390,7 +390,7 @@ async def create(
               As we improve Claude, we develop new versions of it that you can query. This
               parameter controls which version of Claude answers your request. Right now we
               are offering two model families: Claude, and Claude Instant. You can use them by
-              setting `model` to `"claude-2"` or `"claude-instant-1"`, respectively. See
+              setting `model` to `"claude-2.1"` or `"claude-instant-1"`, respectively. See
               [models](https://docs.anthropic.com/claude/reference/selecting-a-model) for
               additional details.
 
@@ -453,7 +453,7 @@ async def create(
         self,
         *,
         max_tokens_to_sample: int,
-        model: Union[str, Literal["claude-2", "claude-instant-1"]],
+        model: Union[str, Literal["claude-2.1", "claude-instant-1"]],
         prompt: str,
         stream: Literal[True],
         metadata: completion_create_params.Metadata | NotGiven = NOT_GIVEN,
@@ -482,7 +482,7 @@ async def create(
               As we improve Claude, we develop new versions of it that you can query. This
               parameter controls which version of Claude answers your request. Right now we
               are offering two model families: Claude, and Claude Instant. You can use them by
-              setting `model` to `"claude-2"` or `"claude-instant-1"`, respectively. See
+              setting `model` to `"claude-2.1"` or `"claude-instant-1"`, respectively. See
               [models](https://docs.anthropic.com/claude/reference/selecting-a-model) for
               additional details.
 
@@ -545,7 +545,7 @@ async def create(
         self,
         *,
         max_tokens_to_sample: int,
-        model: Union[str, Literal["claude-2", "claude-instant-1"]],
+        model: Union[str, Literal["claude-2.1", "claude-instant-1"]],
         prompt: str,
         stream: bool,
         metadata: completion_create_params.Metadata | NotGiven = NOT_GIVEN,
@@ -574,7 +574,7 @@ async def create(
               As we improve Claude, we develop new versions of it that you can query. This
               parameter controls which version of Claude answers your request. Right now we
               are offering two model families: Claude, and Claude Instant. You can use them by
-              setting `model` to `"claude-2"` or `"claude-instant-1"`, respectively. See
+              setting `model` to `"claude-2.1"` or `"claude-instant-1"`, respectively. See
               [models](https://docs.anthropic.com/claude/reference/selecting-a-model) for
               additional details.
 
@@ -637,7 +637,7 @@ async def create(
         self,
         *,
         max_tokens_to_sample: int,
-        model: Union[str, Literal["claude-2", "claude-instant-1"]],
+        model: Union[str, Literal["claude-2.1", "claude-instant-1"]],
         prompt: str,
         metadata: completion_create_params.Metadata | NotGiven = NOT_GIVEN,
         stop_sequences: List[str] | NotGiven = NOT_GIVEN,
diff --git a/src/anthropic/types/completion_create_params.py b/src/anthropic/types/completion_create_params.py
index 3dc40e37..ba1ebbe6 100644
--- a/src/anthropic/types/completion_create_params.py
+++ b/src/anthropic/types/completion_create_params.py
@@ -25,13 +25,13 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     only specifies the absolute maximum number of tokens to generate.
     """
 
-    model: Required[Union[str, Literal["claude-2", "claude-instant-1"]]]
+    model: Required[Union[str, Literal["claude-2.1", "claude-instant-1"]]]
     """The model that will complete your prompt.
 
     As we improve Claude, we develop new versions of it that you can query. This
     parameter controls which version of Claude answers your request. Right now we
     are offering two model families: Claude, and Claude Instant. You can use them by
-    setting `model` to `"claude-2"` or `"claude-instant-1"`, respectively. See
+    setting `model` to `"claude-2.1"` or `"claude-instant-1"`, respectively. See
     [models](https://docs.anthropic.com/claude/reference/selecting-a-model) for
     additional details.
     """
diff --git a/tests/api_resources/test_completions.py b/tests/api_resources/test_completions.py
index bc152842..f96563c9 100644
--- a/tests/api_resources/test_completions.py
+++ b/tests/api_resources/test_completions.py
@@ -24,7 +24,7 @@ class TestCompletions:
     def test_method_create_overload_1(self, client: Anthropic) -> None:
         completion = client.completions.create(
             max_tokens_to_sample=256,
-            model="claude-2",
+            model="claude-2.1",
             prompt="\n\nHuman: Hello, world!\n\nAssistant:",
         )
         assert_matches_type(Completion, completion, path=["response"])
@@ -33,7 +33,7 @@ def test_method_create_overload_1(self, client: Anthropic) -> None:
     def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> None:
         completion = client.completions.create(
             max_tokens_to_sample=256,
-            model="claude-2",
+            model="claude-2.1",
             prompt="\n\nHuman: Hello, world!\n\nAssistant:",
             metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
             stop_sequences=["string", "string", "string"],
@@ -48,7 +48,7 @@ def test_method_create_with_all_params_overload_1(self, client: Anthropic) -> No
     def test_raw_response_create_overload_1(self, client: Anthropic) -> None:
         response = client.completions.with_raw_response.create(
             max_tokens_to_sample=256,
-            model="claude-2",
+            model="claude-2.1",
             prompt="\n\nHuman: Hello, world!\n\nAssistant:",
         )
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -59,7 +59,7 @@ def test_raw_response_create_overload_1(self, client: Anthropic) -> None:
     def test_method_create_overload_2(self, client: Anthropic) -> None:
         client.completions.create(
             max_tokens_to_sample=256,
-            model="claude-2",
+            model="claude-2.1",
             prompt="\n\nHuman: Hello, world!\n\nAssistant:",
             stream=True,
         )
@@ -68,7 +68,7 @@ def test_method_create_overload_2(self, client: Anthropic) -> None:
     def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> None:
         client.completions.create(
             max_tokens_to_sample=256,
-            model="claude-2",
+            model="claude-2.1",
             prompt="\n\nHuman: Hello, world!\n\nAssistant:",
             stream=True,
             metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
@@ -82,7 +82,7 @@ def test_method_create_with_all_params_overload_2(self, client: Anthropic) -> No
     def test_raw_response_create_overload_2(self, client: Anthropic) -> None:
         response = client.completions.with_raw_response.create(
             max_tokens_to_sample=256,
-            model="claude-2",
+            model="claude-2.1",
             prompt="\n\nHuman: Hello, world!\n\nAssistant:",
             stream=True,
         )
@@ -99,7 +99,7 @@ class TestAsyncCompletions:
     async def test_method_create_overload_1(self, client: AsyncAnthropic) -> None:
         completion = await client.completions.create(
             max_tokens_to_sample=256,
-            model="claude-2",
+            model="claude-2.1",
             prompt="\n\nHuman: Hello, world!\n\nAssistant:",
         )
         assert_matches_type(Completion, completion, path=["response"])
@@ -108,7 +108,7 @@ async def test_method_create_overload_1(self, client: AsyncAnthropic) -> None:
     async def test_method_create_with_all_params_overload_1(self, client: AsyncAnthropic) -> None:
         completion = await client.completions.create(
             max_tokens_to_sample=256,
-            model="claude-2",
+            model="claude-2.1",
             prompt="\n\nHuman: Hello, world!\n\nAssistant:",
             metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
             stop_sequences=["string", "string", "string"],
@@ -123,7 +123,7 @@ async def test_method_create_with_all_params_overload_1(self, client: AsyncAnthr
     async def test_raw_response_create_overload_1(self, client: AsyncAnthropic) -> None:
         response = await client.completions.with_raw_response.create(
             max_tokens_to_sample=256,
-            model="claude-2",
+            model="claude-2.1",
             prompt="\n\nHuman: Hello, world!\n\nAssistant:",
         )
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -134,7 +134,7 @@ async def test_raw_response_create_overload_1(self, client: AsyncAnthropic) -> N
     async def test_method_create_overload_2(self, client: AsyncAnthropic) -> None:
         await client.completions.create(
             max_tokens_to_sample=256,
-            model="claude-2",
+            model="claude-2.1",
             prompt="\n\nHuman: Hello, world!\n\nAssistant:",
             stream=True,
         )
@@ -143,7 +143,7 @@ async def test_method_create_overload_2(self, client: AsyncAnthropic) -> None:
     async def test_method_create_with_all_params_overload_2(self, client: AsyncAnthropic) -> None:
         await client.completions.create(
             max_tokens_to_sample=256,
-            model="claude-2",
+            model="claude-2.1",
             prompt="\n\nHuman: Hello, world!\n\nAssistant:",
             stream=True,
             metadata={"user_id": "13803d75-b4b5-4c3e-b2a2-6f21399b021b"},
@@ -157,7 +157,7 @@ async def test_method_create_with_all_params_overload_2(self, client: AsyncAnthr
     async def test_raw_response_create_overload_2(self, client: AsyncAnthropic) -> None:
         response = await client.completions.with_raw_response.create(
             max_tokens_to_sample=256,
-            model="claude-2",
+            model="claude-2.1",
             prompt="\n\nHuman: Hello, world!\n\nAssistant:",
             stream=True,
         )
diff --git a/tests/test_client.py b/tests/test_client.py
index 3beacc06..f474752c 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -2,11 +2,13 @@
 
 from __future__ import annotations
 
+import gc
 import os
 import json
 import asyncio
 import inspect
-from typing import Any, Dict, Union, cast
+import tracemalloc
+from typing import Any, Union, cast
 from unittest import mock
 
 import httpx
@@ -19,7 +21,11 @@
 from anthropic._client import Anthropic, AsyncAnthropic
 from anthropic._models import BaseModel, FinalRequestOptions
 from anthropic._streaming import Stream, AsyncStream
-from anthropic._exceptions import APIResponseValidationError
+from anthropic._exceptions import (
+    APIStatusError,
+    APITimeoutError,
+    APIResponseValidationError,
+)
 from anthropic._base_client import (
     DEFAULT_TIMEOUT,
     HTTPX_DEFAULT_TIMEOUT,
@@ -39,6 +45,18 @@ def _get_params(client: BaseClient[Any, Any]) -> dict[str, str]:
     return dict(url.params)
 
 
+def _low_retry_timeout(*_args: Any, **_kwargs: Any) -> float:
+    return 0.1
+
+
+def _get_open_connections(client: Anthropic | AsyncAnthropic) -> int:
+    transport = client._client._transport
+    assert isinstance(transport, httpx.HTTPTransport) or isinstance(transport, httpx.AsyncHTTPTransport)
+
+    pool = transport._pool
+    return len(pool._requests)
+
+
 class TestAnthropic:
     client = Anthropic(base_url=base_url, api_key=api_key, _strict_response_validation=True)
 
@@ -172,6 +190,67 @@ def test_copy_signature(self) -> None:
             copy_param = copy_signature.parameters.get(name)
             assert copy_param is not None, f"copy() signature is missing the {name} param"
 
+    def test_copy_build_request(self) -> None:
+        options = FinalRequestOptions(method="get", url="/foo")
+
+        def build_request(options: FinalRequestOptions) -> None:
+            client = self.client.copy()
+            client._build_request(options)
+
+        # ensure that the machinery is warmed up before tracing starts.
+        build_request(options)
+        gc.collect()
+
+        tracemalloc.start(1000)
+
+        snapshot_before = tracemalloc.take_snapshot()
+
+        ITERATIONS = 10
+        for _ in range(ITERATIONS):
+            build_request(options)
+            gc.collect()
+
+        snapshot_after = tracemalloc.take_snapshot()
+
+        tracemalloc.stop()
+
+        def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.StatisticDiff) -> None:
+            if diff.count == 0:
+                # Avoid false positives by considering only leaks (i.e. allocations that persist).
+                return
+
+            if diff.count % ITERATIONS != 0:
+                # Avoid false positives by considering only leaks that appear per iteration.
+                return
+
+            for frame in diff.traceback:
+                if any(
+                    frame.filename.endswith(fragment)
+                    for fragment in [
+                        # to_raw_response_wrapper leaks through the @functools.wraps() decorator.
+                        #
+                        # removing the decorator fixes the leak for reasons we don't understand.
+                        "anthropic/_response.py",
+                        # pydantic.BaseModel.model_dump || pydantic.BaseModel.dict leak memory for some reason.
+                        "anthropic/_compat.py",
+                        # Standard library leaks we don't care about.
+                        "/logging/__init__.py",
+                    ]
+                ):
+                    return
+
+            leaks.append(diff)
+
+        leaks: list[tracemalloc.StatisticDiff] = []
+        for diff in snapshot_after.compare_to(snapshot_before, "traceback"):
+            add_leak(leaks, diff)
+        if leaks:
+            for leak in leaks:
+                print("MEMORY LEAK:", leak)
+                for frame in leak.traceback:
+                    print(frame)
+            raise AssertionError()
+
     def test_request_timeout(self) -> None:
         request = self.client._build_request(FinalRequestOptions(method="get", url="/foo"))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -343,7 +422,7 @@ def test_request_extra_query(self) -> None:
                 ),
             ),
         )
-        params = cast(Dict[str, str], dict(request.url.params))
+        params = dict(request.url.params)
         assert params == {"my_query_param": "Foo"}
 
         # if both `query` and `extra_query` are given, they are merged
@@ -357,7 +436,7 @@ def test_request_extra_query(self) -> None:
                 ),
             ),
         )
-        params = cast(Dict[str, str], dict(request.url.params))
+        params = dict(request.url.params)
         assert params == {"bar": "1", "foo": "2"}
 
         # `extra_query` takes priority over `query` when keys clash
@@ -371,7 +450,7 @@ def test_request_extra_query(self) -> None:
                 ),
             ),
         )
-        params = cast(Dict[str, str], dict(request.url.params))
+        params = dict(request.url.params)
         assert params == {"foo": "2"}
 
     @pytest.mark.respx(base_url=base_url)
@@ -431,6 +510,14 @@ class Model(BaseModel):
         assert isinstance(response, Model)
         assert response.foo == 2
 
+    def test_base_url_setter(self) -> None:
+        client = Anthropic(base_url="https://example.com/from_init", api_key=api_key, _strict_response_validation=True)
+        assert client.base_url == "https://example.com/from_init/"
+
+        client.base_url = "https://example.com/from_setter"  # type: ignore[assignment]
+
+        assert client.base_url == "https://example.com/from_setter/"
+
     def test_base_url_env(self) -> None:
         with update_env(ANTHROPIC_BASE_URL="http://localhost:5000/from/env"):
             client = Anthropic(api_key=api_key, _strict_response_validation=True)
@@ -510,7 +597,9 @@ def test_transport_option_is_deprecated(self) -> None:
             DeprecationWarning,
             match="The `transport` argument is deprecated. The `http_client` argument should be passed instead",
         ):
-            transport = httpx.MockTransport(lambda: None)
+            transport = httpx.MockTransport(
+                lambda: None,  # type: ignore
+            )
 
             client = Anthropic(
                 base_url=base_url, api_key=api_key, _strict_response_validation=True, transport=transport
@@ -526,7 +615,9 @@ def test_transport_option_mutually_exclusive_with_http_client(self) -> None:
                         base_url=base_url,
                         api_key=api_key,
                         _strict_response_validation=True,
-                        transport=httpx.MockTransport(lambda: None),
+                        transport=httpx.MockTransport(
+                            lambda: None,  # type: ignore
+                        ),
                         http_client=http_client,
                     )
 
@@ -594,14 +685,6 @@ def test_proxies_option_mutually_exclusive_with_http_client(self) -> None:
                         http_client=http_client,
                     )
 
-    def test_client_del(self) -> None:
-        client = Anthropic(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-        assert not client.is_closed()
-
-        client.__del__()
-
-        assert client.is_closed()
-
     def test_copied_client_does_not_close_http(self) -> None:
         client = Anthropic(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         assert not client.is_closed()
@@ -609,9 +692,8 @@ def test_copied_client_does_not_close_http(self) -> None:
         copied = client.copy()
         assert copied is not client
 
-        copied.__del__()
+        del copied
 
-        assert not copied.is_closed()
         assert not client.is_closed()
 
     def test_client_context_manager(self) -> None:
@@ -690,6 +772,44 @@ def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str
         calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
         assert calculated == pytest.approx(timeout, 0.5 * 0.875)  # pyright: ignore[reportUnknownMemberType]
 
+    @mock.patch("anthropic._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
+        respx_mock.post("/v1/complete").mock(side_effect=httpx.TimeoutException("Test timeout error"))
+
+        with pytest.raises(APITimeoutError):
+            self.client.post(
+                "/v1/complete",
+                body=dict(
+                    max_tokens_to_sample=300,
+                    model="claude-2.1",
+                    prompt="\n\nHuman:Where can I get a good coffee in my neighbourhood?\n\nAssistant:",
+                ),
+                cast_to=httpx.Response,
+                options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
+            )
+
+        assert _get_open_connections(self.client) == 0
+
+    @mock.patch("anthropic._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
+        respx_mock.post("/v1/complete").mock(return_value=httpx.Response(500))
+
+        with pytest.raises(APIStatusError):
+            self.client.post(
+                "/v1/complete",
+                body=dict(
+                    max_tokens_to_sample=300,
+                    model="claude-2.1",
+                    prompt="\n\nHuman:Where can I get a good coffee in my neighbourhood?\n\nAssistant:",
+                ),
+                cast_to=httpx.Response,
+                options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
+            )
+
+        assert _get_open_connections(self.client) == 0
+
 
 class TestAsyncAnthropic:
     client = AsyncAnthropic(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -826,6 +946,67 @@ def test_copy_signature(self) -> None:
             copy_param = copy_signature.parameters.get(name)
             assert copy_param is not None, f"copy() signature is missing the {name} param"
 
+    def test_copy_build_request(self) -> None:
+        options = FinalRequestOptions(method="get", url="/foo")
+
+        def build_request(options: FinalRequestOptions) -> None:
+            client = self.client.copy()
+            client._build_request(options)
+
+        # ensure that the machinery is warmed up before tracing starts.
+        build_request(options)
+        gc.collect()
+
+        tracemalloc.start(1000)
+
+        snapshot_before = tracemalloc.take_snapshot()
+
+        ITERATIONS = 10
+        for _ in range(ITERATIONS):
+            build_request(options)
+            gc.collect()
+
+        snapshot_after = tracemalloc.take_snapshot()
+
+        tracemalloc.stop()
+
+        def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.StatisticDiff) -> None:
+            if diff.count == 0:
+                # Avoid false positives by considering only leaks (i.e. allocations that persist).
+                return
+
+            if diff.count % ITERATIONS != 0:
+                # Avoid false positives by considering only leaks that appear per iteration.
+                return
+
+            for frame in diff.traceback:
+                if any(
+                    frame.filename.endswith(fragment)
+                    for fragment in [
+                        # to_raw_response_wrapper leaks through the @functools.wraps() decorator.
+                        #
+                        # removing the decorator fixes the leak for reasons we don't understand.
+                        "anthropic/_response.py",
+                        # pydantic.BaseModel.model_dump || pydantic.BaseModel.dict leak memory for some reason.
+                        "anthropic/_compat.py",
+                        # Standard library leaks we don't care about.
+                        "/logging/__init__.py",
+                    ]
+                ):
+                    return
+
+            leaks.append(diff)
+
+        leaks: list[tracemalloc.StatisticDiff] = []
+        for diff in snapshot_after.compare_to(snapshot_before, "traceback"):
+            add_leak(leaks, diff)
+        if leaks:
+            for leak in leaks:
+                print("MEMORY LEAK:", leak)
+                for frame in leak.traceback:
+                    print(frame)
+            raise AssertionError()
+
     async def test_request_timeout(self) -> None:
         request = self.client._build_request(FinalRequestOptions(method="get", url="/foo"))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -997,7 +1178,7 @@ def test_request_extra_query(self) -> None:
                 ),
             ),
         )
-        params = cast(Dict[str, str], dict(request.url.params))
+        params = dict(request.url.params)
         assert params == {"my_query_param": "Foo"}
 
         # if both `query` and `extra_query` are given, they are merged
@@ -1011,7 +1192,7 @@ def test_request_extra_query(self) -> None:
                 ),
             ),
         )
-        params = cast(Dict[str, str], dict(request.url.params))
+        params = dict(request.url.params)
         assert params == {"bar": "1", "foo": "2"}
 
         # `extra_query` takes priority over `query` when keys clash
@@ -1025,7 +1206,7 @@ def test_request_extra_query(self) -> None:
                 ),
             ),
         )
-        params = cast(Dict[str, str], dict(request.url.params))
+        params = dict(request.url.params)
         assert params == {"foo": "2"}
 
     @pytest.mark.respx(base_url=base_url)
@@ -1085,6 +1266,16 @@ class Model(BaseModel):
         assert isinstance(response, Model)
         assert response.foo == 2
 
+    def test_base_url_setter(self) -> None:
+        client = AsyncAnthropic(
+            base_url="https://example.com/from_init", api_key=api_key, _strict_response_validation=True
+        )
+        assert client.base_url == "https://example.com/from_init/"
+
+        client.base_url = "https://example.com/from_setter"  # type: ignore[assignment]
+
+        assert client.base_url == "https://example.com/from_setter/"
+
     def test_base_url_env(self) -> None:
         with update_env(ANTHROPIC_BASE_URL="http://localhost:5000/from/env"):
             client = AsyncAnthropic(api_key=api_key, _strict_response_validation=True)
@@ -1170,7 +1361,9 @@ def test_transport_option_is_deprecated(self) -> None:
             DeprecationWarning,
             match="The `transport` argument is deprecated. The `http_client` argument should be passed instead",
         ):
-            transport = httpx.MockTransport(lambda: None)
+            transport = httpx.MockTransport(
+                lambda: None,  # type: ignore
+            )
 
             client = AsyncAnthropic(
                 base_url=base_url, api_key=api_key, _strict_response_validation=True, transport=transport
@@ -1186,7 +1379,9 @@ async def test_transport_option_mutually_exclusive_with_http_client(self) -> Non
                         base_url=base_url,
                         api_key=api_key,
                         _strict_response_validation=True,
-                        transport=httpx.MockTransport(lambda: None),
+                        transport=httpx.MockTransport(
+                            lambda: None,  # type: ignore
+                        ),
                         http_client=http_client,
                     )
 
@@ -1256,15 +1451,6 @@ async def test_proxies_option_mutually_exclusive_with_http_client(self) -> None:
                         http_client=http_client,
                     )
 
-    async def test_client_del(self) -> None:
-        client = AsyncAnthropic(base_url=base_url, api_key=api_key, _strict_response_validation=True)
-        assert not client.is_closed()
-
-        client.__del__()
-
-        await asyncio.sleep(0.2)
-        assert client.is_closed()
-
     async def test_copied_client_does_not_close_http(self) -> None:
         client = AsyncAnthropic(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         assert not client.is_closed()
@@ -1272,10 +1458,9 @@ async def test_copied_client_does_not_close_http(self) -> None:
         copied = client.copy()
         assert copied is not client
 
-        copied.__del__()
+        del copied
 
         await asyncio.sleep(0.2)
-        assert not copied.is_closed()
         assert not client.is_closed()
 
     async def test_client_context_manager(self) -> None:
@@ -1357,3 +1542,41 @@ async def test_parse_retry_after_header(self, remaining_retries: int, retry_afte
         options = FinalRequestOptions(method="get", url="/foo", max_retries=3)
         calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
         assert calculated == pytest.approx(timeout, 0.5 * 0.875)  # pyright: ignore[reportUnknownMemberType]
+
+    @mock.patch("anthropic._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
+        respx_mock.post("/v1/complete").mock(side_effect=httpx.TimeoutException("Test timeout error"))
+
+        with pytest.raises(APITimeoutError):
+            await self.client.post(
+                "/v1/complete",
+                body=dict(
+                    max_tokens_to_sample=300,
+                    model="claude-2.1",
+                    prompt="\n\nHuman:Where can I get a good coffee in my neighbourhood?\n\nAssistant:",
+                ),
+                cast_to=httpx.Response,
+                options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
+            )
+
+        assert _get_open_connections(self.client) == 0
+
+    @mock.patch("anthropic._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> None:
+        respx_mock.post("/v1/complete").mock(return_value=httpx.Response(500))
+
+        with pytest.raises(APIStatusError):
+            await self.client.post(
+                "/v1/complete",
+                body=dict(
+                    max_tokens_to_sample=300,
+                    model="claude-2.1",
+                    prompt="\n\nHuman:Where can I get a good coffee in my neighbourhood?\n\nAssistant:",
+                ),
+                cast_to=httpx.Response,
+                options={"headers": {"X-Stainless-Streamed-Raw-Response": "true"}},
+            )
+
+        assert _get_open_connections(self.client) == 0
diff --git a/tests/test_required_args.py b/tests/test_required_args.py
index c506e4ef..fc774314 100644
--- a/tests/test_required_args.py
+++ b/tests/test_required_args.py
@@ -43,7 +43,7 @@ def foo(*, a: str | None = None) -> str | None:
 def test_multiple_params() -> None:
     @required_args(["a", "b", "c"])
     def foo(a: str = "", *, b: str = "", c: str = "") -> str | None:
-        return a + " " + b + " " + c
+        return f"{a} {b} {c}"
 
     assert foo(a="a", b="b", c="c") == "a b c"
 
diff --git a/tests/test_utils/test_proxy.py b/tests/test_utils/test_proxy.py
index 620bb806..8a2a9cbd 100644
--- a/tests/test_utils/test_proxy.py
+++ b/tests/test_utils/test_proxy.py
@@ -19,5 +19,5 @@ def test_recursive_proxy() -> None:
     assert repr(proxy) == "RecursiveLazyProxy"
     assert str(proxy) == "RecursiveLazyProxy"
     assert dir(proxy) == []
-    assert getattr(type(proxy), "__name__") == "RecursiveLazyProxy"
+    assert type(proxy).__name__ == "RecursiveLazyProxy"
     assert type(operator.attrgetter("name.foo.bar.baz")(proxy)).__name__ == "RecursiveLazyProxy"
diff --git a/tests/utils.py b/tests/utils.py
index 348363a5..3a5830b4 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -91,7 +91,7 @@ def assert_matches_type(
                 traceback.print_exc()
                 continue
 
-        assert False, "Did not match any variants"
+        raise AssertionError("Did not match any variants")
     elif issubclass(origin, BaseModel):
         assert isinstance(value, type_)
         assert assert_matches_model(type_, cast(Any, value), path=path)