Skip to content

Commit

Permalink
feat: enable ModuleType and str for backend in .lazy() method (#…
Browse files Browse the repository at this point in the history
…1914)

feat: enable ModuleType and str for backend in .lazy() method
  • Loading branch information
FBruzzesi authored Feb 2, 2025
1 parent 5943b3a commit adb5a4a
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 11 deletions.
23 changes: 19 additions & 4 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,17 +501,31 @@ def __arrow_c_stream__(self: Self, requested_schema: object | None = None) -> ob
pa_table = self.to_arrow()
return pa_table.__arrow_c_stream__(requested_schema=requested_schema)

def lazy(self: Self, *, backend: Implementation | None = None) -> LazyFrame[Any]:
def lazy(
self: Self,
*,
backend: ModuleType | Implementation | str | None = None,
) -> LazyFrame[Any]:
"""Restrict available API methods to lazy-only ones.
If `backend` is specified, then a conversion between different backends
might be triggered.
If a library does not support lazy execution and `backend` is not specified,
then this is will only restrict the API to lazy-only operations. This is useful
if you want to ensure that you write dataframe-agnostic code which all has
the possibility of running entirely lazily.
Arguments:
backend: specifies which lazy backend collect to. This will be the underlying
backend for the resulting Narwhals LazyFrame.
`backend` can be specified in various ways:
- As `Implementation.<BACKEND>` with `BACKEND` being `DASK`, `DUCKDB`
or `POLARS`.
- As a string: `"dask"`, `"duckdb"` or `"polars"`
- Directly as a module `dask.dataframe`, `duckdb` or `polars`.
backend: The (lazy) implementation to convert to. If not specified, and the
given library does not support lazy execution, then this will restrict
the API to lazy-only operations.
Expand Down Expand Up @@ -552,19 +566,20 @@ def lazy(self: Self, *, backend: Implementation | None = None) -> LazyFrame[Any]
|└───────┴───────┘ |
└──────────────────┘
"""
lazy_backend = None if backend is None else Implementation.from_backend(backend)
supported_lazy_backends = (
Implementation.DASK,
Implementation.DUCKDB,
Implementation.POLARS,
)
if backend is not None and backend not in supported_lazy_backends:
if lazy_backend is not None and lazy_backend not in supported_lazy_backends:
msg = (
"Not-supported backend."
f"\n\nExpected one of {supported_lazy_backends} or `None`, got {backend}"
f"\n\nExpected one of {supported_lazy_backends} or `None`, got {lazy_backend}"
)
raise ValueError(msg)
return self._lazyframe(
self._compliant_frame.lazy(backend=backend),
self._compliant_frame.lazy(backend=lazy_backend),
level="lazy",
)

Expand Down
16 changes: 15 additions & 1 deletion narwhals/stable/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,17 +167,31 @@ def __getitem__(self: Self, item: tuple[slice, slice]) -> Self: ...
def __getitem__(self: Self, item: Any) -> Any:
return super().__getitem__(item)

def lazy(self: Self, *, backend: Implementation | None = None) -> LazyFrame[Any]:
def lazy(
self: Self,
*,
backend: ModuleType | Implementation | str | None = None,
) -> LazyFrame[Any]:
"""Restrict available API methods to lazy-only ones.
If `backend` is specified, then a conversion between different backends
might be triggered.
If a library does not support lazy execution and `backend` is not specified,
then this is will only restrict the API to lazy-only operations. This is useful
if you want to ensure that you write dataframe-agnostic code which all has
the possibility of running entirely lazily.
Arguments:
backend: specifies which lazy backend collect to. This will be the underlying
backend for the resulting Narwhals LazyFrame.
`backend` can be specified in various ways:
- As `Implementation.<BACKEND>` with `BACKEND` being `DASK`, `DUCKDB`
or `POLARS`.
- As a string: `"dask"`, `"duckdb"` or `"polars"`
- Directly as a module `dask.dataframe`, `duckdb` or `polars`.
backend: The (lazy) implementation to convert to. If not specified, and the
given library does not support lazy execution, then this will restrict
the API to lazy-only operations.
Expand Down
44 changes: 38 additions & 6 deletions tests/frame/lazy_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@

from typing import TYPE_CHECKING

import pandas as pd
import polars as pl
import pyarrow as pa
import pytest

import narwhals as nw
import narwhals.stable.v1 as nw_v1
from narwhals.dependencies import get_cudf
from narwhals.dependencies import get_modin
from narwhals.utils import Implementation

if TYPE_CHECKING:
Expand All @@ -15,33 +20,60 @@
data = {"a": [1, 2, 3]}


def test_lazy(constructor_eager: ConstructorEager) -> None:
def test_lazy_to_default(constructor_eager: ConstructorEager) -> None:
df = nw.from_native(constructor_eager(data), eager_only=True)
result = df.lazy()
assert isinstance(result, nw.LazyFrame)
df = nw_v1.from_native(constructor_eager(data), eager_only=True)
result = df.lazy()
assert isinstance(result, nw_v1.LazyFrame)

if "polars" in str(constructor_eager):
expected_cls = pl.LazyFrame
elif "pandas" in str(constructor_eager):
expected_cls = pd.DataFrame
elif "modin" in str(constructor_eager):
mpd = get_modin()
expected_cls = mpd.DataFrame
elif "cudf" in str(constructor_eager):
cudf = get_cudf()
expected_cls = cudf.DataFrame
else: # pyarrow
expected_cls = pa.Table

assert isinstance(result.to_native(), expected_cls)


@pytest.mark.parametrize(
"backend", [Implementation.POLARS, Implementation.DUCKDB, Implementation.DASK]
"backend",
[
Implementation.POLARS,
Implementation.DUCKDB,
Implementation.DASK,
"polars",
"duckdb",
"dask",
],
)
def test_lazy_backend(
request: pytest.FixtureRequest,
constructor_eager: ConstructorEager,
backend: Implementation,
backend: Implementation | str,
) -> None:
if "modin" in str(constructor_eager):
request.applymarker(pytest.mark.xfail)
if backend is Implementation.DASK:
if (backend is Implementation.DASK) or backend == "dask":
pytest.importorskip("dask")
if backend is Implementation.DUCKDB:
if (backend is Implementation.DUCKDB) or backend == "duckdb":
pytest.importorskip("duckdb")
df = nw.from_native(constructor_eager(data), eager_only=True)
result = df.lazy(backend=backend)
assert isinstance(result, nw.LazyFrame)
assert result.implementation == backend

expected = (
Implementation.from_string(backend) if isinstance(backend, str) else backend
)
assert result.implementation == expected


def test_lazy_backend_invalid(constructor_eager: ConstructorEager) -> None:
Expand Down

0 comments on commit adb5a4a

Please sign in to comment.