Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(rust,python): Add config to specify GPU polars as the default engine #20717

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions crates/polars-core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ pub fn verbose() -> bool {
std::env::var("POLARS_VERBOSE").as_deref().unwrap_or("") == "1"
}

pub fn get_default_engine() -> String {
std::env::var("POLARS_DEFAULT_ENGINE").unwrap_or_else(|_| "cpu".to_string())
}

Comment on lines +36 to +39
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a question: why even change polars-core when changing the engine is a Python-specific feature?

pub fn get_file_prefetch_size() -> usize {
std::env::var("POLARS_PREFETCH_SIZE")
.map(|s| s.parse::<usize>().expect("integer"))
Expand Down
6 changes: 6 additions & 0 deletions crates/polars-python/src/functions/utils.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
use polars::prelude::_set_check_length;
use polars_core::config::get_default_engine as _get_default_engine;
use pyo3::prelude::*;

#[pyfunction]
pub fn check_length(check: bool) {
unsafe { _set_check_length(check) }
}

#[pyfunction]
pub fn get_default_engine() -> PyResult<Option<String>> {
Ok(Some(_get_default_engine()))
}
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Config options
Config.set_ascii_tables
Config.set_auto_structify
Config.set_decimal_separator
Config.set_default_engine
Config.set_float_precision
Config.set_fmt_float
Config.set_fmt_str_lengths
Expand Down
38 changes: 38 additions & 0 deletions py-polars/polars/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
"POLARS_TABLE_WIDTH",
"POLARS_VERBOSE",
"POLARS_MAX_EXPR_DEPTH",
"POLARS_DEFAULT_ENGINE",
}

# vars that set the rust env directly should declare themselves here as the Config
Expand Down Expand Up @@ -140,6 +141,7 @@ class ConfigParameters(TypedDict, total=False):
set_trim_decimal_zeros: bool | None
set_verbose: bool | None
set_expr_depth_warning: int
set_default_engine: Literal["cpu", "gpu"] | None


class Config(contextlib.ContextDecorator):
Expand Down Expand Up @@ -1450,3 +1452,39 @@ def set_expr_depth_warning(cls, limit: int) -> type[Config]:

os.environ["POLARS_MAX_EXPR_DEPTH"] = str(limit)
return cls

@classmethod
def set_default_engine(cls, engine: Literal["cpu", "gpu"] | None) -> type[Config]:
"""
Set which engine to use by default.

Parameters
----------
engine : Literal["cpu", "gpu"]
The default engine to use on all .collect() calls

Examples
--------
>>> pl.Config.set_default_engine("gpu") # doctest: +SKIP
>>> pl.Config.set_verbose(True) # doctest: +SKIP
>>> lf = pl.DataFrame({"v": [1, 2, 3], "v2": [4, 5, 6]}) # doctest: +SKIP
>>> lf.max().collect() # doctest: +SKIP
run PythonScanExec
run UdfExec
run StackExec
shape: (3, 2)
┌─────┬─────┐
│ v ┆ v2 │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 4 │
│ 2 ┆ 5 │
│ 3 ┆ 6 │
└─────┴─────┘
"""
if engine is None:
os.environ.pop("POLARS_DEFAULT_ENGINE", None)
else:
os.environ["POLARS_DEFAULT_ENGINE"] = engine
return cls
3 changes: 3 additions & 0 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
from polars.lazyframe.engine_config import GPUEngine
from polars.lazyframe.group_by import LazyGroupBy
from polars.lazyframe.in_process import InProcessQuery
from polars.polars import get_default_engine
from polars.schema import Schema
from polars.selectors import by_dtype, expand_selector

Expand Down Expand Up @@ -2003,6 +2004,8 @@ def collect(
if not (is_config_obj or engine in ("cpu", "gpu")):
msg = f"Invalid engine argument {engine=}"
raise ValueError(msg)
if get_default_engine() == "gpu": # pragma: no cover
is_gpu = True # pragma: no cover
if (streaming or background or new_streaming) and is_gpu:
issue_warning(
"GPU engine does not support streaming or background collection, "
Expand Down
2 changes: 2 additions & 0 deletions py-polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,8 @@ fn polars(py: Python, m: &Bound<PyModule>) -> PyResult<()> {
// Functions: other
m.add_wrapped(wrap_pyfunction!(functions::check_length))
.unwrap();
m.add_wrapped(wrap_pyfunction!(functions::get_default_engine))
.unwrap();

#[cfg(feature = "sql")]
m.add_wrapped(wrap_pyfunction!(functions::sql_expr))
Expand Down
1 change: 1 addition & 0 deletions py-polars/tests/unit/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -965,6 +965,7 @@ def test_warn_unstable(recwarn: pytest.WarningsRecorder) -> None:
@pytest.mark.parametrize(
("environment_variable", "config_setting", "value", "expected"),
[
("POLARS_DEFAULT_ENGINE", "set_default_engine", "gpu", "gpu"),
("POLARS_AUTO_STRUCTIFY", "set_auto_structify", True, "1"),
("POLARS_FMT_MAX_COLS", "set_tbl_cols", 12, "12"),
("POLARS_FMT_MAX_ROWS", "set_tbl_rows", 3, "3"),
Expand Down
Loading