From 269ce9b2692e3192e324b6cc1e005283e890d5ee Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 14 Jan 2025 20:16:05 +0000 Subject: [PATCH 1/3] [FEA] Add config option to specify GPU polars as the default engine --- crates/polars-core/src/config.rs | 7 ++++ crates/polars-python/src/functions/utils.rs | 6 ++++ py-polars/polars/config.py | 39 +++++++++++++++++++++ py-polars/polars/lazyframe/frame.py | 3 ++ py-polars/src/lib.rs | 2 ++ py-polars/tests/unit/test_config.py | 1 + 6 files changed, 58 insertions(+) diff --git a/crates/polars-core/src/config.rs b/crates/polars-core/src/config.rs index 919810811188..6ee193263857 100644 --- a/crates/polars-core/src/config.rs +++ b/crates/polars-core/src/config.rs @@ -33,6 +33,13 @@ pub fn verbose() -> bool { std::env::var("POLARS_VERBOSE").as_deref().unwrap_or("") == "1" } +pub fn use_gpu_engine() -> bool { + std::env::var("POLARS_DEFAULT_ENGINE") + .as_deref() + .unwrap_or("") + == "gpu" +} + pub fn get_file_prefetch_size() -> usize { std::env::var("POLARS_PREFETCH_SIZE") .map(|s| s.parse::().expect("integer")) diff --git a/crates/polars-python/src/functions/utils.rs b/crates/polars-python/src/functions/utils.rs index fd57272de44a..81b1b89d5fbb 100644 --- a/crates/polars-python/src/functions/utils.rs +++ b/crates/polars-python/src/functions/utils.rs @@ -1,7 +1,13 @@ use polars::prelude::_set_check_length; +use polars_core::config::use_gpu_engine as _use_gpu_engine; use pyo3::prelude::*; #[pyfunction] pub fn check_length(check: bool) { unsafe { _set_check_length(check) } } + +#[pyfunction] +pub fn use_gpu_engine() -> PyResult> { + Ok(Some(_use_gpu_engine())) +} diff --git a/py-polars/polars/config.py b/py-polars/polars/config.py index fd31a1e425d6..bd4a3ef099cd 100644 --- a/py-polars/polars/config.py +++ b/py-polars/polars/config.py @@ -71,6 +71,7 @@ "POLARS_TABLE_WIDTH", "POLARS_VERBOSE", "POLARS_MAX_EXPR_DEPTH", + "POLARS_DEFAULT_ENGINE", } # vars that set the rust env directly should declare themselves here as the Config @@ -140,6 +141,7 @@ class ConfigParameters(TypedDict, total=False): set_trim_decimal_zeros: bool | None set_verbose: bool | None set_expr_depth_warning: int + set_gpu_engine: bool | None class Config(contextlib.ContextDecorator): @@ -1450,3 +1452,40 @@ def set_expr_depth_warning(cls, limit: int) -> type[Config]: os.environ["POLARS_MAX_EXPR_DEPTH"] = str(limit) return cls + + @classmethod + def set_gpu_engine(cls, active: bool | None = None) -> type[Config]: + """ + Set the default engine to use the GPU. + + Parameters + ---------- + engine : bool + Whether or not to default to GPU polars on .collect() calls + + Examples + -------- + >>> pl.Config.set_gpu_engine(True) # doctest: +SKIP + >>> pl.Config.set_verbose(True) # doctest: +SKIP + >>> lf = pl.DataFrame({"v": [1, 2, 3], "v2": [4, 5, 6]}) # doctest: +SKIP + >>> lf.max().collect() # doctest: +SKIP + run PythonScanExec + run UdfExec + run StackExec + shape: (3, 2) + ┌─────┬─────┐ + │ v ┆ v2 │ + │ --- ┆ --- │ + │ i64 ┆ i64 │ + ╞═════╪═════╡ + │ 1 ┆ 4 │ + │ 2 ┆ 5 │ + │ 3 ┆ 6 │ + └─────┴─────┘ + """ + if active is None: + os.environ.pop("POLARS_DEFAULT_ENGINE", None) + else: + engine = "gpu" if active else "cpu" + os.environ["POLARS_DEFAULT_ENGINE"] = engine + return cls diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 5453daa3995c..834baef6833e 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -81,6 +81,7 @@ from polars.lazyframe.engine_config import GPUEngine from polars.lazyframe.group_by import LazyGroupBy from polars.lazyframe.in_process import InProcessQuery +from polars.polars import use_gpu_engine from polars.schema import Schema from polars.selectors import by_dtype, expand_selector @@ -2003,6 +2004,8 @@ def collect( if not (is_config_obj or engine in ("cpu", "gpu")): msg = f"Invalid engine argument {engine=}" raise ValueError(msg) + if use_gpu_engine(): + is_gpu = True if (streaming or background or new_streaming) and is_gpu: issue_warning( "GPU engine does not support streaming or background collection, " diff --git a/py-polars/src/lib.rs b/py-polars/src/lib.rs index 381a56dd7153..30ddeae06476 100644 --- a/py-polars/src/lib.rs +++ b/py-polars/src/lib.rs @@ -220,6 +220,8 @@ fn polars(py: Python, m: &Bound) -> PyResult<()> { // Functions: other m.add_wrapped(wrap_pyfunction!(functions::check_length)) .unwrap(); + m.add_wrapped(wrap_pyfunction!(functions::use_gpu_engine)) + .unwrap(); #[cfg(feature = "sql")] m.add_wrapped(wrap_pyfunction!(functions::sql_expr)) diff --git a/py-polars/tests/unit/test_config.py b/py-polars/tests/unit/test_config.py index 9535f11b3825..5a1492f9942f 100644 --- a/py-polars/tests/unit/test_config.py +++ b/py-polars/tests/unit/test_config.py @@ -965,6 +965,7 @@ def test_warn_unstable(recwarn: pytest.WarningsRecorder) -> None: @pytest.mark.parametrize( ("environment_variable", "config_setting", "value", "expected"), [ + ("POLARS_DEFAULT_ENGINE", "set_gpu_engine", True, "gpu"), ("POLARS_AUTO_STRUCTIFY", "set_auto_structify", True, "1"), ("POLARS_FMT_MAX_COLS", "set_tbl_cols", 12, "12"), ("POLARS_FMT_MAX_ROWS", "set_tbl_rows", 3, "3"), From 93dba8685d28abb2699cbc236c07c779d82afe1b Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 15 Jan 2025 19:43:41 +0000 Subject: [PATCH 2/3] address review --- crates/polars-core/src/config.rs | 7 ++----- crates/polars-python/src/functions/utils.rs | 6 +++--- py-polars/polars/config.py | 13 ++++++------- py-polars/polars/lazyframe/frame.py | 4 ++-- py-polars/src/lib.rs | 2 +- py-polars/tests/unit/test_config.py | 2 +- 6 files changed, 15 insertions(+), 19 deletions(-) diff --git a/crates/polars-core/src/config.rs b/crates/polars-core/src/config.rs index 6ee193263857..15672b69ace9 100644 --- a/crates/polars-core/src/config.rs +++ b/crates/polars-core/src/config.rs @@ -33,11 +33,8 @@ pub fn verbose() -> bool { std::env::var("POLARS_VERBOSE").as_deref().unwrap_or("") == "1" } -pub fn use_gpu_engine() -> bool { - std::env::var("POLARS_DEFAULT_ENGINE") - .as_deref() - .unwrap_or("") - == "gpu" +pub fn get_default_engine() -> String { + std::env::var("POLARS_DEFAULT_ENGINE").unwrap_or_else(|_| "cpu".to_string()) } pub fn get_file_prefetch_size() -> usize { diff --git a/crates/polars-python/src/functions/utils.rs b/crates/polars-python/src/functions/utils.rs index 81b1b89d5fbb..9f7828d648b7 100644 --- a/crates/polars-python/src/functions/utils.rs +++ b/crates/polars-python/src/functions/utils.rs @@ -1,5 +1,5 @@ use polars::prelude::_set_check_length; -use polars_core::config::use_gpu_engine as _use_gpu_engine; +use polars_core::config::get_default_engine as _get_default_engine; use pyo3::prelude::*; #[pyfunction] @@ -8,6 +8,6 @@ pub fn check_length(check: bool) { } #[pyfunction] -pub fn use_gpu_engine() -> PyResult> { - Ok(Some(_use_gpu_engine())) +pub fn get_default_engine() -> PyResult> { + Ok(Some(_get_default_engine())) } diff --git a/py-polars/polars/config.py b/py-polars/polars/config.py index bd4a3ef099cd..ba5d295aa27d 100644 --- a/py-polars/polars/config.py +++ b/py-polars/polars/config.py @@ -141,7 +141,7 @@ class ConfigParameters(TypedDict, total=False): set_trim_decimal_zeros: bool | None set_verbose: bool | None set_expr_depth_warning: int - set_gpu_engine: bool | None + set_default_engine: Literal["cpu", "gpu"] | None class Config(contextlib.ContextDecorator): @@ -1454,18 +1454,18 @@ def set_expr_depth_warning(cls, limit: int) -> type[Config]: return cls @classmethod - def set_gpu_engine(cls, active: bool | None = None) -> type[Config]: + def set_default_engine(cls, engine: Literal["cpu", "gpu"] | None) -> type[Config]: """ Set the default engine to use the GPU. Parameters ---------- - engine : bool - Whether or not to default to GPU polars on .collect() calls + engine : Literal["cpu", "gpu"] + The default engine to use on all .collect() calls Examples -------- - >>> pl.Config.set_gpu_engine(True) # doctest: +SKIP + >>> pl.Config.set_default_engine("gpu") # doctest: +SKIP >>> pl.Config.set_verbose(True) # doctest: +SKIP >>> lf = pl.DataFrame({"v": [1, 2, 3], "v2": [4, 5, 6]}) # doctest: +SKIP >>> lf.max().collect() # doctest: +SKIP @@ -1483,9 +1483,8 @@ def set_gpu_engine(cls, active: bool | None = None) -> type[Config]: │ 3 ┆ 6 │ └─────┴─────┘ """ - if active is None: + if engine is None: os.environ.pop("POLARS_DEFAULT_ENGINE", None) else: - engine = "gpu" if active else "cpu" os.environ["POLARS_DEFAULT_ENGINE"] = engine return cls diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 86ebb3f02dab..4c7f866c6e8e 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -81,7 +81,7 @@ from polars.lazyframe.engine_config import GPUEngine from polars.lazyframe.group_by import LazyGroupBy from polars.lazyframe.in_process import InProcessQuery -from polars.polars import use_gpu_engine +from polars.polars import get_default_engine from polars.schema import Schema from polars.selectors import by_dtype, expand_selector @@ -2004,7 +2004,7 @@ def collect( if not (is_config_obj or engine in ("cpu", "gpu")): msg = f"Invalid engine argument {engine=}" raise ValueError(msg) - if use_gpu_engine(): + if get_default_engine() == "gpu": is_gpu = True if (streaming or background or new_streaming) and is_gpu: issue_warning( diff --git a/py-polars/src/lib.rs b/py-polars/src/lib.rs index 30ddeae06476..5fcb6cdf3686 100644 --- a/py-polars/src/lib.rs +++ b/py-polars/src/lib.rs @@ -220,7 +220,7 @@ fn polars(py: Python, m: &Bound) -> PyResult<()> { // Functions: other m.add_wrapped(wrap_pyfunction!(functions::check_length)) .unwrap(); - m.add_wrapped(wrap_pyfunction!(functions::use_gpu_engine)) + m.add_wrapped(wrap_pyfunction!(functions::get_default_engine)) .unwrap(); #[cfg(feature = "sql")] diff --git a/py-polars/tests/unit/test_config.py b/py-polars/tests/unit/test_config.py index 5a1492f9942f..2b4b3910f284 100644 --- a/py-polars/tests/unit/test_config.py +++ b/py-polars/tests/unit/test_config.py @@ -965,7 +965,7 @@ def test_warn_unstable(recwarn: pytest.WarningsRecorder) -> None: @pytest.mark.parametrize( ("environment_variable", "config_setting", "value", "expected"), [ - ("POLARS_DEFAULT_ENGINE", "set_gpu_engine", True, "gpu"), + ("POLARS_DEFAULT_ENGINE", "set_default_engine", "gpu", "gpu"), ("POLARS_AUTO_STRUCTIFY", "set_auto_structify", True, "1"), ("POLARS_FMT_MAX_COLS", "set_tbl_cols", 12, "12"), ("POLARS_FMT_MAX_ROWS", "set_tbl_rows", 3, "3"), From c4f03e97d52d617b93f664750737ae844b12d73e Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 15 Jan 2025 20:32:38 +0000 Subject: [PATCH 3/3] update config docs, code cov --- py-polars/docs/source/reference/config.rst | 1 + py-polars/polars/config.py | 2 +- py-polars/polars/lazyframe/frame.py | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/py-polars/docs/source/reference/config.rst b/py-polars/docs/source/reference/config.rst index c190bfde5d3d..38fbeecd500c 100644 --- a/py-polars/docs/source/reference/config.rst +++ b/py-polars/docs/source/reference/config.rst @@ -12,6 +12,7 @@ Config options Config.set_ascii_tables Config.set_auto_structify Config.set_decimal_separator + Config.set_default_engine Config.set_float_precision Config.set_fmt_float Config.set_fmt_str_lengths diff --git a/py-polars/polars/config.py b/py-polars/polars/config.py index ba5d295aa27d..51205684a195 100644 --- a/py-polars/polars/config.py +++ b/py-polars/polars/config.py @@ -1456,7 +1456,7 @@ def set_expr_depth_warning(cls, limit: int) -> type[Config]: @classmethod def set_default_engine(cls, engine: Literal["cpu", "gpu"] | None) -> type[Config]: """ - Set the default engine to use the GPU. + Set which engine to use by default. Parameters ---------- diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 4c7f866c6e8e..ebfb95bd7c10 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -2004,8 +2004,8 @@ def collect( if not (is_config_obj or engine in ("cpu", "gpu")): msg = f"Invalid engine argument {engine=}" raise ValueError(msg) - if get_default_engine() == "gpu": - is_gpu = True + if get_default_engine() == "gpu": # pragma: no cover + is_gpu = True # pragma: no cover if (streaming or background or new_streaming) and is_gpu: issue_warning( "GPU engine does not support streaming or background collection, "