From f3906ece5ea5db10424f042cd2ebd80eb2c4591a Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Wed, 15 Jan 2025 02:33:52 -0500 Subject: [PATCH] test: Add tests for various open issues (#20720) --- .../unit/constructors/test_constructors.py | 25 +++++++++++++++++++ .../tests/unit/datatypes/test_decimal.py | 15 +++++++++++ py-polars/tests/unit/io/test_lazy_csv.py | 9 +++++++ .../unit/streaming/test_streaming_join.py | 13 ++++++++++ py-polars/tests/unit/test_empty.py | 7 ++++++ 5 files changed, 69 insertions(+) diff --git a/py-polars/tests/unit/constructors/test_constructors.py b/py-polars/tests/unit/constructors/test_constructors.py index 0e169fae4fde..dbeb26484bd0 100644 --- a/py-polars/tests/unit/constructors/test_constructors.py +++ b/py-polars/tests/unit/constructors/test_constructors.py @@ -1182,6 +1182,31 @@ def test_from_dicts_schema_columns_do_not_match() -> None: assert_frame_equal(result, expected) +def test_from_dicts_infer_integer_types() -> None: + data = [ + { + "a": 2**7 - 1, + "b": 2**15 - 1, + "c": 2**31 - 1, + "d": 2**63 - 1, + "e": 2**127 - 1, + } + ] + result = pl.from_dicts(data).schema + # all values inferred as i64 except for values too large for i64 + expected = { + "a": pl.Int64, + "b": pl.Int64, + "c": pl.Int64, + "d": pl.Int64, + "e": pl.Int128, + } + assert result == expected + + with pytest.raises(OverflowError): + pl.from_dicts([{"too_big": 2**127}]) + + def test_from_rows_dtype() -> None: # 50 is the default inference length # 5182 diff --git a/py-polars/tests/unit/datatypes/test_decimal.py b/py-polars/tests/unit/datatypes/test_decimal.py index ec0f5eaf9be9..e3543b62cc07 100644 --- a/py-polars/tests/unit/datatypes/test_decimal.py +++ b/py-polars/tests/unit/datatypes/test_decimal.py @@ -606,3 +606,18 @@ def test_decimal_horizontal_20482() -> None: "max": [D("123.000000"), D("234.000000")], "sum": [D("246.000000"), D("468.000000")], } + + +def test_shift_over_12957() -> None: + df = pl.DataFrame( + { + "a": [1, 1, 2, 2], + "b": [D("1.1"), D("1.1"), D("2.2"), D("2.2")], + } + ) + result = df.select( + x=pl.col("b").shift(1).over("a"), + y=pl.col("a").shift(1).over("b"), + ) + assert result["x"].to_list() == [None, D("1.1"), None, D("2.2")] + assert result["y"].to_list() == [None, 1, None, 2] diff --git a/py-polars/tests/unit/io/test_lazy_csv.py b/py-polars/tests/unit/io/test_lazy_csv.py index aee85eb53a0d..a014148ea25c 100644 --- a/py-polars/tests/unit/io/test_lazy_csv.py +++ b/py-polars/tests/unit/io/test_lazy_csv.py @@ -1,5 +1,6 @@ from __future__ import annotations +import io import tempfile from collections import OrderedDict from pathlib import Path @@ -438,3 +439,11 @@ def test_scan_csv_with_column_names_nonexistent_file() -> None: # Upon collection, it should fail with pytest.raises(FileNotFoundError): result.collect() + + +def test_select_nonexistent_column() -> None: + csv = "a\n1" + f = io.StringIO(csv) + + with pytest.raises(pl.exceptions.ColumnNotFoundError): + pl.scan_csv(f).select("b").collect() diff --git a/py-polars/tests/unit/streaming/test_streaming_join.py b/py-polars/tests/unit/streaming/test_streaming_join.py index 214fcbfc01f6..9edea0ae8b3f 100644 --- a/py-polars/tests/unit/streaming/test_streaming_join.py +++ b/py-polars/tests/unit/streaming/test_streaming_join.py @@ -308,3 +308,16 @@ def test_flush_join_and_operation_19040() -> None: "B": [None, 1], "C": [None, 1], } + + +def test_full_coalesce_join_and_rename_15583() -> None: + df1 = pl.LazyFrame({"a": [1, 2, 3]}) + df2 = pl.LazyFrame({"a": [3, 4, 5]}) + + result = ( + df1.join(df2, on="a", how="full", coalesce=True) + .select(pl.all().name.map(lambda c: c.upper())) + .sort("A") + .collect(streaming=True) + ) + assert result["A"].to_list() == [1, 2, 3, 4, 5] diff --git a/py-polars/tests/unit/test_empty.py b/py-polars/tests/unit/test_empty.py index 0a5e2b4aedf4..6cedcaa59b54 100644 --- a/py-polars/tests/unit/test_empty.py +++ b/py-polars/tests/unit/test_empty.py @@ -158,3 +158,10 @@ def test_empty_input_expansion() -> None: pl.col("B").sort_by(pl.struct(pl.exclude("A", "B"))) ) ) + + +def test_empty_list_15523() -> None: + s = pl.Series("", [["a"], []], dtype=pl.List) + assert s.dtype == pl.List(pl.String) + s = pl.Series("", [[], ["a"]], dtype=pl.List) + assert s.dtype == pl.List(pl.String)