Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raise NotImplementedError in to_datetime if Z (or tz component) in string #14074

Merged
merged 7 commits into from
Sep 14, 2023
15 changes: 10 additions & 5 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,10 @@ def infer_format(element: str, **kwargs) -> str:
fmt = _guess_datetime_format(element, **kwargs)

if fmt is not None:
if "%z" in fmt or "%Z" in fmt:
raise NotImplementedError(
"cuDF does not yet support timezone-aware datetimes"
)
return fmt

element_parts = element.split(".")
Expand All @@ -651,11 +655,12 @@ def infer_format(element: str, **kwargs) -> str:
raise ValueError("Unable to infer the timestamp format from the data")

if len(second_parts) > 1:
# "Z" indicates Zulu time(widely used in aviation) - Which is
# UTC timezone that currently cudf only supports. Having any other
# unsupported timezone will let the code fail below
# with a ValueError.
second_parts.remove("Z")
# We may have a non-digit, timezone-like component
# like Z, UTC-3, +01:00
if any(re.search(r"\D", part) for part in second_parts):
raise NotImplementedError(
"cuDF does not yet support timezone-aware datetimes"
)
second_part = "".join(second_parts[1:])

if len(second_part) > 1:
Expand Down
14 changes: 8 additions & 6 deletions python/cudf/cudf/tests/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1278,12 +1278,8 @@ def test_datetime_reductions(data, op, dtype):
@pytest.mark.parametrize("dtype", DATETIME_TYPES)
def test_datetime_infer_format(data, dtype):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test verifies if our string-to-time conversions work properly until nano-second time resolution. Can you add another set of tests that drop the timezone="UTC" parameter?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I'll add back that testing in this test

sr = cudf.Series(data)
psr = pd.Series(data)

expected = psr.astype(dtype)
actual = sr.astype(dtype)

assert_eq(expected, actual)
with pytest.raises(NotImplementedError):
sr.astype(dtype)


def test_dateoffset_instance_subclass_check():
Expand Down Expand Up @@ -2158,6 +2154,12 @@ def test_format_timezone_not_implemented(code):
)


@pytest.mark.parametrize("tz", ["Z", "UTC-3", "+01:00"])
def test_no_format_timezone_not_implemented(tz):
with pytest.raises(NotImplementedError):
cudf.to_datetime([f"2020-01-01 00:00:00{tz}"])


@pytest.mark.parametrize("arg", [True, False])
def test_args_not_datetime_typerror(arg):
with pytest.raises(TypeError):
Expand Down
12 changes: 6 additions & 6 deletions python/cudf/cudf/tests/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,12 +200,12 @@ def test_string_astype(dtype):
data = ["True", "False", "True", "False", "False"]
elif dtype.startswith("datetime64"):
data = [
"2019-06-04T00:00:00Z",
"2019-06-04T12:12:12Z",
"2019-06-03T00:00:00Z",
"2019-05-04T00:00:00Z",
"2018-06-04T00:00:00Z",
"1922-07-21T01:02:03Z",
"2019-06-04T00:00:00",
"2019-06-04T12:12:12",
"2019-06-03T00:00:00",
"2019-05-04T00:00:00",
"2018-06-04T00:00:00",
"1922-07-21T01:02:03",
]
elif dtype == "str" or dtype == "object":
data = ["ab", "cd", "ef", "gh", "ij"]
Expand Down