From fbd553fe0a9adc31abb5e9dadb2f801d833cb3c4 Mon Sep 17 00:00:00 2001 From: PrimozGodec Date: Fri, 18 Aug 2023 12:40:44 +0200 Subject: [PATCH] CSV Import - Change datetime format parsing --- Orange/widgets/data/owcsvimport.py | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/Orange/widgets/data/owcsvimport.py b/Orange/widgets/data/owcsvimport.py index f49806a30e6..39ba064df71 100644 --- a/Orange/widgets/data/owcsvimport.py +++ b/Orange/widgets/data/owcsvimport.py @@ -1627,19 +1627,6 @@ def guess_data_type(col: pd.Series) -> pd.Series: ------- Data column with correct dtype """ - def parse_dates(s): - """ - This is an extremely fast approach to datetime parsing. - For large data, the same dates are often repeated. Rather than - re-parse these, we store all unique dates, parse them, and - use a lookup to convert all dates. - """ - try: - dates = {date: pd.to_datetime(date) for date in s.unique()} - except ValueError: - return None - return s.map(dates) - if pdtypes.is_numeric_dtype(col): unique_values = col.unique() if len(unique_values) <= 2 and ( @@ -1647,13 +1634,12 @@ def parse_dates(s): or len(np.setdiff1d(unique_values, [1, 2])) == 0): return col.astype("category") else: # object - # try parse as date - if None not a date - parsed_col = parse_dates(col) - if parsed_col is not None: - return parsed_col - unique_values = col.unique() - if len(unique_values) < 100 and len(unique_values) < len(col)**0.7: - return col.astype("category") + try: + return pd.to_datetime(col) + except ValueError: + unique_values = col.unique() + if len(unique_values) < 100 and len(unique_values) < len(col)**0.7: + return col.astype("category") return col