From d56950926ac3fc428b870600b6d7ff3235b48ea7 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 18 Dec 2024 14:37:37 -0500 Subject: [PATCH 1/7] adjust tests to new frequency conventions and address some more downcasting warnings --- rdtools/analysis_chains.py | 11 ++++----- rdtools/availability.py | 2 +- rdtools/test/analysis_chains_test.py | 8 +++---- rdtools/test/degradation_test.py | 31 ++++++++++++++------------ rdtools/test/energy_from_power_test.py | 2 +- rdtools/test/filtering_test.py | 18 ++++++++------- 6 files changed, 39 insertions(+), 33 deletions(-) diff --git a/rdtools/analysis_chains.py b/rdtools/analysis_chains.py index 6c28b370..f889cd09 100644 --- a/rdtools/analysis_chains.py +++ b/rdtools/analysis_chains.py @@ -562,7 +562,7 @@ def _call_clearsky_filter(filter_string): warnings.warn( "ad_hoc_filter contains NaN values; setting to False (excluding)" ) - ad_hoc_filter = ad_hoc_filter.fillna(False) + ad_hoc_filter.loc[ad_hoc_filter.isnull()] = False if not filter_components.index.equals(ad_hoc_filter.index): warnings.warn( @@ -570,9 +570,8 @@ def _call_clearsky_filter(filter_string): "values will be set to True (kept). Align the index with the index " "of the filter_components attribute to prevent this warning" ) - ad_hoc_filter = ad_hoc_filter.reindex(filter_components.index).fillna( - True - ) + ad_hoc_filter = ad_hoc_filter.reindex(filter_components.index) + ad_hoc_filter.loc[ad_hoc_filter.isnull()] = True filter_components["ad_hoc_filter"] = ad_hoc_filter @@ -654,6 +653,7 @@ def _aggregated_filter(self, aggregated, case): "aggregated ad_hoc_filter contains NaN values; setting to False (excluding)" ) ad_hoc_filter_aggregated = ad_hoc_filter_aggregated.fillna(False) + ad_hoc_filter_aggregated.loc[ad_hoc_filter_aggregated.isnull()] = False if not filter_components_aggregated.index.equals( ad_hoc_filter_aggregated.index @@ -666,7 +666,8 @@ def _aggregated_filter(self, aggregated, case): ) ad_hoc_filter_aggregated = ad_hoc_filter_aggregated.reindex( filter_components_aggregated.index - ).fillna(True) + ) + ad_hoc_filter_aggregated.loc[ad_hoc_filter_aggregated.isnull()] = True filter_components_aggregated["ad_hoc_filter"] = ad_hoc_filter_aggregated diff --git a/rdtools/availability.py b/rdtools/availability.py index ee0e74b7..06d57780 100644 --- a/rdtools/availability.py +++ b/rdtools/availability.py @@ -269,7 +269,7 @@ def _calc_loss_subsystem(self, low_threshold, relative_sizes, subsystem_fraction = relative_sizes / relative_sizes.sum() smallest_delta = ( power_subsystem.le(low_threshold) - .replace(False, np.nan) + .replace(False, None) .multiply(subsystem_fraction) .min(axis=1) .astype(float) diff --git a/rdtools/test/analysis_chains_test.py b/rdtools/test/analysis_chains_test.py index 8f2f9156..3cea466e 100644 --- a/rdtools/test/analysis_chains_test.py +++ b/rdtools/test/analysis_chains_test.py @@ -41,7 +41,7 @@ def degradation_trend(basic_parameters, cs_input): from degradation_test import DegradationTestCase rd = -0.05 - input_freq = "H" + input_freq = "h" degradation_trend = DegradationTestCase.get_corr_energy(rd, input_freq) tz = cs_input["pvlib_location"].tz return degradation_trend.tz_localize(tz) @@ -56,7 +56,7 @@ def sensor_parameters(basic_parameters, degradation_trend): basic_parameters["pv"] = power basic_parameters["poa_global"] = poa_global basic_parameters["temperature_ambient"] = temperature_ambient - basic_parameters["interp_freq"] = "H" + basic_parameters["interp_freq"] = "h" return basic_parameters @@ -143,7 +143,7 @@ def test_interpolation(basic_parameters, degradation_trend): basic_parameters["temperature_cell"] = dummy_series basic_parameters["windspeed"] = dummy_series basic_parameters["power_expected"] = dummy_series - basic_parameters["interp_freq"] = "H" + basic_parameters["interp_freq"] = "h" rd_analysis = TrendAnalysis(**basic_parameters) @@ -404,7 +404,7 @@ def test_filter_ad_hoc_warnings(workflow, sensor_parameters): # warning about NaNs ad_hoc_filter = pd.Series(True, index=sensor_parameters["pv"].index) - ad_hoc_filter.iloc[10] = np.nan + ad_hoc_filter.iloc[10] = None # np.nan rd_analysis.filter_params["ad_hoc_filter"] = ad_hoc_filter with pytest.warns( UserWarning, match="ad_hoc_filter contains NaN values; setting to False" diff --git a/rdtools/test/degradation_test.py b/rdtools/test/degradation_test.py index a2e16ceb..d3b37026 100644 --- a/rdtools/test/degradation_test.py +++ b/rdtools/test/degradation_test.py @@ -53,20 +53,20 @@ def setUpClass(cls): # define module constants and parameters # All frequencies - cls.list_all_input_freq = ["MS", "M", "W", "D", "h", "min", "s", "Irregular_D"] + cls.list_all_input_freq = ["MS", "ME", "W", "D", "h", "min", "s", "Irregular_D"] # Allowed frequencies for degradation_ols - cls.list_ols_input_freq = ["MS", "M", "W", "D", "h", "min", "s", "Irregular_D"] + cls.list_ols_input_freq = ["MS", "ME", "W", "D", "h", "min", "s", "Irregular_D"] ''' Allowed frequencies for degradation_classical_decomposition in principle CD works on higher frequency data but that makes the tests painfully slow ''' - cls.list_CD_input_freq = ['MS', 'M', 'W', 'D'] + cls.list_CD_input_freq = ["MS", "ME", "W", "D"] # Allowed frequencies for degradation_year_on_year - cls.list_YOY_input_freq = ['MS', 'M', 'W', 'D', 'Irregular_D'] + cls.list_YOY_input_freq = ["MS", "ME", "W", "D", "Irregular_D"] cls.rd = -0.005 @@ -184,16 +184,19 @@ def test_usage_of_points(self): self.assertTrue((np.sum(rd_result[2]['usage_of_points'])) == 1462) -@pytest.mark.parametrize('start,end,freq', [ - ('2014-01-01', '2015-12-31', 'D'), # no leap day - ('2015-01-01', '2016-12-31', 'D'), # leap day included in index - ('2015-01-01', '2016-12-29', '7D'), # leap day in period but not in index - ('2016-06-01', '2018-05-31', 'D'), # leap year, but no leap day in period - # ('2016-02-29', '2018-02-28', 'd'), # starts on leap day (doesn't work) - ('2014-03-01', '2016-02-29', 'D'), # ends on leap day - ('2015-01-01', '2016-12-31', 'M'), # month end - ('2015-01-01', '2016-12-31', 'MS'), # month start -]) +@pytest.mark.parametrize( + "start,end,freq", + [ + ("2014-01-01", "2015-12-31", "D"), # no leap day + ("2015-01-01", "2016-12-31", "D"), # leap day included in index + ("2015-01-01", "2016-12-29", "7D"), # leap day in period but not in index + ("2016-06-01", "2018-05-31", "D"), # leap year, but no leap day in period + # ('2016-02-29', '2018-02-28', 'd'), # starts on leap day (doesn't work) + ("2014-03-01", "2016-02-29", "D"), # ends on leap day + ("2015-01-01", "2016-12-31", "ME"), # month end + ("2015-01-01", "2016-12-31", "MS"), # month start + ], +) def test_yoy_two_years_error(start, end, freq): # GH 339 times = pd.date_range(start, end, freq=freq) diff --git a/rdtools/test/energy_from_power_test.py b/rdtools/test/energy_from_power_test.py index edd6e010..eb6d1b90 100644 --- a/rdtools/test/energy_from_power_test.py +++ b/rdtools/test/energy_from_power_test.py @@ -103,7 +103,7 @@ def test_energy_from_power_single_value_with_target(): times = pd.date_range("2019-01-01", freq="15min", periods=1) power = pd.Series([100.0], index=times) expected_result = pd.Series([100.0], index=times, name="energy_Wh") - result = energy_from_power(power, target_frequency="H") + result = energy_from_power(power, target_frequency="h") pd.testing.assert_series_equal(result, expected_result) diff --git a/rdtools/test/filtering_test.py b/rdtools/test/filtering_test.py index 0e6297e7..586b3216 100644 --- a/rdtools/test/filtering_test.py +++ b/rdtools/test/filtering_test.py @@ -129,19 +129,22 @@ def generate_power_time_series_no_clipping(): def generate_power_time_series_irregular_intervals(): power_datetime_index = pd.Series(np.arange(1, 62)) # Add datetime index to second series - time_range_1 = pd.date_range('2016-12-02T11:00:00.000Z', - '2017-06-06T07:00:00.000Z', freq='1T') + time_range_1 = pd.date_range( + "2016-12-02T11:00:00.000Z", "2017-06-06T07:00:00.000Z", freq="1min" + ) power_datetime_index.index = pd.to_datetime(time_range_1[:61]) power_datetime_index_2 = pd.Series(np.arange(100, 200)) - time_range_2 = pd.date_range(power_datetime_index.index.max(), - '2017-06-06T07:00:00.000Z', freq='15T') + time_range_2 = pd.date_range( + power_datetime_index.index.max(), "2017-06-06T07:00:00.000Z", freq="15min" + ) power_datetime_index_2.index = pd.to_datetime(time_range_2[:100]) power_datetime_index_2 = power_datetime_index_2.iloc[1:] power_datetime_index = pd.concat([power_datetime_index, power_datetime_index_2]) power_datetime_index_3 = pd.Series(list(reversed(np.arange(100, 200)))) - time_range_3 = pd.date_range(power_datetime_index.index.max(), - '2017-06-06T07:00:00.000Z', freq='5T') + time_range_3 = pd.date_range( + power_datetime_index.index.max(), "2017-06-06T07:00:00.000Z", freq="5min" + ) power_datetime_index_3.index = pd.to_datetime(time_range_3[:100]) power_datetime_index_3 = power_datetime_index_3.iloc[1:] power_datetime_index = pd.concat([power_datetime_index, @@ -157,8 +160,7 @@ def generate_power_time_series_one_min_intervals(): power_datetime_index = pd.concat([power_datetime_index, power_datetime_index[::-1]]) # Add datetime index to second series - time_range = pd.date_range('2016-12-02T11:00:00.000Z', - '2017-06-06T07:00:00.000Z', freq='1T') + time_range = pd.date_range("2016-12-02T11:00:00.000Z", "2017-06-06T07:00:00.000Z", freq="1min") power_datetime_index.index = pd.to_datetime(time_range[:100]) # Note: Power is expected to be Series object with a datetime index. return power_datetime_index From 7dce7c2dca274c8008250e29c13b686ea05b5fcb Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 18 Dec 2024 14:40:58 -0500 Subject: [PATCH 2/7] delete left over line --- rdtools/analysis_chains.py | 1 - 1 file changed, 1 deletion(-) diff --git a/rdtools/analysis_chains.py b/rdtools/analysis_chains.py index f889cd09..5731912a 100644 --- a/rdtools/analysis_chains.py +++ b/rdtools/analysis_chains.py @@ -652,7 +652,6 @@ def _aggregated_filter(self, aggregated, case): warnings.warn( "aggregated ad_hoc_filter contains NaN values; setting to False (excluding)" ) - ad_hoc_filter_aggregated = ad_hoc_filter_aggregated.fillna(False) ad_hoc_filter_aggregated.loc[ad_hoc_filter_aggregated.isnull()] = False if not filter_components_aggregated.index.equals( From fb78f8513f8b2f01799ffb72ee197b3759357a30 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 18 Dec 2024 15:02:31 -0500 Subject: [PATCH 3/7] fix ME frequency issue for pandas < 2.2.0 --- rdtools/test/degradation_test.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/rdtools/test/degradation_test.py b/rdtools/test/degradation_test.py index d3b37026..4e92a1f1 100644 --- a/rdtools/test/degradation_test.py +++ b/rdtools/test/degradation_test.py @@ -68,6 +68,25 @@ def setUpClass(cls): # Allowed frequencies for degradation_year_on_year cls.list_YOY_input_freq = ["MS", "ME", "W", "D", "Irregular_D"] + # ------------------------------------------------------------------------------------------------ + # Allow pandas < 2.2.0 to use 'M' as an alias for MonthEnd + # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#deprecate-aliases-m-q-y-etc-in-favour-of-me-qe-ye-etc-for-offsets + # Check pandas version and set frequency alias + pandas_version = pd.__version__.split(".") + if int(pandas_version[0]) < 2 or ( + int(pandas_version[0]) == 2 and int(pandas_version[1]) < 2 + ): + for list in [ + cls.list_all_input_freq, + cls.list_ols_input_freq, + cls.list_CD_input_freq, + cls.list_YOY_input_freq, + ]: + if "ME" in list: + list.remove("ME") + list.append(pd.tseries.offsets.MonthEnd()) + # ------------------------------------------------------------------------------------------------ + cls.rd = -0.005 test_corr_energy = {} @@ -198,7 +217,13 @@ def test_usage_of_points(self): ], ) def test_yoy_two_years_error(start, end, freq): - # GH 339 + # ---------------------------------------------------------------- + # Allow pandas < 2.2.0 to use 'M' as an alias for MonthEnd + # https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#deprecate-aliases-m-q-y-etc-in-favour-of-me-qe-ye-etc-for-offsets + if freq == "ME": + freq = pd.tseries.offsets.MonthEnd() + # ---------------------------------------------------------------- + times = pd.date_range(start, end, freq=freq) series = pd.Series(1, index=times) # introduce NaN at the end to ensure that the 2 year requirement applies to From 6f9aee1e00d25e775f319a070db9eff69db9c6c2 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 18 Dec 2024 15:03:18 -0500 Subject: [PATCH 4/7] use .iloc for integer indexing --- rdtools/degradation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rdtools/degradation.py b/rdtools/degradation.py index 4bd9ba57..1698b368 100644 --- a/rdtools/degradation.py +++ b/rdtools/degradation.py @@ -130,7 +130,9 @@ def degradation_classical_decomposition(energy_normalized, # Compute yearly rolling mean to isolate trend component using # moving average energy_ma = df['energy_normalized'].rolling('365d', center=True).mean() - has_full_year = (df['years'] >= df['years'][0] + 0.5) & (df['years'] <= df['years'][-1] - 0.5) + has_full_year = (df["years"] >= df["years"].iloc[0] + 0.5) & ( + df["years"] <= df["years"].iloc[-1] - 0.5 + ) energy_ma[~has_full_year] = np.nan df['energy_ma'] = energy_ma From a4de2146c4b10d1f1508daf8a602555fdd3c5d77 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 18 Dec 2024 15:03:32 -0500 Subject: [PATCH 5/7] use pd.NA for nullable bool series --- rdtools/test/analysis_chains_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rdtools/test/analysis_chains_test.py b/rdtools/test/analysis_chains_test.py index 3cea466e..358ff8a6 100644 --- a/rdtools/test/analysis_chains_test.py +++ b/rdtools/test/analysis_chains_test.py @@ -404,7 +404,7 @@ def test_filter_ad_hoc_warnings(workflow, sensor_parameters): # warning about NaNs ad_hoc_filter = pd.Series(True, index=sensor_parameters["pv"].index) - ad_hoc_filter.iloc[10] = None # np.nan + ad_hoc_filter.iloc[10] = pd.NA rd_analysis.filter_params["ad_hoc_filter"] = ad_hoc_filter with pytest.warns( UserWarning, match="ad_hoc_filter contains NaN values; setting to False" @@ -455,7 +455,7 @@ def test_aggregated_filter_ad_hoc_warnings(workflow, sensor_parameters): rd_analysis_2.filter_params = {"clearsky_filter": {"model": "csi"}} daily_ad_hoc_filter = pd.Series(True, index=sensor_parameters["pv"].index) daily_ad_hoc_filter = daily_ad_hoc_filter.resample("1D").first().dropna(how="all") - daily_ad_hoc_filter.iloc[10] = np.nan + daily_ad_hoc_filter.iloc[10] = pd.NA rd_analysis_2.filter_params_aggregated["ad_hoc_filter"] = daily_ad_hoc_filter with pytest.warns( UserWarning, match="ad_hoc_filter contains NaN values; setting to False" From b2c2c2b2d2e4f58388cbc73e9586ca5e3072cc43 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 18 Dec 2024 15:17:04 -0500 Subject: [PATCH 6/7] another try for nullable boolean series --- rdtools/test/analysis_chains_test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rdtools/test/analysis_chains_test.py b/rdtools/test/analysis_chains_test.py index 358ff8a6..a89da6b5 100644 --- a/rdtools/test/analysis_chains_test.py +++ b/rdtools/test/analysis_chains_test.py @@ -403,7 +403,7 @@ def test_filter_ad_hoc_warnings(workflow, sensor_parameters): assert components["ad_hoc_filter"].all() # warning about NaNs - ad_hoc_filter = pd.Series(True, index=sensor_parameters["pv"].index) + ad_hoc_filter = pd.Series(True, index=sensor_parameters["pv"].index, dtype="boolean") ad_hoc_filter.iloc[10] = pd.NA rd_analysis.filter_params["ad_hoc_filter"] = ad_hoc_filter with pytest.warns( @@ -454,7 +454,9 @@ def test_aggregated_filter_ad_hoc_warnings(workflow, sensor_parameters): # disable all filters outside of CSI rd_analysis_2.filter_params = {"clearsky_filter": {"model": "csi"}} daily_ad_hoc_filter = pd.Series(True, index=sensor_parameters["pv"].index) - daily_ad_hoc_filter = daily_ad_hoc_filter.resample("1D").first().dropna(how="all") + daily_ad_hoc_filter = ( + daily_ad_hoc_filter.resample("1D").first().dropna(how="all").astype("boolean") + ) daily_ad_hoc_filter.iloc[10] = pd.NA rd_analysis_2.filter_params_aggregated["ad_hoc_filter"] = daily_ad_hoc_filter with pytest.warns( From 8ff3cba6911762c62ed28527776f472a40702bf8 Mon Sep 17 00:00:00 2001 From: martin-springer Date: Wed, 18 Dec 2024 15:18:41 -0500 Subject: [PATCH 7/7] update changelog --- docs/sphinx/source/changelog/pending.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/sphinx/source/changelog/pending.rst b/docs/sphinx/source/changelog/pending.rst index 28e56a5c..1ee493b4 100644 --- a/docs/sphinx/source/changelog/pending.rst +++ b/docs/sphinx/source/changelog/pending.rst @@ -14,6 +14,7 @@ Bug fixes * Fix `energy_from_power`` returns incorrect index for shifted hourly data (:issue:`370`, :pull:`437`) * Add warning to clearsky workflow when power_expected is passed by user (:pull:`439`) * Fix different results with Nan's and Zeros in power series (:issue:`313`, :pull:`442`) +* Fix pandas deprecation warnings in tests (:pull:`444`) Requirements