From dd9cebc7236a08cef2e714d19b77b010b5ed1f8b Mon Sep 17 00:00:00 2001 From: Stefan Jansen Date: Tue, 14 May 2024 16:29:54 -0400 Subject: [PATCH] MAINT: Update to Python 3.12 (#243) * GHA updates * relax pandas constraint, expand tox envs * update tests for latest versions * add tox env python 3.12 --- .github/workflows/build_wheels.yml | 4 +- .github/workflows/ci_tests_full.yml | 17 ++++--- .github/workflows/ci_tests_quick.yml | 15 +++--- pyproject.toml | 21 ++++++--- src/zipline/algorithm.py | 14 +++++- src/zipline/utils/pandas_utils.py | 8 ++-- tests/pipeline/test_factor.py | 11 ++++- tests/pipeline/test_quarters_estimates.py | 8 +++- .../pipeline/test_us_equity_pricing_loader.py | 4 ++ tests/test_algorithm.py | 14 ++---- tests/utils/test_pandas_utils.py | 46 +++++++++---------- 11 files changed, 92 insertions(+), 70 deletions(-) diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 4c3f8faa7e..467bb43db2 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -27,7 +27,7 @@ jobs: fetch-depth: 0 # - name: Setup Python -# uses: actions/setup-python@v4 +# uses: actions/setup-python@v5 # with: # python-version: ${{ matrix.python }} @@ -77,7 +77,7 @@ jobs: with: fetch-depth: 0 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 name: Install Python with: python-version: '3.11' diff --git a/.github/workflows/ci_tests_full.yml b/.github/workflows/ci_tests_full.yml index fd381ceceb..082a8c83be 100644 --- a/.github/workflows/ci_tests_full.yml +++ b/.github/workflows/ci_tests_full.yml @@ -25,44 +25,47 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: - python-version: "3.10" + python-version: "3.11" - name: flake8 Lint uses: py-actions/flake8@v2 tests: + name: Unit Tests for ${{ matrix.python-version }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - name: Checkout Zipline uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Install TA-Lib Linux + - name: Install TA-Lib if: ${{ matrix.os == 'ubuntu-latest' }} run: | sudo ./tools/install_talib.sh - - name: Install TA-Lib macOS + - name: Install TA-Lib HDF5 c-blosc if: ${{ matrix.os == 'macos-latest' }} run: | brew install ta-lib + brew install hdf5 + brew install c-blosc - name: Developer Command Prompt for Microsoft Visual C++ uses: ilammy/msvc-dev-cmd@v1 - - name: Install TA-Lib Windows + - name: Install TA-Lib if: ${{ matrix.os == 'windows-latest' }} run: | ./tools/install_talib.bat diff --git a/.github/workflows/ci_tests_quick.yml b/.github/workflows/ci_tests_quick.yml index 392b9fee83..883e1ba995 100644 --- a/.github/workflows/ci_tests_quick.yml +++ b/.github/workflows/ci_tests_quick.yml @@ -24,7 +24,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: "3.11" @@ -32,6 +32,7 @@ jobs: uses: py-actions/flake8@v2 tests: + name: Unit Tests for ${{ matrix.python-version }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -44,30 +45,26 @@ jobs: uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Install TA-Lib Linux + - name: Install TA-Lib if: ${{ matrix.os == 'ubuntu-latest' }} run: | sudo ./tools/install_talib.sh - - name: Install TA-Lib macOS + - name: Install TA-Lib HDF5 c-blosc if: ${{ matrix.os == 'macos-latest' }} run: | brew install ta-lib - - - name: Install HDF5 macOS - if: ${{ matrix.os == 'macos-latest' }} - run: | brew install hdf5 brew install c-blosc - name: Developer Command Prompt for Microsoft Visual C++ uses: ilammy/msvc-dev-cmd@v1 - - name: Install TA-Lib Windows + - name: Install TA-Lib if: ${{ matrix.os == 'windows-latest' }} run: | ./tools/install_talib.bat diff --git a/pyproject.toml b/pyproject.toml index 2b32a2bc96..64998b803d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,10 +18,10 @@ classifiers = [ 'License :: OSI Approved :: Apache Software License', 'Natural Language :: English', 'Programming Language :: Python', - 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', 'Operating System :: OS Independent', 'Intended Audience :: Science/Research', 'Topic :: Office/Business :: Financial :: Investment', @@ -47,7 +47,7 @@ dependencies = [ 'networkx >=2.0', 'numexpr >=2.6.1', 'numpy >=1.14.5', - 'pandas >=2.0', + 'pandas >=1.3', 'patsy >=0.4.0', 'python-dateutil >=2.4.2', 'python-interface >=1.5.3', @@ -73,7 +73,8 @@ requires = [ 'setuptools>=42.0.0', "setuptools_scm[toml]>=6.2", 'wheel>=0.36.0', - 'Cython>=0.29.21,<3', + 'Cython>=0.29.21', + # 'Cython>=3', 'oldest-supported-numpy; python_version>="3.8"', ] build-backend = 'setuptools.build_meta' @@ -102,7 +103,8 @@ dev = [ 'flake8 >=3.9.1', 'black', 'pre-commit >=2.12.1', - 'Cython>=0.29.21,<3', + # 'Cython>=0.29.21,<3', + 'Cython>=0.29.21', ] docs = [ 'Cython', @@ -176,17 +178,17 @@ exclude = ''' [tool.tox] legacy_tox_ini = """ [tox] -envlist = py{38,39,310,311}-pandas{2} +envlist = py{39,310}-pandas{13,14,15}, py{39,310,311,312}-pandas{20,21,22} isolated_build = True skip_missing_interpreters = True minversion = 3.23.0 [gh-actions] python = - 3.8: py38 3.9: py39 3.10: py310 3.11: py311 + 3.12: py312 [testenv] usedevelop = True @@ -196,7 +198,12 @@ setenv = changedir = tmp extras = test deps = - pandas2: pandas>=2.0 + pandas13: pandas>=1.3.0,<1.4 + pandas14: pandas>=1.4.0,<1.5 + pandas15: pandas>=1.5.0,<1.6 + pandas20: pandas>=2.0,<2.1 + pandas21: pandas>=2.1,<2.2 + pandas22: pandas>=2.2,<2.3 commands = pytest -n 4 --reruns 5 --cov={toxinidir}/src --cov-report term --cov-report=xml --cov-report=html:htmlcov {toxinidir}/tests diff --git a/src/zipline/algorithm.py b/src/zipline/algorithm.py index dbccd82b18..14833db37f 100644 --- a/src/zipline/algorithm.py +++ b/src/zipline/algorithm.py @@ -1455,7 +1455,19 @@ def get_datetime(self, tz=None): The current simulation datetime converted to ``tz``. """ dt = self.datetime - assert dt.tzinfo == timezone.utc, "Algorithm should have a utc datetime" + from packaging.version import Version + import pytz + + if Version(pd.__version__) < Version("2.0.0"): + assert ( + dt.tzinfo == pytz.utc + ), f"Algorithm should have a pytc utc datetime, {dt.tzinfo}" + else: + assert ( + dt.tzinfo == timezone.utc + ), f"Algorithm should have a timezone.utc datetime, {dt.tzinfo}" + + # assert dt.tzinfo == timezone.utc, "Algorithm should have a utc datetime" if tz is not None: dt = dt.astimezone(tz) return dt diff --git a/src/zipline/utils/pandas_utils.py b/src/zipline/utils/pandas_utils.py index 944e88dc81..7b6af82f1b 100644 --- a/src/zipline/utils/pandas_utils.py +++ b/src/zipline/utils/pandas_utils.py @@ -18,7 +18,7 @@ skip_pipeline_new_pandas = ( "Pipeline categoricals are not yet compatible with pandas >=0.19" ) -skip_pipeline_blaze = "Blaze doesn't play nicely with Pandas >=1.0" +# skip_pipeline_blaze = "Blaze doesn't play nicely with Pandas >=1.0" def july_5th_holiday_observance(datetime_index): @@ -226,8 +226,8 @@ def categorical_df_concat(df_list, inplace=False): # Assert each dataframe has the same columns/dtypes df = df_list[0] - if not all([(df.dtypes.equals(df_i.dtypes)) for df_i in df_list[1:]]): - raise ValueError("Input DataFrames must have the same columns/dtypes.") + if not all([set(df.columns) == set(df_i.columns) for df_i in df_list[1:]]): + raise ValueError("Input DataFrames must have the same columns.") categorical_columns = df.columns[df.dtypes == "category"] @@ -238,7 +238,7 @@ def categorical_df_concat(df_list, inplace=False): with ignore_pandas_nan_categorical_warning(): for df in df_list: - df[col].cat.set_categories(new_categories, inplace=True) + df[col] = df[col].cat.set_categories(new_categories) return pd.concat(df_list) diff --git a/tests/pipeline/test_factor.py b/tests/pipeline/test_factor.py index 0d3408a09d..da019ab82a 100644 --- a/tests/pipeline/test_factor.py +++ b/tests/pipeline/test_factor.py @@ -5,7 +5,6 @@ from functools import partial from itertools import product from unittest import skipIf - import numpy as np import pandas as pd import pytest @@ -14,7 +13,7 @@ from parameterized import parameterized from scipy.stats.mstats import winsorize as scipy_winsorize from toolz import compose - +from packaging.version import Version from zipline.errors import BadPercentileBounds, UnknownRankMethod from zipline.lib.labelarray import LabelArray from zipline.lib.normalize import naive_grouped_rowwise_apply as grouped_apply @@ -41,6 +40,12 @@ from .base import BaseUSEquityPipelineTestCase +pandas_two_point_two = False +if Version(pd.__version__) >= Version("2.2"): + # pandas 2.2.0 has a bug in qcut that causes it to return a Series with + # the wrong dtype when labels=False. + pandas_two_point_two = True + class F(Factor): dtype = float64_dtype @@ -1466,6 +1471,8 @@ def test_quantiles_masked(self, seed): mask=self.build_mask(self.ones_mask(shape=shape)), ) + # skip until https://github.com/pandas-dev/pandas/issues/58240 fixed + @skipIf(pandas_two_point_two, "pd.qcut has a bug in pandas 2.2") def test_quantiles_uneven_buckets(self): permute = partial(permute_rows, 5) shape = (5, 5) diff --git a/tests/pipeline/test_quarters_estimates.py b/tests/pipeline/test_quarters_estimates.py index 18c5f169f3..1a613d0185 100644 --- a/tests/pipeline/test_quarters_estimates.py +++ b/tests/pipeline/test_quarters_estimates.py @@ -1,6 +1,6 @@ from datetime import timedelta from functools import partial - +from packaging.version import Version import itertools from parameterized import parameterized import numpy as np @@ -238,6 +238,11 @@ def test_load_one_day(self): end_date=pd.Timestamp("2015-01-15"), ) + # type changes to datatime[ns] in pandas 2.0.0 + if Version(pd.__version__) >= Version("2"): + self.expected_out.event_date = self.expected_out.event_date.astype( + "datetime64[ns]" + ) assert_frame_equal( results.sort_index(axis=1), self.expected_out.sort_index(axis=1) ) @@ -660,7 +665,6 @@ def make_loader(cls, events, columns): return PreviousEarningsEstimatesLoader(events, columns) def get_expected_estimate(self, q1_knowledge, q2_knowledge, comparable_date): - # The expected estimate will be for q2 if the last thing # we've seen is that the release date already happened. # Otherwise, it'll be for q1, as long as the release date diff --git a/tests/pipeline/test_us_equity_pricing_loader.py b/tests/pipeline/test_us_equity_pricing_loader.py index f08165f0e1..736d2deec6 100644 --- a/tests/pipeline/test_us_equity_pricing_loader.py +++ b/tests/pipeline/test_us_equity_pricing_loader.py @@ -17,6 +17,7 @@ from parameterized import parameterized import sys +from packaging.version import Version import numpy as np from numpy.testing import ( assert_allclose, @@ -473,6 +474,9 @@ def test_load_adjustments(self, tables, adjustment_type): @parameterized.expand([(True,), (False,)]) @pytest.mark.skipif(sys.platform == "win32", reason="does not run on windows") def test_load_adjustments_to_df(self, convert_dts): + if Version(pd.__version__) < Version("2.0") and not convert_dts: + pytest.skip("pandas < 2.0 behaves differently datetime64[s]") + reader = self.adjustment_reader adjustment_dfs = reader.unpack_db_to_component_dfs(convert_dates=convert_dts) diff --git a/tests/test_algorithm.py b/tests/test_algorithm.py index 389badaf7a..573cb96e0b 100644 --- a/tests/test_algorithm.py +++ b/tests/test_algorithm.py @@ -157,7 +157,6 @@ def handle_data(self, data): class TestMiscellaneousAPI(zf.WithMakeAlgo, zf.ZiplineTestCase): - START_DATE = pd.Timestamp("2006-01-03") END_DATE = pd.Timestamp("2006-01-04") SIM_PARAMS_DATA_FREQUENCY = "minute" @@ -373,7 +372,6 @@ def initialize(algo): def handle_data(algo, data): if algo.minute == 0: - # Should be filled by the next minute algo.order(algo.sid(1), 1) @@ -922,7 +920,6 @@ def test_noop_orders(self): # to sell with extremely high versions of same. Should not end up with # any positions for reasonable data. def handle_data(algo, data): - ######## # Buys # ######## @@ -1896,7 +1893,6 @@ def test_bad_kwargs(self, name, algo_text): @parameterized.expand(ARG_TYPE_TEST_CASES) def test_arg_types(self, name, inputs): - keyword = name.split("__")[1] algo = self.make_algo(script=inputs[0]) @@ -2000,11 +1996,13 @@ def handle_data(algo, data): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("ignore", PerformanceWarning) warnings.simplefilter("ignore", RuntimeWarning) + # catch new FutureWarning until fixed + warnings.simplefilter("ignore", FutureWarning) algo = self.make_algo(script=algocode, sim_params=sim_params) algo.run() - assert len(w) == 2 + assert len(w) == 2, f"Expected 2 warnings, got {len(w):d}" for i, warning in enumerate(w): assert isinstance(warning.message, UserWarning) @@ -2031,7 +2029,6 @@ def handle_data(algo, data): class TestCapitalChanges(zf.WithMakeAlgo, zf.ZiplineTestCase): - START_DATE = pd.Timestamp("2006-01-03") END_DATE = pd.Timestamp("2006-01-09") @@ -2794,7 +2791,6 @@ def init_class_fixtures(cls): cls.another_asset = cls.asset_finder.retrieve_asset(134) def _check_algo(self, algo, expected_order_count, expected_exc): - with pytest.raises(expected_exc) if expected_exc else nop_context: algo.run() assert algo.order_count == expected_order_count @@ -3235,7 +3231,6 @@ def handle_data(algo, data): class TestAssetDateBounds(zf.WithMakeAlgo, zf.ZiplineTestCase): - START_DATE = pd.Timestamp("2014-01-02") END_DATE = pd.Timestamp("2014-01-03") SIM_PARAMS_START_DATE = END_DATE # Only run for one day. @@ -3755,7 +3750,6 @@ def test_eod_order_cancel_minute(self, direction, minute_emission): assert np.copysign(389, direction) == the_order["filled"] with self._caplog.at_level(logging.WARNING): - assert 1 == len(self._caplog.messages) if direction == 1: @@ -4447,7 +4441,6 @@ def handle_data(context, data): algo.run() with self._caplog.at_level(logging.WARNING): - # one warning per order on the second day assert 6 * 390 == len(self._caplog.messages) @@ -4478,6 +4471,5 @@ def analyze(context, results): """ ) for method in ("initialize", "handle_data", "before_trading_start", "analyze"): - with pytest.raises(ValueError): self.make_algo(script=script, **{method: lambda *args, **kwargs: None}) diff --git a/tests/utils/test_pandas_utils.py b/tests/utils/test_pandas_utils.py index ce6da9292e..7c2a0c8983 100644 --- a/tests/utils/test_pandas_utils.py +++ b/tests/utils/test_pandas_utils.py @@ -2,7 +2,7 @@ Tests for zipline/utils/pandas_utils.py """ import pandas as pd - +from packaging.version import Version from zipline.testing.predicates import assert_equal from zipline.utils.pandas_utils import ( categorical_df_concat, @@ -16,7 +16,6 @@ class TestNearestUnequalElements: @pytest.mark.parametrize("tz", ["UTC", "US/Eastern"]) def test_nearest_unequal_elements(self, tz): - dts = pd.to_datetime( ["2014-01-01", "2014-01-05", "2014-01-06", "2014-01-09"], ).tz_localize(tz) @@ -45,7 +44,6 @@ def t(s): @pytest.mark.parametrize("tz", ["UTC", "US/Eastern"]) def test_nearest_unequal_elements_short_dts(self, tz): - # Length 1. dts = pd.to_datetime(["2014-01-01"]).tz_localize(tz) @@ -87,9 +85,8 @@ def test_nearest_unequal_bad_input(self): class TestCatDFConcat: - @pytest.mark.skipif(new_pandas, reason=skip_pipeline_new_pandas) + # @pytest.mark.skipif(Version(), reason=skip_pipeline_new_pandas) def test_categorical_df_concat(self): - inp = [ pd.DataFrame( { @@ -134,21 +131,20 @@ def test_categorical_df_concat(self): assert_equal(expected["C"].cat.categories, result["C"].cat.categories) def test_categorical_df_concat_value_error(self): - - mismatched_dtypes = [ - pd.DataFrame( - { - "A": pd.Series(["a", "b", "c"], dtype="category"), - "B": pd.Series([100, 102, 103], dtype="int64"), - } - ), - pd.DataFrame( - { - "A": pd.Series(["c", "b", "d"], dtype="category"), - "B": pd.Series([103, 102, 104], dtype="float64"), - } - ), - ] + # mismatched_dtypes = [ + # pd.DataFrame( + # { + # "A": pd.Series(["a", "b", "c"], dtype="category"), + # "B": pd.Series([100, 102, 103], dtype="int64"), + # } + # ), + # pd.DataFrame( + # { + # "A": pd.Series(["c", "b", "d"], dtype="category"), + # "B": pd.Series([103, 102, 104], dtype="float64"), + # } + # ), + # ] mismatched_column_names = [ pd.DataFrame( { @@ -164,12 +160,12 @@ def test_categorical_df_concat_value_error(self): ), ] - with pytest.raises( - ValueError, match="Input DataFrames must have the same columns/dtypes." - ): - categorical_df_concat(mismatched_dtypes) + # with pytest.raises( + # ValueError, match="Input DataFrames must have the same columns." + # ): + # categorical_df_concat(mismatched_dtypes) with pytest.raises( - ValueError, match="Input DataFrames must have the same columns/dtypes." + ValueError, match="Input DataFrames must have the same columns." ): categorical_df_concat(mismatched_column_names)