From dd9cebc7236a08cef2e714d19b77b010b5ed1f8b Mon Sep 17 00:00:00 2001
From: Stefan Jansen <stefan@applied-ai.com>
Date: Tue, 14 May 2024 16:29:54 -0400
Subject: [PATCH] MAINT: Update to Python 3.12 (#243)

* GHA updates
* relax pandas constraint, expand tox envs
* update tests for latest versions
* add tox env python 3.12
---
 .github/workflows/build_wheels.yml            |  4 +-
 .github/workflows/ci_tests_full.yml           | 17 ++++---
 .github/workflows/ci_tests_quick.yml          | 15 +++---
 pyproject.toml                                | 21 ++++++---
 src/zipline/algorithm.py                      | 14 +++++-
 src/zipline/utils/pandas_utils.py             |  8 ++--
 tests/pipeline/test_factor.py                 | 11 ++++-
 tests/pipeline/test_quarters_estimates.py     |  8 +++-
 .../pipeline/test_us_equity_pricing_loader.py |  4 ++
 tests/test_algorithm.py                       | 14 ++----
 tests/utils/test_pandas_utils.py              | 46 +++++++++----------
 11 files changed, 92 insertions(+), 70 deletions(-)

diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
index 4c3f8faa7e..467bb43db2 100644
--- a/.github/workflows/build_wheels.yml
+++ b/.github/workflows/build_wheels.yml
@@ -27,7 +27,7 @@ jobs:
           fetch-depth: 0
 
 #      - name: Setup Python
-#        uses: actions/setup-python@v4
+#        uses: actions/setup-python@v5
 #        with:
 #          python-version: ${{ matrix.python }}
 
@@ -77,7 +77,7 @@ jobs:
         with:
           fetch-depth: 0
 
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         name: Install Python
         with:
           python-version: '3.11'
diff --git a/.github/workflows/ci_tests_full.yml b/.github/workflows/ci_tests_full.yml
index fd381ceceb..082a8c83be 100644
--- a/.github/workflows/ci_tests_full.yml
+++ b/.github/workflows/ci_tests_full.yml
@@ -25,44 +25,47 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         with:
-          python-version: "3.10"
+          python-version: "3.11"
 
       - name: flake8 Lint
         uses: py-actions/flake8@v2
 
   tests:
+    name: Unit Tests for ${{ matrix.python-version }} on ${{ matrix.os }}
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
 
     steps:
       - name: Checkout Zipline
         uses: actions/checkout@v4
 
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
 
-      - name: Install TA-Lib Linux
+      - name: Install TA-Lib
         if: ${{ matrix.os == 'ubuntu-latest' }}
         run: |
           sudo ./tools/install_talib.sh
 
-      - name: Install TA-Lib macOS
+      - name: Install TA-Lib HDF5 c-blosc
         if: ${{ matrix.os == 'macos-latest' }}
         run: |
           brew install ta-lib
+          brew install hdf5
+          brew install c-blosc
 
       - name: Developer Command Prompt for Microsoft Visual C++
         uses: ilammy/msvc-dev-cmd@v1
 
-      - name: Install TA-Lib Windows
+      - name: Install TA-Lib
         if: ${{ matrix.os == 'windows-latest' }}
         run: |
           ./tools/install_talib.bat
diff --git a/.github/workflows/ci_tests_quick.yml b/.github/workflows/ci_tests_quick.yml
index 392b9fee83..883e1ba995 100644
--- a/.github/workflows/ci_tests_quick.yml
+++ b/.github/workflows/ci_tests_quick.yml
@@ -24,7 +24,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         with:
           python-version: "3.11"
 
@@ -32,6 +32,7 @@ jobs:
         uses: py-actions/flake8@v2
 
   tests:
+    name: Unit Tests for ${{ matrix.python-version }} on ${{ matrix.os }}
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
@@ -44,30 +45,26 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
 
-      - name: Install TA-Lib Linux
+      - name: Install TA-Lib
         if: ${{ matrix.os == 'ubuntu-latest' }}
         run: |
           sudo ./tools/install_talib.sh
 
-      - name: Install TA-Lib macOS
+      - name: Install TA-Lib HDF5 c-blosc
         if: ${{ matrix.os == 'macos-latest' }}
         run: |
           brew install ta-lib
-
-      - name: Install HDF5 macOS
-        if: ${{ matrix.os == 'macos-latest' }}
-        run: |
           brew install hdf5
           brew install c-blosc
 
       - name: Developer Command Prompt for Microsoft Visual C++
         uses: ilammy/msvc-dev-cmd@v1
 
-      - name: Install TA-Lib Windows
+      - name: Install TA-Lib
         if: ${{ matrix.os == 'windows-latest' }}
         run: |
           ./tools/install_talib.bat
diff --git a/pyproject.toml b/pyproject.toml
index 2b32a2bc96..64998b803d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,10 +18,10 @@ classifiers = [
     'License :: OSI Approved :: Apache Software License',
     'Natural Language :: English',
     'Programming Language :: Python',
-    'Programming Language :: Python :: 3.8',
     'Programming Language :: Python :: 3.9',
     'Programming Language :: Python :: 3.10',
     'Programming Language :: Python :: 3.11',
+    'Programming Language :: Python :: 3.12',
     'Operating System :: OS Independent',
     'Intended Audience :: Science/Research',
     'Topic :: Office/Business :: Financial :: Investment',
@@ -47,7 +47,7 @@ dependencies = [
     'networkx >=2.0',
     'numexpr >=2.6.1',
     'numpy >=1.14.5',
-    'pandas >=2.0',
+    'pandas >=1.3',
     'patsy >=0.4.0',
     'python-dateutil >=2.4.2',
     'python-interface >=1.5.3',
@@ -73,7 +73,8 @@ requires = [
     'setuptools>=42.0.0',
     "setuptools_scm[toml]>=6.2",
     'wheel>=0.36.0',
-    'Cython>=0.29.21,<3',
+    'Cython>=0.29.21',
+    #    'Cython>=3',
     'oldest-supported-numpy; python_version>="3.8"',
 ]
 build-backend = 'setuptools.build_meta'
@@ -102,7 +103,8 @@ dev = [
     'flake8 >=3.9.1',
     'black',
     'pre-commit >=2.12.1',
-    'Cython>=0.29.21,<3',
+    #    'Cython>=0.29.21,<3',
+    'Cython>=0.29.21',
 ]
 docs = [
     'Cython',
@@ -176,17 +178,17 @@ exclude = '''
 [tool.tox]
 legacy_tox_ini = """
 [tox]
-envlist = py{38,39,310,311}-pandas{2}
+envlist = py{39,310}-pandas{13,14,15}, py{39,310,311,312}-pandas{20,21,22}
 isolated_build = True
 skip_missing_interpreters = True
 minversion = 3.23.0
 
 [gh-actions]
 python =
-    3.8: py38
     3.9: py39
     3.10: py310
     3.11: py311
+    3.12: py312
 
 [testenv]
 usedevelop = True
@@ -196,7 +198,12 @@ setenv =
 changedir = tmp
 extras = test
 deps =
-    pandas2: pandas>=2.0
+    pandas13: pandas>=1.3.0,<1.4
+    pandas14: pandas>=1.4.0,<1.5
+    pandas15: pandas>=1.5.0,<1.6
+    pandas20: pandas>=2.0,<2.1
+    pandas21: pandas>=2.1,<2.2
+    pandas22: pandas>=2.2,<2.3
 
 commands =
     pytest -n 4 --reruns 5 --cov={toxinidir}/src --cov-report term  --cov-report=xml --cov-report=html:htmlcov {toxinidir}/tests
diff --git a/src/zipline/algorithm.py b/src/zipline/algorithm.py
index dbccd82b18..14833db37f 100644
--- a/src/zipline/algorithm.py
+++ b/src/zipline/algorithm.py
@@ -1455,7 +1455,19 @@ def get_datetime(self, tz=None):
             The current simulation datetime converted to ``tz``.
         """
         dt = self.datetime
-        assert dt.tzinfo == timezone.utc, "Algorithm should have a utc datetime"
+        from packaging.version import Version
+        import pytz
+
+        if Version(pd.__version__) < Version("2.0.0"):
+            assert (
+                dt.tzinfo == pytz.utc
+            ), f"Algorithm should have a pytc utc datetime, {dt.tzinfo}"
+        else:
+            assert (
+                dt.tzinfo == timezone.utc
+            ), f"Algorithm should have a timezone.utc datetime, {dt.tzinfo}"
+
+        # assert dt.tzinfo == timezone.utc, "Algorithm should have a utc datetime"
         if tz is not None:
             dt = dt.astimezone(tz)
         return dt
diff --git a/src/zipline/utils/pandas_utils.py b/src/zipline/utils/pandas_utils.py
index 944e88dc81..7b6af82f1b 100644
--- a/src/zipline/utils/pandas_utils.py
+++ b/src/zipline/utils/pandas_utils.py
@@ -18,7 +18,7 @@
 skip_pipeline_new_pandas = (
     "Pipeline categoricals are not yet compatible with pandas >=0.19"
 )
-skip_pipeline_blaze = "Blaze doesn't play nicely with Pandas >=1.0"
+# skip_pipeline_blaze = "Blaze doesn't play nicely with Pandas >=1.0"
 
 
 def july_5th_holiday_observance(datetime_index):
@@ -226,8 +226,8 @@ def categorical_df_concat(df_list, inplace=False):
 
     # Assert each dataframe has the same columns/dtypes
     df = df_list[0]
-    if not all([(df.dtypes.equals(df_i.dtypes)) for df_i in df_list[1:]]):
-        raise ValueError("Input DataFrames must have the same columns/dtypes.")
+    if not all([set(df.columns) == set(df_i.columns) for df_i in df_list[1:]]):
+        raise ValueError("Input DataFrames must have the same columns.")
 
     categorical_columns = df.columns[df.dtypes == "category"]
 
@@ -238,7 +238,7 @@ def categorical_df_concat(df_list, inplace=False):
 
         with ignore_pandas_nan_categorical_warning():
             for df in df_list:
-                df[col].cat.set_categories(new_categories, inplace=True)
+                df[col] = df[col].cat.set_categories(new_categories)
 
     return pd.concat(df_list)
 
diff --git a/tests/pipeline/test_factor.py b/tests/pipeline/test_factor.py
index 0d3408a09d..da019ab82a 100644
--- a/tests/pipeline/test_factor.py
+++ b/tests/pipeline/test_factor.py
@@ -5,7 +5,6 @@
 from functools import partial
 from itertools import product
 from unittest import skipIf
-
 import numpy as np
 import pandas as pd
 import pytest
@@ -14,7 +13,7 @@
 from parameterized import parameterized
 from scipy.stats.mstats import winsorize as scipy_winsorize
 from toolz import compose
-
+from packaging.version import Version
 from zipline.errors import BadPercentileBounds, UnknownRankMethod
 from zipline.lib.labelarray import LabelArray
 from zipline.lib.normalize import naive_grouped_rowwise_apply as grouped_apply
@@ -41,6 +40,12 @@
 
 from .base import BaseUSEquityPipelineTestCase
 
+pandas_two_point_two = False
+if Version(pd.__version__) >= Version("2.2"):
+    # pandas 2.2.0 has a bug in qcut that causes it to return a Series with
+    # the wrong dtype when labels=False.
+    pandas_two_point_two = True
+
 
 class F(Factor):
     dtype = float64_dtype
@@ -1466,6 +1471,8 @@ def test_quantiles_masked(self, seed):
             mask=self.build_mask(self.ones_mask(shape=shape)),
         )
 
+    # skip until https://github.com/pandas-dev/pandas/issues/58240 fixed
+    @skipIf(pandas_two_point_two, "pd.qcut has a bug in pandas 2.2")
     def test_quantiles_uneven_buckets(self):
         permute = partial(permute_rows, 5)
         shape = (5, 5)
diff --git a/tests/pipeline/test_quarters_estimates.py b/tests/pipeline/test_quarters_estimates.py
index 18c5f169f3..1a613d0185 100644
--- a/tests/pipeline/test_quarters_estimates.py
+++ b/tests/pipeline/test_quarters_estimates.py
@@ -1,6 +1,6 @@
 from datetime import timedelta
 from functools import partial
-
+from packaging.version import Version
 import itertools
 from parameterized import parameterized
 import numpy as np
@@ -238,6 +238,11 @@ def test_load_one_day(self):
             end_date=pd.Timestamp("2015-01-15"),
         )
 
+        # type changes to datatime[ns] in pandas 2.0.0
+        if Version(pd.__version__) >= Version("2"):
+            self.expected_out.event_date = self.expected_out.event_date.astype(
+                "datetime64[ns]"
+            )
         assert_frame_equal(
             results.sort_index(axis=1), self.expected_out.sort_index(axis=1)
         )
@@ -660,7 +665,6 @@ def make_loader(cls, events, columns):
         return PreviousEarningsEstimatesLoader(events, columns)
 
     def get_expected_estimate(self, q1_knowledge, q2_knowledge, comparable_date):
-
         # The expected estimate will be for q2 if the last thing
         # we've seen is that the release date already happened.
         # Otherwise, it'll be for q1, as long as the release date
diff --git a/tests/pipeline/test_us_equity_pricing_loader.py b/tests/pipeline/test_us_equity_pricing_loader.py
index f08165f0e1..736d2deec6 100644
--- a/tests/pipeline/test_us_equity_pricing_loader.py
+++ b/tests/pipeline/test_us_equity_pricing_loader.py
@@ -17,6 +17,7 @@
 
 from parameterized import parameterized
 import sys
+from packaging.version import Version
 import numpy as np
 from numpy.testing import (
     assert_allclose,
@@ -473,6 +474,9 @@ def test_load_adjustments(self, tables, adjustment_type):
     @parameterized.expand([(True,), (False,)])
     @pytest.mark.skipif(sys.platform == "win32", reason="does not run on windows")
     def test_load_adjustments_to_df(self, convert_dts):
+        if Version(pd.__version__) < Version("2.0") and not convert_dts:
+            pytest.skip("pandas < 2.0 behaves differently datetime64[s]")
+
         reader = self.adjustment_reader
         adjustment_dfs = reader.unpack_db_to_component_dfs(convert_dates=convert_dts)
 
diff --git a/tests/test_algorithm.py b/tests/test_algorithm.py
index 389badaf7a..573cb96e0b 100644
--- a/tests/test_algorithm.py
+++ b/tests/test_algorithm.py
@@ -157,7 +157,6 @@ def handle_data(self, data):
 
 
 class TestMiscellaneousAPI(zf.WithMakeAlgo, zf.ZiplineTestCase):
-
     START_DATE = pd.Timestamp("2006-01-03")
     END_DATE = pd.Timestamp("2006-01-04")
     SIM_PARAMS_DATA_FREQUENCY = "minute"
@@ -373,7 +372,6 @@ def initialize(algo):
 
         def handle_data(algo, data):
             if algo.minute == 0:
-
                 # Should be filled by the next minute
                 algo.order(algo.sid(1), 1)
 
@@ -922,7 +920,6 @@ def test_noop_orders(self):
         # to sell with extremely high versions of same. Should not end up with
         # any positions for reasonable data.
         def handle_data(algo, data):
-
             ########
             # Buys #
             ########
@@ -1896,7 +1893,6 @@ def test_bad_kwargs(self, name, algo_text):
 
     @parameterized.expand(ARG_TYPE_TEST_CASES)
     def test_arg_types(self, name, inputs):
-
         keyword = name.split("__")[1]
 
         algo = self.make_algo(script=inputs[0])
@@ -2000,11 +1996,13 @@ def handle_data(algo, data):
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter("ignore", PerformanceWarning)
             warnings.simplefilter("ignore", RuntimeWarning)
+            # catch new FutureWarning until fixed
+            warnings.simplefilter("ignore", FutureWarning)
 
             algo = self.make_algo(script=algocode, sim_params=sim_params)
             algo.run()
 
-            assert len(w) == 2
+            assert len(w) == 2, f"Expected 2 warnings, got {len(w):d}"
 
             for i, warning in enumerate(w):
                 assert isinstance(warning.message, UserWarning)
@@ -2031,7 +2029,6 @@ def handle_data(algo, data):
 
 
 class TestCapitalChanges(zf.WithMakeAlgo, zf.ZiplineTestCase):
-
     START_DATE = pd.Timestamp("2006-01-03")
     END_DATE = pd.Timestamp("2006-01-09")
 
@@ -2794,7 +2791,6 @@ def init_class_fixtures(cls):
         cls.another_asset = cls.asset_finder.retrieve_asset(134)
 
     def _check_algo(self, algo, expected_order_count, expected_exc):
-
         with pytest.raises(expected_exc) if expected_exc else nop_context:
             algo.run()
         assert algo.order_count == expected_order_count
@@ -3235,7 +3231,6 @@ def handle_data(algo, data):
 
 
 class TestAssetDateBounds(zf.WithMakeAlgo, zf.ZiplineTestCase):
-
     START_DATE = pd.Timestamp("2014-01-02")
     END_DATE = pd.Timestamp("2014-01-03")
     SIM_PARAMS_START_DATE = END_DATE  # Only run for one day.
@@ -3755,7 +3750,6 @@ def test_eod_order_cancel_minute(self, direction, minute_emission):
         assert np.copysign(389, direction) == the_order["filled"]
 
         with self._caplog.at_level(logging.WARNING):
-
             assert 1 == len(self._caplog.messages)
 
             if direction == 1:
@@ -4447,7 +4441,6 @@ def handle_data(context, data):
         algo.run()
 
         with self._caplog.at_level(logging.WARNING):
-
             # one warning per order on the second day
             assert 6 * 390 == len(self._caplog.messages)
 
@@ -4478,6 +4471,5 @@ def analyze(context, results):
             """
         )
         for method in ("initialize", "handle_data", "before_trading_start", "analyze"):
-
             with pytest.raises(ValueError):
                 self.make_algo(script=script, **{method: lambda *args, **kwargs: None})
diff --git a/tests/utils/test_pandas_utils.py b/tests/utils/test_pandas_utils.py
index ce6da9292e..7c2a0c8983 100644
--- a/tests/utils/test_pandas_utils.py
+++ b/tests/utils/test_pandas_utils.py
@@ -2,7 +2,7 @@
 Tests for zipline/utils/pandas_utils.py
 """
 import pandas as pd
-
+from packaging.version import Version
 from zipline.testing.predicates import assert_equal
 from zipline.utils.pandas_utils import (
     categorical_df_concat,
@@ -16,7 +16,6 @@
 class TestNearestUnequalElements:
     @pytest.mark.parametrize("tz", ["UTC", "US/Eastern"])
     def test_nearest_unequal_elements(self, tz):
-
         dts = pd.to_datetime(
             ["2014-01-01", "2014-01-05", "2014-01-06", "2014-01-09"],
         ).tz_localize(tz)
@@ -45,7 +44,6 @@ def t(s):
 
     @pytest.mark.parametrize("tz", ["UTC", "US/Eastern"])
     def test_nearest_unequal_elements_short_dts(self, tz):
-
         # Length 1.
         dts = pd.to_datetime(["2014-01-01"]).tz_localize(tz)
 
@@ -87,9 +85,8 @@ def test_nearest_unequal_bad_input(self):
 
 
 class TestCatDFConcat:
-    @pytest.mark.skipif(new_pandas, reason=skip_pipeline_new_pandas)
+    # @pytest.mark.skipif(Version(), reason=skip_pipeline_new_pandas)
     def test_categorical_df_concat(self):
-
         inp = [
             pd.DataFrame(
                 {
@@ -134,21 +131,20 @@ def test_categorical_df_concat(self):
         assert_equal(expected["C"].cat.categories, result["C"].cat.categories)
 
     def test_categorical_df_concat_value_error(self):
-
-        mismatched_dtypes = [
-            pd.DataFrame(
-                {
-                    "A": pd.Series(["a", "b", "c"], dtype="category"),
-                    "B": pd.Series([100, 102, 103], dtype="int64"),
-                }
-            ),
-            pd.DataFrame(
-                {
-                    "A": pd.Series(["c", "b", "d"], dtype="category"),
-                    "B": pd.Series([103, 102, 104], dtype="float64"),
-                }
-            ),
-        ]
+        # mismatched_dtypes = [
+        #     pd.DataFrame(
+        #         {
+        #             "A": pd.Series(["a", "b", "c"], dtype="category"),
+        #             "B": pd.Series([100, 102, 103], dtype="int64"),
+        #         }
+        #     ),
+        #     pd.DataFrame(
+        #         {
+        #             "A": pd.Series(["c", "b", "d"], dtype="category"),
+        #             "B": pd.Series([103, 102, 104], dtype="float64"),
+        #         }
+        #     ),
+        # ]
         mismatched_column_names = [
             pd.DataFrame(
                 {
@@ -164,12 +160,12 @@ def test_categorical_df_concat_value_error(self):
             ),
         ]
 
-        with pytest.raises(
-            ValueError, match="Input DataFrames must have the same columns/dtypes."
-        ):
-            categorical_df_concat(mismatched_dtypes)
+        # with pytest.raises(
+        #     ValueError, match="Input DataFrames must have the same columns."
+        # ):
+        #     categorical_df_concat(mismatched_dtypes)
 
         with pytest.raises(
-            ValueError, match="Input DataFrames must have the same columns/dtypes."
+            ValueError, match="Input DataFrames must have the same columns."
         ):
             categorical_df_concat(mismatched_column_names)