[CI] Add pre-commit hook pyupgrade to auto upgrade Python syntax

"A tool (and pre-commit hook) to automatically upgrade syntax for newer versions of the language." https://github.com/asottile/pyupgrade
apache · Oct 13, 2024 · c853c65 · c853c65
1 parent c6d7969
commit c853c65
Show file tree

Hide file tree

Showing 13 changed files with 48 additions and 56 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -10,6 +10,12 @@ repos:
     hooks:
       - id: identity
       - id: check-hooks-apply
+  - repo: https://github.com/asottile/pyupgrade
+    rev: v3.18.0
+    hooks:
+      - id: pyupgrade
+        args: [--py36-plus]
+        exclude: ^python/sedona/sql/dataframe_api\.py$
   - repo: https://github.com/psf/black-pre-commit-mirror
     rev: 24.10.0
     hooks:

diff --git a/python/sedona/core/jvm/config.py b/python/sedona/core/jvm/config.py
@@ -28,7 +28,7 @@
 
 from sedona.utils.decorators import classproperty
 
-string_types = (type(b""), type(""))
+string_types = (bytes, str)
 
 
 def is_greater_or_equal_version(version_a: str, version_b: str) -> bool:
@@ -190,7 +190,7 @@ def get_spark_java_config(
         try:
             used_jar_files = java_spark_conf.get(value)
         except Py4JJavaError:
-            error_message = "Didn't find the value of {} from SparkConf".format(value)
+            error_message = f"Didn't find the value of {value} from SparkConf"
             logging.info(error_message)
 
         return used_jar_files, error_message

diff --git a/python/sedona/maps/SedonaPyDeck.py b/python/sedona/maps/SedonaPyDeck.py
@@ -315,7 +315,7 @@ def _create_default_fill_color_(cls, gdf, plot_col):
         :return: fill_color string for pydeck map
         """
         plot_max = gdf[plot_col].max()
-        return "[85, 183, 177, ({0} / {1}) * 255 + 15]".format(plot_col, plot_max)
+        return f"[85, 183, 177, ({plot_col} / {plot_max}) * 255 + 15]"
 
     @classmethod
     def _create_coord_column_(cls, gdf, geometry_col, add_points=False):

diff --git a/python/sedona/raster/meta.py b/python/sedona/raster/meta.py
@@ -104,8 +104,8 @@ def _do_change_pixel_anchor(self, from_anchor: PixelAnchor, to_anchor: PixelAnch
 
     def __repr__(self):
         return (
-            "[ {} {} {}\n".format(self.scale_x, self.skew_x, self.ip_x)
-            + "  {} {} {}\n".format(self.skew_y, self.scale_y, self.ip_y)
+            f"[ {self.scale_x} {self.skew_x} {self.ip_x}\n"
+            + f"  {self.skew_y} {self.scale_y} {self.ip_y}\n"
             + "   0  0  1 ]"
         )
 

diff --git a/python/sedona/raster/raster_serde.py b/python/sedona/raster/raster_serde.py
@@ -63,7 +63,7 @@ def _deserialize(bio: BytesIO, raster_type: int) -> SedonaRaster:
             width, height, bands_meta, affine_trans, crs_wkt, awt_raster
         )
     else:
-        raise ValueError("unsupported raster_type: {}".format(raster_type))
+        raise ValueError(f"unsupported raster_type: {raster_type}")
 
 
 def _read_grid_envelope(bio: BytesIO) -> Tuple[int, int, int, int]:
@@ -183,7 +183,7 @@ def _read_data_buffer(bio: BytesIO) -> DataBuffer:
         elif data_type == DataBuffer.TYPE_DOUBLE:
             np_array = np.frombuffer(bio.read(8 * bank_size), dtype=np.float64)
         else:
-            raise ValueError("unknown data_type {}".format(data_type))
+            raise ValueError(f"unknown data_type {data_type}")
 
         banks.append(np_array)
 

diff --git a/python/sedona/utils/decorators.py b/python/sedona/utils/decorators.py
@@ -20,7 +20,7 @@
 T = TypeVar("T")
 
 
-class classproperty(object):
+class classproperty:
 
     def __init__(self, f):
         self.f = f

diff --git a/python/sedona/utils/geometry_serde.py b/python/sedona/utils/geometry_serde.py
@@ -42,9 +42,7 @@ def find_geos_c_dll():
                     ".dll"
                 ):
                     return os.path.join(lib_dirpath, filename)
-        raise RuntimeError(
-            "geos_c DLL not found in {}\\[S|s]hapely.libs".format(packages_dir)
-        )
+        raise RuntimeError(f"geos_c DLL not found in {packages_dir}\\[S|s]hapely.libs")
 
     if shapely.__version__.startswith("2."):
         if sys.platform != "win32":

diff --git a/python/sedona/utils/geometry_serde_general.py b/python/sedona/utils/geometry_serde_general.py
@@ -82,7 +82,7 @@ def type_of(geom) -> int:
         elif geom._ndim == 3:
             return CoordinateType.XYZ
         else:
-            raise ValueError("Invalid coordinate dimension: {}".format(geom._ndim))
+            raise ValueError(f"Invalid coordinate dimension: {geom._ndim}")
 
     @staticmethod
     def bytes_per_coord(coord_type: int) -> int:
@@ -233,7 +233,7 @@ def deserialize(buffer: bytes) -> Optional[BaseGeometry]:
     elif geom_type == GeometryTypeID.GEOMETRYCOLLECTION:
         geom = deserialize_geometry_collection(geom_buffer)
     else:
-        raise ValueError("Unsupported geometry type ID: {}".format(geom_type))
+        raise ValueError(f"Unsupported geometry type ID: {geom_type}")
     return geom, geom_buffer.ints_offset
 
 
@@ -546,7 +546,7 @@ def serialize_shapely_1_empty_geom(geom: BaseGeometry) -> bytearray:
         geom_type = GeometryTypeID.MULTIPOLYGON
         total_size = 12
     else:
-        raise ValueError("Invalid empty geometry collection object: {}".format(geom))
+        raise ValueError(f"Invalid empty geometry collection object: {geom}")
     return create_buffer_for_geom(geom_type, CoordinateType.XY, total_size, 0)
 
 

diff --git a/python/sedona/utils/meta.py b/python/sedona/utils/meta.py
@@ -79,7 +79,7 @@ def register(self, meth):
 
             if parm.annotation is inspect.Parameter.empty:
                 raise InvalidParametersException(
-                    "Argument {} must be annotated with a type".format(name)
+                    f"Argument {name} must be annotated with a type"
                 )
             if parm.default is not inspect.Parameter.empty:
                 self._methods[tuple(types)] = meth

diff --git a/python/setup.py b/python/setup.py
@@ -21,7 +21,7 @@
 
 from sedona import version
 
-with open("README.md", "r") as fh:
+with open("README.md") as fh:
     long_description = fh.read()
 
 extension_args = {}

diff --git a/python/tests/core/test_avoiding_python_jvm_serde_df.py b/python/tests/core/test_avoiding_python_jvm_serde_df.py
@@ -165,24 +165,20 @@ def test_spatial_join_query_flat_to_df(self):
         right_geometries = self.__row_to_list(right_geometries_raw)
 
         # Ignore the ordering of these
-        assert set(geom[0] for geom in left_geometries) == set(
-            [
-                "POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))",
-                "POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))",
-                "POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))",
-                "POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
-                "POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
-            ]
-        )
-        assert set(geom[0] for geom in right_geometries) == set(
-            [
-                "POINT (-3 5)",
-                "POINT (11 5)",
-                "POINT (4 3)",
-                "POINT (-1 -1)",
-                "POINT (-4 -5)",
-            ]
-        )
+        assert {geom[0] for geom in left_geometries} == {
+            "POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))",
+            "POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))",
+            "POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))",
+            "POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
+            "POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
+        }
+        assert {geom[0] for geom in right_geometries} == {
+            "POINT (-3 5)",
+            "POINT (11 5)",
+            "POINT (4 3)",
+            "POINT (-1 -1)",
+            "POINT (-4 -5)",
+        }
 
     def test_range_query_flat_to_df(self):
         poi_point_rdd = WktReader.readToGeometryRDD(

diff --git a/python/tests/sql/test_function.py b/python/tests/sql/test_function.py
@@ -1911,14 +1911,10 @@ def test_st_collect_on_array_type(self):
         )
 
         # then result should be as expected
-        assert set(
-            [
-                el[0]
-                for el in geometry_df_collected.selectExpr(
-                    "ST_AsText(collected)"
-                ).collect()
-            ]
-        ) == {
+        assert {
+            el[0]
+            for el in geometry_df_collected.selectExpr("ST_AsText(collected)").collect()
+        } == {
             "MULTILINESTRING ((1 2, 3 4), (3 4, 4 5))",
             "MULTIPOINT ((1 2), (-2 3))",
             "MULTIPOLYGON (((1 2, 1 4, 3 4, 3 2, 1 2)), ((0.5 0.5, 5 0, 5 5, 0 5, 0.5 0.5)))",
@@ -1944,14 +1940,10 @@ def test_st_collect_on_multiple_columns(self):
         )
 
         # then result should be calculated
-        assert set(
-            [
-                el[0]
-                for el in geometry_df_collected.selectExpr(
-                    "ST_AsText(collected)"
-                ).collect()
-            ]
-        ) == {
+        assert {
+            el[0]
+            for el in geometry_df_collected.selectExpr("ST_AsText(collected)").collect()
+        } == {
             "MULTILINESTRING ((1 2, 3 4), (3 4, 4 5))",
             "MULTIPOINT ((1 2), (-2 3))",
             "MULTIPOLYGON (((1 2, 1 4, 3 4, 3 2, 1 2)), ((0.5 0.5, 5 0, 5 5, 0 5, 0.5 0.5)))",
@@ -1980,7 +1972,7 @@ def test_st_reverse(self):
         }
         for input_geom, expected_geom in test_cases.items():
             reversed_geometry = self.spark.sql(
-                "select ST_AsText(ST_Reverse(ST_GeomFromText({})))".format(input_geom)
+                f"select ST_AsText(ST_Reverse(ST_GeomFromText({input_geom})))"
             )
             assert reversed_geometry.take(1)[0][0] == expected_geom
 
@@ -2078,7 +2070,7 @@ def test_st_force2d(self):
 
         for input_geom, expected_geom in tests1.items():
             geom_2d = self.spark.sql(
-                "select ST_AsText(ST_Force_2D(ST_GeomFromText({})))".format(input_geom)
+                f"select ST_AsText(ST_Force_2D(ST_GeomFromText({input_geom})))"
             )
             assert geom_2d.take(1)[0][0] == expected_geom
 
@@ -2102,7 +2094,7 @@ def test_st_buildarea(self):
 
         for input_geom, expected_geom in tests.items():
             areal_geom = self.spark.sql(
-                "select ST_AsText(ST_BuildArea(ST_GeomFromText({})))".format(input_geom)
+                f"select ST_AsText(ST_BuildArea(ST_GeomFromText({input_geom})))"
             )
             assert areal_geom.take(1)[0][0] == expected_geom
 
@@ -2162,7 +2154,7 @@ def test_st_s2_cell_ids(self):
         ]
         for input_geom in test_cases:
             cell_ids = self.spark.sql(
-                "select ST_S2CellIDs(ST_GeomFromText({}), 6)".format(input_geom)
+                f"select ST_S2CellIDs(ST_GeomFromText({input_geom}), 6)"
             ).take(1)[0][0]
             assert isinstance(cell_ids, list)
             assert isinstance(cell_ids[0], int)
@@ -2190,7 +2182,7 @@ def test_st_h3_cell_ids(self):
         ]
         for input_geom in test_cases:
             cell_ids = self.spark.sql(
-                "select ST_H3CellIDs(ST_GeomFromText({}), 6, true)".format(input_geom)
+                f"select ST_H3CellIDs(ST_GeomFromText({input_geom}), 6, true)"
             ).take(1)[0][0]
             assert isinstance(cell_ids, list)
             assert isinstance(cell_ids[0], int)

diff --git a/python/tests/stats/test_dbscan.py b/python/tests/stats/test_dbscan.py
@@ -92,7 +92,7 @@ def get_actual_results(
 
         clusters = {
             frozenset([y[0] for y in clusters_members if y[1] == x])
-            for x in set([y[1] for y in clusters_members])
+            for x in {y[1] for y in clusters_members}
         }
 
         return clusters