Skip to content

Commit

Permalink
[CI] Add pre-commit hook pyupgrade to auto upgrade Python syntax
Browse files Browse the repository at this point in the history
"A tool (and pre-commit hook) to automatically upgrade syntax for newer versions of the language."

https://github.com/asottile/pyupgrade
  • Loading branch information
jbampton committed Oct 13, 2024
1 parent c6d7969 commit c853c65
Show file tree
Hide file tree
Showing 13 changed files with 48 additions and 56 deletions.
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ repos:
hooks:
- id: identity
- id: check-hooks-apply
- repo: https://github.com/asottile/pyupgrade
rev: v3.18.0
hooks:
- id: pyupgrade
args: [--py36-plus]
exclude: ^python/sedona/sql/dataframe_api\.py$
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 24.10.0
hooks:
Expand Down
4 changes: 2 additions & 2 deletions python/sedona/core/jvm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

from sedona.utils.decorators import classproperty

string_types = (type(b""), type(""))
string_types = (bytes, str)


def is_greater_or_equal_version(version_a: str, version_b: str) -> bool:
Expand Down Expand Up @@ -190,7 +190,7 @@ def get_spark_java_config(
try:
used_jar_files = java_spark_conf.get(value)
except Py4JJavaError:
error_message = "Didn't find the value of {} from SparkConf".format(value)
error_message = f"Didn't find the value of {value} from SparkConf"
logging.info(error_message)

return used_jar_files, error_message
Expand Down
2 changes: 1 addition & 1 deletion python/sedona/maps/SedonaPyDeck.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def _create_default_fill_color_(cls, gdf, plot_col):
:return: fill_color string for pydeck map
"""
plot_max = gdf[plot_col].max()
return "[85, 183, 177, ({0} / {1}) * 255 + 15]".format(plot_col, plot_max)
return f"[85, 183, 177, ({plot_col} / {plot_max}) * 255 + 15]"

@classmethod
def _create_coord_column_(cls, gdf, geometry_col, add_points=False):
Expand Down
4 changes: 2 additions & 2 deletions python/sedona/raster/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ def _do_change_pixel_anchor(self, from_anchor: PixelAnchor, to_anchor: PixelAnch

def __repr__(self):
return (
"[ {} {} {}\n".format(self.scale_x, self.skew_x, self.ip_x)
+ " {} {} {}\n".format(self.skew_y, self.scale_y, self.ip_y)
f"[ {self.scale_x} {self.skew_x} {self.ip_x}\n"
+ f" {self.skew_y} {self.scale_y} {self.ip_y}\n"
+ " 0 0 1 ]"
)

Expand Down
4 changes: 2 additions & 2 deletions python/sedona/raster/raster_serde.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def _deserialize(bio: BytesIO, raster_type: int) -> SedonaRaster:
width, height, bands_meta, affine_trans, crs_wkt, awt_raster
)
else:
raise ValueError("unsupported raster_type: {}".format(raster_type))
raise ValueError(f"unsupported raster_type: {raster_type}")


def _read_grid_envelope(bio: BytesIO) -> Tuple[int, int, int, int]:
Expand Down Expand Up @@ -183,7 +183,7 @@ def _read_data_buffer(bio: BytesIO) -> DataBuffer:
elif data_type == DataBuffer.TYPE_DOUBLE:
np_array = np.frombuffer(bio.read(8 * bank_size), dtype=np.float64)
else:
raise ValueError("unknown data_type {}".format(data_type))
raise ValueError(f"unknown data_type {data_type}")

banks.append(np_array)

Expand Down
2 changes: 1 addition & 1 deletion python/sedona/utils/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
T = TypeVar("T")


class classproperty(object):
class classproperty:

def __init__(self, f):
self.f = f
Expand Down
4 changes: 1 addition & 3 deletions python/sedona/utils/geometry_serde.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ def find_geos_c_dll():
".dll"
):
return os.path.join(lib_dirpath, filename)
raise RuntimeError(
"geos_c DLL not found in {}\\[S|s]hapely.libs".format(packages_dir)
)
raise RuntimeError(f"geos_c DLL not found in {packages_dir}\\[S|s]hapely.libs")

if shapely.__version__.startswith("2."):
if sys.platform != "win32":
Expand Down
6 changes: 3 additions & 3 deletions python/sedona/utils/geometry_serde_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def type_of(geom) -> int:
elif geom._ndim == 3:
return CoordinateType.XYZ
else:
raise ValueError("Invalid coordinate dimension: {}".format(geom._ndim))
raise ValueError(f"Invalid coordinate dimension: {geom._ndim}")

@staticmethod
def bytes_per_coord(coord_type: int) -> int:
Expand Down Expand Up @@ -233,7 +233,7 @@ def deserialize(buffer: bytes) -> Optional[BaseGeometry]:
elif geom_type == GeometryTypeID.GEOMETRYCOLLECTION:
geom = deserialize_geometry_collection(geom_buffer)
else:
raise ValueError("Unsupported geometry type ID: {}".format(geom_type))
raise ValueError(f"Unsupported geometry type ID: {geom_type}")
return geom, geom_buffer.ints_offset


Expand Down Expand Up @@ -546,7 +546,7 @@ def serialize_shapely_1_empty_geom(geom: BaseGeometry) -> bytearray:
geom_type = GeometryTypeID.MULTIPOLYGON
total_size = 12
else:
raise ValueError("Invalid empty geometry collection object: {}".format(geom))
raise ValueError(f"Invalid empty geometry collection object: {geom}")
return create_buffer_for_geom(geom_type, CoordinateType.XY, total_size, 0)


Expand Down
2 changes: 1 addition & 1 deletion python/sedona/utils/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def register(self, meth):

if parm.annotation is inspect.Parameter.empty:
raise InvalidParametersException(
"Argument {} must be annotated with a type".format(name)
f"Argument {name} must be annotated with a type"
)
if parm.default is not inspect.Parameter.empty:
self._methods[tuple(types)] = meth
Expand Down
2 changes: 1 addition & 1 deletion python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from sedona import version

with open("README.md", "r") as fh:
with open("README.md") as fh:
long_description = fh.read()

extension_args = {}
Expand Down
32 changes: 14 additions & 18 deletions python/tests/core/test_avoiding_python_jvm_serde_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,24 +165,20 @@ def test_spatial_join_query_flat_to_df(self):
right_geometries = self.__row_to_list(right_geometries_raw)

# Ignore the ordering of these
assert set(geom[0] for geom in left_geometries) == set(
[
"POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))",
"POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))",
"POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))",
"POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
"POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
]
)
assert set(geom[0] for geom in right_geometries) == set(
[
"POINT (-3 5)",
"POINT (11 5)",
"POINT (4 3)",
"POINT (-1 -1)",
"POINT (-4 -5)",
]
)
assert {geom[0] for geom in left_geometries} == {
"POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))",
"POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))",
"POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))",
"POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
"POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
}
assert {geom[0] for geom in right_geometries} == {
"POINT (-3 5)",
"POINT (11 5)",
"POINT (4 3)",
"POINT (-1 -1)",
"POINT (-4 -5)",
}

def test_range_query_flat_to_df(self):
poi_point_rdd = WktReader.readToGeometryRDD(
Expand Down
34 changes: 13 additions & 21 deletions python/tests/sql/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1911,14 +1911,10 @@ def test_st_collect_on_array_type(self):
)

# then result should be as expected
assert set(
[
el[0]
for el in geometry_df_collected.selectExpr(
"ST_AsText(collected)"
).collect()
]
) == {
assert {
el[0]
for el in geometry_df_collected.selectExpr("ST_AsText(collected)").collect()
} == {
"MULTILINESTRING ((1 2, 3 4), (3 4, 4 5))",
"MULTIPOINT ((1 2), (-2 3))",
"MULTIPOLYGON (((1 2, 1 4, 3 4, 3 2, 1 2)), ((0.5 0.5, 5 0, 5 5, 0 5, 0.5 0.5)))",
Expand All @@ -1944,14 +1940,10 @@ def test_st_collect_on_multiple_columns(self):
)

# then result should be calculated
assert set(
[
el[0]
for el in geometry_df_collected.selectExpr(
"ST_AsText(collected)"
).collect()
]
) == {
assert {
el[0]
for el in geometry_df_collected.selectExpr("ST_AsText(collected)").collect()
} == {
"MULTILINESTRING ((1 2, 3 4), (3 4, 4 5))",
"MULTIPOINT ((1 2), (-2 3))",
"MULTIPOLYGON (((1 2, 1 4, 3 4, 3 2, 1 2)), ((0.5 0.5, 5 0, 5 5, 0 5, 0.5 0.5)))",
Expand Down Expand Up @@ -1980,7 +1972,7 @@ def test_st_reverse(self):
}
for input_geom, expected_geom in test_cases.items():
reversed_geometry = self.spark.sql(
"select ST_AsText(ST_Reverse(ST_GeomFromText({})))".format(input_geom)
f"select ST_AsText(ST_Reverse(ST_GeomFromText({input_geom})))"
)
assert reversed_geometry.take(1)[0][0] == expected_geom

Expand Down Expand Up @@ -2078,7 +2070,7 @@ def test_st_force2d(self):

for input_geom, expected_geom in tests1.items():
geom_2d = self.spark.sql(
"select ST_AsText(ST_Force_2D(ST_GeomFromText({})))".format(input_geom)
f"select ST_AsText(ST_Force_2D(ST_GeomFromText({input_geom})))"
)
assert geom_2d.take(1)[0][0] == expected_geom

Expand All @@ -2102,7 +2094,7 @@ def test_st_buildarea(self):

for input_geom, expected_geom in tests.items():
areal_geom = self.spark.sql(
"select ST_AsText(ST_BuildArea(ST_GeomFromText({})))".format(input_geom)
f"select ST_AsText(ST_BuildArea(ST_GeomFromText({input_geom})))"
)
assert areal_geom.take(1)[0][0] == expected_geom

Expand Down Expand Up @@ -2162,7 +2154,7 @@ def test_st_s2_cell_ids(self):
]
for input_geom in test_cases:
cell_ids = self.spark.sql(
"select ST_S2CellIDs(ST_GeomFromText({}), 6)".format(input_geom)
f"select ST_S2CellIDs(ST_GeomFromText({input_geom}), 6)"
).take(1)[0][0]
assert isinstance(cell_ids, list)
assert isinstance(cell_ids[0], int)
Expand Down Expand Up @@ -2190,7 +2182,7 @@ def test_st_h3_cell_ids(self):
]
for input_geom in test_cases:
cell_ids = self.spark.sql(
"select ST_H3CellIDs(ST_GeomFromText({}), 6, true)".format(input_geom)
f"select ST_H3CellIDs(ST_GeomFromText({input_geom}), 6, true)"
).take(1)[0][0]
assert isinstance(cell_ids, list)
assert isinstance(cell_ids[0], int)
Expand Down
2 changes: 1 addition & 1 deletion python/tests/stats/test_dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def get_actual_results(

clusters = {
frozenset([y[0] for y in clusters_members if y[1] == x])
for x in set([y[1] for y in clusters_members])
for x in {y[1] for y in clusters_members}
}

return clusters
Expand Down

0 comments on commit c853c65

Please sign in to comment.