Skip to content

Commit

Permalink
Perf: Use ravel, not flatten, for numpy to pyarrow (#512)
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebarron authored May 10, 2024
1 parent 7fe0f99 commit b2ca192
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 17 deletions.
2 changes: 1 addition & 1 deletion lonboard/_geoarrow/_duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def _convert_box2d_to_geoarrow_polygon_array(
geom_offsets = np.arange(0, len(ring_offsets), dtype=np.int32)

# Construct the final PolygonArray
coords = pa.FixedSizeListArray.from_arrays(coords.flatten("C"), 2)
coords = pa.FixedSizeListArray.from_arrays(coords.ravel("C"), 2)
ring_array = pa.ListArray.from_arrays(ring_offsets, coords)
polygon_array = pa.ListArray.from_arrays(geom_offsets, ring_array)
return polygon_array
Expand Down
12 changes: 6 additions & 6 deletions lonboard/_geoarrow/extension_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ def construct_geometry_array(
extension_metadata["ARROW:extension:metadata"] = json.dumps({"crs": crs_str})

if geom_type == GeometryType.POINT:
parr = pa.FixedSizeListArray.from_arrays(coords.flatten(), len(dims))
parr = pa.FixedSizeListArray.from_arrays(coords.ravel("C"), len(dims))
extension_metadata["ARROW:extension:name"] = "geoarrow.point"
field = pa.field(
field_name,
Expand All @@ -318,7 +318,7 @@ def construct_geometry_array(
elif geom_type == GeometryType.LINESTRING:
assert len(offsets) == 1, "Expected one offsets array"
(geom_offsets,) = offsets
_parr = pa.FixedSizeListArray.from_arrays(coords.flatten(), len(dims))
_parr = pa.FixedSizeListArray.from_arrays(coords.ravel("C"), len(dims))
parr = pa.ListArray.from_arrays(pa.array(geom_offsets), _parr)
extension_metadata["ARROW:extension:name"] = "geoarrow.linestring"
field = pa.field(
Expand All @@ -332,7 +332,7 @@ def construct_geometry_array(
elif geom_type == GeometryType.POLYGON:
assert len(offsets) == 2, "Expected two offsets arrays"
ring_offsets, geom_offsets = offsets
_parr = pa.FixedSizeListArray.from_arrays(coords.flatten(), len(dims))
_parr = pa.FixedSizeListArray.from_arrays(coords.ravel("C"), len(dims))
_parr1 = pa.ListArray.from_arrays(pa.array(ring_offsets), _parr)
parr = pa.ListArray.from_arrays(pa.array(geom_offsets), _parr1)
extension_metadata["ARROW:extension:name"] = "geoarrow.polygon"
Expand All @@ -347,7 +347,7 @@ def construct_geometry_array(
elif geom_type == GeometryType.MULTIPOINT:
assert len(offsets) == 1, "Expected one offsets array"
(geom_offsets,) = offsets
_parr = pa.FixedSizeListArray.from_arrays(coords.flatten(), len(dims))
_parr = pa.FixedSizeListArray.from_arrays(coords.ravel("C"), len(dims))
parr = pa.ListArray.from_arrays(pa.array(geom_offsets), _parr)
extension_metadata["ARROW:extension:name"] = "geoarrow.multipoint"
field = pa.field(
Expand All @@ -361,7 +361,7 @@ def construct_geometry_array(
elif geom_type == GeometryType.MULTILINESTRING:
assert len(offsets) == 2, "Expected two offsets arrays"
ring_offsets, geom_offsets = offsets
_parr = pa.FixedSizeListArray.from_arrays(coords.flatten(), len(dims))
_parr = pa.FixedSizeListArray.from_arrays(coords.ravel("C"), len(dims))
_parr1 = pa.ListArray.from_arrays(pa.array(ring_offsets), _parr)
parr = pa.ListArray.from_arrays(pa.array(geom_offsets), _parr1)
extension_metadata["ARROW:extension:name"] = "geoarrow.multilinestring"
Expand All @@ -376,7 +376,7 @@ def construct_geometry_array(
elif geom_type == GeometryType.MULTIPOLYGON:
assert len(offsets) == 3, "Expected three offsets arrays"
ring_offsets, polygon_offsets, geom_offsets = offsets
_parr = pa.FixedSizeListArray.from_arrays(coords.flatten(), len(dims))
_parr = pa.FixedSizeListArray.from_arrays(coords.ravel("C"), len(dims))
_parr1 = pa.ListArray.from_arrays(pa.array(ring_offsets), _parr)
_parr2 = pa.ListArray.from_arrays(pa.array(polygon_offsets), _parr1)
parr = pa.ListArray.from_arrays(pa.array(geom_offsets), _parr2)
Expand Down
4 changes: 2 additions & 2 deletions lonboard/_geoarrow/ops/coord_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,14 @@ def _transpose_coords(arr: Union[pa.FixedSizeListArray, pa.StructArray]):
if arr.type.num_fields == 2:
x = arr.field("x").to_numpy()
y = arr.field("y").to_numpy()
coords = np.column_stack([x, y]).flatten("C")
coords = np.column_stack([x, y]).ravel("C")
return pa.FixedSizeListArray.from_arrays(coords, 2)

if arr.type.num_fields == 3:
x = arr.field("x").to_numpy()
y = arr.field("y").to_numpy()
z = arr.field("z").to_numpy()
coords = np.column_stack([x, y, z]).flatten("C")
coords = np.column_stack([x, y, z]).ravel("C")
return pa.FixedSizeListArray.from_arrays(coords, 3)

raise ValueError(f"Expected struct with 2 or 3 fields, got {arr.type.num_fields}")
Expand Down
4 changes: 1 addition & 3 deletions lonboard/_geoarrow/ops/reproject.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,7 @@ def _reproject_coords(arr: pa.FixedSizeListArray, transformer: Transformer):
raise ValueError(f"Unexpected list size {list_size}")

coord_field = pa.list_(pa.field(dims, pa.float64()), len(dims))
return pa.FixedSizeListArray.from_arrays(
output_np_arr.flatten("C"), type=coord_field
)
return pa.FixedSizeListArray.from_arrays(output_np_arr.ravel("C"), type=coord_field)


def _reproject_chunk_nest_0(arr: pa.ListArray, transformer: Transformer):
Expand Down
10 changes: 5 additions & 5 deletions lonboard/traits.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def validate(
info="Color array must have 3 or 4 as its second dimension.",
)

return pa.FixedSizeListArray.from_arrays(value.flatten("C"), list_size)
return pa.FixedSizeListArray.from_arrays(value.ravel("C"), list_size)

# Check for Arrow PyCapsule Interface
# https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
Expand Down Expand Up @@ -502,7 +502,7 @@ def validate(
info="Point array to have 2 or 3 as its second dimension",
)

return pa.FixedSizeListArray.from_arrays(value.flatten("C"), list_size)
return pa.FixedSizeListArray.from_arrays(value.ravel("C"), list_size)

if isinstance(value, (pa.ChunkedArray, pa.Array)):
if not pa.types.is_fixed_size_list(value.type):
Expand Down Expand Up @@ -681,7 +681,7 @@ def validate(self, obj, value) -> Union[float, pa.ChunkedArray, pa.DoubleArray]:
),
)

return pa.FixedSizeListArray.from_arrays(value.flatten("C"), filter_size)
return pa.FixedSizeListArray.from_arrays(value.ravel("C"), filter_size)

# Check for Arrow PyCapsule Interface
# https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
Expand Down Expand Up @@ -800,7 +800,7 @@ def validate(
)
value = value.astype(np.float32)

return pa.FixedSizeListArray.from_arrays(value.flatten("C"), 3)
return pa.FixedSizeListArray.from_arrays(value.ravel("C"), 3)

# Check for Arrow PyCapsule Interface
# https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
Expand Down Expand Up @@ -932,7 +932,7 @@ def validate(
if np.issubdtype(value.dtype, np.float64):
value = value.astype(np.float32)

return pa.FixedSizeListArray.from_arrays(value.flatten("C"), list_size)
return pa.FixedSizeListArray.from_arrays(value.ravel("C"), list_size)

# Check for Arrow PyCapsule Interface
# https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
Expand Down

0 comments on commit b2ca192

Please sign in to comment.