diff --git a/Dockerfile b/Dockerfile
index 4d1ce30..5209f74 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -70,6 +70,7 @@ RUN apt-get update --quiet \
apt-get install -y --quiet --no-install-recommends \
"build-essential" \
"gcc" \
+ "libpq-dev" \
&& rm -rf /var/lib/apt/lists/*
COPY --from=uv /uv /usr/local/bin/uv
COPY pyproject.toml uv.lock README.md /_lock/
@@ -103,6 +104,12 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
SSL_CERT_FILE=/etc/ssl/certs/ca-certificates.crt \
REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt \
CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt
+RUN apt-get update --quiet \
+ && DEBIAN_FRONTEND=noninteractive \
+ apt-get install -y --quiet --no-install-recommends \
+ "postgresql-client" \
+ && DEBIAN_FRONTEND=noninteractive apt-get upgrade -y \
+ && rm -rf /var/lib/apt/lists/*
# Copy Python deps from build to runtime
COPY --from=build /opt/python /opt/python
WORKDIR /data
diff --git a/README.md b/README.md
index df1cae0..087705c 100644
--- a/README.md
+++ b/README.md
@@ -2,10 +2,10 @@
-
+
- Parse and normalize a GeoJSON area of interest, using pure Python.
+ Parse and normalize a GeoJSON area of interest, using using PostGIS.
@@ -54,25 +54,16 @@
- **Flexible geometry input**:
- Polygon
- MultiPolygons
+ - GeometryCollection
- Feature
- FeatureCollection
- Handle multigeometries with an optional merge to single polygon, or split into
featcol of individual polygons.
-- Handle geometries nested inside GeometryCollection*.
+- Handle geometries nested inside GeometryCollection.
- Remove any z-dimension coordinates.
- Warn user if CRS is provided, in a coordinate system other than EPSG:4326.
- **Normalised output**: FeatureCollection containing Polygon geoms.
-> [!WARNING]
-> *We typically advise against using the GeometryCollection type, and support
-> in this library may not be fully functional.
->
-> However sometimes geometries may need to be returned wrapped in
-> GeometryCollection, for example due to idiosyncrasies of PostGIS.
->
-> In this scenario, we support stripping out the first geometry from inside
-> each GeometryCollection object (that may be nested in a FeatureCollection).
-
## Capturing The Warnings
If the GeoJSON has an invalid CRS, or coordinates seem off, a warning
@@ -104,3 +95,15 @@ if recorded_warnings:
# do stuff with warning
logger.warning(f"A warning was encountered: {warning.message}")
```
+
+## History
+
+- Initially I tried to write a pure-Python implementation of this, no dependencies.
+- I underestimated the amount of work that is! It could be possible to reverse
+ engineer C++ Geos or georust/geos, but it's more hassle than it's worth.
+- As all of the target install candidates for this package use a db driver
+ anyway, I thought it wisest (and most time efficient) to use the PostGIS
+ Geos implementation (specifically for the unary_union and convex_hull
+ algorithms).
+- An additional advantage is the potential to port this to PGLite when the
+ PostGIS extension is available, meaning AOI processing easily in the browser.
diff --git a/compose.yml b/compose.yml
index 9f9f75c..91fcca9 100644
--- a/compose.yml
+++ b/compose.yml
@@ -15,8 +15,12 @@
# along with geojson-aoi-parser. If not, see .
#
+networks:
+ net:
+ name: aoi-parser
+
services:
- aoi-parser:
+ parser:
image: "ghcr.io/hotosm/geojson-aoi-parser:${TAG_OVERRIDE:-ci}"
build:
target: ci
@@ -26,6 +30,29 @@ services:
- ./geojson_aoi:/opt/python/lib/python3.10/site-packages/geojson_aoi
# Mount local tests
- ./tests:/data/tests
- network_mode: none
- restart: "unless-stopped"
+ depends_on:
+ db:
+ condition: service_healthy
+ networks:
+ - net
+ restart: "no"
command: "pytest"
+
+ db:
+ image: "postgis/postgis:17-3.5-alpine"
+ container_name: aoi-parser-db
+ environment:
+ - POSTGRES_USER=aoi
+ - POSTGRES_PASSWORD=dummycipassword
+ - POSTGRES_DB=aoi
+ ports:
+ - "5439:5432"
+ networks:
+ - net
+ restart: "unless-stopped"
+ healthcheck:
+ test: pg_isready -U ${FMTM_DB_USER:-aoi} -d ${FMTM_DB_NAME:-aoi}
+ start_period: 5s
+ interval: 10s
+ timeout: 5s
+ retries: 3
diff --git a/geojson_aoi/merge.py b/geojson_aoi/merge.py
deleted file mode 100644
index 7af4a52..0000000
--- a/geojson_aoi/merge.py
+++ /dev/null
@@ -1,182 +0,0 @@
-"""Functions for Polygon merging."""
-
-from itertools import chain
-
-from geojson_aoi.types import FeatureCollection, PointCoords, PolygonCoords
-
-
-def merge_polygons(featcol: FeatureCollection) -> FeatureCollection:
- """Merge multiple Polygons or MultiPolygons into a single Polygon.
-
- It is used to create a single polygon boundary.
-
- Automatically determine whether to use union (for overlapping polygons)
- or convex hull (for disjoint polygons).
-
- As a result of the processing, any Feature properties will be lost.
-
- Args:
- featcol (FeatureCollection): a FeatureCollection containing geometries.
-
- Returns:
- FeatureCollection: a FeatureCollection of a single Polygon.
- """
- if not featcol.get("features"):
- raise ValueError("FeatureCollection must contain at least one feature")
-
- polygons = []
- for feature in featcol.get("features", []):
- geom = feature["geometry"]
- if geom["type"] == "Polygon":
- polygons.append([_remove_holes(geom["coordinates"])])
- elif geom["type"] == "MultiPolygon":
- for polygon in geom["coordinates"]:
- polygons.append([_remove_holes(polygon)])
-
- polygons = [_ensure_right_hand_rule(polygon[0]) for polygon in polygons]
-
- if all(
- _polygons_disjoint(p1[0], p2[0])
- for i, p1 in enumerate(polygons)
- for p2 in polygons[i + 1 :]
- ):
- merged_coordinates = _create_convex_hull(list(chain.from_iterable(polygons)))
- else:
- merged_coordinates = _create_unary_union(polygons)
-
- return {
- "type": "FeatureCollection",
- "features": [
- {
- "type": "Feature",
- "geometry": {"type": "Polygon", "coordinates": [merged_coordinates]},
- "properties": {},
- }
- ],
- }
-
-
-def _ensure_right_hand_rule(
- coordinates: PolygonCoords,
-) -> PolygonCoords:
- """Ensure the outer ring follows the right-hand rule (clockwise)."""
-
- def is_clockwise(ring: list[PointCoords]) -> bool:
- """Check coords are in clockwise direction."""
- return (
- sum(
- (ring[i][0] - ring[i - 1][0]) * (ring[i][1] + ring[i - 1][1])
- for i in range(len(ring))
- )
- > 0
- )
-
- # Validate input
- if not isinstance(coordinates[0], list) or not all(
- isinstance(pt, list) and len(pt) == 2 for pt in coordinates[0]
- ):
- raise ValueError(
- "Invalid input: coordinates[0] must be a list "
- f"of [x, y] points. Got: {coordinates[0]}"
- )
-
- # Ensure the first ring is the exterior ring and follows clockwise direction
- if not is_clockwise(coordinates[0]):
- coordinates[0] = coordinates[0][::-1]
-
- # Ensure any holes follow counter-clockwise direction
- for i in range(1, len(coordinates)):
- if is_clockwise(coordinates[i]):
- coordinates[i] = coordinates[i][::-1]
-
- return coordinates
-
-
-def _remove_holes(polygon: list) -> list:
- """Remove holes from a polygon by keeping only the exterior ring.
-
- Args:
- polygon: A list of coordinate rings, where the first is the exterior
- and subsequent ones are interior holes.
-
- Returns:
- list: A list containing only the exterior ring coordinates.
- """
- if not polygon:
- return [] # Return an empty list if the polygon is empty
- return polygon[0] # Only return the exterior ring
-
-
-def _create_convex_hull(points: list[PointCoords]) -> list[PointCoords]:
- """Create a convex hull from a list of polygons.
-
- This essentially draws a boundary around the outside of the polygons.
-
- Most appropriate when the boundaries are not touching (disjoint).
- """
-
- def cross(o: PointCoords, a: PointCoords, b: PointCoords) -> float:
- return (a[0] - o[0]) * (b[1] - o[1]) - (a[1] - o[1]) * (b[0] - o[0])
-
- points = sorted(set(points))
- if len(points) <= 1:
- return points
-
- lower, upper = [], []
- for p in points:
- while len(lower) >= 2 and cross(lower[-2], lower[-1], p) <= 0:
- lower.pop()
- lower.append(p)
- for p in reversed(points):
- while len(upper) >= 2 and cross(upper[-2], upper[-1], p) <= 0:
- upper.pop()
- upper.append(p)
-
- return lower[:-1] + upper[:-1]
-
-
-def _polygons_disjoint(poly1: list[list[float]], poly2: list[list[float]]) -> bool:
- """Check if two polygons are disjoint.
-
- Test bounding boxes and edge intersections.
- """
-
- def bounding_box(polygon: list[list[float]]) -> tuple:
- xs, ys = zip(*polygon, strict=False)
- return min(xs), min(ys), max(xs), max(ys)
-
- def bounding_boxes_overlap(bb1: tuple, bb2: tuple) -> bool:
- return not (
- bb1[2] < bb2[0] or bb2[2] < bb1[0] or bb1[3] < bb2[1] or bb2[3] < bb1[1]
- )
-
- bb1, bb2 = bounding_box(poly1), bounding_box(poly2)
- if not bounding_boxes_overlap(bb1, bb2):
- return True
-
- def line_segments_intersect(p1, p2, q1, q2) -> bool:
- def ccw(a, b, c):
- return (c[1] - a[1]) * (b[0] - a[0]) > (b[1] - a[1]) * (c[0] - a[0])
-
- return ccw(p1, q1, q2) != ccw(p2, q1, q2) and ccw(p1, p2, q1) != ccw(p1, p2, q2)
-
- for i in range(len(poly1)):
- p1, p2 = poly1[i], poly1[(i + 1) % len(poly1)]
- for j in range(len(poly2)):
- q1, q2 = poly2[j], poly2[(j + 1) % len(poly2)]
- if line_segments_intersect(p1, p2, q1, q2):
- return False
-
- return True
-
-
-def _create_unary_union(polygons: list[list[list[float]]]) -> list[list[list[float]]]:
- """Create a unary union from a list of polygons.
-
- This merges the polygons by their boundaries exactly.
- Most appropriate when the boundaries are touching (not disjoint).
- """
- # Pure Python union implementation is non-trivial, so this is simplified:
- # Merge all coordinates into a single outer ring.
- all_points = chain.from_iterable(polygon[0] for polygon in polygons)
- return [list(set(all_points))]
diff --git a/geojson_aoi/normalize.py b/geojson_aoi/normalize.py
deleted file mode 100644
index 0f8a477..0000000
--- a/geojson_aoi/normalize.py
+++ /dev/null
@@ -1,114 +0,0 @@
-"""Functions to normalize a GeoJSON to FeatureCollection."""
-
-from geojson_aoi.types import (
- Feature,
- FeatureCollection,
- Geometry,
- PolygonCoords,
- Properties,
-)
-
-
-def normalize_featcol(featcol: FeatureCollection) -> FeatureCollection:
- """Normalize a FeatureCollection into a standardised format.
-
- The final FeatureCollection will only contain:
- - Polygon
- - LineString
- - Point
-
- Processed:
- - MultiPolygons will be divided out into individual polygons.
- - GeometryCollections wrappers will be stripped out.
- - Removes any z-dimension coordinates, e.g. [43, 32, 0.0]
-
- Args:
- featcol: A parsed FeatureCollection.
-
- Returns:
- FeatureCollection: A normalized FeatureCollection.
- """
- for feat in featcol.get("features", []):
- geom = feat.get("geometry")
- if not geom or "type" not in geom:
- continue # Skip invalid features
-
- # Strip out GeometryCollection wrappers
- if (
- geom.get("type") == "GeometryCollection"
- and len(geom.get("geometries", [])) == 1
- ):
- feat["geometry"] = geom.get("geometries")[0]
-
- # Remove any z-dimension coordinates
- coords = geom.get("coordinates")
- if coords:
- geom["coordinates"] = _remove_z_dimension(coords)
-
- # Convert MultiPolygon type --> individual Polygons
- return _multigeom_to_singlegeom(featcol)
-
-
-def _remove_z_dimension(coords: PolygonCoords) -> PolygonCoords:
- """Recursively remove the Z dimension from coordinates."""
- if isinstance(coords[0], (list, tuple)):
- # If the first element is a list, recurse into each sub-list
- return [_remove_z_dimension(sub_coord) for sub_coord in coords]
- else:
- # If the first element is not a list, it's a coordinate pair (x, y, z)
- return coords[:2] # Return only [x, y]
-
-
-def _multigeom_to_singlegeom(featcol: FeatureCollection) -> FeatureCollection:
- """Converts any Multi(xxx) geometry types to list of individual geometries.
-
- Args:
- featcol : A GeoJSON FeatureCollection of geometries.
-
- Returns:
- FeatureCollection: A GeoJSON FeatureCollection containing
- single geometry types only: Polygon, LineString, Point.
- """
-
- def split_multigeom(geom: Geometry, properties: Properties) -> list[Feature]:
- """Splits multi-geometries into individual geometries."""
- geom_type = geom["type"]
- coordinates = geom["coordinates"]
-
- # Handle MultiPolygon correctly
- if geom_type == "MultiPolygon":
- return [
- {
- "type": "Feature",
- "geometry": {"type": "Polygon", "coordinates": polygon},
- "properties": properties,
- }
- for polygon in coordinates
- ]
-
- # Handle other MultiXXX types
- return [
- {
- "type": "Feature",
- "geometry": {"type": geom_type[5:], "coordinates": coord},
- "properties": properties,
- }
- for coord in coordinates
- ]
-
- final_features = []
-
- for feature in featcol.get("features", []):
- properties = feature.get("properties", {})
- geom = feature.get("geometry")
- if not geom or "type" not in geom:
- continue
-
- if geom["type"].startswith("Multi"):
- # Handle all MultiXXX types
- final_features.extend(split_multigeom(geom, properties))
- else:
- # Handle single geometry types
- final_features.append(feature)
-
- return {"type": "FeatureCollection", "features": final_features}
diff --git a/geojson_aoi/parser.py b/geojson_aoi/parser.py
index fd03535..dd5cf0a 100644
--- a/geojson_aoi/parser.py
+++ b/geojson_aoi/parser.py
@@ -1,3 +1,20 @@
+# Copyright (c) Humanitarian OpenStreetMap Team
+# This file is part of geojson-aoi-parser.
+#
+# geojson-aoi-parser is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# geojson-aoi-parser is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with geojson-aoi-parser. If not, see .
+#
+
"""Parse various AOI GeoJSON formats and normalize."""
import json
@@ -5,9 +22,10 @@
import warnings
from pathlib import Path
-from geojson_aoi.merge import merge_polygons
-from geojson_aoi.normalize import normalize_featcol
-from geojson_aoi.types import FeatureCollection
+from psycopg import Connection
+
+from geojson_aoi.postgis import PostGis
+from geojson_aoi.types import Feature, FeatureCollection, GeoJSON
AllowedInputTypes = [
"Polygon",
@@ -20,20 +38,20 @@
log = logging.getLogger(__name__)
-def check_crs(featcol: FeatureCollection) -> None:
+def check_crs(geojson: GeoJSON) -> None:
"""Warn the user if an invalid CRS is detected.
Also does a rough check for one geometry, to determine if the
coordinates are range 90/180 degree range.
Args:
- featcol (FeatureCollection): a FeatureCollection.
+ geojson (GeoJSON): a GeoJSON.
Returns:
- FeatureCollection: a FeatureCollection.
+ None
"""
- def is_valid_crs(crs_name):
+ def is_valid_crs(crs_name: str) -> bool:
valid_crs_list = [
"urn:ogc:def:crs:OGC:1.3:CRS84",
"urn:ogc:def:crs:EPSG::4326",
@@ -41,78 +59,66 @@ def is_valid_crs(crs_name):
]
return crs_name in valid_crs_list
- def is_valid_coordinate(coord):
- if coord is None:
- return False
- return -180 <= coord[0] <= 180 and -90 <= coord[1] <= 90
-
- if "crs" in featcol:
- crs = featcol.get("crs", {}).get("properties", {}).get("name")
- if not is_valid_crs(crs):
- warning_msg = (
- "Unsupported coordinate system, it is recommended to use a "
- "GeoJSON file in WGS84(EPSG 4326) standard."
- )
- log.warning(warning_msg)
- warnings.warn(UserWarning(warning_msg), stacklevel=2)
-
- features = featcol.get("features", [])
- coordinates = (
- features[-1].get("geometry", {}).get("coordinates", []) if features else []
- )
+ def is_valid_coordinate(coord: list[float]) -> bool:
+ return len(coord) == 2 and -180 <= coord[0] <= 180 and -90 <= coord[1] <= 90
- first_coordinate = None
- if coordinates:
- while isinstance(coordinates, list):
- first_coordinate = coordinates
- coordinates = coordinates[0]
-
- if not is_valid_coordinate(first_coordinate):
+ crs = geojson.get("crs", {}).get("properties", {}).get("name")
+ if crs and not is_valid_crs(crs):
warning_msg = (
- "The coordinates within the GeoJSON file are not valid. "
- "Is the file empty?"
+ "Unsupported coordinate system. Use WGS84 (EPSG 4326) for best results."
)
log.warning(warning_msg)
warnings.warn(UserWarning(warning_msg), stacklevel=2)
+ geom = geojson.get("geometry") or geojson.get("features", [{}])[-1].get(
+ "geometry", {}
+ )
+ coordinates = geom.get("coordinates", [])
+
+ # Drill down into nested coordinates to find the first coordinate
+ while isinstance(coordinates, list) and len(coordinates) > 0:
+ coordinates = coordinates[0]
-def geojson_to_featcol(geojson_obj: dict) -> FeatureCollection:
- """Enforce GeoJSON is wrapped in FeatureCollection.
+ if not is_valid_coordinate(coordinates):
+ warning_msg = "Invalid coordinates in GeoJSON. Ensure the file is not empty."
+ log.warning(warning_msg)
+ warnings.warn(UserWarning(warning_msg), stacklevel=2)
- The type check is done directly from the GeoJSON to allow parsing
- from different upstream libraries (e.g. geojson_pydantic).
+
+def strip_featcol(geojson_obj: GeoJSON | Feature | FeatureCollection) -> list[GeoJSON]:
+ """Remove FeatureCollection and Feature wrapping.
Args:
- geojson_obj (dict): a parsed geojson, to wrap in a FeatureCollection.
+ geojson_obj (dict): a parsed geojson.
Returns:
- FeatureCollection: a FeatureCollection.
+ list[GeoJSON]: a list of geometries.
"""
+ # FIXME possibly add logic to retain and existing properties?
+
+ if geojson_obj.get("crs"):
+ # Warn the user if invalid CRS detected
+ check_crs(geojson_obj)
+
geojson_type = geojson_obj.get("type")
- geojson_crs = geojson_obj.get("crs")
if geojson_type == "FeatureCollection":
- log.debug("Already in FeatureCollection format, reparsing")
- features = geojson_obj.get("features", [])
+ geoms = [feature["geometry"] for feature in geojson_obj.get("features", [])]
elif geojson_type == "Feature":
- log.debug("Converting Feature to FeatureCollection")
- features = [geojson_obj]
+ geoms = [geojson_obj.get("geometry")]
else:
- log.debug("Converting Geometry to FeatureCollection")
- features = [{"type": "Feature", "geometry": geojson_obj, "properties": {}}]
+ geoms = [geojson_obj]
- featcol = {"type": "FeatureCollection", "features": features}
- if geojson_crs:
- featcol["crs"] = geojson_crs
- return featcol
+ return geoms
def parse_aoi(
- geojson_raw: str | bytes | dict, merge: bool = False
+ db: str | Connection, geojson_raw: str | bytes | dict, merge: bool = False
) -> FeatureCollection:
"""Parse a GeoJSON file or data struc into a normalized FeatureCollection.
Args:
+ db (str | Connection): Existing db connection, or connection string.
geojson_raw (str | bytes | dict): GeoJSON file path, JSON string, dict,
or file bytes.
merge (bool): If any nested Polygons / MultiPolygon should be merged.
@@ -143,14 +149,9 @@ def parse_aoi(
if geojson_parsed["type"] not in AllowedInputTypes:
raise ValueError(f"The GeoJSON type must be one of: {AllowedInputTypes}")
- # Convert to FeatureCollection
- featcol = geojson_to_featcol(geojson_parsed)
- if not featcol.get("features", []):
- raise ValueError("Failed parsing geojson")
-
- # Warn the user if invalid CRS detected
- check_crs(featcol)
+ # Extract from FeatureCollection
+ geoms = strip_featcol(geojson_parsed)
- if not merge:
- return normalize_featcol(featcol)
- return merge_polygons(normalize_featcol(featcol))
+ with PostGis(db, geoms, merge) as result:
+ print(result.featcol)
+ return result.featcol
diff --git a/geojson_aoi/postgis.py b/geojson_aoi/postgis.py
new file mode 100644
index 0000000..be303da
--- /dev/null
+++ b/geojson_aoi/postgis.py
@@ -0,0 +1,169 @@
+# Copyright (c) Humanitarian OpenStreetMap Team
+# This file is part of geojson-aoi-parser.
+#
+# geojson-aoi-parser is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# geojson-aoi-parser is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with geojson-aoi-parser. If not, see .
+#
+"""Wrapper around PostGIS geometry functions."""
+
+import json
+import logging
+from uuid import uuid4
+
+from psycopg import Connection, connect
+
+from geojson_aoi.types import GeoJSON
+
+log = logging.getLogger(__name__)
+
+
+class Normalize:
+ """Normalise the geometry.
+
+ - Strip z-dimension (force 2D).
+ - Remove geoms from GeometryCollection.
+ - Multi geometries to single geometries.
+ """
+
+ @staticmethod
+ def init_table(table_id: str) -> str:
+ """Create the table for geometry processing."""
+ return f"""
+ CREATE TEMP TABLE "{table_id}" (
+ id SERIAL PRIMARY KEY,
+ geometry GEOMETRY(Polygon, 4326)
+ );
+ """
+
+ @staticmethod
+ def insert(geoms: list[GeoJSON], table_id: str) -> str:
+ """Insert geometries into db, normalising where possible."""
+ values = []
+ for geom in geoms:
+ # ST_Force2D strings z-coordinates
+ val = (
+ "ST_Force2D(ST_SetSRID("
+ f"ST_GeomFromGeoJSON('{json.dumps(geom)}'), 4326))"
+ )
+
+ # ST_CollectionExtract converts any GeometryCollections
+ # into MultiXXX geoms
+ if geom.get("type") == "GeometryCollection":
+ val = f"ST_CollectionExtract({val})"
+
+ # ST_Dump extracts all MultiXXX geoms to single geom equivalents
+ # TODO ST_Dump (complex, as it returns multiple geometries!)
+
+ # ST_ForcePolygonCW forces clockwise orientation for
+ # their exterior ring
+ if geom.get("type") == "Polygon" or geom.get("type") == "MultiPolygon":
+ val = f"ST_ForcePolygonCW({val})"
+
+ values.append(val)
+
+ value_string = ", ".join(values)
+ return f"""
+ INSERT INTO "{table_id}" (geometry)
+ VALUES {value_string};
+ """
+
+
+class Merge:
+ """Merge polygons.
+
+ - MultiPolygon to a single Polygon.
+ - Remove interior rings from all polygons (holes).
+
+ Automatically determine whether to use union (for overlapping polygons)
+ or convex hull (for disjoint polygons).
+ """
+
+ pass
+ # ST_UnaryUnion
+ # ST_ConvexHull
+
+
+class PostGis:
+ """A synchronous database connection.
+
+ Typically called standalone.
+ Can reuse an existing upstream connection.
+ """
+
+ def __init__(self, db: str | Connection, geoms: list[GeoJSON], merge: bool = False):
+ """Initialise variables and compose classes."""
+ self.table_id = uuid4().hex
+ self.geoms = geoms
+ self.db = db
+ self.featcol = None
+
+ self.normalize = Normalize()
+ if merge:
+ self.merge = Merge()
+
+ def __enter__(self) -> "PostGis":
+ """Initialise the database via context manager."""
+ self.create_connection()
+ with self.connection.cursor() as cur:
+ cur.execute(self.normalize.init_table(self.table_id))
+ cur.execute(self.normalize.insert(self.geoms, self.table_id))
+ # if self.merge:
+ # cur.execute(self.merge.unary_union(self.geoms, self.table_id))
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ """Execute the SQL and optionally close the db connection."""
+ self.close_connection()
+
+ def create_connection(self) -> None:
+ """Get a new database connection."""
+ # Create new connection
+ if isinstance(self.db, str):
+ self.connection = connect(self.db)
+ self.is_new_connection = True
+ # Reuse existing connection
+ elif isinstance(self.db, Connection):
+ self.connection = self.db
+ self.is_new_connection = False
+ # Else, error
+ else:
+ msg = (
+ "The `db` variable is not a valid string or "
+ "existing psycopg connection."
+ )
+ log.error(msg)
+ raise ValueError(msg)
+
+ def close_connection(self) -> None:
+ """Close the database connection."""
+ if not self.connection:
+ return
+
+ # Execute all commands in a transaction before closing
+ try:
+ self.connection.commit()
+ except Exception as e:
+ log.error(e)
+ log.error("Error committing psycopg transaction to db")
+ finally:
+ # Only close the connection if it was newly created
+ if self.is_new_connection:
+ self.connection.close()
+
+
+class PostGisAsync:
+ """An asynchronous database connection.
+
+ Typically called from an async web server.
+ Can reuse an existing upstream connection.
+ """
diff --git a/geojson_aoi/types.py b/geojson_aoi/types.py
index bd49c50..8face2c 100644
--- a/geojson_aoi/types.py
+++ b/geojson_aoi/types.py
@@ -2,15 +2,7 @@
from typing import Any
-# Coordinates
-Coordinate = float | int
-PointCoords = tuple[Coordinate, Coordinate]
-PolygonCoords = list[list[PointCoords]]
-
-# GeoJSON
-Geometry = dict[str, Any]
-Properties = dict[str, Any]
-
-# Features
+# FIXME these should be improved
+GeoJSON = dict[str, Any]
Feature = dict[str, Any]
FeatureCollection = dict[str, Any]
diff --git a/mkdocs.yml b/mkdocs.yml
index d88ef65..5c2c569 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -1,5 +1,5 @@
site_name: geojson-aoi-parser
-site_description: Parse and normalize a GeoJSON area of interest, using pure Python.
+site_description: Parse and normalize a GeoJSON area of interest, using PostGIS.
# strict: true
site_url: "https://www.hotosm.org"
diff --git a/pyproject.toml b/pyproject.toml
index 852ae0b..5831b10 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
[project]
name = "geojson-aoi-parser"
version = "0.2.0"
-description = "Parse and normalize a GeoJSON area of interest, using pure Python."
+description = "Parse and normalize a GeoJSON area of interest, using PostGIS."
authors = [
{name = "Sam Woodcock", email = "sam.woodcock@hotosm.org"},
]
@@ -17,7 +17,9 @@ classifiers = [
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]
-dependencies = []
+dependencies = [
+ "psycopg>=3.1",
+]
[dependency-groups]
test = [
diff --git a/tests/conftest.py b/tests/conftest.py
index b1abfdd..973ce2b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -3,6 +3,12 @@
import pytest
+@pytest.fixture(scope="session")
+def db():
+ """Database URI."""
+ return "postgresql://aoi:dummycipassword@db:5432/aoi"
+
+
@pytest.fixture
def polygon_geojson():
"""Polygon."""
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 7b9663e..beb243b 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -17,38 +17,38 @@ def is_featcol_nested_polygon(geojson) -> bool:
return False
-def test_polygon(polygon_geojson):
+def test_polygon(db, polygon_geojson):
"""A single Polygon."""
- result = parse_aoi(polygon_geojson)
+ result = parse_aoi(db, polygon_geojson)
assert is_featcol_nested_polygon(result)
assert len(result["features"]) == 1
-def test_polygon_with_holes(polygon_holes_geojson):
+def test_polygon_with_holes(db, polygon_holes_geojson):
"""A single Polygon with holes, should remain unchanged."""
- result = parse_aoi(polygon_holes_geojson)
+ result = parse_aoi(db, polygon_holes_geojson)
assert is_featcol_nested_polygon(result)
assert len(result["features"]) == 1
# We have three rings inside polygon (1 exterior, 2 interior)
assert len(result["features"][0]["geometry"]["coordinates"]) == 3
-def test_polygon_merge_with_holes(polygon_holes_geojson):
+def test_polygon_merge_with_holes(db, polygon_holes_geojson):
"""A single Polygon with holes, where the holes should be removed."""
- result = parse_aoi(polygon_holes_geojson, merge=True)
+ result = parse_aoi(db, polygon_holes_geojson, merge=True)
assert is_featcol_nested_polygon(result)
assert len(result["features"]) == 1
# As we specify 'merge', only the exterior ring should be remaining
assert len(result["features"][0]["geometry"]["coordinates"]) == 1
-def test_z_dimension_polygon(polygon_geojson):
+def test_z_dimension_polygon(db, polygon_geojson):
"""A single Polygon, with z-dimension coord stripped out."""
geojson_data = {
"type": "Polygon",
"coordinates": [[[0, 0, 0], [1, 0, 0], [1, 1, 0], [0, 1, 0], [0, 0, 0]]],
}
- result = parse_aoi(geojson_data)
+ result = parse_aoi(db, geojson_data)
assert is_featcol_nested_polygon(result)
assert len(result["features"]) == 1
assert result == {
@@ -63,21 +63,21 @@ def test_z_dimension_polygon(polygon_geojson):
}
-def test_feature(feature_geojson):
+def test_feature(db, feature_geojson):
"""A Polygon nested in a Feature."""
- result = parse_aoi(feature_geojson)
+ result = parse_aoi(db, feature_geojson)
assert is_featcol_nested_polygon(result)
assert len(result["features"]) == 1
-def test_feature_collection(featcol_geojson):
+def test_feature_collection(db, featcol_geojson):
"""A Polygon nested in a Feature, inside a FeatureCollection."""
- result = parse_aoi(featcol_geojson)
+ result = parse_aoi(db, featcol_geojson)
assert is_featcol_nested_polygon(result)
assert len(result["features"]) == 1
-def test_feature_collection_multiple_geoms(feature_geojson):
+def test_feature_collection_multiple_geoms(db, feature_geojson):
"""Multiple Polygon nested in Features, inside a FeatureCollection.
Intentionally no merging in this test.
@@ -86,12 +86,12 @@ def test_feature_collection_multiple_geoms(feature_geojson):
"type": "FeatureCollection",
"features": [feature_geojson, feature_geojson, feature_geojson],
}
- result = parse_aoi(geojson_data)
+ result = parse_aoi(db, geojson_data)
assert is_featcol_nested_polygon(result)
assert len(result["features"]) == 3
-def test_nested_geometrycollection(geomcol_geojson):
+def test_nested_geometrycollection(db, geomcol_geojson):
"""A GeometryCollection nested inside a FeatureCollection."""
geojson_data = {
"type": "FeatureCollection",
@@ -103,12 +103,12 @@ def test_nested_geometrycollection(geomcol_geojson):
}
],
}
- result = parse_aoi(geojson_data)
+ result = parse_aoi(db, geojson_data)
assert is_featcol_nested_polygon(result)
assert len(result["features"]) == 1
-def test_multiple_nested_geometrycollection(geomcol_geojson):
+def test_multiple_nested_geometrycollection(db, geomcol_geojson):
"""Multiple GeometryCollection nested inside a FeatureCollection."""
geojson_data = {
"type": "FeatureCollection",
@@ -125,7 +125,7 @@ def test_multiple_nested_geometrycollection(geomcol_geojson):
},
],
}
- result = parse_aoi(geojson_data)
+ result = parse_aoi(db, geojson_data)
assert is_featcol_nested_polygon(result)
assert len(result["features"]) == 2
@@ -138,12 +138,12 @@ def test_multiple_nested_geometrycollection(geomcol_geojson):
# "geometries": [polygon_geojson, polygon_geojson, polygon_geojson],
# }
-# result = parse_aoi(geojson_data)
+# result = parse_aoi(db, geojson_data)
# assert is_featcol_nested_polygon(result)
# assert len(result["features"]) == 3
-def test_featcol_merge_multiple_polygons():
+def test_featcol_merge_multiple_polygons(db):
"""Merge multiple polygons inside a FeatureCollection."""
geojson_data = {
"type": "FeatureCollection",
@@ -166,12 +166,12 @@ def test_featcol_merge_multiple_polygons():
},
],
}
- result = parse_aoi(geojson_data, merge=True)
+ result = parse_aoi(db, geojson_data, merge=True)
assert is_featcol_nested_polygon(result)
assert len(result["features"]) == 1
-def test_featcol_no_merge_polygons():
+def test_featcol_no_merge_polygons(db):
"""Do not merge multiple polygons inside a FeatureCollection."""
geojson_data = {
"type": "FeatureCollection",
@@ -194,14 +194,14 @@ def test_featcol_no_merge_polygons():
},
],
}
- result = parse_aoi(geojson_data)
+ result = parse_aoi(db, geojson_data)
assert is_featcol_nested_polygon(result)
assert len(result["features"]) == 2
-def test_merge_multipolygon(multipolygon_geojson):
+def test_merge_multipolygon(db, multipolygon_geojson):
"""Merge multiple polygons inside a MultiPolygon."""
- result = parse_aoi(multipolygon_geojson, merge=True)
+ result = parse_aoi(db, multipolygon_geojson, merge=True)
assert is_featcol_nested_polygon(result)
assert len(result["features"]) == 1
@@ -211,45 +211,45 @@ def test_merge_multipolygon(multipolygon_geojson):
# assert False
-def test_multipolygon_no_merge(multipolygon_geojson):
+def test_multipolygon_no_merge(db, multipolygon_geojson):
"""Do not merge multiple polygons inside a MultiPolygon."""
- result = parse_aoi(multipolygon_geojson)
+ result = parse_aoi(db, multipolygon_geojson)
assert is_featcol_nested_polygon(result)
assert len(result["features"]) == 3
-def test_multipolygon_with_holes(multipolygon_holes_geojson):
+def test_multipolygon_with_holes(db, multipolygon_holes_geojson):
"""MultiPolygon --> Polygon, with holes remaining."""
# FIXME this should not removed the holes from the polygon geom
# FIXME Instead the polygon should simply be extrated from the MultiPolygon
# FIXME (we only remove holes if merge=True)
- result = parse_aoi(multipolygon_holes_geojson)
+ result = parse_aoi(db, multipolygon_holes_geojson)
assert is_featcol_nested_polygon(result)
assert len(result["features"]) == 3
-def test_multipolygon_with_holes_merged(multipolygon_holes_geojson):
+def test_multipolygon_with_holes_merged(db, multipolygon_holes_geojson):
"""Merge multipolygon, including holes."""
- result = parse_aoi(multipolygon_holes_geojson, merge=True)
+ result = parse_aoi(db, multipolygon_holes_geojson, merge=True)
assert is_featcol_nested_polygon(result)
assert len(result["features"]) == 1
-def test_invalid_input():
+def test_invalid_input(db):
"""Invalud input for parse_aoi function."""
with pytest.raises(
ValueError, match="GeoJSON input must be a valid dict, str, or bytes"
):
- parse_aoi(123)
+ parse_aoi(db, 123)
with pytest.raises(ValueError, match="Provided GeoJSON is empty"):
- parse_aoi("{}")
+ parse_aoi(db, "{}")
with pytest.raises(ValueError, match="The GeoJSON type must be one of:"):
- parse_aoi({"type": "Point"})
+ parse_aoi(db, {"type": "Point"})
-def test_file_input(tmp_path):
+def test_file_input(db, tmp_path):
"""GeoJSON file input for parse_aoi function."""
geojson_file = tmp_path / "test.geojson"
geojson_data = {
@@ -267,12 +267,12 @@ def test_file_input(tmp_path):
}
geojson_file.write_text(json.dumps(geojson_data))
- result = parse_aoi(str(geojson_file))
+ result = parse_aoi(db, str(geojson_file))
assert is_featcol_nested_polygon(result)
assert len(result["features"]) == 1
-def test_no_warnings_valid_crs():
+def test_no_warnings_valid_crs(db):
"""Test including a valid CRS."""
geojson_data = {
"type": "FeatureCollection",
@@ -293,7 +293,7 @@ def test_no_warnings_valid_crs():
}
with warnings.catch_warnings(record=True) as recorded_warnings:
- result = parse_aoi(geojson_data)
+ result = parse_aoi(db, geojson_data)
if recorded_warnings:
raise AssertionError(
f"Warnings should not be raised here: {recorded_warnings[0].message}"
@@ -303,7 +303,7 @@ def test_no_warnings_valid_crs():
assert len(result["features"]) == 1
-def test_warnings_raised_invalid_crs():
+def test_warnings_raised_invalid_crs(db):
"""Test including an invalid CRS, raising warnings."""
geojson_data = {
"type": "FeatureCollection",
@@ -320,10 +320,10 @@ def test_warnings_raised_invalid_crs():
"crs": {"type": "name", "properties": {"name": "invalid!!"}},
}
with pytest.warns(UserWarning):
- parse_aoi(geojson_data)
+ parse_aoi(db, geojson_data)
-def test_warnings_raised_invalid_coords():
+def test_warnings_raised_invalid_coords(db):
"""Test including an invalid coordinates, raising warnings."""
geojson_data = {
"type": "FeatureCollection",
@@ -340,4 +340,4 @@ def test_warnings_raised_invalid_coords():
"crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:EPSG::4326"}},
}
with pytest.warns(UserWarning):
- parse_aoi(geojson_data)
+ parse_aoi(db, geojson_data)
diff --git a/uv.lock b/uv.lock
index 7911b69..d7f3110 100644
--- a/uv.lock
+++ b/uv.lock
@@ -114,6 +114,9 @@ wheels = [
name = "geojson-aoi-parser"
version = "0.2.0"
source = { editable = "." }
+dependencies = [
+ { name = "psycopg" },
+]
[package.dev-dependencies]
docs = [
@@ -128,6 +131,7 @@ test = [
]
[package.metadata]
+requires-dist = [{ name = "psycopg", specifier = ">=3.1" }]
[package.metadata.requires-dev]
docs = [
@@ -478,6 +482,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 },
]
+[[package]]
+name = "psycopg"
+version = "3.2.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+ { name = "tzdata", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d1/ad/7ce016ae63e231575df0498d2395d15f005f05e32d3a2d439038e1bd0851/psycopg-3.2.3.tar.gz", hash = "sha256:a5764f67c27bec8bfac85764d23c534af2c27b893550377e37ce59c12aac47a2", size = 155550 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/ce/21/534b8f5bd9734b7a2fcd3a16b1ee82ef6cad81a4796e95ebf4e0c6a24119/psycopg-3.2.3-py3-none-any.whl", hash = "sha256:644d3973fe26908c73d4be746074f6e5224b03c1101d302d9a53bf565ad64907", size = 197934 },
+]
+
[[package]]
name = "pygments"
version = "2.19.1"
@@ -735,6 +752,24 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257 },
]
+[[package]]
+name = "typing-extensions"
+version = "4.12.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 },
+]
+
+[[package]]
+name = "tzdata"
+version = "2024.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e1/34/943888654477a574a86a98e9896bae89c7aa15078ec29f490fef2f1e5384/tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc", size = 193282 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a6/ab/7e5f53c3b9d14972843a647d8d7a853969a58aecc7559cb3267302c94774/tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd", size = 346586 },
+]
+
[[package]]
name = "urllib3"
version = "2.3.0"