diff --git a/script.py b/script.py new file mode 100644 index 0000000..aa6faac --- /dev/null +++ b/script.py @@ -0,0 +1,53 @@ +import pandas as pd +import geopandas as gpd +import sqlite3 +from shapely.wkb import loads +from shapely.ops import unary_union +from map_app.views import get_wbid_from_point +from data_processing.gpkg_utils import blob_to_geometry, get_table_crs +from data_processing.file_paths import file_paths +from data_processing.graph_utils import get_upstream_ids +import multiprocessing as mp + + +def get_upstream_geometry(upstream_ids): + geopackage = file_paths.conus_hydrofabric() + sql_query = f"SELECT id, geom FROM divides WHERE id IN {tuple(upstream_ids)}" + sql_query = sql_query.replace(",)", ")") + + with sqlite3.connect(geopackage) as con: + result = con.execute(sql_query).fetchall() + + geometry_list = [blob_to_geometry(r[1]) for r in result if blob_to_geometry(r[1]) is not None] + merged_geometry = unary_union(geometry_list) + return merged_geometry + + +def process_station(row): + lat, lng = row["lat"], row["long"] + coords = {"lat": lat, "lng": lng} + try: + wbid = get_wbid_from_point(coords) + upstream_ids = get_upstream_ids(wbid) + return get_upstream_geometry(upstream_ids) + except: + return None + + +if __name__ == "__main__": + NWIS_STATIONS = pd.read_csv("NWIS_Lat_longs.csv") + # filtered_stations = NWIS_STATIONS[NWIS_STATIONS["state"] == "Utah"].copy() + filtered_stations = NWIS_STATIONS.copy() + print(filtered_stations.head()) + + crs = get_table_crs(file_paths.conus_hydrofabric(), "divides") + print(crs) + + # Use multiprocessing to process stations in parallel + with mp.Pool(processes=mp.cpu_count()) as pool: + geometries = pool.map(process_station, [row for _, row in filtered_stations.iterrows()]) + + filtered_stations["geometry"] = geometries + + gdf = gpd.GeoDataFrame(filtered_stations, geometry="geometry", crs=crs) + gdf.to_parquet("utah_stations.parquet")