From f815067f03ef547436aaaa24c35724588deaefc7 Mon Sep 17 00:00:00 2001 From: Scott Staniewicz Date: Tue, 31 Oct 2023 11:56:29 -0700 Subject: [PATCH 1/5] add ability to load fewer polygons, start trying to fix colorbar --- src/sweets/_missing_data.py | 58 ++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/src/sweets/_missing_data.py b/src/sweets/_missing_data.py index 2d471a5..afca03d 100644 --- a/src/sweets/_missing_data.py +++ b/src/sweets/_missing_data.py @@ -14,6 +14,7 @@ import numpy as np import pandas as pd from dolphin._types import Filename +from matplotlib.colors import BoundaryNorm, ListedColormap from osgeo import gdal from shapely import geometry, intersection_all, union_all, wkt from tqdm.contrib.concurrent import thread_map @@ -24,7 +25,7 @@ def get_geodataframe( - gslc_files: Iterable[Filename], max_workers: int = 5 + gslc_files: Iterable[Filename], max_workers: int = 5, one_per_burst: bool = True ) -> gpd.GeoDataFrame: """Get a GeoDataFrame of the CSLC footprints. @@ -34,9 +35,26 @@ def get_geodataframe( List of CSLC files. max_workers : int Number of threads to use. + one_per_burst : bool, default=True + If True, only keep one footprint per burst ID. """ gslc_files = list(gslc_files) # make sure generator doesn't deplete after first run - polygons = thread_map(get_cslc_polygon, gslc_files, max_workers=max_workers) + if one_per_burst: + from dolphin.opera_utils import group_by_burst + + burst_to_file_list = group_by_burst(gslc_files) + slc_files = [file_list[0] for file_list in burst_to_file_list.values()] + unique_polygons = thread_map( + get_cslc_polygon, slc_files, max_workers=max_workers + ) + assert len(unique_polygons) == len(burst_to_file_list) + # Repeat the polygons for each burst + polygons: list[geometry.Polygon] = [] + for burst_id, p in zip(burst_to_file_list, unique_polygons): + for _ in range(len(burst_to_file_list[burst_id])): + polygons.append(p) + else: + polygons = thread_map(get_cslc_polygon, gslc_files, max_workers=max_workers) gdf = gpd.GeoDataFrame(geometry=polygons, crs="EPSG:4326") gdf["count"] = 1 @@ -68,13 +86,19 @@ def get_cslc_polygon( def get_common_dates( - *, gslc_files: Optional[Sequence[Filename]] = None, gdf=None + *, + gslc_files: Optional[Sequence[Filename]] = None, + gdf=None, + max_workers: int = 5, + one_per_burst: bool = True, ) -> list[str]: """Get the date common to all GSLCs.""" if gdf is None: if gslc_files is None: raise ValueError("Need `gdf` or `gslc_files`") - gdf = get_geodataframe(gslc_files) + gdf = get_geodataframe( + gslc_files, max_workers=max_workers, one_per_burst=one_per_burst + ) grouped_by_burst = _get_per_burst_df(gdf) common_dates = list( @@ -109,27 +133,47 @@ def plot_count_per_burst( *, gdf: Optional[gpd.GeoDataFrame] = None, gslc_files: Optional[Sequence[Filename]] = None, + one_per_burst: bool = True, ax: Optional[plt.Axes] = None, ) -> None: """Plot the number of GSLC files found per burst.""" if gdf is None: if gslc_files is None: raise ValueError("Need `gdf` or `gslc_files`") - gdf = get_geodataframe(gslc_files) + gdf = get_geodataframe(gslc_files, one_per_burst=one_per_burst) gdf_grouped = _get_per_burst_df(gdf) if ax is None: fig, ax = plt.subplots(ncols=1) + + # Make a unique colormap for the specific count values + unique_counts = np.unique(gdf_grouped["count"]) + # cmap = ListedColormap(plt.cm.tab20.colors[: len(unique_counts)]) + # norm = BoundaryNorm(unique_counts, cmap.N + 1) + + cmap = ListedColormap(plt.cm.tab10(np.linspace(0, 1, len(unique_counts)))) + # norm = BoundaryNorm(unique_counts, cmap.N + 1) + boundaries = np.concatenate([[unique_counts[0] - 1], unique_counts + 0.5]) + print(unique_counts, boundaries) + norm = BoundaryNorm(boundaries, cmap.N) + kwds = dict( column="count", - legend=True, - cmap="tab10", + legend=False, + cmap=cmap, + norm=norm, legend_kwds={"label": "Count", "orientation": "horizontal"}, linewidth=0.8, edgecolor="0.8", ) gdf_grouped.plot(ax=ax, **kwds) + cbar = plt.colorbar( + plt.cm.ScalarMappable(norm=norm, cmap=cmap), ax=ax, orientation="horizontal" + ) + cbar.set_label("Count") + cbar.set_ticks(unique_counts) + cbar.set_ticklabels(unique_counts) return gdf_grouped From bd16e6c200b1ae0142b47a80d37468811f0f0475 Mon Sep 17 00:00:00 2001 From: Scott Staniewicz Date: Wed, 1 Nov 2023 14:52:45 -0700 Subject: [PATCH 2/5] remove commented code --- src/sweets/_missing_data.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/sweets/_missing_data.py b/src/sweets/_missing_data.py index afca03d..3bcb2e7 100644 --- a/src/sweets/_missing_data.py +++ b/src/sweets/_missing_data.py @@ -148,13 +148,9 @@ def plot_count_per_burst( # Make a unique colormap for the specific count values unique_counts = np.unique(gdf_grouped["count"]) - # cmap = ListedColormap(plt.cm.tab20.colors[: len(unique_counts)]) - # norm = BoundaryNorm(unique_counts, cmap.N + 1) cmap = ListedColormap(plt.cm.tab10(np.linspace(0, 1, len(unique_counts)))) - # norm = BoundaryNorm(unique_counts, cmap.N + 1) boundaries = np.concatenate([[unique_counts[0] - 1], unique_counts + 0.5]) - print(unique_counts, boundaries) norm = BoundaryNorm(boundaries, cmap.N) kwds = dict( @@ -162,7 +158,6 @@ def plot_count_per_burst( legend=False, cmap=cmap, norm=norm, - legend_kwds={"label": "Count", "orientation": "horizontal"}, linewidth=0.8, edgecolor="0.8", ) From c246ecfb04c72ad140d3ea04e8adcf3750f20eb7 Mon Sep 17 00:00:00 2001 From: Emre Havazli Date: Thu, 2 Nov 2023 12:38:43 -0700 Subject: [PATCH 3/5] change slc files list from str to Path object --- src/sweets/_missing_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sweets/_missing_data.py b/src/sweets/_missing_data.py index 3bcb2e7..f7ecd9b 100644 --- a/src/sweets/_missing_data.py +++ b/src/sweets/_missing_data.py @@ -58,7 +58,7 @@ def get_geodataframe( gdf = gpd.GeoDataFrame(geometry=polygons, crs="EPSG:4326") gdf["count"] = 1 - gdf["filename"] = [p.stem for p in gslc_files] + gdf["filename"] = [Path(p).stem for p in gslc_files] gdf["date"] = pd.to_datetime(gdf.filename.str.split("_").str[3]) gdf["burst_id"] = gdf.filename.str[:15] return gdf From 41801972b89db5ef2ef4dc81b6281387097661ea Mon Sep 17 00:00:00 2001 From: ehavazli Date: Fri, 3 Nov 2023 17:19:31 -0700 Subject: [PATCH 4/5] set colorbar ticks --- src/sweets/_missing_data.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/sweets/_missing_data.py b/src/sweets/_missing_data.py index f7ecd9b..be8f03d 100644 --- a/src/sweets/_missing_data.py +++ b/src/sweets/_missing_data.py @@ -150,7 +150,7 @@ def plot_count_per_burst( unique_counts = np.unique(gdf_grouped["count"]) cmap = ListedColormap(plt.cm.tab10(np.linspace(0, 1, len(unique_counts)))) - boundaries = np.concatenate([[unique_counts[0] - 1], unique_counts + 0.5]) + boundaries = np.concatenate([[unique_counts[0] - 1], unique_counts + 1]) norm = BoundaryNorm(boundaries, cmap.N) kwds = dict( @@ -167,8 +167,10 @@ def plot_count_per_burst( plt.cm.ScalarMappable(norm=norm, cmap=cmap), ax=ax, orientation="horizontal" ) cbar.set_label("Count") - cbar.set_ticks(unique_counts) + cbar_ticks = [(boundaries[i] + boundaries[i + 1]) / 2 for i in range(len(boundaries) - 1)] + cbar.set_ticks(cbar_ticks) cbar.set_ticklabels(unique_counts) + return gdf_grouped From da8d233e1e4cadef744990dc3dabbc5777f9902d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 4 Nov 2023 00:49:36 +0000 Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/sweets/_missing_data.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/sweets/_missing_data.py b/src/sweets/_missing_data.py index be8f03d..167e1c7 100644 --- a/src/sweets/_missing_data.py +++ b/src/sweets/_missing_data.py @@ -167,7 +167,9 @@ def plot_count_per_burst( plt.cm.ScalarMappable(norm=norm, cmap=cmap), ax=ax, orientation="horizontal" ) cbar.set_label("Count") - cbar_ticks = [(boundaries[i] + boundaries[i + 1]) / 2 for i in range(len(boundaries) - 1)] + cbar_ticks = [ + (boundaries[i] + boundaries[i + 1]) / 2 for i in range(len(boundaries) - 1) + ] cbar.set_ticks(cbar_ticks) cbar.set_ticklabels(unique_counts)