diff --git a/ecoscope/analysis/__init__.py b/ecoscope/analysis/__init__.py index 46ca082e..5ea814b7 100644 --- a/ecoscope/analysis/__init__.py +++ b/ecoscope/analysis/__init__.py @@ -1,6 +1,7 @@ from ecoscope.analysis import UD, astronomy, seasons from ecoscope.analysis.ecograph import Ecograph, get_feature_gdf from ecoscope.analysis.percentile import get_percentile_area +from ecoscope.analysis.recurse import get_recursions from ecoscope.analysis.speed import SpeedDataFrame __all__ = [ @@ -11,4 +12,5 @@ "get_feature_gdf", "get_percentile_area", "seasons", + "get_recursions", ] diff --git a/ecoscope/analysis/ecograph.py b/ecoscope/analysis/ecograph.py index c05a9baf..882b4f3c 100644 --- a/ecoscope/analysis/ecograph.py +++ b/ecoscope/analysis/ecograph.py @@ -7,7 +7,6 @@ import pandas as pd import rasterio import sklearn.base -from affine import Affine from shapely.geometry import shape from skimage.draw import line @@ -58,19 +57,7 @@ def __init__(self, trajectory, resolution=15, radius=2, cutoff=None, tortuosity_ eastings = np.array([geom.iloc[i].coords.xy[0] for i in range(len(geom))]).flatten() northings = np.array([geom.iloc[i].coords.xy[1] for i in range(len(geom))]).flatten() - self.xmin = floor(np.min(eastings)) - self.resolution - self.ymin = floor(np.min(northings)) - self.resolution - self.xmax = ceil(np.max(eastings)) + self.resolution - self.ymax = ceil(np.max(northings)) + self.resolution - - self.xmax += self.resolution - ((self.xmax - self.xmin) % self.resolution) - self.ymax += self.resolution - ((self.ymax - self.ymin) % self.resolution) - - self.transform = Affine(self.resolution, 0.00, self.xmin, 0.00, -self.resolution, self.ymax) - self.inverse_transform = ~self.transform - - self.n_rows = int((self.xmax - self.xmin) // self.resolution) - self.n_cols = int((self.ymax - self.ymin) // self.resolution) + self.grid = ecoscope.base.Grid(eastings, northings, self.resolution) def compute(df): subject_name = df.name @@ -143,7 +130,7 @@ def to_geotiff(self, feature, output_path, individual="all", interpolation=None, band_count=1, ) raster_profile.raster_extent = ecoscope.io.raster.RasterExtent( - x_min=self.xmin, x_max=self.xmax, y_min=self.ymin, y_max=self.ymax + x_min=self.grid.xmin, x_max=self.grid.xmax, y_min=self.grid.ymin, y_max=self.grid.ymax ) ecoscope.io.raster.RasterPy.write( ndarray=feature_ndarray, @@ -160,8 +147,8 @@ def _get_ecograph(self, trajectory_gdf, individual_name, radius, cutoff, tortuos lines = [list(geom.iloc[i + j].coords) for j in range(tortuosity_length)] p1, p2, p3, p4 = lines[0][0], lines[0][1], lines[1][1], lines[1][0] pixel1, pixel2 = ( - self.inverse_transform * p1, - self.inverse_transform * p2, + self.grid.inverse_transform * p1, + self.grid.inverse_transform * p2, ) row1, row2 = floor(pixel1[0]), floor(pixel2[0]) @@ -296,9 +283,9 @@ def _get_feature_mosaic(self, feature, interpolation=None): features = [] for individual in self.graphs.keys(): features.append(self._get_feature_map(self, feature, individual, interpolation)) - mosaic = np.full((self.n_cols, self.n_rows), np.nan) - for i in range(self.n_cols): - for j in range(self.n_rows): + mosaic = np.full((self.grid.n_cols, self.grid.n_rows), np.nan) + for i in range(self.grid.n_cols): + for j in range(self.grid.n_rows): values = [] for feature_map in features: grid_val = feature_map[i][j] @@ -319,7 +306,7 @@ def _get_feature_map(self, feature, individual, interpolation): @staticmethod def _get_regular_feature_map(self, feature, individual): - feature_ndarray = np.full((self.n_cols, self.n_rows), np.nan) + feature_ndarray = np.full((self.grid.n_cols, self.grid.n_rows), np.nan) for node in self.graphs[individual].nodes(): feature_ndarray[node[1]][node[0]] = (self.graphs[individual]).nodes[node][feature] return feature_ndarray @@ -333,7 +320,7 @@ def _get_interpolated_feature_map(self, feature, individual, interpolation): for i in range(len(geom)): line1 = list(geom.iloc[i].coords) p1, p2 = line1[0], line1[1] - pixel1, pixel2 = self.inverse_transform * p1, self.inverse_transform * p2 + pixel1, pixel2 = self.grid.inverse_transform * p1, self.grid.inverse_transform * p2 row1, row2 = floor(pixel1[0]), floor(pixel2[0]) col1, col2 = ceil(pixel1[1]), ceil(pixel2[1]) diff --git a/ecoscope/analysis/recurse.py b/ecoscope/analysis/recurse.py new file mode 100644 index 00000000..10425ecb --- /dev/null +++ b/ecoscope/analysis/recurse.py @@ -0,0 +1,49 @@ +from math import ceil, floor + +import numpy as np + +import ecoscope + + +def get_consecutive_items_number(idxs): + gaps = [[s, e] for s, e in zip(idxs, idxs[1:]) if s + 1 < e] + edges = iter(idxs[:1] + sum(gaps, []) + idxs[-1:]) + return len(list(zip(edges, edges))) + + +def get_recursions(relocations, resolution): + relocations = relocations.reset_index(drop=True) + if not relocations["fixtime"].is_monotonic: + relocations.sort_values("fixtime", inplace=True) + + diameter = ceil(resolution) * 2 + utm_crs = relocations.estimate_utm_crs() + relocations.to_crs(utm_crs, inplace=True) + + geom = relocations["geometry"] + eastings = np.array([geom.iloc[i].x for i in range(len(geom))]).flatten() + northings = np.array([geom.iloc[i].y for i in range(len(geom))]).flatten() + + grid = ecoscope.base.Grid(eastings, northings, diameter) + + grid_cells_dict = {} + for i in range(len(geom)): + point = geom.iloc[i] + grid_cell = grid.inverse_transform * (point.x, point.y) + row, col = floor(grid_cell[0]), ceil(grid_cell[1]) + if (col, row) in grid_cells_dict: + grid_cells_dict[(col, row)].append(i) + else: + grid_cells_dict[(col, row)] = [i] + + for grid_cell in grid_cells_dict: + grid_cells_dict[grid_cell] = get_consecutive_items_number(grid_cells_dict[grid_cell]) + + recursion_values = [] + for i in range(len(geom)): + point = geom.iloc[i] + grid_cell = grid.inverse_transform * (point.x, point.y) + row, col = floor(grid_cell[0]), ceil(grid_cell[1]) + recursion_values.append(grid_cells_dict[(col, row)]) + + return recursion_values diff --git a/ecoscope/base/__init__.py b/ecoscope/base/__init__.py index f3814329..10d58dfb 100644 --- a/ecoscope/base/__init__.py +++ b/ecoscope/base/__init__.py @@ -7,6 +7,7 @@ ) from ecoscope.base.base import EcoDataFrame, Relocations, Trajectory from ecoscope.base.utils import ( + Grid, cachedproperty, create_meshgrid, groupby_intervals, @@ -24,4 +25,5 @@ "cachedproperty", "create_meshgrid", "groupby_intervals", + "Grid", ] diff --git a/ecoscope/base/utils.py b/ecoscope/base/utils.py index 03bb6f01..5a7550b6 100644 --- a/ecoscope/base/utils.py +++ b/ecoscope/base/utils.py @@ -1,6 +1,9 @@ +from math import ceil, floor + import geopandas as gpd import numpy as np import pandas as pd +from affine import Affine from shapely.geometry import box @@ -212,3 +215,65 @@ def create_modis_interval_index(start, intervals, overlap=pd.Timedelta(0), close left = pd.DatetimeIndex(left) return pd.IntervalIndex.from_arrays(left=left, right=left + pd.Timedelta(days=16), closed=closed) + + +class Grid: + """ + A class that creates a grid covering a list of UTM points + + Parameters + ---------- + eastings : list + UTM easting coordinates + northings : list + UTM northing coordinates + resolution : float + The width/length of a grid cell in meters + """ + + def __init__(self, eastings, northings, resolution): + self._xmin = floor(np.min(eastings)) - resolution + self._ymin = floor(np.min(northings)) - resolution + self._xmax = ceil(np.max(eastings)) + resolution + self._ymax = ceil(np.max(northings)) + resolution + + self._xmax += resolution - ((self._xmax - self._xmin) % resolution) + self._ymax += resolution - ((self._ymax - self._ymin) % resolution) + + self._transform = Affine(resolution, 0.00, self._xmin, 0.00, -resolution, self._ymax) + self._inverse_transform = ~self._transform + + self._n_rows = int((self._xmax - self._xmin) // resolution) + self._n_cols = int((self._ymax - self._ymin) // resolution) + + @property + def xmin(self): + return self._xmin + + @property + def ymin(self): + return self._ymin + + @property + def xmax(self): + return self._xmax + + @property + def ymax(self): + return self._ymax + + @property + def transform(self): + return self._transform + + @property + def inverse_transform(self): + return self._inverse_transform + + @property + def n_rows(self): + return self._n_rows + + @property + def n_cols(self): + return self._n_cols diff --git a/notebooks/03. Home Range & Movescape/Recurse.ipynb b/notebooks/03. Home Range & Movescape/Recurse.ipynb new file mode 100644 index 00000000..b45f1011 --- /dev/null +++ b/notebooks/03. Home Range & Movescape/Recurse.ipynb @@ -0,0 +1,245 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cdebfa2d-f384-4dd0-ad67-d94501f3d39c", + "metadata": { + "tags": [] + }, + "source": [ + "# Recurse" + ] + }, + { + "cell_type": "markdown", + "id": "0c82797c-10b4-4a4f-b951-93ae5fc1f456", + "metadata": {}, + "source": [ + "The Recurse R package (C. Bracis et al., Ecography, 2018 : https://onlinelibrary.wiley.com/doi/abs/10.1111/ecog.03618) can be used to analyze animal trajectory data to look for returns to a previously visited area, i.e. revisitations. These revisits could be interesting ecologically for a number of reasons. For example, they could be used to identify nesting or denning sites or important resource locations such as water points.\n", + "\n", + "The original method from the paper works by taking a circle of a user-specified radius and moving it along a set a relocations points. At each point, the number of trajectory segments entering and exiting the circle is counted to determine the number of revisitation. Therefore, each movement trajectory location has one visit for the piece of the trajectory centered on it plus additional visits that could occur before or after. \n", + "\n", + "Our implementation of the algorithm proposed by Bracis et al. is a little bit different than the one described in their paper. Indeed, we are counting the number of trajectory segments inside a square instead of a circle for each relocation points. The length of the sides of the square (in meters) must be specified by the user through the `resolution` parameter. Our method gives almost identical results to the one described in the paper, and is faster to execute due to our usage of affine transforms. Figure 1 shows the difference between our implementation and the one available in the R recurse package.\n", + "\n", + "
\n", + "\n", + "
Figure 1. Difference between the Recurse R package and Ecoscope for calculating the number of revisits. \n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "84b6f476-8b7a-417c-8ffe-64ac1bf9ad22", + "metadata": { + "tags": [] + }, + "source": [ + "## Ecoscope" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0130a1d-cd00-4abb-8489-641bdb3c96ed", + "metadata": {}, + "outputs": [], + "source": [ + "ECOSCOPE_RAW = \"https://raw.githubusercontent.com/wildlife-dynamics/ecoscope/master\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6492302f-958a-45cc-af06-58b1e71c7e82", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "import pandas as pd\n", + "from shapely.geometry import Point\n", + "from sklearn.cluster import KMeans\n", + "\n", + "import ecoscope\n", + "\n", + "ecoscope.init()" + ] + }, + { + "cell_type": "markdown", + "id": "9a938316-79a5-404e-a6ec-ec9a4199eec6", + "metadata": { + "tags": [] + }, + "source": [ + "## Google Drive Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23e115bb-1fed-4a81-945e-a01a4d375da4", + "metadata": {}, + "outputs": [], + "source": [ + "output_dir = \"Ecoscope-Outputs\"\n", + "\n", + "if \"google.colab\" in sys.modules:\n", + " from google.colab import drive\n", + "\n", + " drive.mount(\"/content/drive/\", force_remount=True)\n", + " output_dir = os.path.join(\"/content/drive/MyDrive/\", output_dir)\n", + "\n", + "os.makedirs(output_dir, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "id": "42e4add5-df93-48aa-a095-3ff957182d98", + "metadata": {}, + "source": [ + "## Create Relocations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc36504f-0e28-4ec5-8ee3-d72171da1315", + "metadata": {}, + "outputs": [], + "source": [ + "ecoscope.io.download_file(\n", + " f\"{ECOSCOPE_RAW}/tests/sample_data/vector/movbank_data.csv\",\n", + " os.path.join(output_dir, \"movbank_data.csv\"),\n", + ")\n", + "\n", + "df = pd.read_csv(os.path.join(output_dir, \"movbank_data.csv\"), index_col=0)\n", + "gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df[\"location-long\"], df[\"location-lat\"]), crs=4326)\n", + "\n", + "relocs = ecoscope.base.Relocations.from_gdf(gdf, groupby_col=\"individual-local-identifier\", time_col=\"timestamp\")\n", + "relocs_salif = relocs[relocs.groupby_col == \"Salif Keita\"].copy()" + ] + }, + { + "cell_type": "markdown", + "id": "de415fc3-0e72-4c06-87ea-4779fabac1d2", + "metadata": {}, + "source": [ + "## Calculating and visualizing revisits" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3828886c-eb52-47fc-a7e6-174fa0980caf", + "metadata": {}, + "outputs": [], + "source": [ + "relocs_salif[\"revisits\"] = ecoscope.analysis.get_recursions(relocs_salif, resolution=400)\n", + "relocs_salif.explore(column=\"revisits\", cmap=\"viridis\")" + ] + }, + { + "cell_type": "markdown", + "id": "71c74241-2b72-418d-883a-85aecbdf05f0", + "metadata": {}, + "source": [ + "## Clustering revisits" + ] + }, + { + "cell_type": "markdown", + "id": "e7547f4b-2e62-42cf-9125-72d644d389dd", + "metadata": {}, + "source": [ + "If important sites are not known a priori, they can be identified from the recursion analysis using clustering. For example, one might want to identify nests, dens, water holes, foraging patches, roosts, haulouts, etc. in a systematic way rather than using visual identification methods, which may not be feasible for large amounts of data. The identification of these sites may be the end goal itself, or a preliminary step in a larger analysis.\n", + "\n", + "As an example, here we use K-Means clustering to cluster the (x,y) coordinates of the top 20% of locations by number of revisists. Please note that many other clustering methods are also available in the sklearn package : https://scikit-learn.org/stable/modules/clustering.html. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7da6d9a-5aa7-4d07-b8df-be9497f34aaf", + "metadata": {}, + "outputs": [], + "source": [ + "relocs_top20 = relocs_salif[relocs_salif[\"revisits\"] >= np.percentile(relocs_salif[\"revisits\"], 80)].copy()\n", + "relocs_top20.explore(column=\"revisits\", cmap=\"viridis\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0794308-5248-4f99-bfe0-51ac50889e9d", + "metadata": {}, + "outputs": [], + "source": [ + "utm_crs = relocs_top20.estimate_utm_crs()\n", + "relocs_top20.to_crs(utm_crs, inplace=True)\n", + "relocs_top20[\"utm_east\"] = [relocs_top20[\"geometry\"].iloc[i].x for i in range(len(relocs_top20))]\n", + "relocs_top20[\"utm_north\"] = [relocs_top20[\"geometry\"].iloc[i].y for i in range(len(relocs_top20))]\n", + "\n", + "kmeans = KMeans(n_clusters=6)\n", + "relocs_top20[\"cluster\"] = kmeans.fit_predict(relocs_top20[[\"utm_east\", \"utm_north\"]])\n", + "relocs_top20.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3434174-e9d0-418b-9e98-bddb539fb7a1", + "metadata": {}, + "outputs": [], + "source": [ + "cluster_idx = np.unique(relocs_top20[\"cluster\"])\n", + "cluster_centers = [Point(cluster_center) for cluster_center in list(kmeans.cluster_centers_)]\n", + "cluster_radius = [\n", + " np.max(relocs_top20[relocs_top20.cluster == idx].geometry.distance(cluster))\n", + " for idx, cluster in zip(cluster_idx, cluster_centers)\n", + "]\n", + "cluster_gdfs = [gpd.GeoDataFrame(geometry=[cluster], crs=utm_crs) for cluster in cluster_centers]\n", + "for i in range(6):\n", + " cluster_gdfs[i][\"geometry\"] = cluster_gdfs[i].buffer(cluster_radius[i])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb05cb3c-f97e-4ded-a0e8-32fccc053929", + "metadata": {}, + "outputs": [], + "source": [ + "m = ecoscope.mapping.EcoMap(width=800, height=600)\n", + "m.zoom_to_gdf(relocs_top20)\n", + "for cluster_gdf in cluster_gdfs:\n", + " m.add_gdf(cluster_gdf)\n", + "m" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/test_output/salif_revisits.feather b/tests/test_output/salif_revisits.feather new file mode 100644 index 00000000..9c87ff3a Binary files /dev/null and b/tests/test_output/salif_revisits.feather differ diff --git a/tests/test_recurse.py b/tests/test_recurse.py new file mode 100644 index 00000000..d649dfb0 --- /dev/null +++ b/tests/test_recurse.py @@ -0,0 +1,24 @@ +import geopandas as gpd +import geopandas.testing + +import ecoscope + + +def test_recurse(movbank_relocations): + # apply relocation coordinate filter to movbank data + pnts_filter = ecoscope.base.RelocsCoordinateFilter( + min_x=-5, + max_x=1, + min_y=12, + max_y=18, + filter_point_coords=[[180, 90], [0, 0]], + ) + movbank_relocations.apply_reloc_filter(pnts_filter, inplace=True) + movbank_relocations.remove_filtered(inplace=True) + + revisits = movbank_relocations.copy() + revisits["revisits"] = ecoscope.analysis.get_recursions(movbank_relocations, resolution=400) + + expected_revisits = gpd.read_feather("tests/test_output/salif_revisits.feather") + + gpd.testing.assert_geodataframe_equal(revisits, expected_revisits, check_less_precise=True)