From 8f31e83c610bd670dff93b00af01f7e4d11957ca Mon Sep 17 00:00:00 2001 From: Ivan Moskalenko Date: Sun, 7 Jan 2024 17:39:53 +0300 Subject: [PATCH 1/4] New mapping system --- aero_vloc/__init__.py | 3 +- aero_vloc/feature_matchers/feature_matcher.py | 6 +- .../feature_matchers/lightglue/lightglue.py | 7 +- .../feature_matchers/superglue/superglue.py | 7 +- aero_vloc/geo_referencers/geo_referencer.py | 2 +- .../geo_referencers/google_maps_referencer.py | 20 ++- .../geo_referencers/linear_referencer.py | 27 +++- .../homography_estimator.py | 4 +- .../index_searchers/sequential_searcher.py | 2 +- aero_vloc/localization_pipeline.py | 14 +- aero_vloc/maps/__init__.py | 14 ++ .../{primitives/map.py => maps/base_map.py} | 59 ++++++-- aero_vloc/maps/map.py | 130 ++++++++++++++++++ aero_vloc/primitives/__init__.py | 1 - aero_vloc/primitives/map_tile.py | 43 +++++- aero_vloc/primitives/uav_image.py | 7 + aero_vloc/retrieval_system.py | 15 +- aero_vloc/utils.py | 21 ++- aero_vloc/vpr_systems/anyloc/anyloc.py | 9 +- aero_vloc/vpr_systems/cosplace/cosplace.py | 11 +- .../vpr_systems/eigenplaces/eigenplaces.py | 11 +- aero_vloc/vpr_systems/mixvpr/mixvpr.py | 11 +- aero_vloc/vpr_systems/netvlad/netvlad.py | 14 +- aero_vloc/vpr_systems/salad/salad.py | 10 +- aero_vloc/vpr_systems/vpr_system.py | 8 +- 25 files changed, 337 insertions(+), 119 deletions(-) create mode 100644 aero_vloc/maps/__init__.py rename aero_vloc/{primitives/map.py => maps/base_map.py} (64%) create mode 100644 aero_vloc/maps/map.py diff --git a/aero_vloc/__init__.py b/aero_vloc/__init__.py index fa8dc73..ce6dbe8 100644 --- a/aero_vloc/__init__.py +++ b/aero_vloc/__init__.py @@ -17,8 +17,9 @@ from aero_vloc.index_searchers import FaissSearcher, SequentialSearcher from aero_vloc.localization_pipeline import LocalizationPipeline from aero_vloc.map_downloader import MapDownloader +from aero_vloc.maps import Map from aero_vloc.metrics import reference_recall, retrieval_recall -from aero_vloc.primitives import Map, UAVSeq +from aero_vloc.primitives import UAVSeq from aero_vloc.retrieval_system import RetrievalSystem from aero_vloc.utils import visualize_matches from aero_vloc.vpr_systems import AnyLoc, CosPlace, EigenPlaces, MixVPR, NetVLAD, SALAD diff --git a/aero_vloc/feature_matchers/feature_matcher.py b/aero_vloc/feature_matchers/feature_matcher.py index 5c1b3d7..3be7556 100644 --- a/aero_vloc/feature_matchers/feature_matcher.py +++ b/aero_vloc/feature_matchers/feature_matcher.py @@ -11,10 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np import torch from abc import ABC, abstractmethod -from pathlib import Path class FeatureMatcher(ABC): @@ -28,10 +28,10 @@ def __init__(self, resize: int, gpu_index: int = 0): print('Running inference on device "{}"'.format(self.device)) @abstractmethod - def get_feature(self, image_path: Path): + def get_feature(self, image: np.ndarray): """ Gets features of RGB image given - :param image_path: Path to the image for which features should be calculated + :param image: The image for which features should be calculated in OpenCV format :return: Features for image """ pass diff --git a/aero_vloc/feature_matchers/lightglue/lightglue.py b/aero_vloc/feature_matchers/lightglue/lightglue.py index 257c0d4..a908095 100644 --- a/aero_vloc/feature_matchers/lightglue/lightglue.py +++ b/aero_vloc/feature_matchers/lightglue/lightglue.py @@ -14,7 +14,6 @@ import numpy as np import torch -from pathlib import Path from tqdm import tqdm from aero_vloc.feature_detectors import SuperPoint @@ -22,7 +21,7 @@ from aero_vloc.feature_matchers.lightglue.model.lightglue_matcher import ( LightGlueMatcher, ) -from aero_vloc.utils import load_image_for_sp +from aero_vloc.utils import transform_image_for_sp class LightGlue(FeatureMatcher): @@ -42,8 +41,8 @@ def __init__(self, resize: int = 800, gpu_index: int = 0): LightGlueMatcher(features="superpoint").eval().to(self.device) ) - def get_feature(self, image_path: Path): - img = load_image_for_sp(image_path, self.resize).to(self.device) + def get_feature(self, image: np.ndarray): + img = transform_image_for_sp(image, self.resize).to(self.device) shape = img.shape[-2:][::-1] with torch.no_grad(): feats = self.super_point({"image": img}) diff --git a/aero_vloc/feature_matchers/superglue/superglue.py b/aero_vloc/feature_matchers/superglue/superglue.py index 5161ec8..7b0b516 100644 --- a/aero_vloc/feature_matchers/superglue/superglue.py +++ b/aero_vloc/feature_matchers/superglue/superglue.py @@ -46,7 +46,6 @@ import numpy as np import torch -from pathlib import Path from tqdm import tqdm from aero_vloc.feature_detectors import SuperPoint @@ -54,7 +53,7 @@ from aero_vloc.feature_matchers.superglue.model.superglue_matcher import ( SuperGlueMatcher, ) -from aero_vloc.utils import load_image_for_sp +from aero_vloc.utils import transform_image_for_sp class SuperGlue(FeatureMatcher): @@ -75,8 +74,8 @@ def __init__(self, path_to_sg_weights, resize=800, gpu_index: int = 0): SuperGlueMatcher(path_to_sg_weights).eval().to(self.device) ) - def get_feature(self, image_path: Path): - inp = load_image_for_sp(image_path, self.resize).to(self.device) + def get_feature(self, image: np.ndarray): + inp = transform_image_for_sp(image, self.resize).to(self.device) shape = inp.shape[2:] with torch.no_grad(): features = self.super_point({"image": inp}) diff --git a/aero_vloc/geo_referencers/geo_referencer.py b/aero_vloc/geo_referencers/geo_referencer.py index 579f8e7..00cd8ab 100644 --- a/aero_vloc/geo_referencers/geo_referencer.py +++ b/aero_vloc/geo_referencers/geo_referencer.py @@ -20,7 +20,7 @@ class GeoReferencer(ABC): @abstractmethod def get_lat_lon( - self, map_tile: MapTile, pixel: Tuple[int, int], resize: int + self, map_tile: MapTile, pixel: Tuple[int, int], resize: int = None ) -> Tuple[float, float]: """ Finds geographic coordinates of a given pixel on a satellite image diff --git a/aero_vloc/geo_referencers/google_maps_referencer.py b/aero_vloc/geo_referencers/google_maps_referencer.py index c16c87f..99545c0 100644 --- a/aero_vloc/geo_referencers/google_maps_referencer.py +++ b/aero_vloc/geo_referencers/google_maps_referencer.py @@ -1,11 +1,22 @@ -import cv2 +# Copyright (c) 2023, Ivan Moskalenko, Anastasiia Kornilova +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import math from typing import Tuple from aero_vloc.geo_referencers.geo_referencer import GeoReferencer from aero_vloc.primitives import MapTile -from aero_vloc.utils import get_new_size class GoogleMapsReferencer(GeoReferencer): @@ -41,12 +52,13 @@ def __world_to_lat_lon(self, x, y): return lat, lon def get_lat_lon( - self, map_tile: MapTile, pixel: Tuple[int, int], resize: int + self, map_tile: MapTile, pixel: Tuple[int, int], resize: int = None ) -> Tuple[float, float]: top_left_x, top_left_y = self.__lat_lon_to_world( map_tile.top_left_lat, map_tile.top_left_lon ) - + if resize is None: + resize = max(map_tile.image.shape[:2]) desired_x = top_left_x + (self.map_size * abs(pixel[0]) / resize) / self.scale desired_y = top_left_y + (self.map_size * abs(pixel[1]) / resize) / self.scale diff --git a/aero_vloc/geo_referencers/linear_referencer.py b/aero_vloc/geo_referencers/linear_referencer.py index 8548dd2..68befb6 100644 --- a/aero_vloc/geo_referencers/linear_referencer.py +++ b/aero_vloc/geo_referencers/linear_referencer.py @@ -1,5 +1,16 @@ -import cv2 - +# Copyright (c) 2023, Ivan Moskalenko, Anastasiia Kornilova +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from typing import Tuple from aero_vloc.geo_referencers.geo_referencer import GeoReferencer @@ -9,15 +20,17 @@ class LinearReferencer(GeoReferencer): def get_lat_lon( - self, map_tile: MapTile, pixel: Tuple[int, int], resize: int + self, map_tile: MapTile, pixel: Tuple[int, int], resize: int = None ) -> Tuple[float, float]: - map_image = cv2.imread(str(map_tile.path)) - h_new, w_new = get_new_size(*map_image.shape[:2], resize) + map_image = map_tile.image + height, width = map_image.shape[:2] + if resize is not None: + height, width = get_new_size(height, width, resize) - lat = map_tile.top_left_lat + (abs(pixel[1]) / h_new) * ( + lat = map_tile.top_left_lat + (abs(pixel[1]) / height) * ( map_tile.bottom_right_lat - map_tile.top_left_lat ) - lon = map_tile.top_left_lon + (abs(pixel[0]) / w_new) * ( + lon = map_tile.top_left_lon + (abs(pixel[0]) / width) * ( map_tile.bottom_right_lon - map_tile.top_left_lon ) return lat, lon diff --git a/aero_vloc/homography_estimator/homography_estimator.py b/aero_vloc/homography_estimator/homography_estimator.py index b757d55..7b0fd83 100644 --- a/aero_vloc/homography_estimator/homography_estimator.py +++ b/aero_vloc/homography_estimator/homography_estimator.py @@ -44,9 +44,7 @@ def __call__( if len(matched_kpts_reference) < 4: print("Not enough points for homography") return None - h_new, w_new = get_new_size( - *cv2.imread(str(query_image.path)).shape[:2], resize_param - ) + h_new, w_new = get_new_size(*query_image.image.shape[:2], resize_param) M, mask = cv2.findHomography( matched_kpts_query, matched_kpts_reference, cv2.RANSAC, 5.0 ) diff --git a/aero_vloc/index_searchers/sequential_searcher.py b/aero_vloc/index_searchers/sequential_searcher.py index 094e506..48f1a82 100644 --- a/aero_vloc/index_searchers/sequential_searcher.py +++ b/aero_vloc/index_searchers/sequential_searcher.py @@ -16,7 +16,7 @@ import numpy as np from aero_vloc.index_searchers.index_searcher import IndexSearcher -from aero_vloc.primitives import Map +from aero_vloc.maps import Map class SequentialSearcher(IndexSearcher): diff --git a/aero_vloc/localization_pipeline.py b/aero_vloc/localization_pipeline.py index b950712..e3bf643 100644 --- a/aero_vloc/localization_pipeline.py +++ b/aero_vloc/localization_pipeline.py @@ -13,7 +13,6 @@ # limitations under the License. from typing import Optional, Tuple -from aero_vloc.geo_referencers import GeoReferencer from aero_vloc.homography_estimator import HomographyEstimator from aero_vloc.primitives import UAVSeq from aero_vloc.retrieval_system import RetrievalSystem @@ -21,18 +20,15 @@ class LocalizationPipeline: """ - Allows to create a localizator based on the retrieval system, - homography estimator and one of the georeference methods. + Allows to create a localizator based on the retrieval system and homography estimator. """ def __init__( self, retrieval_system: RetrievalSystem, - geo_referencer: GeoReferencer, homography_estimator: HomographyEstimator, ): self.retrieval_system = retrieval_system - self.geo_referencer = geo_referencer self.homography_estimator = homography_estimator def __call__( @@ -41,8 +37,7 @@ def __call__( k_closest: int, ) -> list[Optional[Tuple[float, float]]]: """ - Calculates UAV locations using the retrieval system, - homography estimator and one of the georeference methods. + Calculates UAV locations using the retrieval system and homography estimator. :param query_seq: The sequence of images for which locations should be calculated :param k_closest: Specifies how many predictions for each query the global localization should make. @@ -72,7 +67,10 @@ def __call__( if estimator_result is None: localization_results.append(None) continue - latitude, longitude = self.geo_referencer.get_lat_lon( + ( + latitude, + longitude, + ) = self.retrieval_system.sat_map.geo_referencer.get_lat_lon( chosen_sat_image, estimator_result, self.retrieval_system.feature_matcher.resize, diff --git a/aero_vloc/maps/__init__.py b/aero_vloc/maps/__init__.py new file mode 100644 index 0000000..995e995 --- /dev/null +++ b/aero_vloc/maps/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2023, Ivan Moskalenko, Anastasiia Kornilova +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from aero_vloc.maps.map import Map diff --git a/aero_vloc/primitives/map.py b/aero_vloc/maps/base_map.py similarity index 64% rename from aero_vloc/primitives/map.py rename to aero_vloc/maps/base_map.py index e8af38f..123528a 100644 --- a/aero_vloc/primitives/map.py +++ b/aero_vloc/maps/base_map.py @@ -18,10 +18,10 @@ from aero_vloc.primitives.map_tile import MapTile -class Map: +class BaseMap: """ - The class represents the satellite map required for UAV localization. - It is assumed that the map is divided into tiles. + The class represents the base satellite map required for UAV localization. + It is assumed that the map is divided into non-overlapping tiles. """ def __init__(self, path_to_metadata: Path): @@ -47,23 +47,48 @@ def __init__(self, path_to_metadata: Path): bottom_right_lon, ) = line.split() map_tile = MapTile( - map_folder / filename, + [[map_folder / filename]], float(top_left_lat), float(top_left_lon), float(bottom_right_lat), float(bottom_right_lon), ) tiles.append(map_tile) + self.tiles = tiles + height, width = self.shape + tile_height, tile_width = self.tile_shape + self.pixel_shape = height * tile_height, width * tile_width - self.width = None - for i, tile in enumerate(tiles[1:]): - if tile.top_left_lat != tiles[i].top_left_lat: - self.width = i + 1 + @property + def shape(self) -> tuple[int, int]: + """ + :return: Number of tiles by height and by width + """ + width = None + for i, tile in enumerate(self.tiles[1:]): + if tile.top_left_lat != self.tiles[i].top_left_lat: + width = i + 1 break - if self.width is None: - self.width = len(tiles) - self.height = int(len(tiles) / self.width) - self.tiles = tiles + if width is None: + width = len(self.tiles) + height = int(len(self.tiles) / width) + return height, width + + @property + def tile_shape(self) -> tuple[int, int]: + """ + :return: Height and width of tiles in the map + """ + tile_img = self.tiles[0].image + tile_height, tile_width = tile_img.shape[:2] + return tile_height, tile_width + + @property + def tiles_2d(self) -> np.ndarray: + """ + :return: Reshaped map based on the number of tiles in height and width + """ + return np.array(self.tiles).reshape(self.shape) def __iter__(self): for map_tile in self.tiles: @@ -72,6 +97,9 @@ def __iter__(self): def __getitem__(self, key): return self.tiles[key] + def __len__(self): + return len(self.tiles) + def get_neighboring_tiles(self, query_index: int) -> list[int]: """ Returns the indexes of neighboring tiles @@ -79,7 +107,8 @@ def get_neighboring_tiles(self, query_index: int) -> list[int]: :param query_index: Index of the tile for which you need to find neighbors :return: Neighboring tile indices """ - x, y = query_index % self.width, query_index // self.width + height, width = self.shape + x, y = query_index % width, query_index // width potential_neighbors = [ (x - 1, y - 1), (x, y - 1), @@ -93,8 +122,8 @@ def get_neighboring_tiles(self, query_index: int) -> list[int]: result_neighbors = [] for x, y in potential_neighbors: - if (0 <= x < self.width) and (0 <= y < self.height): - result_neighbors.append(self.width * y + x) + if (0 <= x < width) and (0 <= y < height): + result_neighbors.append(width * y + x) return result_neighbors def are_neighbors(self, index_1: int, index_2: int) -> bool: diff --git a/aero_vloc/maps/map.py b/aero_vloc/maps/map.py new file mode 100644 index 0000000..24ac0f1 --- /dev/null +++ b/aero_vloc/maps/map.py @@ -0,0 +1,130 @@ +# Copyright (c) 2023, Ivan Moskalenko, Anastasiia Kornilova +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from pathlib import Path + +from aero_vloc.geo_referencers import GeoReferencer +from aero_vloc.primitives.map_tile import MapTile +from aero_vloc.maps.base_map import BaseMap + + +class Map(BaseMap): + """ + The class represents the satellite map required for UAV localization. + Based on the BaseMap class, it allows + to specify an arbitrary level of overlap and zoom level. + """ + + def __init__( + self, + path_to_metadata: Path, + zoom: float, + overlap_level: float, + geo_referencer: GeoReferencer, + ): + """ + Reads map from metadata file. + File format -- sequence of lines, each line is a single tile. + + The format of a line is as follows: + `filename top_left_lat top_left_lon bottom_right_lat bottom_right_lon` + + :param path_to_metadata: Path to the metadata file + :param zoom: Zoom Level. For example, a level equal + to 0.5 means that the coverage area is doubled. + :param geo_referencer: Georeference model of the map + """ + assert zoom > 0 + assert 0 <= overlap_level < 1 + super().__init__(path_to_metadata) + self.geo_referencer = geo_referencer + + old_tile_h, old_tile_w = self.tile_shape + map_pixel_height, map_pixel_width = self.pixel_shape + new_tile_h, new_tile_w = int(old_tile_h // zoom), int(old_tile_w // zoom) + + # Generating of the new tiles + tiles = [] + for new_top_left_y in range( + 0, map_pixel_height - new_tile_h + 1, int(new_tile_h * (1 - overlap_level)) + ): + for new_top_left_x in range( + 0, + map_pixel_width - new_tile_w + 1, + int(new_tile_w * (1 - overlap_level)), + ): + # Finding the tiles that should participate in the creation of new ones + top_left_index_x, top_left_index_y = ( + new_top_left_x // old_tile_w, + new_top_left_y // old_tile_h, + ) + new_bottom_right_x, new_bottom_right_y = ( + new_top_left_x + new_tile_w - 1, + new_top_left_y + new_tile_h - 1, + ) + bottom_right_index_x, bottom_right_index_y = ( + new_bottom_right_x // old_tile_w, + new_bottom_right_y // old_tile_h, + ) + involved_tiles = self.tiles_2d[ + top_left_index_y : bottom_right_index_y + 1, + top_left_index_x : bottom_right_index_x + 1, + ] + + # Finding the global pixel coordinates of the top left involved tile + old_top_left_x, old_top_left_y = ( + top_left_index_x * old_tile_w, + top_left_index_y * old_tile_h, + ) + # Finding the coordinates of a tile in the involved tiles coordinate system + top_left_local_x, top_left_local_y = ( + new_top_left_x - old_top_left_x, + new_top_left_y - old_top_left_y, + ) + bottom_right_local_x, bottom_right_local_y = ( + new_bottom_right_x - old_top_left_x, + new_bottom_right_y - old_top_left_y, + ) + + top_left_lat, top_left_lon = self.geo_referencer.get_lat_lon( + involved_tiles[0, 0], (top_left_local_x, top_left_local_y) + ) + # We also need to find the coordinates of the bottom right corner + # in the bottom right involved tile coordinate system for georeferencing + bottom_right_lat, bottom_right_lon = self.geo_referencer.get_lat_lon( + involved_tiles[-1, -1], + ( + new_bottom_right_x - bottom_right_index_x * old_tile_w, + new_bottom_right_y - bottom_right_index_y * old_tile_h, + ), + ) + + paths_to_tiles = [ + [tile.paths[0][0] for tile in line] for line in involved_tiles + ] + tiles.append( + MapTile( + paths_to_tiles, + top_left_lat, + top_left_lon, + bottom_right_lat, + bottom_right_lon, + ( + top_left_local_x, + top_left_local_y, + bottom_right_local_x, + bottom_right_local_y, + ), + ) + ) + self.tiles = tiles diff --git a/aero_vloc/primitives/__init__.py b/aero_vloc/primitives/__init__.py index d850d4e..4382b24 100644 --- a/aero_vloc/primitives/__init__.py +++ b/aero_vloc/primitives/__init__.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from aero_vloc.primitives.map import Map from aero_vloc.primitives.map_tile import MapTile from aero_vloc.primitives.uav_image import UAVImage from aero_vloc.primitives.uav_seq import UAVSeq diff --git a/aero_vloc/primitives/map_tile.py b/aero_vloc/primitives/map_tile.py index 64c562a..b2dcce9 100644 --- a/aero_vloc/primitives/map_tile.py +++ b/aero_vloc/primitives/map_tile.py @@ -11,24 +11,61 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import cv2 +import numpy as np + from pathlib import Path class MapTile: """ - The class represents one satellite map tile with specified coordinates + The class represents one satellite map tile with specified coordinates. + Generally consists of several image files. """ def __init__( self, - path: Path, + paths: list[list[Path]], top_left_lat: float, top_left_lon: float, bottom_right_lat: float, bottom_right_lon: float, + region_of_interest: tuple[int, int, int, int] = None, ): - self.path = path + """ + :param paths: 2D list of paths to the image files + according to their actual location + :param top_left_lat: Top left latitude of the tile + :param top_left_lon: Top left longitude of the tile + :param bottom_right_lat: Bottom right latitude of the tile + :param bottom_right_lon: Bottom right longitude of the tile + :param region_of_interest: Region of the interest of the united image + in the (top left X, top left Y, + bottom right X, bottom right Y) format + If None, no crop is applied + """ + self.paths = paths self.top_left_lat = top_left_lat self.top_left_lon = top_left_lon self.bottom_right_lat = bottom_right_lat self.bottom_right_lon = bottom_right_lon + self.region_of_interest = region_of_interest + + @property + def image(self) -> np.ndarray: + horizontal_lines = [] + for horizontal_line in self.paths: + images = [cv2.imread(str(img)) for img in horizontal_line] + horizontal_lines.append(np.hstack(images)) + result = np.vstack(horizontal_lines) + if self.region_of_interest is not None: + ( + top_left_x, + top_left_y, + bottom_right_x, + bottom_right_y, + ) = self.region_of_interest + result = result[ + top_left_y : bottom_right_y + 1, top_left_x : bottom_right_x + 1 + ] + return result diff --git a/aero_vloc/primitives/uav_image.py b/aero_vloc/primitives/uav_image.py index ee48ffc..4da0b95 100644 --- a/aero_vloc/primitives/uav_image.py +++ b/aero_vloc/primitives/uav_image.py @@ -11,6 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import cv2 +import numpy as np + from pathlib import Path @@ -23,3 +26,7 @@ def __init__(self, path: Path, gt_latitude: float, gt_longitude: float): self.path = path self.gt_latitude = gt_latitude self.gt_longitude = gt_longitude + + @property + def image(self) -> np.ndarray: + return cv2.imread(str(self.path)) diff --git a/aero_vloc/retrieval_system.py b/aero_vloc/retrieval_system.py index 1222ee2..7c9c791 100644 --- a/aero_vloc/retrieval_system.py +++ b/aero_vloc/retrieval_system.py @@ -18,7 +18,8 @@ from aero_vloc.feature_matchers import FeatureMatcher from aero_vloc.index_searchers import IndexSearcher -from aero_vloc.primitives import UAVImage, Map +from aero_vloc.maps import Map +from aero_vloc.primitives import UAVImage from aero_vloc.vpr_systems import VPRSystem @@ -41,15 +42,15 @@ def __init__( self.index = index_searcher self.global_descs = [] - for image in tqdm( + for tile in tqdm( sat_map, desc="Calculating of global descriptors for source DB" ): - self.global_descs.append(self.vpr_system.get_image_descriptor(image.path)) + self.global_descs.append(self.vpr_system.get_image_descriptor(tile.image)) self.index.create(np.asarray(self.global_descs)) local_features = [] - for image in tqdm(sat_map, desc="Calculating of local features for source DB"): - local_features.append(self.feature_matcher.get_feature(image.path)) + for tile in tqdm(sat_map, desc="Calculating of local features for source DB"): + local_features.append(self.feature_matcher.get_feature(tile.image)) self.source_local_features = np.asarray(local_features) def __call__( @@ -71,14 +72,14 @@ def __call__( list of matched reference keypoints for every query -- reference pair (optional) """ query_global_desc = np.expand_dims( - self.vpr_system.get_image_descriptor(query_image.path), axis=0 + self.vpr_system.get_image_descriptor(query_image.image), axis=0 ) global_predictions = self.index.search(query_global_desc, vpr_k_closest) if feature_matcher_k_closest is None: return global_predictions, None, None - query_local_features = self.feature_matcher.get_feature(query_image.path) + query_local_features = self.feature_matcher.get_feature(query_image.image) filtered_db_features = self.source_local_features[global_predictions] ( local_predictions, diff --git a/aero_vloc/utils.py b/aero_vloc/utils.py index 58a28ac..65088f1 100644 --- a/aero_vloc/utils.py +++ b/aero_vloc/utils.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. import cv2 +import numpy as np import torch import torchvision -from pathlib import Path from PIL import Image from torchvision.transforms import InterpolationMode from typing import Tuple @@ -30,11 +30,12 @@ def get_new_size(height: int, width: int, resize: int): return height_new, width_new -def transform_image( - image: Image, +def transform_image_for_vpr( + image: np.ndarray, resize: int | Tuple[int, int], interpolation: InterpolationMode = InterpolationMode.BILINEAR, ): + image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) if isinstance(resize, int): h_new, w_new = get_new_size(image.height, image.width, resize) else: @@ -52,8 +53,8 @@ def transform_image( return transformed_image -def load_image_for_sp(image_path: Path, resize: int): - grayim = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE) +def transform_image_for_sp(image: np.ndarray, resize: int): + grayim = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) h, w = grayim.shape[:2] h_new, w_new = get_new_size(h, w, resize) grayim = cv2.resize(grayim, (w_new, h_new), interpolation=cv2.INTER_AREA) @@ -63,13 +64,11 @@ def load_image_for_sp(image_path: Path, resize: int): def visualize_matches( matched_kpts_query, matched_kpts_reference, sat_image, drone_image, resize ): - drone_image = cv2.imread(str(drone_image.path)) - h_new, w_new = get_new_size(*drone_image.shape[:2], resize) - drone_image = cv2.resize(drone_image, (w_new, h_new)) + h_new, w_new = get_new_size(*drone_image.image.shape[:2], resize) + drone_image = cv2.resize(drone_image.image, (w_new, h_new)) - sat_image = cv2.imread(str(sat_image.path)) - h_new, w_new = get_new_size(*sat_image.shape[:2], resize) - sat_image = cv2.resize(sat_image, (w_new, h_new)) + h_new, w_new = get_new_size(*sat_image.image.shape[:2], resize) + sat_image = cv2.resize(sat_image.image, (w_new, h_new)) matches = [cv2.DMatch(i, i, 1) for i in range(len(matched_kpts_query))] matched_kpts_query = [cv2.KeyPoint(x, y, 1) for x, y in matched_kpts_query] diff --git a/aero_vloc/vpr_systems/anyloc/anyloc.py b/aero_vloc/vpr_systems/anyloc/anyloc.py index 4611d8b..e703f6e 100644 --- a/aero_vloc/vpr_systems/anyloc/anyloc.py +++ b/aero_vloc/vpr_systems/anyloc/anyloc.py @@ -11,14 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np import torch import torchvision from pathlib import Path -from PIL import Image from torchvision import transforms as tvf -from aero_vloc.utils import transform_image +from aero_vloc.utils import transform_image_for_vpr from aero_vloc.vpr_systems.vpr_system import VPRSystem from aero_vloc.vpr_systems.anyloc.models import DinoV2ExtractFeatures, VLAD @@ -43,9 +43,8 @@ def __init__(self, c_centers_file: Path, resize: int = 800, gpu_index: int = 0): self.vlad = VLAD(num_clusters=32, desc_dim=None, c_centers_path=c_centers_file) self.vlad.fit() - def get_image_descriptor(self, image_path: Path): - image = Image.open(image_path).convert("RGB") - image = transform_image( + def get_image_descriptor(self, image: np.ndarray): + image = transform_image_for_vpr( image, self.resize, torchvision.transforms.InterpolationMode.BICUBIC ).to(self.device) _, h, w = image.shape diff --git a/aero_vloc/vpr_systems/cosplace/cosplace.py b/aero_vloc/vpr_systems/cosplace/cosplace.py index 5cfce23..d539872 100644 --- a/aero_vloc/vpr_systems/cosplace/cosplace.py +++ b/aero_vloc/vpr_systems/cosplace/cosplace.py @@ -11,12 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np import torch -from pathlib import Path -from PIL import Image - -from aero_vloc.utils import transform_image +from aero_vloc.utils import transform_image_for_vpr from aero_vloc.vpr_systems.vpr_system import VPRSystem @@ -51,9 +49,8 @@ def __init__( ) self.model.eval().to(self.device) - def get_image_descriptor(self, image_path: Path): - image = Image.open(image_path).convert("RGB") - image = transform_image(image, self.resize)[None, :].to(self.device) + def get_image_descriptor(self, image: np.ndarray): + image = transform_image_for_vpr(image, self.resize)[None, :].to(self.device) with torch.no_grad(): descriptor = self.model(image) descriptor = descriptor.cpu().numpy()[0] diff --git a/aero_vloc/vpr_systems/eigenplaces/eigenplaces.py b/aero_vloc/vpr_systems/eigenplaces/eigenplaces.py index b442942..4de8256 100644 --- a/aero_vloc/vpr_systems/eigenplaces/eigenplaces.py +++ b/aero_vloc/vpr_systems/eigenplaces/eigenplaces.py @@ -11,12 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np import torch -from pathlib import Path -from PIL import Image - -from aero_vloc.utils import transform_image +from aero_vloc.utils import transform_image_for_vpr from aero_vloc.vpr_systems.vpr_system import VPRSystem @@ -51,9 +49,8 @@ def __init__( ) self.model.eval().to(self.device) - def get_image_descriptor(self, image_path: Path): - image = Image.open(image_path).convert("RGB") - image = transform_image(image, self.resize)[None, :].to(self.device) + def get_image_descriptor(self, image: np.ndarray): + image = transform_image_for_vpr(image, self.resize)[None, :].to(self.device) with torch.no_grad(): descriptor = self.model(image) descriptor = descriptor.cpu().numpy()[0] diff --git a/aero_vloc/vpr_systems/mixvpr/mixvpr.py b/aero_vloc/vpr_systems/mixvpr/mixvpr.py index 258f49d..01db2fb 100644 --- a/aero_vloc/vpr_systems/mixvpr/mixvpr.py +++ b/aero_vloc/vpr_systems/mixvpr/mixvpr.py @@ -11,13 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np import torch import torchvision -from pathlib import Path -from PIL import Image - -from aero_vloc.utils import transform_image +from aero_vloc.utils import transform_image_for_vpr from aero_vloc.vpr_systems.vpr_system import VPRSystem from aero_vloc.vpr_systems.mixvpr.model.mixvpr_model import VPRModel @@ -53,10 +51,9 @@ def __init__(self, ckpt_path, gpu_index: int = 0): self.model.eval().to(self.device) print(f"Loaded model from {ckpt_path} successfully!") - def get_image_descriptor(self, image_path: Path): + def get_image_descriptor(self, image: np.ndarray): # Note that images must be resized to 320x320 - image = Image.open(image_path).convert("RGB") - image = transform_image( + image = transform_image_for_vpr( image, (320, 320), torchvision.transforms.InterpolationMode.BICUBIC )[None, :].to(self.device) with torch.no_grad(): diff --git a/aero_vloc/vpr_systems/netvlad/netvlad.py b/aero_vloc/vpr_systems/netvlad/netvlad.py index a4d675b..1ebfeaf 100644 --- a/aero_vloc/vpr_systems/netvlad/netvlad.py +++ b/aero_vloc/vpr_systems/netvlad/netvlad.py @@ -11,12 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np import torch -from pathlib import Path -from PIL import Image - -from aero_vloc.utils import transform_image +from aero_vloc.utils import transform_image_for_vpr from aero_vloc.vpr_systems.netvlad.model.models_generic import ( get_backend, get_model, @@ -56,12 +54,8 @@ def __init__(self, path_to_weights: str, resize: int = 800, gpu_index: int = 0): self.model = self.model.to(self.device) self.model.eval() - def get_image_descriptor( - self, - image_path: Path, - ): - image = Image.open(image_path).convert("RGB") - image = transform_image(image, self.resize)[None, :].to(self.device) + def get_image_descriptor(self, image: np.ndarray): + image = transform_image_for_vpr(image, self.resize)[None, :].to(self.device) with torch.no_grad(): image_encoding = self.model.encoder(image) diff --git a/aero_vloc/vpr_systems/salad/salad.py b/aero_vloc/vpr_systems/salad/salad.py index 3f41318..b77dae1 100644 --- a/aero_vloc/vpr_systems/salad/salad.py +++ b/aero_vloc/vpr_systems/salad/salad.py @@ -11,13 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np import torch -from pathlib import Path -from PIL import Image from torchvision import transforms as tvf -from aero_vloc.utils import transform_image +from aero_vloc.utils import transform_image_for_vpr from aero_vloc.vpr_systems.vpr_system import VPRSystem @@ -40,9 +39,8 @@ def __init__( self.model = torch.hub.load("serizba/salad", "dinov2_salad") self.model.eval().to(self.device) - def get_image_descriptor(self, image_path: Path): - image = Image.open(image_path).convert("RGB") - image = transform_image(image, self.resize).to(self.device) + def get_image_descriptor(self, image: np.ndarray): + image = transform_image_for_vpr(image, self.resize).to(self.device) _, h, w = image.shape h_new, w_new = (h // 14) * 14, (w // 14) * 14 img_cropped = tvf.CenterCrop((h_new, w_new))(image)[None, ...] diff --git a/aero_vloc/vpr_systems/vpr_system.py b/aero_vloc/vpr_systems/vpr_system.py index 9c9c88a..dd7753a 100644 --- a/aero_vloc/vpr_systems/vpr_system.py +++ b/aero_vloc/vpr_systems/vpr_system.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. import torch +import numpy as np from abc import ABC, abstractmethod -from pathlib import Path class VPRSystem(ABC): @@ -26,10 +26,10 @@ def __init__(self, gpu_index: int = 0): print('Running inference on device "{}"'.format(self.device)) @abstractmethod - def get_image_descriptor(self, image_path: Path): + def get_image_descriptor(self, image: np.ndarray): """ Gets the descriptor of the image given - :param image_path: - :return: + :param image: Image in the OpenCV format + :return: Descriptor of the image """ pass From 33ebd56e2572bc561b711134d485afcc66f493a2 Mon Sep 17 00:00:00 2001 From: Ivan Moskalenko Date: Sun, 7 Jan 2024 17:41:54 +0300 Subject: [PATCH 2/4] Remove overlap level in downloader --- aero_vloc/map_downloader.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/aero_vloc/map_downloader.py b/aero_vloc/map_downloader.py index 88d09be..c78446c 100644 --- a/aero_vloc/map_downloader.py +++ b/aero_vloc/map_downloader.py @@ -31,7 +31,6 @@ def __init__( south_east_lat: float, south_east_lon: float, zoom: int, - overlap_level: float, api_key: str, folder_to_save: Path, ): @@ -41,7 +40,6 @@ def __init__( :param south_east_lat: Latitude of the southeast point of the map :param south_east_lon: Longitude of the southeast point of the map :param zoom: Zoom level of the map - :param overlap_level: Shows how much neighboring images overlap each other. Float between 0 and 1 :param api_key: API key for Google Maps API :param folder_to_save: Path to save map """ @@ -50,7 +48,6 @@ def __init__( self.south_east_lat = south_east_lat self.south_east_lon = south_east_lon self.zoom = zoom - self.overlap_level = overlap_level self.api_key = api_key self.folder_to_save = folder_to_save @@ -174,9 +171,9 @@ def download_map(self): f"{filename} {top_left_lat} {top_left_lon} {bottom_right_lat} {bottom_right_lon}\n" ) - lon = lon + (lon_step * (1 - self.overlap_level)) + lon = lon + lon_step index += 1 lat_step = self.__get_lat_step(lat, lon) - lat = lat + (lat_step * (1 - self.overlap_level)) + lat = lat + lat_step metadata_file.close() From 8715feb7e4bb19705148843c8f4e99ea6eea1a48 Mon Sep 17 00:00:00 2001 From: Ivan Moskalenko Date: Sun, 7 Jan 2024 17:46:06 +0300 Subject: [PATCH 3/4] Updated example --- example.ipynb | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/example.ipynb b/example.ipynb index 3ad74c6..0ff8c22 100644 --- a/example.ipynb +++ b/example.ipynb @@ -44,7 +44,9 @@ "metadata": {}, "outputs": [], "source": [ - "satellite_map = avl.Map(Path(\"test_map/map_metadata.txt\"))\n", + "linear_referencer = avl.LinearReferencer()\n", + "\n", + "satellite_map = avl.Map(Path(\"test_map/map_metadata.txt\"), zoom=1.5, overlap_level=0.25, geo_referencer=linear_referencer)\n", "drone_images = avl.UAVSeq(Path(\"test_queries/queries.txt\"))" ] }, @@ -62,13 +64,12 @@ "outputs": [], "source": [ "eigen_places = avl.EigenPlaces('ResNet101', fc_output_dim=2048)\n", - "super_glue = avl.SuperGlue(\"superglue_outdoor.pth\", resize=800)\n", + "super_glue = avl.SuperGlue(\"weights/superglue_outdoor.pth\", resize=800)\n", "faiss_searcher = avl.FaissSearcher()\n", "retrieval_system = avl.RetrievalSystem(eigen_places, satellite_map, super_glue, faiss_searcher)\n", "\n", "homography_estimator = avl.HomographyEstimator()\n", - "gmaps_referencer = avl.GoogleMapsReferencer(zoom=17)\n", - "localization_pipeline = avl.LocalizationPipeline(retrieval_system, gmaps_referencer, homography_estimator)" + "localization_pipeline = avl.LocalizationPipeline(retrieval_system, homography_estimator)" ] }, { @@ -105,7 +106,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.6" } }, "nbformat": 4, From d142031e12c4cda3ef05c2c3da09aaa5d0895209 Mon Sep 17 00:00:00 2001 From: Ivan Moskalenko Date: Sun, 7 Jan 2024 21:50:56 +0300 Subject: [PATCH 4/4] Optimizations --- aero_vloc/geo_referencers/linear_referencer.py | 3 +-- aero_vloc/maps/base_map.py | 11 +---------- aero_vloc/maps/map.py | 5 +++-- aero_vloc/primitives/map_tile.py | 9 +++++++++ 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/aero_vloc/geo_referencers/linear_referencer.py b/aero_vloc/geo_referencers/linear_referencer.py index 68befb6..b3389ff 100644 --- a/aero_vloc/geo_referencers/linear_referencer.py +++ b/aero_vloc/geo_referencers/linear_referencer.py @@ -22,8 +22,7 @@ class LinearReferencer(GeoReferencer): def get_lat_lon( self, map_tile: MapTile, pixel: Tuple[int, int], resize: int = None ) -> Tuple[float, float]: - map_image = map_tile.image - height, width = map_image.shape[:2] + height, width = map_tile.shape if resize is not None: height, width = get_new_size(height, width, resize) diff --git a/aero_vloc/maps/base_map.py b/aero_vloc/maps/base_map.py index 123528a..4665643 100644 --- a/aero_vloc/maps/base_map.py +++ b/aero_vloc/maps/base_map.py @@ -56,7 +56,7 @@ def __init__(self, path_to_metadata: Path): tiles.append(map_tile) self.tiles = tiles height, width = self.shape - tile_height, tile_width = self.tile_shape + tile_height, tile_width = self.tiles[0].shape self.pixel_shape = height * tile_height, width * tile_width @property @@ -74,15 +74,6 @@ def shape(self) -> tuple[int, int]: height = int(len(self.tiles) / width) return height, width - @property - def tile_shape(self) -> tuple[int, int]: - """ - :return: Height and width of tiles in the map - """ - tile_img = self.tiles[0].image - tile_height, tile_width = tile_img.shape[:2] - return tile_height, tile_width - @property def tiles_2d(self) -> np.ndarray: """ diff --git a/aero_vloc/maps/map.py b/aero_vloc/maps/map.py index 24ac0f1..15274c1 100644 --- a/aero_vloc/maps/map.py +++ b/aero_vloc/maps/map.py @@ -49,9 +49,10 @@ def __init__( super().__init__(path_to_metadata) self.geo_referencer = geo_referencer - old_tile_h, old_tile_w = self.tile_shape + old_tile_h, old_tile_w = self.tiles[0].shape map_pixel_height, map_pixel_width = self.pixel_shape new_tile_h, new_tile_w = int(old_tile_h // zoom), int(old_tile_w // zoom) + tiles_2d = self.tiles_2d # Generating of the new tiles tiles = [] @@ -76,7 +77,7 @@ def __init__( new_bottom_right_x // old_tile_w, new_bottom_right_y // old_tile_h, ) - involved_tiles = self.tiles_2d[ + involved_tiles = tiles_2d[ top_left_index_y : bottom_right_index_y + 1, top_left_index_x : bottom_right_index_x + 1, ] diff --git a/aero_vloc/primitives/map_tile.py b/aero_vloc/primitives/map_tile.py index b2dcce9..d1bc9cf 100644 --- a/aero_vloc/primitives/map_tile.py +++ b/aero_vloc/primitives/map_tile.py @@ -14,6 +14,7 @@ import cv2 import numpy as np +from functools import cached_property from pathlib import Path @@ -69,3 +70,11 @@ def image(self) -> np.ndarray: top_left_y : bottom_right_y + 1, top_left_x : bottom_right_x + 1 ] return result + + @cached_property + def shape(self) -> tuple[int, int]: + """ + :return: Height and width of the tile + """ + height, width = self.image.shape[:2] + return height, width