forked from weecology/DeepTreeAttention
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mine.py
51 lines (43 loc) · 1.59 KB
/
mine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import glob
import geopandas as gpd
import numpy as np
import os
import pandas as pd
import rasterio as rio
from src import patches
from src import neon_paths
from src.data import read_config
from src.start_cluster import start
from distributed import wait
config = read_config("config.yml")
shapefiles = glob.glob("/orange/idtrees-collab/draped/*.shp")
shapefiles = [x for x in shapefiles if "OSBS" in x]
np.random.shuffle(shapefiles)
rgb_pool = glob.glob(config["rgb_sensor_pool"], recursive=True)
HSI_pool = glob.glob(config["HSI_sensor_pool"], recursive=True)
client = start(cpus=50)
futures = []
for i in shapefiles:
shp = gpd.read_file(i)
basename = os.path.splitext(os.path.basename(i))[0]
#get 100 random trees
try:
shp = shp.sample(n=1000)
except:
continue
hsi_path = neon_paths.lookup_and_convert(bounds=shp.total_bounds, rgb_pool=rgb_pool, hyperspectral_pool=HSI_pool, savedir=config["HSI_tif_dir"])
for index, row in shp.iterrows():
future = client.submit(patches.crop, bounds=row["geometry"].bounds, sensor_path=hsi_path, savedir="/orange/idtrees-collab/mining/", basename="{}_{}".format(basename, index))
futures.append(future)
wait(futures)
def remove(x):
i = rio.open(x).read()
if not np.isfinite(i).all():
os.remove(x)
#Make sure all data is valid.
images = glob.glob("/orange/idtrees-collab/mining/*.tif")
futures = client.map(remove, images)
wait(futures)
images = glob.glob("/orange/idtrees-collab/mining/*.tif")
mining = pd.DataFrame({"image_path":images})
mining.to_csv("/orange/idtrees-collab/mining/mining.csv")