-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataprep.py
71 lines (48 loc) · 2.17 KB
/
dataprep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import shutil
import random
dataset_path = "/teamspace/studios/this_studio/Aerial-Segmentation/Semantic segmentation dataset"
new_dataset_path = "/teamspace/studios/this_studio/Aerial-Segmentation"
train_path = os.path.join(new_dataset_path, "train")
val_path = os.path.join(new_dataset_path, "val")
os.makedirs(train_path, exist_ok=True)
os.makedirs(val_path, exist_ok=True)
train_image_path = os.path.join(train_path, "images")
train_mask_path = os.path.join(train_path, "masks")
val_image_path = os.path.join(val_path, "images")
val_mask_path = os.path.join(val_path, "masks")
os.makedirs(train_image_path, exist_ok=True)
os.makedirs(val_image_path, exist_ok=True)
os.makedirs(train_mask_path, exist_ok=True)
os.makedirs(val_mask_path, exist_ok=True)
tile_folders = [folder for folder in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, folder))]
n_train_images = 8
n_val_images = 1
def copy(train_status):
if train_status:
images = train_images
path_image = train_image_path
path_mask = train_mask_path
else:
images = val_images
path_image = val_image_path
path_mask = val_mask_path
for image in images:
tile_image_name = f'{tile_folder}_{image}'
shutil.copy(os.path.join(images_path, image), os.path.join(path_image, tile_image_name))
mask_name = image.split('.')[0]+'.png'
tile_mask_name = f'{tile_folder}_{mask_name}'
shutil.copy(os.path.join(masks_path, mask_name), os.path.join(path_mask, tile_mask_name))
for tile_folder in tile_folders:
images_path = os.path.join(dataset_path, tile_folder, 'images')
masks_path = os.path.join(dataset_path, tile_folder, 'masks')
images = os.listdir(images_path)
masks = os.listdir(masks_path)
random.shuffle(images)
random.shuffle(masks)
train_images = images[:n_train_images]
val_images = images[n_train_images:]
copy(train_status=True)
copy(train_status=False)
shutil.rmtree(dataset_path)
print(f"Data organization and split completed successfully. Total Training Files is {len(os.listdir(train_image_path))} and Validation Files is {len(os.listdir(val_image_path))}")