-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdata_preparer.py
158 lines (124 loc) · 7.1 KB
/
data_preparer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import enum
from os import listdir
from typing import List, Dict, Optional, Iterable
from utils.data_land_marker import LandMarker
FILE_SEPARATOR = '/' # For Linux and MacOS
LABEL_POSTFIX = '_emotion.txt'
def sorted_dir_content(dir_path: str):
return sorted(listdir(dir_path))
def merge_paths(*paths: str):
return FILE_SEPARATOR.join(paths).replace('//', '/')
def list_to_csv_line(lst: Iterable):
return ','.join(map(str, lst)) + '\n'
class EmotionLabels(enum.Enum):
neutral = {'code': '0.0000000e+00', 'name': 'neutral'}
anger = {'code': '1.0000000e+00', 'name': 'anger'}
contempt = {'code': '2.0000000e+00', 'name': 'contempt'}
disgust = {'code': '3.0000000e+00', 'name': 'disgust'}
fear = {'code': '4.0000000e+00', 'name': 'fear'}
happy = {'code': '5.0000000e+00', 'name': 'happy'}
sadness = {'code': '6.0000000e+00', 'name': 'sadness'}
surprise = {'code': '7.0000000e+00', 'name': 'surprise'}
@staticmethod
def code_to_name(input_label_code: str) -> str:
matched_labels = [label for label in EmotionLabels if label.value['code'] == input_label_code]
return matched_labels[0].value['name']
class EmotionFrameSet:
IMG_EXTENSION = '.png'
LABEL_EXTENSION = '_emotion.txt'
class LabeledImage:
def __init__(self, img_path: str, img_label: str):
self.img_path = img_path
self.img_label = img_label
def __str__(self):
return str({"IMG_PATH": self.img_path, "IMG_LABEL": self.img_label})
@staticmethod
def print_labeled_images(labeled_images: List[LabeledImage]) -> None:
for i, lbl_img in enumerate(labeled_images):
print(f'[{i}]:', lbl_img)
def __init__(self, emotion_frame_set_dir_path: str):
files_in_dir = sorted_dir_content(dir_path=emotion_frame_set_dir_path)
self.img_paths = self.calc_img_paths(dir_path=emotion_frame_set_dir_path, files_in_dir=files_in_dir)
self.emotion_label = self.calc_emotion_label(dir_path=emotion_frame_set_dir_path, files_in_dir=files_in_dir)
@staticmethod
def calc_emotion_label(dir_path: str, files_in_dir: List[str]) -> Optional[str]:
possible_label_files = [file for file in files_in_dir if file.endswith(EmotionFrameSet.LABEL_EXTENSION)]
if not possible_label_files: # Check if emotion label does not exist.
return None
label_file = possible_label_files[0]
label_path = merge_paths(dir_path, label_file)
encoded_label = open(label_path, 'r').read().strip()
return EmotionLabels.code_to_name(input_label_code=encoded_label)
@staticmethod
def calc_img_paths(dir_path: str, files_in_dir: List[str]) -> List:
img_names = [img for img in files_in_dir if img.endswith(EmotionFrameSet.IMG_EXTENSION)]
return [merge_paths(dir_path, img_name) for img_name in img_names]
@staticmethod
def calc_img_paths_with_labels(self) -> List[Dict]:
return [
{'path': self.img_paths[0], 'label': EmotionLabels.neutral.name},
{'path': self.img_paths[-1], 'label': self.emotion_label}
]
class PreProcessor:
def __init__(self, dataset_parent_dir: str):
self.__dataset_parent_dir = dataset_parent_dir
def preprocess(self) -> List[EmotionFrameSet.LabeledImage]:
emotion_frame_sets = self.__collect_emotion_frame_sets(dataset_parent_dir=self.__dataset_parent_dir)
filtered_emotion_frame_sets = self.__filter_emotion_frame_sets(emotion_frame_sets=emotion_frame_sets)
labeled_images = self.__emotion_frame_sets_to_labeled_images(frame_sets=filtered_emotion_frame_sets)
# EmotionFrameSet.print_labeled_images(labeled_images)
return labeled_images
@staticmethod
def __collect_emotion_frame_sets(dataset_parent_dir: str) -> List[EmotionFrameSet]:
emotion_frame_sets = [] # type: List[EmotionFrameSet]
for person in sorted_dir_content(dataset_parent_dir):
person_subject_dir = merge_paths(dataset_parent_dir, person)
for emotion_folder in sorted_dir_content(person_subject_dir):
path = merge_paths(person_subject_dir, emotion_folder)
emotion_frame_sets.append(EmotionFrameSet(emotion_frame_set_dir_path=path))
return emotion_frame_sets
@staticmethod
def __filter_emotion_frame_sets(emotion_frame_sets: List[EmotionFrameSet]) -> List[EmotionFrameSet]:
filtered_emotion_frame_sets = [] # type: # type: List[EmotionFrameSet]
for emotion_frame_set in emotion_frame_sets:
if emotion_frame_set.emotion_label is not None:
all_img_paths = emotion_frame_set.img_paths
emotion_frame_set.img_paths = [all_img_paths[0], all_img_paths[-1]] # Get first and last image.
filtered_emotion_frame_sets.append(emotion_frame_set)
return filtered_emotion_frame_sets
@staticmethod
def __emotion_frame_sets_to_labeled_images(frame_sets: List[EmotionFrameSet]) -> List[EmotionFrameSet.LabeledImage]:
labeled_images = [] # type: List[EmotionFrameSet.LabeledImage]
for a_set in frame_sets:
i1 = EmotionFrameSet.LabeledImage(a_set.img_paths[0], img_label=EmotionLabels.neutral.name)
i2 = EmotionFrameSet.LabeledImage(a_set.img_paths[1], img_label=a_set.emotion_label)
labeled_images.append(i1)
labeled_images.append(i2)
return labeled_images
class DatasetBuilder:
def __init__(self, labeled_images: List[EmotionFrameSet.LabeledImage], class_col: str, land_marker: LandMarker):
self.labeled_images = labeled_images
self.land_marker = land_marker
self.header = self.create_header(class_col=class_col, dummy_labeled_image=labeled_images[0])
def create_header(self, class_col: str, dummy_labeled_image: EmotionFrameSet.LabeledImage):
lm = self.land_marker
dummy_lm_points = lm.img_path_to_landmarks(img_path=dummy_labeled_image.img_path)[0]
header_list = tuple('X%d' % (i + 1) for i in range(len(dummy_lm_points))) + (class_col,)
return list_to_csv_line(header_list)
def build(self, target: str, write_header: bool = True):
lm = self.land_marker
print('[INFO]', 'Dataset is building..')
with open(file=target, mode='w') as csv_dataset:
if write_header:
csv_dataset.write(self.header)
for i, labeled_img in enumerate(self.labeled_images):
landmark_points = lm.img_path_to_landmarks(img_path=labeled_img.img_path)[0]
instance = landmark_points + (labeled_img.img_label,)
csv_dataset.write(list_to_csv_line(instance))
print('[INFO]', 'Written Face Instance Progress: %d/%d' % ((i + 1), len(self.labeled_images)))
print('\nAll instances are successfully written to file: \"%s\"' % target)
def run_data_preparer(land_marker: LandMarker, dataset_images_dir: str, dataset_csv: str):
# Pre-process data
labeled_images = PreProcessor(dataset_parent_dir=dataset_images_dir).preprocess()
# Build dataset as csv
DatasetBuilder(labeled_images, class_col='emotion', land_marker=land_marker).build(target=dataset_csv)