forked from njcronin/UDCT
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_h5_dataset.py
123 lines (92 loc) · 4.44 KB
/
create_h5_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/python
import numpy as np
import h5py
import cv2
import os
import sys
def get_file_list(data_path):
"""
This function returns the list of all png images in a given directory, such as their dimensions. It returns an error, if the image dimensions are not consistent.
Arguments:
data_path (string) Path to directory of the set of images to be extracted
Returns:
file_list (List of strings) Filenames of all png images in dataset
dimensions (3 x integer) Dimensions of images: height, width, channels
flag (boolean) Is true, iff the images are grayscale
"""
# Create list of all png files
file_list = []
for element in os.listdir(data_path):
if element[-4:] == ".png":
file_list.append(data_path + element)
# Compare sizes
dimensions = cv2.imread(file_list[0],cv2.IMREAD_UNCHANGED).shape
for i in range(1,len(file_list)):
if not np.array_equal(dimensions,cv2.imread(file_list[i],cv2.IMREAD_UNCHANGED).shape):
raise Exception('The following two images have different dimensions. Please make sure all images in this directory have the same size \n\r ' +\
file_list[0] + '\n\r' +\
file_list[i])
# Add a 3rd value two dimensions, if it does not exist (this means it has 1 data channel)
flag = False
if len(dimensions) == 2:
dimensions = np.array([dimensions[0],dimensions[1],1])
flag = True
return file_list,dimensions,flag
def main():
# Check if the right amount of arguments has been given to the program
if len(sys.argv[1:]) != 3:
print('This script recuires three arguments in order to work:')
print('1: Path to directory containing the genuine/raw images (only png images in directory are used!)')
print('2: Path to directory containing the synthetic images (only png images in directory are used!)')
print('3: Output hdf5 filename')
print(' ')
print('Example: python create_h5_dataset.py ./Data/Example/Genuine/ ./Data/Example/Synthetic/ ./Data/Example/example_dataset.h5')
print(' ')
print('Script aborted')
return -1
# get the addresses
raw_path = sys.argv[1]
syn_path = sys.argv[2]
filename = sys.argv[3]
# Create the output hdf5 file
f = h5py.File(filename,"w")
# Save the raw dataset into the file
raw_files, raw_dimensions, raw_flag = get_file_list(raw_path)
num_samples = len(raw_files)
num_channel = raw_dimensions[2]
group = f.create_group('A')
group.create_dataset(name='num_samples', data=num_samples)
group.create_dataset(name='num_channel', data=num_channel)
dtype = np.uint8
data_A = np.zeros([num_samples,\
raw_dimensions[0],\
raw_dimensions[1],\
num_channel], dtype=dtype)
for idx,fname in enumerate(raw_files):
if raw_flag: # This means, the images are gray scale
data_A[idx,:,:,0] = np.array(cv2.imread(fname,cv2.IMREAD_GRAYSCALE))
else:
data_A[idx,:,:,:] = np.flip(np.array(cv2.imread(fname,cv2.IMREAD_COLOR)),2)
print('Genuine dataset: ', group.create_dataset(name='data', data=(data_A),dtype=dtype))
# Save the syn dataset into the file
syn_files, syn_dimensions, syn_flag = get_file_list(syn_path)
num_samples = len(syn_files)
num_channel = syn_dimensions[2]
group = f.create_group('B')
group.create_dataset(name='num_samples', data=num_samples)
group.create_dataset(name='num_channel', data=num_channel)
dtype = np.uint8
data_B = np.zeros([num_samples,\
syn_dimensions[0],\
syn_dimensions[1],\
num_channel], dtype=dtype)
for idx,fname in enumerate(syn_files):
if syn_flag: # This means, the images are gray scale
data_B[idx,:,:,0] = np.array(cv2.imread(fname,cv2.IMREAD_GRAYSCALE))
else:
data_B[idx,:,:,:] = np.flip(np.array(cv2.imread(fname,cv2.IMREAD_COLOR)),2)
print('Synthetic dataset: ', group.create_dataset(name='data', data=(data_B),dtype=dtype))
# Close the file
f.close()
if __name__ == "__main__":
main()