-
Notifications
You must be signed in to change notification settings - Fork 1
/
dataset.py
68 lines (52 loc) · 2.14 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
import pickle
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tqdm import tqdm
import paths
extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']
def get_file_list(root_dir):
file_list = []
counter = 1
for root, dirs, filenames in os.walk(root_dir):
for filename in tqdm(filenames):
if any(ext in filename for ext in extensions):
file_list.append(os.path.join(root, filename))
counter += 1
return sorted(file_list)
def get_stored_features():
stored_filenames = pickle.load(open(paths.filenames_path, 'rb'))
stored_feature_list = pickle.load(open(paths.features_path, 'rb'))
return stored_filenames, stored_feature_list
def visualize_features():
filenames, features = get_stored_features()
num_feature_dimensions = 100 # Set the number of features
pca = PCA(n_components=num_feature_dimensions)
pca.fit(features)
feature_list_compressed = pca.transform(features)
tsne = TSNE(n_components=2, verbose=1, n_iter=4000, metric='cosine', init='pca')
tsne_results = tsne.fit_transform(feature_list_compressed)
tsne_results = StandardScaler().fit_transform(tsne_results)
size = (45, 45)
imgs = [img_to_array(load_img(path, target_size=size)) / 255 for path in filenames]
visualize_scatter_with_images(tsne_results, imgs=imgs, size=size, zoom=0.7)
def visualize_scatter_with_images(data, imgs, size=(28, 28), zoom=1):
fig, ax = plt.subplots(figsize=size)
artist = []
for xy, i in tqdm(zip(data, imgs)):
x, y = xy
img = OffsetImage(i, zoom=zoom)
ab = AnnotationBbox(img, (x, y), xycoords='data', frameon=False)
artist.append(ax.add_artist(ab))
ax.update_datalim(data)
ax.autoscale()
ax.axis('off')
plt.tight_layout(pad=1.2)
plt.show()
if __name__ == "__main__":
visualize_features()