-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocessFolderForModelTraining.py
executable file
·370 lines (298 loc) · 14.6 KB
/
processFolderForModelTraining.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
"""
Each image in a experimental folder is processed.
For DAPI & O4 channels
If markerFile is present, it 'processMarkers' and adds to 'd' list object.
At the end it looks for distances between markers and centroids and compares the automated results with the manually
annotated results.
annotations.json
if export flag is set, then cell images are exported to 'output' folder for model training
"""
import tkinter as tk
from tkinter import ttk
import tkinter.font as tkFont
# import tkinter.filedialog as fileDialog
from tkinter.ttk import Progressbar
import random
import numpy as np
# import os
import json
from matplotlib import pyplot as plt
# start JVM for compatibility with VSI files
# print('JAVA_HOME =', os.environ['JAVA_HOME'])
import javabridge
import bioformats
from settings import Settings
from singleCompositeImage import singleCompositeImage
from commonFunctions import *
javabridge.start_vm(class_path=bioformats.JARS)
settings = Settings()
class Application(tk.Frame):
def __init__(self, master=None):
"""Setup tk application."""
super().__init__(master)
self.progress = None
self.console = None
self.console_scrollbar = None
self.root = None
self.master = master
self.master.title("Process folder for Model Training")
self.pack()
self.main_container = tk.Frame(master)
self.main_container.pack(side="top", fill="both", expand=True)
self.top_frame = tk.Frame(self.main_container)
self.bottom_frame = tk.Frame(self.main_container, background="grey")
self.top_frame.pack(side="top", fill="x", expand=False)
self.bottom_frame.pack(side="bottom", fill="both", expand=True)
self.create_widgets()
def create_widgets(self):
"""Creates widgets on initial window."""
print("Creating widgets...")
n1 = tk.Label(self.top_frame,
text="""Experiment Name""",
font=tkFont.Font(family="Calibri", size=14))
l13 = tk.Label(self.top_frame,
text="""Enable debug?""",
anchor='e',
font=tkFont.Font(family="Calibri", size=12))
n1.grid(row=0, column=0, sticky='w', pady=2)
l13.grid(row=1, column=2, sticky='w', pady=2)
self.name = tk.StringVar()
experiments = list(settings.experiments)
print(experiments)
self.name.set(settings.defaults["name"])
combo = ttk.Combobox(self.top_frame, values=experiments,
width=80, textvariable=self.name,
font=tkFont.Font(family="Calibri", size=14))
# debug mode?
debug = tk.BooleanVar()
debug.set(settings.defaults["debug"])
e12 = tk.Checkbutton(self.top_frame, text='', variable=debug,
onvalue=True, offvalue=False,
anchor='w')
combo.grid(row=0, column=1, sticky='w', pady=2)
e12.grid(row=1, column=3, columnspan=3, sticky='w', pady=2)
# start button
button2 = tk.Button(self.bottom_frame,
text="Start",
command=lambda: self.start_analysis(experiment=self.name.get(),
debug=debug.get()),
font=tkFont.Font(family="Calibri", size=12))
button2.pack(side="top")
# define output console area
tk.Label(self.bottom_frame,
text="""Output""",
justify=tk.LEFT,
anchor='n',
font=tkFont.Font(family="Calibri", size=12)).pack(side='left', fill='y')
# add scroll bar
self.console_scrollbar = tk.Scrollbar(self.bottom_frame, orient=tk.VERTICAL)
self.console_scrollbar.pack(side="right", fill='y')
# add text console widget
self.console = tk.Text(self.bottom_frame,
yscrollcommand=self.console_scrollbar,
font=tkFont.Font(family="Calibri", size=12))
self.console.pack(side="top", fill="both", expand=True)
self.console.bind("<Key>", lambda e: "break")
# assign scroll bar to console yview
self.console_scrollbar.config(command=self.console.yview)
# add progress bar
self.progress = Progressbar(self.bottom_frame,
length=200, orient=tk.HORIZONTAL, mode='determinate')
self.progress.pack(side="bottom", fill="y", expand=True)
def start_analysis(self, experiment, export: bool = True, debug: bool = False):
""" Run analysis - process folder for presence of count markers"""
# retrieve analysis settings
root = settings.experiments[experiment]['root']
pattern = settings.experiments[experiment]['pattern']
dapi_ch = settings.experiments[experiment]['dapi_ch']
dapi_gamma = settings.experiments[experiment].get('dapi_gamma', 1.0)
o4_ch = settings.experiments[experiment]['o4_ch']
o4_gamma = settings.experiments[experiment].get('o4_gamma', 1.0)
scalefactor = settings.experiments[experiment].get('scalefactor', 1.0)
# which marker was used in cell counter? set marker_index, set 1 if not present.
# might be better to prompt
marker_index = settings.experiments[experiment].get('marker_index', 1)
# create empty results folder
create_empty_folder(os.path.join(root, 'keras'))
# fix unicode filenames
fix_unicode_filenames(experiment)
# enumerate all image files using pattern
files = find(pattern, root)
# start process
self.console.insert("end", f"\nFound {len(files)} matching '{pattern}' in '{root}'")
self.console.insert("end", "\n***************************")
self.console.insert("end", "\nStarting to analyze images")
# select file sample
if debug:
# select two files at random
files = list(files[i] for i in random.sample(list(range(len(files))), 5))
# list to store all cell images
imageSet = []
# list to store distances
d = []
# rest progress bar
self.progress["value"] = 0
self.progress.update()
# calculate increment for progress bar
fileNumber = len(files)
currentFileNumber = 0
progress_increment = 100 / fileNumber
for file in files:
# increment currentFileNumber
currentFileNumber += 1
# increment progress bar
self.progress["value"] += progress_increment
self.progress.update()
path = file['path']
imgFile = file['name']
try:
sCI = singleCompositeImage(path=path,
imgFile=imgFile,
dapi_ch=dapi_ch,
dapi_gamma=dapi_gamma,
o4_ch=o4_ch,
o4_gamma=o4_gamma,
scalefactor=scalefactor)
# process image to count DAPI+ nuclei
sCI.processDAPI(threshold_method='th2')
# process each cell within the image
sCI.processCells()
# append image to imageSet list
imageSet.append(sCI)
""" If marker_index has been set find associated markerFile, i.e. XML data file. """
if marker_index != 0:
markerFile = []
if pattern == '*MMStack.ome*.tif':
markerFile = findNewestMarkerFile(path)
elif pattern == '*Composite*.tif':
markerFile = findMatchingMarkerFile(path, imgFile)
else:
markerFile = findMatchingMarkerFile(path, imgFile)
if markerFile:
sCI.processMarkers(markerFile['name'], marker_index, debug)
if sCI.markers_XY is not None:
for i in range(sCI.markers_XY.shape[0]):
d.append(sCI.fd[sCI.NN[i], i])
if debug:
sCI.reportResults()
except:
self.console.insert("end", f"\nFailed on path '{path}'. Image: {imgFile}")
raise
self.console.update()
""" If there are successfully processed markers plot the distribution of distances. """
if d:
self.console.insert("end", f"\nPlotting density of distances between {len(d)} markers and their nearest "
f"neighbor nuclei.")
import pandas as pd
pd.DataFrame(d).plot(kind='density')
plt.show()
from statistics import mean, stdev
self.console.insert("end", f"\nMean of marker-to-cell distances: {mean(d)}")
self.console.insert("end", f"\nStDev of marker-to-cell distances: {stdev(d)}")
import scipy.stats as st
self.console.insert("end", st.t.interval(0.95, len(d) - 1, loc=np.mean(d), scale=st.sem(d)))
self.console.insert("end", "\n***************************")
self.console.insert("end", f"\nNumber of images loaded in imageSet object: {len(imageSet)}")
self.console.insert("end", "\n***************************")
self.console.insert("end", "\nExporting data")
annotations = []
"""
Export annotations.json file and individual cell images
Output annotations.json file - appropriate to be stored in root folder
List of dictionaries
{
"cell": FILENAME "o4pos.0.tif",
"path": ORIGINAL IMAGE PATH "data\\_6.17.16 Experiment 23 (E23) - CHPG differentiation Rep1 (4 days) - (from Scope folder)\\Original & composite ICC images + cell counter files\\1) 230 uM - new=done\\field_1",
"imgFile": ORIGINAL IMAGE FILENAME "field 1_MMStack.ome.tif",
"markerFile": XML MARKER FILE "newCellCounter_composite.xml",
"cellIndex": INT REFERRING TO CELL 4,
"centroid": [X,Y coordinates] [
1061.7179487179487,
70.58974358974359
],
"classification": set by processFolderForModelTraining.py 0 = O4-, 1 = O4+ 1,
"annotation": set by manualAnnotation.py -1 BAD, 0 O4-, 1 O4+
}
"""
"""
Export cell_indices.json file
Not sure the point of cell_indices.json - disabled for now
# filename = 'cell_indices.json'
# try:
# with open(filename, 'r') as f:
# indices = json.load(f)
# self.console.insert("end", f"\nLoading... {indices}")
# o4neg_cell_index = int(indices['o4neg_cell_index'])
# o4pos_cell_index = int(indices['o4pos_cell_index'])
# except:
"""
cell_index = 0
# reset progress bar
self.progress["value"] = 0
self.progress.update()
# For each image, iterate over each cell, update the annotations list object and export image for model training
for sCI in imageSet:
# increment progress bar
self.progress["value"] += progress_increment
self.progress.update()
# retrieve and count each classification
# values, counts = np.unique(sCI.centroids_classification, return_counts=True)
# self.console.insert("end", values)
# self.console.insert("end", counts) # sometimes only two values returned. which causes next code to fail
# total_cellImages += counts[1]
# if len(counts) == 3:
# total_cellImages += counts[2]
if sCI.centroids_classification is None:
sCI.centroids_classification = np.zeros(sCI.centroids.shape[0]) # set all to O4-
for i in range(len(sCI.cells)):
if isinstance(sCI.cells[i], int):
# not a valid cell, skipping...
continue
# Are there markers classified for this image?
if sCI.centroids_classification[i] == -1:
filename = 'unknown.{}.tif'.format(str(cell_index))
cell_index += 1
elif int(sCI.centroids_classification[i]) == 0:
filename = 'o4neg.{}.tif'.format(str(cell_index))
cell_index += 1
elif int(sCI.centroids_classification[i]) == 1:
filename = 'o4pos.{}.tif'.format(str(cell_index))
cell_index += 1
else:
raise
annotations.append({
'cell': filename,
'path': sCI.path,
'imgFile': sCI.imgFile,
'markerFile': sCI.markerFile,
'cellIndex': i,
'centroid': list(sCI.centroids[i, :]),
'classification': int(sCI.centroids_classification[i])
})
if export:
sCI.saveCellImg(i, os.path.join(os.path.join(root, 'keras'), filename))
self.console.insert("end", f"\nTotal cells: {cell_index}")
# self.console.insert("end", f"\nNew Total O4+: {o4pos_cell_index}. New Total O4-: {o4neg_cell_index}.")
filename = fullPath(root, 'annotations.json')
with open(filename, 'w') as f:
json.dump(annotations, f)
"""
Disabled for now.
if export:
export cell_indices.json file
filename = os.path.join(os.path.join(settings.kerasFolder, 'output'), 'cell_indices.json')
indices = {
'o4pos_cell_index': o4pos_cell_index,
'o4neg_cell_index': o4neg_cell_index
}
with open(filename, 'w') as f:
json.dump(indices, f)
"""
self.console.insert("end", "\nAll Done.")
# Starts application.
root = tk.Tk()
root.geometry('+100+100')
root.resizable(width=False, height=False)
app = Application(master=root)
app.mainloop()
javabridge.kill_vm()