forked from Yaroslavus/manticore_3.0
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmanticore_preprocessing.py
425 lines (376 loc) · 22 KB
/
manticore_preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Apr 20 23:45:35 2020
@author: yaroslav
"""
import manticore_tools as tools
from manticore_tools import system_exit as system_exit # Предлагаю делать так только с очевидними функциями
import os
EVENT_FILTER = 0
TOTAL_DICT_OF_DAYS_FILE = ".total_dict_of_days.txt"
# =============================================================================
#
# =============================================================================
#TODO It should be at the top i.e. here
def fill_the_summary_files(start_time):
"""Fill the final summary files with events named (tail).sum.
Here each existed tail.sum file is being filled by cleaned amplitudes.
For each file function runs through all (22) repacking and cleaned data
files with this tail in this day directory. For each data file function
one by one reads numbers of events. Each block of data function puts to
the correspondent place in (tail).sum file. This place is the N_1-th string
in N_2-th blank, where N_1 - BSM number, N_2 - event number. So, this
string in the tail.sum file will contain exatly amplitudes of THIS BSM in
THIS event.
Finally each tail.sum file contains full information about every event
from two minutes that corresponds to this tail: number, and amplitudes
of every BSM, also the time of event in every BSM and trigger-status
and ignore-status of every channel in every BSM."""
print("Start fill_the_summary_files...")
dict_of_days = {}
with open (TOTAL_DICT_OF_DAYS_FILE, "r") as total_dict_of_days_file:
day = total_dict_of_days_file.readline()
while day:
day = tools.check_and_cut_the_tail(day)
number_of_tails = int(total_dict_of_days_file.readline())
for i in range (number_of_tails):
current_tail_record = total_dict_of_days_file.readline().split()
dict_of_tails_of_the_day = dict()
dict_of_tails_of_the_day[current_tail_record[0]] = [int(current_tail_record[1]), int(current_tail_record[2])]
dict_of_days[day] = dict_of_tails_of_the_day
day = total_dict_of_days_file.readline()
print("The summary files of events are fillng by data...")
list_of_tails = []
i=0
for day_directory, tail_dict in dict_of_days.items():
tail_max_min_list = []
j=0
for tail, max_min_list in tail_dict.items():
list_of_tails.append(tail)
tail_max_min_list.append(max_min_list) # TODO more correct tail_min_max_list
print("{}:{} list_of_tails={}".format(i, j, list_of_tails))
print("{}:{} tail_max_min_list={}".format(i, j, tail_max_min_list))
list_of_BSM = tools.directory_objects_parser(
day_directory, tools.BSM_REGULAR_PATTERN).split()
print("list_of_BSM=", list_of_BSM)
tails_counter = 0
j += 1
for i in range(len(list_of_tails)):
print("The {} is analizyng...".format(list_of_tails[i]))
tails_counter += 1
create_summary_file_for_tail(list_of_tails[i], tail_max_min_list[i], start_time,
list_of_BSM, day_directory,
tails_counter, list_of_tails)
print("The summary files for {} have been created".format(day_directory))
print(tools.time_check(start_time))
print("Merging .list files into one...")
merge_list_files(day_directory)
print("Global .list file has been created.")
i += 1
print(tools.time_check(start_time))
print("End fill_the_summary_files...")
# =============================================================================
#
# =============================================================================
# TODO точно така функция есть в decoding! Разумно ее из обойх мест перенести в tools
def set_of_days(files_list):
"""Creates days_set which contain full pathes of all the days present
in .files_list. If you preprocessed one day, there will be only it."""
days_set = set()
for file in files_list:
file = tools.check_and_cut_the_tail(file)
file_directory = file[:-18]
days_set.add(file_directory)
print("Set of days have been created.")
return days_set
# =============================================================================
#
# =============================================================================
#TODO
def set_of_tails(files_list, day):
"""Creates tails_set for every day. Works for every day from days_set.
Tails_set contains the tails of all files preprocessed in the
directory of this day. For example, if you preprocessed only the
files xxx.001, set will contain only one item - "001". If you
preprocessed all the day, the set will contain all the tails from
('001', '002', '003', ..., the last one) of this day."""
tails_set = set()
for file in files_list:
if file[:-19] == day:
file = tools.check_and_cut_the_tail(file)
tails_set.add(file[60:])
print("Set of tails have been created.")
return tails_set
# =============================================================================
#
# =============================================================================
def list_of_tail_files(day_directory, list_of_BSM, tail):
print("Start list_of_tail_files...")
tail_files = []
for BSM in list_of_BSM:
BSM_name = "{}{}/".format(
day_directory,
BSM)
new_tail_file = BSM_name + tools.directory_objects_parser(
BSM_name,
tools.TAIL_FILE_REGULAR_PATTERN + tail).split()[0]
tail_files.append(new_tail_file)
print("End list_of_tail_files...")
return tail_files
# =============================================================================
#
# =============================================================================
def clean_the_matrix_of_USER_NUMBER_cluster_events(day_directory, tail, matrix_of_events, min_event_number_in_tail, clean_status = 0):
"""Cleaning the matrix of events for one tail from events where no one
cluster was """
empty_event = ['']*22
event_numbers_parallel_list = []
new_matrix_of_events = [['']*22 for i in range(len(matrix_of_events))]
if clean_status == 0:
out_tail_file = day_directory + tail + '_clean.list'
elif clean_status == 1:
out_tail_file = day_directory + tail + '_static.list'
elif clean_status == 2:
out_tail_file = day_directory + tail + '_dynamic.list'
with open(out_tail_file, 'w+') as out_tail_file:
# out_tail_file.write("Tail\tEvent number\tCoins\n")
for i in range (len(matrix_of_events)):
not_empty_cell_counter = 0
for j in range(len(matrix_of_events[i])):
if matrix_of_events[i][j] != "":
not_empty_cell_counter += 1
if not_empty_cell_counter > EVENT_FILTER:
new_matrix_of_events[i] = matrix_of_events[i]
out_tail_file.write("{}\t{}\t{}\n".format(tail, min_event_number_in_tail + i, not_empty_cell_counter))
event_numbers_parallel_list.append(min_event_number_in_tail + i)
return [value for value in new_matrix_of_events if value != empty_event], event_numbers_parallel_list
# =============================================================================
#
# =============================================================================
def print_statistics_for_matrix_of_events(matrix_of_events, stat_file):
"""Print the coincidences statistics for every 2 minutes of data
(for one tail). From 0_BSM events to 22_BSM events.
Takes the matrix of events in format M[event number][BSM], where
each item -
string = maroc number + event time + 64*'amplitude + trigger status + ignore status'"""
coin = [0]*23
for string in matrix_of_events:
string_counter = 0
for item in string:
if item != '':
string_counter += 1
coin[string_counter] += 1
with open(stat_file, 'w+') as stat_file:
for i in range(len(coin)):
print("coins: {}\tevents: {}\n".format(i, coin[i]))
stat_file.write("coins: {}\tevents: {}\n".format(i, coin[i]))
# =============================================================================
#
# =============================================================================
# TODO по моему плохо передовать tail функции только для progress bar
def fill_the_matrix_of_events(matrix_of_events, tail_files, tail, tail_max_min_list, start_time, clean_status = 0):
print("\n\n\nStart fill_the_matrix_of_events...")
chunk_size = 282
tail_files_counter = 0
for tail_file in tail_files:
if clean_status == 0:
print("Tail file {} clean amplitudes collecting...".format(tail_file))
tail_file = tools.make_BSM_file_temp(tail_file) + '.amp'
chunk_size = 281
elif clean_status == 1:
print("Tail file {} static amplitudes collecting...".format(tail_file))
tail_file = tools.make_BSM_file_temp(tail_file) + '.asp'
chunk_size = 282
elif clean_status == 2:
print("Tail file {} dynamic amplitudes collecting...".format(tail_file))
tail_file = tools.make_BSM_file_temp(tail_file) + '.adp'
chunk_size = 282
# try:
if os.path.isfile(tail_file):
with open(tail_file, 'rb') as tail_file:
chunk = tail_file.read(chunk_size)
chunk_counter = 0
while chunk:
# try:
head_array = tools.unpacked_from_bytes('hhii', chunk[:12])
num_event = head_array[2]
maroc_number = tools.unpacked_from_bytes('h', chunk[20:22])[0]
time_array = tools.unpacked_from_bytes('hhhh', chunk[12:20])
ns = (time_array[0] & 0x7f)*10
mks = (time_array[0] & 0xff80) >> 7
mks |= (time_array[1] & 1) << 9
mls = (time_array[1] & 0x7fe) >> 1
s = (time_array[1] & 0xf800) >> 11
s |= (time_array[2] & 1) << 5
m = (time_array[2] & 0x7e) >> 1
h = (time_array[2] & 0xf80) >> 7
time_string = "{}:{}:{}.{}.{}.{}".format(h, m, s, mls, mks, ns)
if clean_status == 0:
result_array = tools.unpacked_from_bytes('fB'*32, chunk[24:-4])
elif clean_status in (1, 2):
result_array = tools.unpacked_from_bytes('fBB'*32, chunk[24:-4])
result_string_ampls = '\t'.join([str(x) for x in result_array])
matrix_of_events[num_event - tail_max_min_list[0]][maroc_number] =\
"{}\t{}\t{}".format(
maroc_number,
time_string,
result_string_ampls)
# except Exception:
# print("{} Chunk number {} in file {} is seems to be corrupted!".format(
# "AMPLITUDE FILE CHUNK RECORDING ERROR!",
# chunk_counter,
# tail_file))
chunk_counter += 1
chunk = tail_file.read(chunk_size)
tail_files_counter += 1
tools.syprogressbar(
tail_files_counter,
len(tail_files),
u'\u24BB',
"tail files {} amplitudes collecting".format(tail),
start_time)
# except Exception:
# print("{} File {} is seems to be not existed!".format(
# "AMPLITUDE FILE EXISTING ERROR!",
# tail_file))
print("\n\nEnd fill_the_matrix_of_events...")
return matrix_of_events
# =============================================================================
#
# =============================================================================
# TODO ATTENTION this function does not work correctly for one/two file
def create_summary_file_for_tail(tail, tail_max_min_list, start_time,
list_of_BSM, day_directory,
tails_counter, list_of_tails):
print("\n\n\nStart create_summary_file_for_tail...")
min_event_number_in_tail = tail_max_min_list[0]
max_event_number_in_tail = tail_max_min_list[1]
#TODO the better is def nevnents = max_event_number_in_tail - min_event_number_in_tail + 1 and put It by the next steps
#print("\nEmpty matrix of clean events are creating...") # TODO It makes output is overloaded
matrix_of_events_clean = [['']*22 for i in range(max_event_number_in_tail - min_event_number_in_tail + 1)]
print("\nEmpty matrix of events has been created...")
print("\nFiles list for tail {} from {} are creating...".format(tail, day_directory))
tail_files = list_of_tail_files(day_directory, list_of_BSM, tail)
print("tail_files=", tail_files)
#print("\nEmpty matrix of events with static pedestals cleaning are creating...") # TODO It makes output is overloaded
matrix_of_events_static = [['']*22 for i in range(max_event_number_in_tail - min_event_number_in_tail + 1)]
print("\nEmpty matrix of events with static pedestals cleaning has been created...")
'''
TODO why do we need It????
print("\nFiles list for tail {} from {} are creating...".format(tail, day_directory))
tail_files = list_of_tail_files(day_directory, list_of_BSM, tail) # TODO why we need It????
'''
#print("\nEmpty matrix of events with dynamic pedestals cleaning are creating...") # TODO It makes output is overloaded
matrix_of_events_dynamic = [['']*22 for i in range(max_event_number_in_tail - min_event_number_in_tail + 1)]
print("\nEmpty matrix of events with dynamic pedestals cleaning has been created...")
'''
TODO why do we need It????
print("\nFiles list for tail {} from {} are creating...".format(tail, day_directory))
tail_files = list_of_tail_files(day_directory, list_of_BSM, tail)
'''
#TODO ATTENTION REMOVE ME! tail_files below
'''
path_1 = "/home/satyshev/Data/DATA_IACT/2017-18/oct17/271017/BSM01/"
path_2 = "/home/satyshev/Data/DATA_IACT/2017-18/oct17/281017/BSM03/"
tail_files = [path_1+'27107001.001', path_2+"28107003.006", path_2 + "28107003.007" ]
print("new_tail_files=", tail_files)
'''
print("Event matrix with static pedestals cleaning for tail {} from {} are creating...".format(tail, day_directory)) #TODO are filing
matrix_of_events_static = fill_the_matrix_of_events(matrix_of_events_static, tail_files, tail, tail_max_min_list, start_time, 1)
print("Event matrix with dynamic pedestals cleaning for tail {} from {} are creating...".format(tail, day_directory))
matrix_of_events_dynamic = fill_the_matrix_of_events(matrix_of_events_dynamic, tail_files, tail, tail_max_min_list, start_time, 2)
print("Event matrix with clean amplitudes for tail {} from {} are creating...".format(tail, day_directory))
matrix_of_events_clean = fill_the_matrix_of_events(matrix_of_events_clean, tail_files, tail, tail_max_min_list, start_time)
print("\nMatrix for tail {} from {} are cleaning for less then USER_NUMBER-coincidences events...".format(tail, day_directory))
before_user_cleaning_static = len(matrix_of_events_static)
before_user_cleaning_dynamic = len(matrix_of_events_dynamic)
before_user_cleaning_clean = len(matrix_of_events_clean)
no_user_number_coin_matrix_of_events_static, event_numbers_parallel_list_static = clean_the_matrix_of_USER_NUMBER_cluster_events(day_directory, tail, matrix_of_events_static, min_event_number_in_tail, 1)
no_user_number_coin_matrix_of_events_dynamic, event_numbers_parallel_list_dynamic = clean_the_matrix_of_USER_NUMBER_cluster_events(day_directory, tail, matrix_of_events_dynamic, min_event_number_in_tail, 2)
no_user_number_coin_matrix_of_events_clean, event_numbers_parallel_list_clean = clean_the_matrix_of_USER_NUMBER_cluster_events(day_directory, tail, matrix_of_events_clean, min_event_number_in_tail)
after_user_cleaning_static = len(no_user_number_coin_matrix_of_events_static)
after_user_cleaning_dynamic = len(no_user_number_coin_matrix_of_events_dynamic)
after_user_cleaning_clean = len(no_user_number_coin_matrix_of_events_clean)
print("DELETED {:.3f}% events".format((before_user_cleaning_static - after_user_cleaning_static)/before_user_cleaning_static*100))
print("DELETED {:.3f}% events".format((before_user_cleaning_dynamic - after_user_cleaning_dynamic)/before_user_cleaning_dynamic*100))
print("DELETED {:.3f}% events".format((before_user_cleaning_clean - after_user_cleaning_clean)/before_user_cleaning_clean*100))
print("Out file for Static amplitudes {} tail from {} are filling for user_number-coins...".format(tail, day_directory))
with open(day_directory + tail + '_static.out', 'w+') as out_tail_file:
for i in range(len(no_user_number_coin_matrix_of_events_static)):
out_tail_file.write(
"Event_number\t{}\tin_tail_files\t{}\tfor_the\t{}\n".format(
event_numbers_parallel_list_static[i],
tail, day_directory))
for j in range(len(no_user_number_coin_matrix_of_events_static[i])):
out_tail_file.write("{}\n".format(no_user_number_coin_matrix_of_events_static[i][j]))
out_tail_file.write('\n')
print("Out file for Dynamic amplitudes {} tail from {} are filling for user_number-coins...".format(tail, day_directory))
with open(day_directory + tail + '_dynamic.out', 'w+') as out_tail_file:
for i in range(len(no_user_number_coin_matrix_of_events_dynamic)):
out_tail_file.write(
"Event_number\t{}\tin_tail_files\t{}\tfor_the\t{}\n".format(
event_numbers_parallel_list_dynamic[i],
tail, day_directory))
for j in range(len(no_user_number_coin_matrix_of_events_dynamic[i])):
out_tail_file.write("{}\n".format(no_user_number_coin_matrix_of_events_dynamic[i][j]))
out_tail_file.write('\n')
print("Out file for Clean amplitudes {} tail from {} are filling for user_number-coins...".format(tail, day_directory))
with open(day_directory + tail + '_clean.out', 'w+') as out_tail_file:
for i in range(len(no_user_number_coin_matrix_of_events_clean)):
out_tail_file.write(
"Event_number\t{}\tin_tail_files\t{}\tfor_the\t{}\n".format(
event_numbers_parallel_list_clean[i],
tail, day_directory))
for j in range(len(no_user_number_coin_matrix_of_events_clean[i])):
out_tail_file.write("{}\n".format(no_user_number_coin_matrix_of_events_clean[i][j]))
out_tail_file.write('\n')
tools.syprogressbar(
tails_counter,
len(list_of_tails),
u'\u24C9',
"creating summary files for tails",
start_time)
stat_file_static = day_directory + tail + '_static.stat'
stat_file_dynamic = day_directory + tail + '_dynamic.stat'
stat_file_clean = day_directory + tail + '_clean.stat'
print("Statistics for static amplitudes for tail {} from {} are calculating...".format(tail, day_directory))
print_statistics_for_matrix_of_events(matrix_of_events_static, stat_file_static)
print("Statistics for dynamic amplitudes for tail {} from {} are calculating...".format(tail, day_directory))
print_statistics_for_matrix_of_events(matrix_of_events_dynamic, stat_file_dynamic)
print("Statistics for clean amplitudes for tail {} from {} are calculating...".format(tail, day_directory))
print_statistics_for_matrix_of_events(matrix_of_events_clean, stat_file_clean)
print("\n\n\nEnd create_summary_file_for_tail...")
# =============================================================================
#
# =============================================================================
def merge_list_files(day_directory):
list_files = tools.directory_objects_parser(day_directory, tools.LIST_FILE_PATTERN).split()
static_file = day_directory + "/static_events_list.txt"
dynamic_file = day_directory + "/dynamic_events_list.txt"
clean_file = day_directory + "/clean_events_list.txt"
with open (static_file, "w+") as static_out_file:
with open(dynamic_file, "w+") as dynamic_out_file:
with open(clean_file, "w+") as clean_out_file:
static_out_file.write("Tail\tEvent_number\tCoins\n")
dynamic_out_file.write("Tail\tEvent_number\tCoins\n")
clean_out_file.write("Tail\tEvent_number\tCoins\n")
for file in list_files:
if "static" in file:
with open (day_directory + "/" + file, "r") as in_file:
event_strings = in_file.readlines()
for line in event_strings:
static_out_file.write(line)
if "dynamic" in file:
with open (day_directory + "/" + file, "r") as in_file:
event_strings = in_file.readlines()
for line in event_strings:
dynamic_out_file.write(line)
if "clean" in file:
with open (day_directory + "/" + file, "r") as in_file:
event_strings = in_file.readlines()
for line in event_strings:
clean_out_file.write(line)
# =============================================================================
#
# =============================================================================