Skip to content

Commit

Permalink
gps proximity data operational, minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
PJDude committed Jan 6, 2025
1 parent 5c6b5d3 commit 8653b2a
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 43 deletions.
63 changes: 22 additions & 41 deletions src/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,9 +740,11 @@ def my_hash_combo(file,hash_size):
curr_res = result_dict[index_tuple]=[0,0,None,None]

try:
#file = None
file = image_open(fullpath)

if use_size_pixels:

width = file.width
height = file.height

Expand All @@ -761,13 +763,14 @@ def my_hash_combo(file,hash_size):
try:
with open(fullpath, 'rb') as image_file:
curr_res[2] = self_get_gps_data(exifread_process_file(image_file)) #brak danych rowniez cache'ujemy
#curr_res[2]=gps_data
except Exception as e_gps:
print('images_processing_in_thread error:',e_gps)
#continue

continue

if not file:
file = image_open(fullpath)

if file.mode != 'RGBA':
file = file.convert("RGBA")

Expand Down Expand Up @@ -869,8 +872,14 @@ def images_processing(self,operation_mode,hash_size,all_rotations,image_min_size
self_scan_results_image_to_gps[(dev,inode,mtime)] = val
#print('setting_1:',dev,inode,mtime,val)

images_quantity_cache_read+=1
size_from_cache += size

continue

images_quantity_need_to_calculate += 1
size_to_calculate += size

images_processing_threads_source[thread_index][(pathnr,path,file_name,mtime,ctime,dev,inode,size)]=self_get_full_path_to_scan(pathnr,path,file_name)
thread_index = (thread_index+1) % max_threads

Expand Down Expand Up @@ -899,7 +908,7 @@ def images_processing(self,operation_mode,hash_size,all_rotations,image_min_size
self.info_total = images_quantity_cache_read + images_quantity_need_to_calculate
self.sum_size = size_from_cache + size_to_calculate

self.info = self.info_line = 'gathering images gps data ...' if gps_mode else 'Images hashing ...'
self.info = self.info_line = 'Gathering images gps data ...' if gps_mode else 'Images hashing ...'

for i in range(max_threads):
images_processing_threads[i].start()
Expand Down Expand Up @@ -986,6 +995,7 @@ def similarity_clustering(self,hash_size,distance,all_rotations):
t0=perf_counter()
self.log.info(f'start DBSCAN')
labels = DBSCAN(eps=de_norm_distance, min_samples=2,n_jobs=-1,metric='manhattan',algorithm='kd_tree').fit(pool).labels_

t1=perf_counter()
self.log.info(f'DBSCAN end. Time:{t1-t0}')

Expand Down Expand Up @@ -1040,7 +1050,7 @@ def similarity_clustering(self,hash_size,distance,all_rotations):

def gps_clustering(self,distance):
from sklearn.cluster import DBSCAN
from numpy import array as numpy_array
from numpy import array as numpy_array, radians

self.scanned_paths=self.paths_to_scan.copy()

Expand All @@ -1050,35 +1060,30 @@ def gps_clustering(self,distance):
self.info_line = self.info = 'Preparing data pool ...'
#pathnr,path,file_name,ctime,dev,inode,size,

#self_scan_results_images_add( (path_nr,subpath,entry.name,st_mtime_ns,st_ctime_ns,st_dev,st_ino,size) )
#self_scan_results_image_to_gps[stat_res.st_dev,stat_res.st_ino] = gps_data

self_scan_results_images = self.scan_results_images
#print(f'{self_scan_results_images=}')
self_scan_results_image_to_gps = self.scan_results_image_to_gps
#print(f'{self_scan_results_image_to_gps=}')

for (path_nr,subpath,name,mtime,ctime,dev,ino,size) in sorted(self.scan_results_images, key=lambda x :[6],reverse = True) :
dict_key = (dev,ino,mtime)
if dict_key in self_scan_results_image_to_gps:
pool.append( numpy_array(self_scan_results_image_to_gps[dict_key] ) )
pool.append( radians(numpy_array(self_scan_results_image_to_gps[dict_key] ) ) )
keys.append( (path_nr,subpath,name,ctime,dev,ino,size) )

self_files_of_images_groups = self.files_of_images_groups = {}

if pool:
de_norm_distance = 0.0000001*(10**distance)+0.00000000001
#epsilon_km = 0.5 # Maksymalna odległość między punktami w kilometrach
#epsilon = epsilon_km / 6371.0 # Zamiana na radiany (6371 km = promień Ziemi)
epsilon = (0.001 + distance) / 6371.0 # Zamiana na radiany (6371 km = promień Ziemi)

if pool:
self.info_line = self.info = 'Clustering ...'

t0=perf_counter()
self.log.info(f'start DBSCAN')
#labels = DBSCAN(eps=de_norm_distance, min_samples=2,n_jobs=-1,metric='euclidean',algorithm='auto').fit(pool).labels_
labels = DBSCAN(eps=de_norm_distance, min_samples=2,n_jobs=-1,metric='manhattan',algorithm='kd_tree').fit(pool).labels_
labels = DBSCAN(eps=epsilon, min_samples=2,n_jobs=-1,metric='haversine',algorithm='ball_tree').fit(pool).labels_
t1=perf_counter()
self.log.info(f'DBSCAN end. Time:{t1-t0}')

#with rotation variants
groups_dict = defaultdict(set)

self.info_line = self.info = 'Separating groups ...'
Expand All @@ -1093,34 +1098,10 @@ def gps_clustering(self,distance):
##############################################
groups_sorted_by_quantity = [ label for label,number in sorted(groups_sorted_by_quantity_dict.items(),key=lambda x : x[1], reverse=True) ]

#kazdy plik tylko raz
self.info_line = self.info = 'Pruning "multiple rotations" data ...'

#files_already_in_group=set()
#files_already_in_group_add = files_already_in_group.add

pruned_groups_dict = defaultdict(set)
for label in groups_sorted_by_quantity:
#print(f'{label=}',type(label))
for key in groups_dict[label]:
#print(f' {key=}')

(pathnr,path,file_name,ctime,dev,inode,size) = key
#file_key = (dev,inode)
#key_without_rotation = (pathnr,path,file_name,ctime,dev,inode,size)

pruned_groups_dict[label].add(key)
#if file_key not in files_already_in_group:
#files_already_in_group_add(file_key)
#else:
#print('pruning file',path,file_name,rotation)

##############################################

groups_digits=len(str(len(pruned_groups_dict)))
groups_digits=len(str(len(groups_dict)))

relabel_nr=0
for label,keys in sorted(pruned_groups_dict.items(), key = lambda x : max([y[6] for y in x[1]]),reverse=True ):
for label,keys in sorted(groups_dict.items(), key = lambda x : max([y[6] for y in x[1]]),reverse=True ):
if len(keys)>1:
self_files_of_images_groups[f'G{str(relabel_nr).zfill(groups_digits)}'] = keys
relabel_nr+=1
Expand Down
43 changes: 41 additions & 2 deletions src/dude.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,8 @@ def __init__(self,cwd,paths_to_add=None,exclude=None,exclude_regexp=None,norun=N
self_main_unbind_class('Treeview', '<Double-Button-1>')

self_main_bind_class('Treeview','<KeyPress>', self.key_press )
self_main_bind_class('Treeview','<KeyRelease>', self.key_release )

self_main_bind_class('Treeview','<ButtonPress-3>', self.context_menu_show)

self.groups_tree=Treeview(frame_groups,takefocus=True,selectmode='none',show=('tree','headings') )
Expand Down Expand Up @@ -1873,7 +1875,7 @@ def get_settings_dialog(self):

def lang_change(self,event):
self.cfg.set(CFG_lang,self.lang_var.get())
self.get_info_dialog_on_main().show(STR('Language Changed'),STR('Restart required.') )
self.get_info_dialog_on_settings().show(STR('Language Changed'),STR('Restart required.') )

info_dialog_on_main_created = False
@restore_status_line
Expand All @@ -1888,6 +1890,19 @@ def get_info_dialog_on_main(self):

return self.info_dialog_on_main

info_dialog_on_settings_created = False
@restore_status_line
@block
def get_info_dialog_on_settings(self):
if not self.info_dialog_on_settings_created:
self.status(STR("Creating dialog ..."))

self.info_dialog_on_settings = LabelDialog(self.settings_dialog.widget,self.main_icon_tuple,self.bg_color,pre_show=self.pre_show,post_close=self.post_close)

self.info_dialog_on_settings_created = True

return self.info_dialog_on_settings

text_ask_dialog_created = False
@restore_status_line
@block
Expand Down Expand Up @@ -2711,6 +2726,26 @@ def goto_first_last_dir_entry(self,index):
self.folder_tree_see(next_item)
self.folder_tree.update()


@catched
def key_release(self,event):
try:
tree,key=event.widget,event.keysym

if key in ("Next"):
item=tree.focus()
#tree.yview_moveto(tree.bbox(item)[1] / tree.winfo_height())
children=tree.get_children(item)
children_len=len(children)

if children_len>=3:
tree.see(children[2])
elif children_len:
tree.see(children[-1])
except Exception as e :
#print(e)
pass

@catched
def key_press(self,event):
if not self.block_processing_stack:
Expand Down Expand Up @@ -3997,7 +4032,7 @@ def scan(self):
self_progress_dialog_on_scan_lab[4].configure(image='',text='')


self_progress_dialog_on_scan.widget.title(STR('Images hashing'))
self_progress_dialog_on_scan.widget.title(STR('Images hashing') if operation_mode == MODE_SIMILARITY else STR('GPS data extraction') )

self_status(STR('Starting Images hashing ...'))

Expand All @@ -4013,6 +4048,7 @@ def scan(self):
fnumber_dude_core_info_counter_images = fnumber(dude_core.info_counter_images)

aborted=False

while ih_thread_is_alive():
anything_changed=False

Expand All @@ -4039,6 +4075,9 @@ def scan(self):
self_progress_dialog_on_scan_lab[3].configure(image='',text=STR('Aborted'))
self_progress_dialog_on_scan.abort_button.configure(state='disabled',text='',image='')

#if anything_changed:
# self_progress_dialog_on_scan_area_main_update = self_progress_dialog_on_scan.area_main.update

self_progress_dialog_on_scan_lab[0].configure(image=self_get_hg_ico(),text='')

self_status(dude_core.info)
Expand Down
2 changes: 2 additions & 0 deletions src/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,8 @@ class LANGUAGES:
"pl":'Przerwano'},
'Images hashing':{
"pl":'Hashowanie obrazów'},
'GPS data extraction':{
"pl":'Wydobywanie danych GPS'},
'Starting Images hashing ...':{
"pl":'Rozpoczęcie hashowania obrazów ...'},
'Data clustering':{
Expand Down

0 comments on commit 8653b2a

Please sign in to comment.