Skip to content

Commit

Permalink
New dict and making merging a little easier.
Browse files Browse the repository at this point in the history
  • Loading branch information
AG committed Apr 22, 2024
1 parent d208d80 commit 5716097
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 2 deletions.
Binary file modified backup/dictionary.pkl
Binary file not shown.
2 changes: 1 addition & 1 deletion backup/processing_progress.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
19885937,18351999999
20392261,18823999999
Binary file modified dictionary.msgpack
Binary file not shown.
10 changes: 9 additions & 1 deletion lib/merge_batches.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,15 +278,23 @@ def finish_merge():

# Delete all files in training/processed_batches
shutil.rmtree('training/processed_batches', ignore_errors=True)
shutil.rmtree('training/copy_of_batches_being_processed_in_this_round', ignore_errors=True)
shutil.copy('training/dictionary.pkl', 'training/batches')

def main():
# If training/batches has more than one file, run the function with the first two files
os.makedirs('training/copy_of_batches_being_processed_in_this_round', exist_ok=True)
shutil.rmtree('training/batches_to_process', ignore_errors=True)
os.makedirs('training/batches_to_process', exist_ok=True)
threads = []

for file in os.listdir('training/batches'):
thread = threading.Thread(target=perform_file_operation, args=(f'training/batches/{file}', f'training/batches_to_process/{file}', 'copy'))
thread = threading.Thread(target=perform_file_operation, args=(f'training/batches/{file}', f'training/copy_of_batches_being_processed_in_this_round/{file}', 'move'))
threads.append(thread)
thread.start()

for file in os.listdir('training/copy_of_batches_being_processed_in_this_round'):
thread = threading.Thread(target=perform_file_operation, args=(f'training/copy_of_batches_being_processed_in_this_round/{file}', f'training/batches_to_process/{file}', 'copy'))
threads.append(thread)
thread.start()

Expand Down
Binary file modified tokens.msgpack
Binary file not shown.

0 comments on commit 5716097

Please sign in to comment.