Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code to Speed Up Image Generation Using Parallelization #16

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion codes/ecg-image-generator/environment_droplet.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,7 @@ dependencies:
- seaborn==0.12.2
- validators==0.18.2
- spacy==3.0.8
- https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_sm-0.4.0.tar.gz
- https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_sm-0.5.0.tar.gz
- tqdm
- pyyaml
- qrcode==7.4.2
6 changes: 5 additions & 1 deletion codes/ecg-image-generator/gen_ecg_image_from_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,11 @@ def run_single_file(args):
else:
json_dict = None
if(args.fully_random):
hw_text = random.choice((True,False))
if not args.hw_text:
hw_text = False
else:
hw_text = random.choice((True,False))

wrinkles = random.choice((True,False))
augment = random.choice((True,False))
else:
Expand Down
26 changes: 26 additions & 0 deletions codes/ecg-image-generator/gen_ecg_images_from_data_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from helper_functions import find_records
from gen_ecg_image_from_data import run_single_file
import warnings
import time

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
warnings.filterwarnings("ignore")
Expand Down Expand Up @@ -92,7 +93,12 @@ def run(args):
i = 0
full_header_files, full_recording_files = find_records(args.input_directory, original_output_dir)

# If fully_random is True, then set hw_text to True
if args.fully_random:
args.hw_text = True

for full_header_file, full_recording_file in zip(full_header_files, full_recording_files):
print(f"{i}/{len(full_header_files)}")
filename = full_recording_file
header = full_header_file
args.input_file = os.path.join(args.input_directory, filename)
Expand All @@ -109,7 +115,27 @@ def run(args):
break

if __name__=='__main__':
start_time = time.time()
path = os.path.join(os.getcwd(), sys.argv[0])
parentPath = os.path.dirname(path)
os.chdir(parentPath)
run(get_parser().parse_args(sys.argv[1:]))

end_time = time.time()

# Calculate the execution time
execution_time = end_time - start_time

# Get the current working directory
cwd = os.getcwd()

# Create the output file path
output_file = os.path.join(cwd, "execution_time.txt")

# Write the execution time to the file
with open(output_file, "a") as f:
f.write(f"Execution time for {sys.argv[2]} to {sys.argv[4]}: {execution_time} seconds")
f.write("\n")

print(f"Execution time: {execution_time} seconds")
print(f"Execution time written to {output_file}")
151 changes: 151 additions & 0 deletions codes/ecg-image-generator/gen_ecg_images_from_data_batch_parallel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
import os, sys, argparse
import random
import csv
from helper_functions import find_records
from gen_ecg_image_from_data import run_single_file
import warnings
from multiprocessing import Pool
import time
from tqdm import tqdm

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
warnings.filterwarnings("ignore")

def get_parser():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input_directory', type=str, required=True)
parser.add_argument('-o', '--output_directory', type=str, required=True)
parser.add_argument('-se', '--seed', type=int, required=False, default = -1)
parser.add_argument('--num_leads',type=str,default='twelve')
parser.add_argument('--max_num_images',type=int,default = -1)
parser.add_argument('--config_file', type=str, default='config.yaml')

parser.add_argument('-r','--resolution',type=int,required=False,default = 200)
parser.add_argument('--pad_inches',type=int,required=False,default=0)
parser.add_argument('-ph','--print_header', action="store_true",default=False)
parser.add_argument('--num_columns',type=int,default = -1)
parser.add_argument('--full_mode', type=str,default='II')
parser.add_argument('--mask_unplotted_samples', action="store_true", default=False)
parser.add_argument('--add_qr_code', action="store_true", default=False)

parser.add_argument('-l', '--link', type=str, required=False,default='')
parser.add_argument('-n','--num_words',type=int,required=False,default=5)
parser.add_argument('--x_offset',dest='x_offset',type=int,default = 30)
parser.add_argument('--y_offset',dest='y_offset',type=int,default = 30)
parser.add_argument('--hws',dest='handwriting_size_factor',type=float,default = 0.2)

parser.add_argument('-ca','--crease_angle',type=int,default=90)
parser.add_argument('-nv','--num_creases_vertically',type=int,default=10)
parser.add_argument('-nh','--num_creases_horizontally',type=int,default=10)

parser.add_argument('-rot','--rotate',type=int,default=0)
parser.add_argument('-noise','--noise',type=int,default=50)
parser.add_argument('-c','--crop',type=float,default=0.01)
parser.add_argument('-t','--temperature',type=int,default=40000)

parser.add_argument('--random_resolution',action="store_true",default=False)
parser.add_argument('--random_padding',action="store_true",default=False)
parser.add_argument('--random_grid_color',action="store_true",default=False)
parser.add_argument('--standard_grid_color', type=int, default=5)
parser.add_argument('--calibration_pulse',type=float,default=1)
parser.add_argument('--random_grid_present',type=float,default=1)
parser.add_argument('--random_print_header',type=float,default=0)
parser.add_argument('--random_bw',type=float,default=0)
parser.add_argument('--remove_lead_names',action="store_false",default=True)
parser.add_argument('--lead_name_bbox',action="store_true",default=False)
parser.add_argument('--store_config', type=int, nargs='?', const=1, default=0)

parser.add_argument('--deterministic_offset',action="store_true",default=False)
parser.add_argument('--deterministic_num_words',action="store_true",default=False)
parser.add_argument('--deterministic_hw_size',action="store_true",default=False)

parser.add_argument('--deterministic_angle',action="store_true",default=False)
parser.add_argument('--deterministic_vertical',action="store_true",default=False)
parser.add_argument('--deterministic_horizontal',action="store_true",default=False)

parser.add_argument('--deterministic_rot',action="store_true",default=False)
parser.add_argument('--deterministic_noise',action="store_true",default=False)
parser.add_argument('--deterministic_crop',action="store_true",default=False)
parser.add_argument('--deterministic_temp',action="store_true",default=False)

parser.add_argument('--fully_random',action='store_true',default=False)
parser.add_argument('--hw_text',action='store_true',default=False)
parser.add_argument('--wrinkles',action='store_true',default=False)
parser.add_argument('--augment',action='store_true',default=False)
parser.add_argument('--lead_bbox',action='store_true',default=False)
parser.add_argument('--store_mask',action='store_true',default=False)
parser.add_argument('--cpu_count', type=int, default=os.cpu_count())

return parser

def run_single_file_wrapper(args_tuple):
# Unpack the arguments
args, full_header_file, full_recording_file = args_tuple

# Obtain the filename, header, and other arguments
filename = full_recording_file
header = full_header_file
args.input_file = os.path.join(args.input_directory, filename)
args.header_file = os.path.join(args.input_directory, header)
args.start_index = -1
folder_struct_list = full_header_file.split('/')[:-1]
args.output_directory = os.path.join(args.original_output_dir, '/'.join(folder_struct_list))
args.encoding = os.path.split(os.path.splitext(filename)[0])[1]

# Run a single file and return the value outputted
return run_single_file(args)

def run(args):
random.seed(args.seed)
if os.path.isabs(args.input_directory) == False:
args.input_directory = os.path.normpath(os.path.join(os.getcwd(), args.input_directory))
if os.path.isabs(args.output_directory) == False:
args.original_output_dir = os.path.normpath(os.path.join(os.getcwd(), args.output_directory))
else:
args.original_output_dir = args.output_directory

if not os.path.exists(args.input_directory) or not os.path.isdir(args.input_directory):
raise Exception("The input directory does not exist, Please re-check the input arguments!")

if not os.path.exists(args.original_output_dir):
os.makedirs(args.original_output_dir)

full_header_files, full_recording_files = find_records(args.input_directory, args.original_output_dir)

# Ensure this argument is always False for this script otherwise it will crash
args.hw_text = False

# Create a list of tuples containing the arguments for each file
args_list = [(args, full_header_files[i], full_recording_files[i]) for i in range(len(full_header_files))]

# Create a pool of workers equal to the number of CPU cores
with Pool(processes=args.cpu_count) as pool:
# Use tqdm to create a progress bar for the map function
for _ in tqdm(pool.imap_unordered(run_single_file_wrapper, args_list), total=len(args_list)):
pass

if __name__=='__main__':
start_time = time.time()
path = os.path.join(os.getcwd(), sys.argv[0])
parentPath = os.path.dirname(path)
os.chdir(parentPath)
run(get_parser().parse_args(sys.argv[1:]))

end_time = time.time()

# Calculate the execution time
execution_time = end_time - start_time

# Get the current working directory
cwd = os.getcwd()

# Create the output file path
output_file = os.path.join(cwd, "execution_time.txt")

# Write the execution time to the file
with open(output_file, "a") as f:
f.write(f"Execution time for {sys.argv[2]} to {sys.argv[4]}: {execution_time} seconds")
f.write("\n")

print(f"Execution time: {execution_time} seconds")
print(f"Execution time written to {output_file}")
39 changes: 39 additions & 0 deletions codes/ecg-image-generator/image_gen_experiment_instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# NOTES BEFORE RUNNING:
- Replace <input_path> and <output_path> below with the input and output directories, respectively.
- **To compare the two scripts as fairly as possible, ensure that lines 97 and 98 in gen_ecg_images_from_data_batch.py are commented out to prevent handwritten text!!!**
- Change cpu_count to a different number by adding "--cpu_count <your_number>" to the command, otherwise all available CPU
cores will be used!


# Command for the old script
```bash
python codes/ecg-image-generator/gen_ecg_images_from_data_batch.py \
-i <input_path> \
-o <output_path> \
--store_config 1 \
--lead_name_bbox \
--lead_bbox \
--augment \
--hw_text \
-rot 20 \
--random_grid_color \
--fully_random \
--mask_unplotted_samples \
--print_header
```

# Command for the new script
```bash
python codes/ecg-image-generator/gen_ecg_images_from_data_batch_parallel.py \
-i <input_path> \
-o <output_path> \
--store_config 1 \
--lead_name_bbox \
--lead_bbox \
--augment \
-rot 20 \
--random_grid_color \
--fully_random \
--mask_unplotted_samples \
--print_header
```
4 changes: 3 additions & 1 deletion codes/ecg-image-generator/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@ numpy==1.26.4
pandas==2.2.2
wfdb==4.1.2
pyyaml
qrcode==7.4.2
qrcode==7.4.2
tqdm
https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_sm-0.5.0.tar.gz