alphanumericslab · amaank123456 · Jul 16, 2024 · Jul 16, 2024 · Jul 17, 2024
diff --git a/codes/ecg-image-generator/environment_droplet.yml b/codes/ecg-image-generator/environment_droplet.yml
@@ -24,4 +24,7 @@ dependencies:
       - seaborn==0.12.2
       - validators==0.18.2
       - spacy==3.0.8
-      - https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_sm-0.4.0.tar.gz
+      - https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_sm-0.5.0.tar.gz
+      - tqdm
+      - pyyaml
+      - qrcode==7.4.2
diff --git a/codes/ecg-image-generator/gen_ecg_image_from_data.py b/codes/ecg-image-generator/gen_ecg_image_from_data.py
@@ -140,7 +140,11 @@ def run_single_file(args):
             else:
                 json_dict = None
             if(args.fully_random):
-                hw_text = random.choice((True,False))
+                if not args.hw_text:
+                    hw_text = False
+                else:
+                    hw_text = random.choice((True,False))
+
                 wrinkles = random.choice((True,False))
                 augment = random.choice((True,False))
             else:

diff --git a/codes/ecg-image-generator/gen_ecg_images_from_data_batch.py b/codes/ecg-image-generator/gen_ecg_images_from_data_batch.py
@@ -4,6 +4,7 @@
 from helper_functions import find_records
 from gen_ecg_image_from_data import run_single_file
 import warnings
+import time
 
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
 warnings.filterwarnings("ignore")
@@ -92,7 +93,12 @@ def run(args):
         i = 0
         full_header_files, full_recording_files = find_records(args.input_directory, original_output_dir)
 
+        # If fully_random is True, then set hw_text to True
+        if args.fully_random:
+            args.hw_text = True
+
         for full_header_file, full_recording_file in zip(full_header_files, full_recording_files):
+            print(f"{i}/{len(full_header_files)}")
             filename = full_recording_file
             header = full_header_file
             args.input_file = os.path.join(args.input_directory, filename)
@@ -109,7 +115,27 @@ def run(args):
                 break
 
 if __name__=='__main__':
+    start_time = time.time()
     path = os.path.join(os.getcwd(), sys.argv[0])
     parentPath = os.path.dirname(path)
     os.chdir(parentPath)
     run(get_parser().parse_args(sys.argv[1:]))
+
+    end_time = time.time()
+
+    # Calculate the execution time
+    execution_time = end_time - start_time
+
+    # Get the current working directory
+    cwd = os.getcwd()
+
+    # Create the output file path
+    output_file = os.path.join(cwd, "execution_time.txt")
+
+    # Write the execution time to the file
+    with open(output_file, "a") as f:
+        f.write(f"Execution time for {sys.argv[2]} to  {sys.argv[4]}: {execution_time} seconds")
+        f.write("\n")
+
+    print(f"Execution time: {execution_time} seconds")
+    print(f"Execution time written to {output_file}")
diff --git a/codes/ecg-image-generator/gen_ecg_images_from_data_batch_parallel.py b/codes/ecg-image-generator/gen_ecg_images_from_data_batch_parallel.py
@@ -0,0 +1,151 @@
+import os, sys, argparse
+import random
+import csv
+from helper_functions import find_records
+from gen_ecg_image_from_data import run_single_file
+import warnings
+from multiprocessing import Pool
+import time
+from tqdm import tqdm
+
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
+warnings.filterwarnings("ignore")
+
+def get_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-i', '--input_directory', type=str, required=True)
+    parser.add_argument('-o', '--output_directory', type=str, required=True)
+    parser.add_argument('-se', '--seed', type=int, required=False, default = -1)
+    parser.add_argument('--num_leads',type=str,default='twelve')
+    parser.add_argument('--max_num_images',type=int,default = -1)
+    parser.add_argument('--config_file', type=str, default='config.yaml')
+
+    parser.add_argument('-r','--resolution',type=int,required=False,default = 200)
+    parser.add_argument('--pad_inches',type=int,required=False,default=0)
+    parser.add_argument('-ph','--print_header', action="store_true",default=False)
+    parser.add_argument('--num_columns',type=int,default = -1)
+    parser.add_argument('--full_mode', type=str,default='II')
+    parser.add_argument('--mask_unplotted_samples', action="store_true", default=False)
+    parser.add_argument('--add_qr_code', action="store_true", default=False)
+
+    parser.add_argument('-l', '--link', type=str, required=False,default='')
+    parser.add_argument('-n','--num_words',type=int,required=False,default=5)
+    parser.add_argument('--x_offset',dest='x_offset',type=int,default = 30)
+    parser.add_argument('--y_offset',dest='y_offset',type=int,default = 30)
+    parser.add_argument('--hws',dest='handwriting_size_factor',type=float,default = 0.2)
+
+    parser.add_argument('-ca','--crease_angle',type=int,default=90)
+    parser.add_argument('-nv','--num_creases_vertically',type=int,default=10)
+    parser.add_argument('-nh','--num_creases_horizontally',type=int,default=10)
+
+    parser.add_argument('-rot','--rotate',type=int,default=0)
+    parser.add_argument('-noise','--noise',type=int,default=50)
+    parser.add_argument('-c','--crop',type=float,default=0.01)
+    parser.add_argument('-t','--temperature',type=int,default=40000)
+
+    parser.add_argument('--random_resolution',action="store_true",default=False)
+    parser.add_argument('--random_padding',action="store_true",default=False)
+    parser.add_argument('--random_grid_color',action="store_true",default=False)
+    parser.add_argument('--standard_grid_color', type=int, default=5)
+    parser.add_argument('--calibration_pulse',type=float,default=1)
+    parser.add_argument('--random_grid_present',type=float,default=1)
+    parser.add_argument('--random_print_header',type=float,default=0)
+    parser.add_argument('--random_bw',type=float,default=0)
+    parser.add_argument('--remove_lead_names',action="store_false",default=True)
+    parser.add_argument('--lead_name_bbox',action="store_true",default=False)
+    parser.add_argument('--store_config', type=int, nargs='?', const=1, default=0)
+
+    parser.add_argument('--deterministic_offset',action="store_true",default=False)
+    parser.add_argument('--deterministic_num_words',action="store_true",default=False)
+    parser.add_argument('--deterministic_hw_size',action="store_true",default=False)
+
+    parser.add_argument('--deterministic_angle',action="store_true",default=False)
+    parser.add_argument('--deterministic_vertical',action="store_true",default=False)
+    parser.add_argument('--deterministic_horizontal',action="store_true",default=False)
+
+    parser.add_argument('--deterministic_rot',action="store_true",default=False)
+    parser.add_argument('--deterministic_noise',action="store_true",default=False)
+    parser.add_argument('--deterministic_crop',action="store_true",default=False)
+    parser.add_argument('--deterministic_temp',action="store_true",default=False)
+
+    parser.add_argument('--fully_random',action='store_true',default=False)
+    parser.add_argument('--hw_text',action='store_true',default=False)
+    parser.add_argument('--wrinkles',action='store_true',default=False)
+    parser.add_argument('--augment',action='store_true',default=False)
+    parser.add_argument('--lead_bbox',action='store_true',default=False)
+    parser.add_argument('--store_mask',action='store_true',default=False)
+    parser.add_argument('--cpu_count', type=int, default=os.cpu_count())
+
+    return parser
+
+def run_single_file_wrapper(args_tuple):
+    # Unpack the arguments
+    args, full_header_file, full_recording_file = args_tuple
+
+    # Obtain the filename, header, and other arguments
+    filename = full_recording_file
+    header = full_header_file
+    args.input_file = os.path.join(args.input_directory, filename)
+    args.header_file = os.path.join(args.input_directory, header)
+    args.start_index = -1
+    folder_struct_list = full_header_file.split('/')[:-1]
+    args.output_directory = os.path.join(args.original_output_dir, '/'.join(folder_struct_list))
+    args.encoding = os.path.split(os.path.splitext(filename)[0])[1]
+
+    # Run a single file and return the value outputted
+    return run_single_file(args)
+
+def run(args):
+    random.seed(args.seed)
+    if os.path.isabs(args.input_directory) == False:
+        args.input_directory = os.path.normpath(os.path.join(os.getcwd(), args.input_directory))
+    if os.path.isabs(args.output_directory) == False:
+        args.original_output_dir = os.path.normpath(os.path.join(os.getcwd(), args.output_directory))
+    else:
+        args.original_output_dir = args.output_directory
+
+    if not os.path.exists(args.input_directory) or not os.path.isdir(args.input_directory):
+        raise Exception("The input directory does not exist, Please re-check the input arguments!")
+
+    if not os.path.exists(args.original_output_dir):
+        os.makedirs(args.original_output_dir)
+
+    full_header_files, full_recording_files = find_records(args.input_directory, args.original_output_dir)
+
+    # Ensure this argument is always False for this script otherwise it will crash
+    args.hw_text = False
+
+    # Create a list of tuples containing the arguments for each file
+    args_list = [(args, full_header_files[i], full_recording_files[i]) for i in range(len(full_header_files))]
+
+    # Create a pool of workers equal to the number of CPU cores
+    with Pool(processes=args.cpu_count) as pool:
+        # Use tqdm to create a progress bar for the map function
+        for _ in tqdm(pool.imap_unordered(run_single_file_wrapper, args_list), total=len(args_list)):
+            pass
+
+if __name__=='__main__':
+    start_time = time.time()
+    path = os.path.join(os.getcwd(), sys.argv[0])
+    parentPath = os.path.dirname(path)
+    os.chdir(parentPath)
+    run(get_parser().parse_args(sys.argv[1:]))
+
+    end_time = time.time()
+
+    # Calculate the execution time
+    execution_time = end_time - start_time
+
+    # Get the current working directory
+    cwd = os.getcwd()
+
+    # Create the output file path
+    output_file = os.path.join(cwd, "execution_time.txt")
+
+    # Write the execution time to the file
+    with open(output_file, "a") as f:
+        f.write(f"Execution time for {sys.argv[2]} to  {sys.argv[4]}: {execution_time} seconds")
+        f.write("\n")
+
+    print(f"Execution time: {execution_time} seconds")
+    print(f"Execution time written to {output_file}")
diff --git a/codes/ecg-image-generator/image_gen_experiment_instructions.md b/codes/ecg-image-generator/image_gen_experiment_instructions.md
@@ -0,0 +1,39 @@
+# NOTES BEFORE RUNNING: 
+- Replace <input_path> and <output_path> below with the input and output directories, respectively.
+- **To compare the two scripts as fairly as possible, ensure that lines 97 and 98 in gen_ecg_images_from_data_batch.py are commented out to prevent handwritten text!!!**
+- Change cpu_count to a different number by adding "--cpu_count <your_number>" to the command, otherwise all available CPU
+cores will be used!
+
+
+# Command for the old script
+```bash
+python codes/ecg-image-generator/gen_ecg_images_from_data_batch.py \
+    -i <input_path> \
+    -o <output_path> \
+    --store_config 1 \
+    --lead_name_bbox \
+    --lead_bbox \
+    --augment \
+    --hw_text \
+    -rot 20 \
+    --random_grid_color \
+    --fully_random \
+    --mask_unplotted_samples \
+    --print_header
+```
+
+# Command for the new script
+```bash
+python codes/ecg-image-generator/gen_ecg_images_from_data_batch_parallel.py \
+    -i <input_path> \
+    -o <output_path> \
+    --store_config 1 \
+    --lead_name_bbox \
+    --lead_bbox \
+    --augment \
+    -rot 20 \
+    --random_grid_color \
+    --fully_random \
+    --mask_unplotted_samples \
+    --print_header
+```
diff --git a/codes/ecg-image-generator/requirements.txt b/codes/ecg-image-generator/requirements.txt
@@ -20,4 +20,6 @@ numpy==1.26.4
 pandas==2.2.2
 wfdb==4.1.2
 pyyaml
-qrcode==7.4.2
+qrcode==7.4.2
+tqdm
+https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.4.0/en_core_sci_sm-0.5.0.tar.gz