diff --git a/.talismanrc b/.talismanrc index 8e2e2fd..aa41ce4 100644 --- a/.talismanrc +++ b/.talismanrc @@ -1,8 +1,4 @@ fileignoreconfig: - - filename: tests/test_main.py - checksum: cc564a13bd73b557d0ef095252a2f1a9fd166a4a70d68d805ee9fd706fc28b32 - - filename: tests/test_utils.py - checksum: 27c2987e02ca5255d6711a82e4b8921ea6585bc4ac06a893f250fe12b4b236a5 - - filename: poetry.lock - checksum: 4fe8810d9b7b0faa6e9aabea51871fde73b004dc93ebdba8462ec31cac3e22a8 + - filename: process_dcm/utils.py + checksum: 78f84c4f196df78c9e514bc1b179468a8f16ebd90bbcf66f54330eb659fdd125 version: "" diff --git a/process_dcm/utils.py b/process_dcm/utils.py index 6941d78..e5c3d14 100644 --- a/process_dcm/utils.py +++ b/process_dcm/utils.py @@ -27,6 +27,15 @@ warnings.filterwarnings("ignore", category=UserWarning, message="A value of type *") +class DcmO: + """Class to handle DCM obj and its original file path.""" + + def __init__(self, dcm_obj: FileDataset, filepath: str) -> None: + """Initialize DcmO with a DICOM object and its file path.""" + self.dcm_obj = dcm_obj + self.filepath = filepath + + def do_date(date_str: str, input_format: str, output_format: str) -> str: """Convert DCM datetime strings to metadata.json string format.""" if "." not in date_str: @@ -319,6 +328,29 @@ def parse_datetime(dt_str: str) -> datetime: return grouped_dcms +def process_dcm_images(dcm_objs: list, output_dir: str, image_format: str, mapping: str, keep: str) -> tuple[str, str]: + """Processes DICOM images and saves them to a directory.""" + os.makedirs(output_dir, exist_ok=True) + + for dcmO in dcm_objs: + # process images + arr = dcmO.pixel_array + + if dcmO.NumberOfFrames == 1: + arr = np.expand_dims(arr, axis=0) + + for i in range(dcmO.NumberOfFrames): + out_img = os.path.join(output_dir, f"{dcmO.Modality.code}-{dcmO.AccessionNumber}_{i}.{image_format}") + while os.path.exists(out_img): + dcmO.AccessionNumber += 1 # increase group_id + out_img = os.path.join(output_dir, f"{dcmO.Modality.code}-{dcmO.AccessionNumber}_{i}.{image_format}") + + array = cv2.normalize(arr[i], None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8UC1) # type: ignore #AWSS + image = Image.fromarray(array) + image.save(out_img) + return process_dcm_meta(dcm_objs=dcm_objs, output_dir=output_dir, mapping=mapping, keep=keep) + + def process_dcm( input_dir: str | Path, image_format: str = "png", @@ -351,9 +383,14 @@ def process_dcm( tuple[str, str]: A tuple containing the new patient key and the original patient key. """ # Load DICOM files from input directory - dcm_objs = [dcmread(os.path.join(input_dir, f)) for f in os.listdir(input_dir) if f.endswith(".dcm")] - dcm_objs.sort(key=lambda x: x.Modality) - patient_id = dcm_objs[0].PatientID + dcm_objs = [ + DcmO(dcmread(os.path.join(input_dir, f)), os.path.join(input_dir, f)) + for f in os.listdir(input_dir) + if f.endswith(".dcm") + ] + + dcm_objs.sort(key=lambda dcmO: dcmO.dcm_obj.Modality) + patient_id = dcm_objs[0].dcm_obj.PatientID keep_patient_key = "p" in keep org_output_dir = output_dir @@ -389,17 +426,22 @@ def process_dcm( dcms = [] # using AccessionNumber to emulate group_id - for dcm in dcm_objs: + for dcmO in dcm_objs: # update modality - if not update_modality(dcm): + if not update_modality(dcmO.dcm_obj): continue # Ignore any other modalities + if dcmO.dcm_obj.Modality == ImageModality.UNKNOWN: + typer.secho( + f"\nWARN: Unknown modality for {dcmO.filepath}", + fg=typer.colors.RED, + ) - dcm.AccessionNumber = 0 + dcmO.dcm_obj.AccessionNumber = 0 - if not dcm.get("NumberOfFrames"): - dcm.NumberOfFrames = 1 + if not dcmO.dcm_obj.get("NumberOfFrames"): + dcmO.dcm_obj.NumberOfFrames = 1 - dcms.append(dcm) + dcms.append(dcmO.dcm_obj) if group: # Group DICOM files by AcquisitionDateTime @@ -417,52 +459,14 @@ def process_dcm( f"\nWARN: unknown AcquisitionDateTime, results in {group_dir} are not reliable", fg=typer.colors.RED ) - os.makedirs(group_dir, exist_ok=True) - - for dcm in group_dcms: - # process images - arr = dcm.pixel_array - - if dcm.NumberOfFrames == 1: - arr = np.expand_dims(arr, axis=0) - - for i in range(dcm.NumberOfFrames): - out_img = os.path.join(group_dir, f"{dcm.Modality.code}-{dcm.AccessionNumber}_{i}.{image_format}") - if os.path.exists(out_img): - dcm.AccessionNumber += 1 # increase group_id - out_img = os.path.join( - group_dir, f"{dcm.Modality.code}-{dcm.AccessionNumber}_{i}.{image_format}" - ) - - array = cv2.normalize(arr[i], None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8UC1) # type: ignore #AWSS - image = Image.fromarray(array) - image.save - image.save(out_img) - - # Process metadata for the grouped DCMs - new, old = process_dcm_meta(dcm_objs=group_dcms, output_dir=group_dir, mapping=mapping, keep=keep) + new, old = process_dcm_images( + dcm_objs=group_dcms, output_dir=group_dir, image_format=image_format, mapping=mapping, keep=keep + ) else: - os.makedirs(output_dir, exist_ok=True) - - for dcm in dcms: - # process images - arr = dcm.pixel_array - - if dcm.NumberOfFrames == 1: - arr = np.expand_dims(arr, axis=0) - - for i in range(dcm.NumberOfFrames): - out_img = os.path.join(output_dir, f"{dcm.Modality.code}-{dcm.AccessionNumber}_{i}.{image_format}") - if os.path.exists(out_img): - dcm.AccessionNumber += 1 # increase group_id - out_img = os.path.join(output_dir, f"{dcm.Modality.code}-{dcm.AccessionNumber}_{i}.{image_format}") - - array = cv2.normalize(arr[i], None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8UC1) # type: ignore #AWSS - image = Image.fromarray(array) - image.save(out_img) - - new, old = process_dcm_meta(dcm_objs=dcms, output_dir=output_dir, mapping=mapping, keep=keep) + new, old = process_dcm_images( + dcm_objs=dcms, output_dir=output_dir, image_format=image_format, mapping=mapping, keep=keep + ) return new, old diff --git a/tests/test_main.py b/tests/test_main.py index c16629e..1addcd0 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -222,6 +222,21 @@ def test_process_acquisition_datetime(): assert result == ("0558756784", "20241113-093410") +# def test_process_many(): +# with TemporaryDirectory() as tmpdirname: +# output_dir = Path(tmpdirname) +# task_data = ("20220823_R/", str(output_dir)) +# image_format = "png" +# overwrite = True +# verbose = True +# keep = "" +# mapping = "" +# group = False +# tol = 2 +# result = process_task(task_data, image_format, overwrite, verbose, keep, mapping, group, tol) +# assert result == ("0558756784", "20241113-093410") + + # def test_process_taskL(): # with TemporaryDirectory() as tmpdirname: # output_dir = Path(tmpdirname)