From 58b5beeffe03f9df8f857d92fbb0d38f26c2779a Mon Sep 17 00:00:00 2001 From: pavan kumar bellam <60264606+Pavan-Bellam@users.noreply.github.com> Date: Tue, 29 Mar 2022 21:11:30 +0530 Subject: [PATCH 1/6] Update ImageExtractor.py --- modules/png-extraction/ImageExtractor.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/modules/png-extraction/ImageExtractor.py b/modules/png-extraction/ImageExtractor.py index b4dcfe8..c5b62bd 100644 --- a/modules/png-extraction/ImageExtractor.py +++ b/modules/png-extraction/ImageExtractor.py @@ -39,9 +39,7 @@ def initialize_config_and_execute(config_values): print_images = bool(configs['PrintImages']) print_only_common_headers = bool(configs['CommonHeadersOnly']) - global public_headers_bool - public_headers_bool = bool(configs['PublicHeadersOnly']) - global SpecificHeadersOnly + PublicHeadersOnly = bool(configs['PublicHeadersOnly']) SpecificHeadersOnly = bool(configs['SpecificHeadersOnly']) depth = int(configs['Depth']) processes = int(configs['UseProcesses']) # how many processes to use. @@ -96,12 +94,12 @@ def initialize_config_and_execute(config_values): logging.info("------- Values Initialization DONE -------") final_res = execute(pickle_file, dicom_home, output_directory, print_images, print_only_common_headers, depth, processes, flattened_to_level, email, send_email, no_splits, is16Bit, png_destination, - failed, maps_directory, meta_directory, LOG_FILENAME, metadata_col_freq_threshold, t_start) + failed, maps_directory, meta_directory, LOG_FILENAME, metadata_col_freq_threshold, t_start,SpecificHeadersOnly,PublicHeadersOnly) return final_res # Function for getting tuple for field,val pairs -def get_tuples(plan, outlist = None, key = ""): +def get_tuples(plan,SpecificHeadersOnly,PublicHeadersOnly, outlist = None, key = ""): if len(key)>0: key = key + "_" if not outlist: @@ -135,7 +133,7 @@ def get_tuples(plan, outlist = None, key = ""): if plan[i] not in headers: headers.append(plan[i]) else: - if (public_headers_bool): + if (PublicHeadersOnly): for aa in plan.dir(): headers.append(plan[aa]) else: @@ -154,7 +152,7 @@ def get_tuples(plan, outlist = None, key = ""): if type(value) is dicom.sequence.Sequence: for nn, ss in enumerate(list(value)): newkey = "_".join([key,("%d"%nn),name]) if len(key) else "_".join([("%d"%nn),name]) - candidate = get_tuples(ss,outlist=None,key=newkey) + candidate = get_tuples(ss,SpecificHeadersOnly,PublicHeadersOnly,outlist=None,key=newkey) # if extracted tuples are too big condense to a string if len(candidate)>2000: outlist.append((newkey,str(candidate))) @@ -177,7 +175,7 @@ def get_tuples(plan, outlist = None, key = ""): def extract_headers(f_list_elem): - nn,ff = f_list_elem # unpack enumerated list + nn,ff,SpecificHeadersOnly,PublicHeadersOnly = f_list_elem # unpack enumerated list plan = dicom.dcmread(ff, force=True) # reads in dicom file # checks if this file has an image c=True @@ -185,7 +183,7 @@ def extract_headers(f_list_elem): check = plan.pixel_array # throws error if dicom file has no image except: c = False - kv = get_tuples(plan) # gets tuple for field,val pairs for this file. function defined above + kv = get_tuples(plan,SpecificHeadersOnly,PublicHeadersOnly) # gets tuple for field,val pairs for this file. function defined above # dicom images should not have more than 300 dicom tags if len(kv)>300: logging.debug(str(len(kv)) + " dicom tags produced by " + ff) @@ -354,7 +352,7 @@ def fix_mismatch(with_VRs=['PN', 'DS', 'IS', 'LO', 'OB']): def execute(pickle_file, dicom_home, output_directory, print_images, print_only_common_headers, depth, processes, flattened_to_level, email, send_email, no_splits, is16Bit, png_destination, - failed, maps_directory, meta_directory, LOG_FILENAME, metadata_col_freq_threshold, t_start): + failed, maps_directory, meta_directory, LOG_FILENAME, metadata_col_freq_threshold, t_start,SpecificHeadersOnly,PublicHeadersOnly): err = None fix_mismatch() if processes == 0.5: # use half the cores to avoid high ram usage @@ -421,7 +419,8 @@ def execute(pickle_file, dicom_home, output_directory, print_images, print_only_ with Pool(core_count) as p: # we send here print_only_public_headers bool value - res = p.imap_unordered(extract_headers, enumerate(chunk)) + chunks_list=[tups + (SpecificHeadersOnly,PublicHeadersOnly,) for tups in enumerate(chunk)] + res = p.imap_unordered(extract_headers, chunks_list) for i,e in enumerate(res): headerlist.append(e) data = pd.DataFrame(headerlist) From b267e4fd923d52d4d06df7dc07880fd06cdc24b8 Mon Sep 17 00:00:00 2001 From: pavan kumar bellam <60264606+Pavan-Bellam@users.noreply.github.com> Date: Tue, 29 Mar 2022 21:31:46 +0530 Subject: [PATCH 2/6] Update test_png_extraction.py --- tests/unit/test_png_extraction.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_png_extraction.py b/tests/unit/test_png_extraction.py index b94f543..0a6b291 100644 --- a/tests/unit/test_png_extraction.py +++ b/tests/unit/test_png_extraction.py @@ -99,14 +99,14 @@ def test_no_image(self): Test for invalid image """ headers = ImageExtractor.extract_headers( - self.invalid_test_dcm_file) + self.invalid_test_dcm_file,False,True) assert headers['has_pix_array'] is False def test_valid_image(self): """ Test for a valid image """ - headers = ImageExtractor.extract_headers(self.valid_test_dcm_file) + headers = ImageExtractor.extract_headers(self.valid_test_dcm_file,False,True) assert headers['has_pix_array'] is True # TODO large dcm files @@ -125,7 +125,7 @@ def test_correct_output(self): Verifies first key """ first_key = self.test_valid_plan.dir()[0] - tuple_list = ImageExtractor.get_tuples(self.test_valid_plan) + tuple_list = ImageExtractor.get_tuples(self.test_valid_plan,Fasle,True) assert tuple_list[0][0] == first_key # TODO hasattr error From 099fc8d044c507aac64ebe4e575665611e0bb7dd Mon Sep 17 00:00:00 2001 From: pavan kumar bellam <60264606+Pavan-Bellam@users.noreply.github.com> Date: Tue, 29 Mar 2022 21:38:57 +0530 Subject: [PATCH 3/6] Update test_png_extraction.py --- tests/unit/test_png_extraction.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_png_extraction.py b/tests/unit/test_png_extraction.py index 0a6b291..16aedbd 100644 --- a/tests/unit/test_png_extraction.py +++ b/tests/unit/test_png_extraction.py @@ -90,23 +90,23 @@ class TestExtractHeaders: Test ImageExtractor.extract_headers """ valid_test_dcm_file = 0, str( - pytest.data_dir / 'png-extraction' / 'input' / 'test-img.dcm') + pytest.data_dir / 'png-extraction' / 'input' / 'test-img.dcm'), False , True invalid_test_dcm_file = 0, str( - pytest.data_dir / 'png-extraction' / 'input' / 'no-img.dcm') + pytest.data_dir / 'png-extraction' / 'input' / 'no-img.dcm'), False, True def test_no_image(self): """ Test for invalid image """ headers = ImageExtractor.extract_headers( - self.invalid_test_dcm_file,False,True) + self.invalid_test_dcm_file) assert headers['has_pix_array'] is False def test_valid_image(self): """ Test for a valid image """ - headers = ImageExtractor.extract_headers(self.valid_test_dcm_file,False,True) + headers = ImageExtractor.extract_headers(self.valid_test_dcm_file) assert headers['has_pix_array'] is True # TODO large dcm files @@ -125,7 +125,7 @@ def test_correct_output(self): Verifies first key """ first_key = self.test_valid_plan.dir()[0] - tuple_list = ImageExtractor.get_tuples(self.test_valid_plan,Fasle,True) + tuple_list = ImageExtractor.get_tuples(self.test_valid_plan,False,True) assert tuple_list[0][0] == first_key # TODO hasattr error From ed390f793eb845a634d1bedf79a17036fda18d01 Mon Sep 17 00:00:00 2001 From: pavan kumar bellam <60264606+Pavan-Bellam@users.noreply.github.com> Date: Tue, 29 Mar 2022 21:46:46 +0530 Subject: [PATCH 4/6] Update test_e2e_png_extraction.py --- tests/integration/test_e2e_png_extraction.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_e2e_png_extraction.py b/tests/integration/test_e2e_png_extraction.py index 1a71538..2e8ea42 100644 --- a/tests/integration/test_e2e_png_extraction.py +++ b/tests/integration/test_e2e_png_extraction.py @@ -66,6 +66,8 @@ def generate_kwargs(self, out_dir: PurePath, **kwargs): 'LOG_FILENAME': str(out_dir / 'ImageExtractor.out'), 'metadata_col_freq_threshold': 0.1, 't_start': time.time() + 'SpecificHeadersOnly': False + 'PublicHeadersOnly' : True } kwargs_dict.update(**kwargs) return kwargs_dict From abfde2556adaa8d69748f9ac38be5017953b846b Mon Sep 17 00:00:00 2001 From: pavan kumar bellam <60264606+Pavan-Bellam@users.noreply.github.com> Date: Tue, 29 Mar 2022 22:09:27 +0530 Subject: [PATCH 5/6] Update test_e2e_png_extraction.py --- tests/integration/test_e2e_png_extraction.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_e2e_png_extraction.py b/tests/integration/test_e2e_png_extraction.py index 2e8ea42..ec901be 100644 --- a/tests/integration/test_e2e_png_extraction.py +++ b/tests/integration/test_e2e_png_extraction.py @@ -65,8 +65,8 @@ def generate_kwargs(self, out_dir: PurePath, **kwargs): 'meta_directory': str(out_dir / 'meta') + '/', 'LOG_FILENAME': str(out_dir / 'ImageExtractor.out'), 'metadata_col_freq_threshold': 0.1, - 't_start': time.time() - 'SpecificHeadersOnly': False + 't_start': time.time(), + 'SpecificHeadersOnly': False, 'PublicHeadersOnly' : True } kwargs_dict.update(**kwargs) @@ -144,7 +144,9 @@ def generate_config(self, **kwargs): "FlattenedToLevel": "patient", "is16Bit": True, "SendEmail": False, - "YourEmail": "test@test.test" + "YourEmail": "test@test.test", + 'SpecificHeadersOnly' : False, + 'PublicHeadersOnly': True } config.update(**kwargs) return config From 91a05b39782501f1a9566be565214e609b738bce Mon Sep 17 00:00:00 2001 From: pavan kumar bellam <60264606+Pavan-Bellam@users.noreply.github.com> Date: Tue, 29 Mar 2022 22:10:41 +0530 Subject: [PATCH 6/6] Update test_png_extraction.py --- tests/unit/test_png_extraction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_png_extraction.py b/tests/unit/test_png_extraction.py index 16aedbd..8299329 100644 --- a/tests/unit/test_png_extraction.py +++ b/tests/unit/test_png_extraction.py @@ -146,7 +146,7 @@ def setup_method(self): Test Setup """ header_list = [ImageExtractor.extract_headers( - (0, self.test_dcm_file))] + (0, self.test_dcm_file,False,True))] self.file_data = pd.DataFrame(header_list) self.index = 0 self.invalid_file_data = pd.DataFrame([