diff --git a/util/mdai2dcm.py b/util/mdai2dcm.py
new file mode 100644
index 00000000..52c7db93
--- /dev/null
+++ b/util/mdai2dcm.py
@@ -0,0 +1,314 @@
+import argparse
+import distutils.spawn
+import os
+import pandas as pd
+import numpy as np
+import mdai
+import logging
+import pydicom
+import tqdm
+import itk
+import cv2
+import copy
+import glob
+import subprocess
+import glob
+
+# logging setup
+
+logging.basicConfig()
+
+# By default the root logger is set to WARNING and all loggers you define
+# inherit that value. Here we set the root logger to NOTSET. This logging
+# level is automatically inherited by all existing and new sub-loggers
+# that do not set a less verbose level.
+logging.root.setLevel(logging.NOTSET)
+
+# The following line sets the root logger level as well.
+# It's equivalent to both previous statements combined:
+logging.basicConfig(level=logging.NOTSET)
+
+logger = logging.getLogger('dcmqi.mdai2dcm')
+logger.setLevel(logging.ERROR)
+
+# Helper functions
+dcmqi_template = {
+ "ContentCreatorName": "",
+ "ClinicalTrialSeriesID": "Session1",
+ "ClinicalTrialTimePointID": "1",
+ "SeriesDescription": "Segmentation",
+ "SeriesNumber": "300",
+ "InstanceNumber": "1",
+ "BodyPartExamined": "CHEST",
+ "segmentAttributes": [
+
+ ],
+ "ContentLabel": "SEGMENTATION",
+ "ContentDescription": "Image segmentation",
+ "ClinicalTrialCoordinatingCenterName": "dcmqi"
+}
+
+def makeHash(text, length=6):
+ from base64 import b64encode
+ from hashlib import sha1
+ return b64encode(sha1(str.encode(text)).digest()).decode('ascii')[:length]
+
+segment_template = {
+ "labelID": "",
+ "SegmentDescription": "",
+ "SegmentAlgorithmType": "MANUAL",
+ "SegmentedPropertyCategoryCodeSequence": {
+ "CodeValue": "49755003",
+ "CodingSchemeDesignator": "SCT",
+ "CodeMeaning": "Morphologically Abnormal Structure"
+ },
+ "SegmentedPropertyTypeCodeSequence": {
+ "CodeValue": "",
+ "CodingSchemeDesignator": "99RICORD",
+ "CodeMeaning": ""
+ },
+ "recommendedDisplayRGBValue": [
+ 177,
+ 122,
+ 101
+ ]
+ }
+
+
+def convertToNIfTI(series_dir, reconstruction_dir):
+# if nii file exist return it
+ logger.debug("Saving reconstruction to ", reconstruction_dir)
+ reconstruction = reconstruction_dir+'/ct_image.nii'
+ if os.path.exists(reconstruction):
+ return reconstruction
+ # else create one with dcm2niix
+ cmd = ['dcm2niix', '-o', reconstruction_dir, '-f', 'ct_image', series_dir]
+ subprocess.run(cmd, stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
+
+ return reconstruction
+
+def convertToSEG2(input_dicom_dir, seg_dir):
+
+ logger.debug("Saving DICOM SEG to "+seg_dir)
+ json_files = glob.glob(seg_dir+"/*.json")
+
+ for label_json in json_files:
+
+ creator = os.path.split(label_json)[1].split('-')[0]
+ label_dcm = os.path.join(seg_dir,creator+".dcm")
+
+ labels = glob.glob(seg_dir+'/'+creator+'*.nii')
+ labels.sort()
+
+ cmd = ['itkimage2segimage','--inputDICOMDirectory',input_dicom_dir,
+ '--inputImageList',','.join(labels),"--inputMetadata",
+ label_json, "--outputDICOM", label_dcm, "--skip"]
+ logger.debug('Running SEG conversion with:\n'+' '.join(cmd))
+ result = subprocess.run(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True)
+ logger.debug(result.stderr)
+ logger.debug(result.stdout)
+
+
+def main():
+
+ parser = argparse.ArgumentParser(
+ usage="%(prog)s --inputDICOM
--inputJSON --outputDirectory \n\n"
+ "Warning: This is an experimenta script in development!\n"
+ "The intent of this helper script is to enable conversion"
+ "of the MD.ai annotations into appropriate standard DICOM objects.\n")
+ parser.add_argument(
+ '--inputDICOM',
+ dest="inputDICOM",
+ default="/Users/fedorov/github/mdai2seg/mydata/mdai_rsna_project_G9qOEdR0_images_2020-10-27-080731",
+ metavar='Input directory with the DICOM images being annotated',
+ help="Directory with the input DICOM images. It is expected that"
+ " the content of this directory is organized into the following"
+ "hierarchy: //.dcm")
+ #,
+ #required=True)
+
+ parser.add_argument(
+ '--inputJSON',
+ dest="inputJSON",
+ default="/Users/fedorov/github/mdai2seg/mydata/mdai_rsna_project_G9qOEdR0_annotations_labelgroup_all_2020-10-27-080732.json",
+ metavar='Input MD.ai annotations in the native JSON representation',
+ help="MD.ai nnotations stored in MD.ai format.")
+ #,
+ #required=True)
+
+ parser.add_argument(
+ '--outputDirectory',
+ dest="outputDirectory",
+ default="/Users/fedorov/Downloads/test_seg2",
+ metavar='Output directory to store the resulting DICOM files',
+ help="Directory to store resulting converted DICOM objects.")
+ #,
+ #required=True)
+
+ parser.add_argument(
+ '--includedLabelGroups',
+ dest="includedLabelGroups",
+ default="\"Team Emily\",\"Team Scott\"",
+ metavar='Label groups to consider in conversion',
+ help="Only the specified label groups will be considered.")
+ #,
+ #required=True)
+
+ args = parser.parse_args()
+
+ if not os.path.exists(args.outputDirectory):
+ os.mkdir(args.outputDirectory)
+
+ results = mdai.common_utils.json_to_dataframe(args.inputJSON)
+ annotations_df = results['annotations']
+ logger.info(f"{annotations_df.shape[0]} annotations loaded")
+
+ all_series_uids = annotations_df['SeriesInstanceUID'].unique()
+ # TODO: Why there are NaNs?
+ all_series_uids = all_series_uids[~pd.isnull(all_series_uids)]
+
+ progress_bar = tqdm.tqdm(total=len(all_series_uids))
+
+ for this_series_uid in all_series_uids:
+ #['1.2.826.0.1.3680043.10.474.419639.300423266679936330916265249312']: # ['1.2.826.0.1.3680043.10.474.440808.1993']: #all_series_uids: # ['1.2.826.0.1.3680043.10.474.2969551981555819856670502082591727602']:
+ logger.debug(f"Processing {this_series_uid}")
+
+ # filter our irrelevant rows
+ this_series_annotations = annotations_df[annotations_df["SeriesInstanceUID"] == this_series_uid]
+ this_series_annotations = this_series_annotations[this_series_annotations['annotationMode'] == 'freeform']
+ this_series_annotations = this_series_annotations[this_series_annotations['groupName'].isin(['Team Emily', 'Team Scott'])]
+
+ if this_series_annotations.shape[0] == 0:
+ continue
+
+ logger.debug('Creators of freeform annotations for series '+this_series_uid+" are "+str(this_series_annotations['createdById'].unique()))
+
+ one_row = this_series_annotations.iloc[0]
+
+ series_uid = one_row["SeriesInstanceUID"]
+ study_path = os.path.join(args.inputDICOM,one_row["StudyInstanceUID"])
+ segmentations_path = os.path.join(args.outputDirectory,series_uid+"_SEG")
+ reconstruction_path = os.path.join(args.outputDirectory,series_uid+"_Reconstruction")
+
+ if not os.path.exists(segmentations_path):
+ os.mkdir(segmentations_path)
+ if not os.path.exists(reconstruction_path):
+ os.mkdir(reconstruction_path)
+
+ series_path = os.path.join(study_path,one_row["SeriesInstanceUID"])
+ path_to_instance = os.path.join(series_path,one_row["SOPInstanceUID"]+".dcm")
+ instance_dcm = pydicom.dcmread(path_to_instance)
+
+ ct_image_file_name = convertToNIfTI(series_path, reconstruction_path) # run dcm2niix to do this conversion
+ PixelType = itk.ctype("signed short")
+ image_volume = itk.imread(ct_image_file_name, PixelType)
+ image_volume_array = itk.array_view_from_image(image_volume)
+
+ label_images_per_creator = {}
+ label_json_per_creator = {}
+ creators = this_series_annotations['createdById'].unique()
+
+ for creator in creators:
+ label_images_per_creator[creator] = []
+ label_json_per_creator[creator] = copy.deepcopy(dcmqi_template)
+ label_json_per_creator[creator]['ContentCreatorName'] = creator
+ label_json_per_creator[creator]['segmentAttributes'] = []
+
+ for index, row in this_series_annotations.iterrows():
+ if row['annotationMode'] != 'freeform':
+ progress_bar.update(1)
+ continue
+ if row['data'] == None or row['data']['vertices'] == None:
+ progress_bar.update(1)
+ continue
+
+ instance_uid = row['SOPInstanceUID']
+ creator = row['createdById']
+
+ path_to_instance = os.path.join(args.inputDICOM,row["StudyInstanceUID"],row["SeriesInstanceUID"],row["SOPInstanceUID"]+".dcm")
+
+ instance_dcm = pydicom.dcmread(path_to_instance)
+ slice_origin_physical_point = instance_dcm.ImagePositionPatient
+ slice_origin_index = image_volume.TransformPhysicalPointToIndex(slice_origin_physical_point)
+ z = slice_origin_index[2]
+
+ # make a separate image from single slice
+ extractor = itk.ExtractImageFilter.New(image_volume)
+ region = image_volume.GetLargestPossibleRegion()
+ size = region.GetSize()
+ size[2] = 1
+ slice_origin_index[0] = 0
+ slice_origin_index[1] = 0
+ region.SetIndex(slice_origin_index)
+ region.SetSize(size)
+ extractor.SetExtractionRegion(region)
+ extractor.SetInput(image_volume)
+
+ extractor.Update()
+ label_slice_image = extractor.GetOutput()
+ label_slice_array = itk.array_from_image(label_slice_image)
+ label_slice_array.fill(0)
+
+ vertices = np.array(row['data']['vertices']).reshape((-1, 2))
+ slice_label_mask = label_slice_array[0,:,:]
+ cv2.fillPoly(slice_label_mask, np.int32([vertices]), 1)
+ label_slice_array[0,:,:]=np.flipud(slice_label_mask)
+
+ new_image = itk.image_from_array(label_slice_array)
+ new_image.SetDirection(label_slice_image.GetDirection())
+ new_image.SetOrigin(label_slice_image.TransformIndexToPhysicalPoint(slice_origin_index))
+ new_image.SetSpacing(label_slice_image.GetSpacing())
+
+ #logger.debug("added annotation for creator "+creator)
+
+ label_images_per_creator[creator].append(new_image)
+
+ this_segment_template = copy.deepcopy(segment_template)
+ this_segment_template['labelID'] = 1
+ this_segment_template['SegmentDescription'] = row['labelName']
+ this_segment_template['recommendedDisplayRGBValue'] = [int(row['color'][1:3],16), int(row['color'][3:5], 16), int(row['color'][5:7],16)]
+ this_segment_template['SegmentedPropertyTypeCodeSequence']['CodeValue'] = makeHash(row['labelName'])
+ this_segment_template['SegmentedPropertyTypeCodeSequence']['CodeMeaning'] = row['labelName']
+ label_json_per_creator[creator]['segmentAttributes'].append([this_segment_template])
+
+ for creator in creators:
+
+ logger.debug('Creator '+creator+' has '+str(len(label_images_per_creator[creator]))+' segmentations and '+str(len(label_json_per_creator[creator]['segmentAttributes']))+' seg attrs')
+ logger.debug('Processing labels for '+creator)
+ for idx, label_itk_image in enumerate(label_images_per_creator[creator]):
+ output_file_name = creator+"-"+('%03d' % idx)
+ itk.imwrite(label_itk_image, os.path.join(segmentations_path,output_file_name+".nii"))
+
+ import json
+ output_json_name = creator+"-metadata.json"
+ with open(os.path.join(segmentations_path,output_json_name), "w") as json_file:
+ json_file.write(json.dumps(label_json_per_creator[creator], indent=2))
+
+ convertToSEG2(series_path, segmentations_path)
+
+ if len(glob.glob(segmentations_path+"/*dcm")) == 0:
+ logger.error(f"SEG conversion failed for {segmentations_path}!")
+
+ progress_bar.update(1)
+
+ logger.debug("Done")
+
+ return
+
+if __name__ == "__main__":
+ exeFound = {}
+ for exe in ['dcm2niix', 'itkimage2segimage']:
+ if distutils.spawn.find_executable(exe) is None:
+ exeFound[exe] = False
+ else:
+ exeFound[exe] = True
+ if not(exeFound['itkimage2segimage']) or not (exeFound['dcm2niix']):
+ logger.error(
+ "Dependency converter(s) not found in the path.")
+ logger.error(
+ "dcmqi (https://github.com/qiicr/dcmqi) and dcm2niix (https://github.com/rordenlab/dcm2niix/releases)")
+ logger.error(
+ "need to be installed and available in the PATH for using this converter script.")
+ sys.exit()
+
+ main()
\ No newline at end of file