Skip to content

Commit

Permalink
Merge pull request #326 from Pavan-Bellam/dev
Browse files Browse the repository at this point in the history
adding mega_workflow
  • Loading branch information
pradeeban authored Jul 28, 2022
2 parents 6c83b39 + a378b50 commit 7142e62
Show file tree
Hide file tree
Showing 10 changed files with 242 additions and 83 deletions.
1 change: 1 addition & 0 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ if [ "$PIP" = false ] ; then
echo "Installing pip"
sudo yum install python3-pip
pip install -r requirements.txt
pip install -i https://test.pypi.org/simple/ HITI-anon-internal
wget https://repo.anaconda.com/archive/Anaconda3-2020.11-Linux-x86_64.sh
sh Anaconda3-2020.11-Linux-x86_64.sh -u
source ~/.bashrc
Expand Down
30 changes: 25 additions & 5 deletions modules/workflows/Nextflow_workflows/README.MD
Original file line number Diff line number Diff line change
@@ -1,14 +1,34 @@
# cold_png_suvpar:
cold_png_suvpar is a workflow that executes Cold_extraction, Png_extration, Suvpar sequentially.
# Mega-workflow:
Mega-workflow chains Cold_extraction, Png_extraction, Suvpar, DicomAnonymization and MetaDataAnonymization together. Mega-workflow contains 8 different workflows. User can choose the workflow by changing the workflow parameter in nextflow.config as mentioned below:

* Cold_extraction->Png_extraction->Suvpar->DicomAnonymization->MetaDataAnonymization = 1

* Cold_extraction->Png_extraction->Suvpar = 2

* Cold_extraction->Png_extraction->DicomAnonymization = 3

* Cold_extraction->Png_extraction->MetaDataAnonymization = 4

* Png_extraction->Suvpar->DicomAnonymization->MetaDataAnonymization = 5

* Png_extraction->Suvpar = 6

* Png_extraction->DicomAnonymization = 7

* Png_extraction->MetaDataAnonymization = 8



Steps to use this workflow:
1. Modify nextflow.config.
2. Run the command ``` nextflow run cold_png_suvpar.nf ```
1. ```pip install requirements.txt```
2. Modify nextflow.config. DICOMHome and depth parameters are only needed for workflows 5,6,7,8.
3. Run the command ``` nextflow run cold_png_suvpar.nf ```

Make sure that nextflow is installed. If you have run ./install then nextflow would be installed in you computer if not run the following commands:
Make sure that nextflow is installed. If you have run ./install.sh then nextflow would be installed on your computer if not run the following commands:
```
wget -qO- https://get.nextflow.io | bash
```
```
sudo mv nextflow /usr/local/bin
```
If HITI_anon_internal is not installed, run ``` pip install -i https://test.pypi.org/simple/ HITI-anon-internal ```
70 changes: 0 additions & 70 deletions modules/workflows/Nextflow_workflows/cold_png_suvpar.nf

This file was deleted.

108 changes: 108 additions & 0 deletions modules/workflows/Nextflow_workflows/mega_workflow.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
pd=projectDir

process makedir{
output:
stdout into makedir_out
script:
"""
#!/usr/bin/python3
import os
if not os.path.exists("$params.OutputDirectory"):
os.makedirs("$params.OutputDirectory")
if not os.path.exists("$params.OutputDirectory/workflow_results"):
os.makedirs("$params.OutputDirectory/workflow_results")
if not os.path.exists("$params.OutputDirectory/workflow_results/cold_extraction_results") and ($params.workflow==1 or $params.workflow==2 or $params.workflow==3 or $params.workflow==4) :
os.makedirs('$params.OutputDirectory/workflow_results/cold_extraction_results')
if not os.path.exists('$params.OutputDirectory/workflow_results/png_extraction_results'):
os.makedirs('$params.OutputDirectory/workflow_results/png_extraction_results')
if not os.path.exists('$params.OutputDirectory/workflow_results/suvpar_resuts') and ($params.workflow==1 or $params.workflow==2 or $params.workflow==5) :
os.makedirs('$params.OutputDirectory/workflow_results/suvpar_resuts')
if not os.path.exists('$params.OutputDirectory/workflow_results/DicomAnon_resuts') and ($params.workflow==1 or $params.workflow==3 or $params.workflow==6):
os.makedirs('$params.OutputDirectory/workflow_results/DicomAnon_resuts')
if not os.path.exists('$params.OutputDirectory/workflow_results/metaAnon_resuts') and ($params.workflow==1 or $params.workflow==4 or $params.workflow==7):
os.makedirs('$params.OutputDirectory/workflow_results/metaAnon_resuts')
x=str("$params.FilePath")
depth=len(x.split("/"))-1
print(depth)
"""
}

process cold_extraction{
input:
val dept from makedir_out
output:
val dept into cold_extraction_out
when:
params.workflow==1 || params.workflow==2 || params.workflow==3 || params.workflow==4
script:
"""
python3 $pd/Modules/cold_extraction.py --StorageFolder $params.OutputDirectory/workflow_results/cold_extraction_results --FilePath $params.FilePath --CsvFile $params.CsvFile --NumberOfQueryAttributes $params.NumberOfQueryAttributes --FirstAttr $params.FirstAttr --FirstIndex $params.FirstIndex --SecondAttr $params.SecondAttr --SecondIndex $params.SecondIndex --ThirdAttr $params.ThirdAttr --ThirdIndex $params.ThirdIndex --DateFormat $params.DateFormat --SendEmail $params.SendEmail --YourEmail $params.YourEmail --DCM4CHEBin $params.DCM4CHEBin --SrcAet $params.SrcAet --QueryAet $params.QueryAet --DestAet $params.DestAet --NightlyOnly $params.NightlyOnly --StartHour $params.StartHour --EndHour $params.EndHour --NifflerID $params.NifflerID --MaxNifflerProcesses $params.MaxNifflerProcesses
"""

}

if(params.workflow==1 || params.workflow==2 || params.workflow==3 || params.workflow==4){

process png_extraction{
input:
val depth from cold_extraction_out
output:
val depth into png_ext_out
script:
"""
python3 $pd/Modules/ImageExtractor_nextflow.py --DICOMHome $DICOMHome --SplitIntoChunks $params.SplitIntoChunks --PrintImages $params.PrintImages --CommonHeadersOnly $params.CommonHeadersOnly --UseProcesses $params.UseProcesses --FlattenedToLevel $params.FlattenedToLevel --is16Bit $params.is16Bit --SendEmail $params.SendEmail --YourEmail $params.YourEmail --PublicHeadersOnly $params.PublicHeadersOnly --Depth $depth
"""
}
}
else{
process png_extraction2{
input:
val depth from makedir_out
output:
val depth into png_ext_out
script:
"""
python3 $pd/Modules/ImageExtractor_nextflow.py --DICOMHome $params.OutputDirectory/workflow_results/cold_extraction_results --OutputDirectory $params.OutputDirectory/workflow_results/png_extraction_results --SplitIntoChunks $params.SplitIntoChunks --PrintImages $params.PrintImages --CommonHeadersOnly $params.CommonHeadersOnly --UseProcesses $params.UseProcesses --FlattenedToLevel $params.FlattenedToLevel --is16Bit $params.is16Bit --SendEmail $params.SendEmail --YourEmail $params.YourEmail --PublicHeadersOnly $params.PublicHeadersOnly --Depth $depth
"""
}

}


process suvpar{
input:
val depth from png_ext_out
when:
params.workflow==1 || params.workflow==2 || params.workflow==5 || params.workflow==6

script:

"""
python3 $pd/Modules/suvpar.py --InputFile $params.OutputDirectory/workflow_results/png_extraction_results/metadata.csv --OutputFile $params.OutputDirectory/workflow_results/suvpar_resuts/output.csv --FeaturesetFile $params.Featureset_File_for_suvpar
"""

}
process dicomAnon{
input:
val depth from png_ext_out
when:
params.workflow==1 || params.workflow==3 || params.workflow==5 || params.workflow==7
script:
"""
python3 $pd/../../dicom-anonymization/DicomAnonymizer2.py $params.OutputDirectory/workflow_results/cold_extraction_results $params.OutputDirectory/workflow_results/DicomAnon_resuts
"""
}

process meta_anon{
input:
val depth from png_ext_out
when:
params.workflow==1 || params.workflow==4 || params.workflow==5 || params.workflow==8
script:
"""
python3 $pd/Modules/metadata_anonymization.py $params.OutputDirectory/workflow_results/png_extraction_results/metadata.csv $params.OutputDirectory/workflow_results/metaAnon_resuts/output.csv
"""
}

20 changes: 12 additions & 8 deletions modules/workflows/Nextflow_workflows/nextflow.config
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
params{
DCM4CHEBin = "/home/kmit/pavan/dcm4che-5.22.5/bin"
SrcAet = "[email protected]:4243"
QueryAet = "niffler:10244"
DestAet = "niffler"
workflow = 1

DCM4CHEBin = "/path/to/DCM4CHE/bin"
SrcAet = "SOURCEAET@IP:PORT"
QueryAet = "QUERYAET:PORT"
DestAet = "DESTAET"
NightlyOnly = false
StartHour = 19
EndHour = 7
NifflerID = 1
MaxNifflerProcesses = 1

FilePath ="{00100020}/{0020000D}/{0020000E}/{00080018}.dcm"
CsvFile ="/home/kmit/pavan/Niffler/modules/cold-extraction/unit_test_read_csv.csv"
CsvFile ="path/to/csvfile"
NumberOfQueryAttributes = 2
FirstAttr = "PatientID"
FirstIndex = 0
Expand All @@ -22,7 +24,9 @@ params{
SendEmail =true
YourEmail ="[email protected]"

OutputDirectory= "/home/kmit/pavan"
OutputDirectory= "path/to/outputDirectory"
DICOMHome="path/to/dicomfiles"
depth=4
SplitIntoChunks= 1
PrintImages=true
CommonHeadersOnly=false
Expand All @@ -33,5 +37,5 @@ params{
SpecificHeadersOnly=false
Featureset_File_for_png_extraction=""

Featureset_File_for_suvpar="/home/kmit/pavan/Niffler/modules/suvpar/featureset.txt"
}
Featureset_File_for_suvpar="/path/to/featureset"
}
5 changes: 5 additions & 0 deletions modules/workflows/Nextflow_workflows/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
sqlalchemy
pillow
pycryptodomex
tqdm
pyparsing
91 changes: 91 additions & 0 deletions modules/workflows/Nextflow_workflows/src/metadata_anonymization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from HITI_anon_internal.Anon import EmoryAnon
import sys
import pandas as pd

def anonymization(metadata, Anon):
metadata['AccessionNumber'] = metadata.AccessionNumber.map(str)
metadata['PatientID'] = metadata.PatientID.map(int)
metadata['StudyDate_modified'] = pd.to_datetime(metadata['StudyDate'], format='%Y%m%d')

mask = metadata.AccessionNumber.str.len()==16
metadata = metadata.loc[mask]
metadata.reset_index(inplace=True, drop=True)

metadata['StudyDate'] = metadata.StudyDate.astype(str)


metadata = Anon.col_norm(metadata)
metadata['empi_anon'] = Anon.IDanon(metadata['PatientID'], data_type='empi')
metadata['acc_anon'] = Anon.IDanon(metadata['AccessionNumber'], data_type='rad_acc')
metadata['study_date_anon'] = Anon.TScol(metadata['PatientID'], metadata['StudyDate_modified'])
metadata.to_csv('metadata_orig_and_anon.csv')
cols_to_drop=['PatientID',
'AccessionNumber',
'StudyDate_modified',
'InstitutionAddress',
'InstitutionName',
'OperatorsName',
'OtherPatientIDs',
'PatientAddress',
'PatientBirthDate',
'PatientName',
'ReferringPhysicianName',
'StationName',
'ContentDate',
'DeviceSerialNumber',
'InstitutionalDepartmentName',
'SeriesDate',
'AcquisitionDate',
'DateOfLastDetectorCalibration',
'DetectorID',
'PerformedProcedureStepID',
'PerformedProcedureStepStartDate',
'RequestingPhysician',
'SOPClassUID',
'SOPInstanceUID',
'SeriesInstanceUID',
'StudyInstanceUID',
'FrameOfReferenceUID',
'InstanceCreatorUID',
'IrradiationEventUID',
'file',
'AcquisitionDateTime',
'DateOfSecondaryCapture',
'EthnicGroup',
'EthnicGroup',
'InstanceCreationDate',
'InstanceCreationTime',
'InstanceNumber',
'IssuerOfPatientID',
'PatientBirthTime',
'PatientComments',
'PatientSex',
'PerformedProcedureStepStartTime',
'StudyID',
'StudyTime',
'TimeOfSecondaryCapture']

finals_cols_to_drop = []
for col in cols_to_drop:
if col in metadata:
finals_cols_to_drop.append(col)

clean_data = metadata.drop(finals_cols_to_drop, axis=1)
Anon.save_keys()
return (clean_data)


if __name__ == "__main__":

Anon = EmoryAnon('/path/to/anon_keys', '/path/to/whitelist')
Anon.load_recentMasterKey()

metadata_path = sys.argv[1]
metadata = pd.read_csv(metadata_path, low_memory=False)
del_cols = []
for col in metadata.columns:
if (metadata[col].isnull().sum() > (0.90*len(metadata))):
del_cols.append(col)

clean_data = anonymization(metadata, Anon)
clean_data.to_csv(sys.argv[2], index=False)

0 comments on commit 7142e62

Please sign in to comment.