-
Notifications
You must be signed in to change notification settings - Fork 54
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #326 from Pavan-Bellam/dev
adding mega_workflow
- Loading branch information
Showing
10 changed files
with
242 additions
and
83 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,34 @@ | ||
# cold_png_suvpar: | ||
cold_png_suvpar is a workflow that executes Cold_extraction, Png_extration, Suvpar sequentially. | ||
# Mega-workflow: | ||
Mega-workflow chains Cold_extraction, Png_extraction, Suvpar, DicomAnonymization and MetaDataAnonymization together. Mega-workflow contains 8 different workflows. User can choose the workflow by changing the workflow parameter in nextflow.config as mentioned below: | ||
|
||
* Cold_extraction->Png_extraction->Suvpar->DicomAnonymization->MetaDataAnonymization = 1 | ||
|
||
* Cold_extraction->Png_extraction->Suvpar = 2 | ||
|
||
* Cold_extraction->Png_extraction->DicomAnonymization = 3 | ||
|
||
* Cold_extraction->Png_extraction->MetaDataAnonymization = 4 | ||
|
||
* Png_extraction->Suvpar->DicomAnonymization->MetaDataAnonymization = 5 | ||
|
||
* Png_extraction->Suvpar = 6 | ||
|
||
* Png_extraction->DicomAnonymization = 7 | ||
|
||
* Png_extraction->MetaDataAnonymization = 8 | ||
|
||
|
||
|
||
Steps to use this workflow: | ||
1. Modify nextflow.config. | ||
2. Run the command ``` nextflow run cold_png_suvpar.nf ``` | ||
1. ```pip install requirements.txt``` | ||
2. Modify nextflow.config. DICOMHome and depth parameters are only needed for workflows 5,6,7,8. | ||
3. Run the command ``` nextflow run cold_png_suvpar.nf ``` | ||
|
||
Make sure that nextflow is installed. If you have run ./install then nextflow would be installed in you computer if not run the following commands: | ||
Make sure that nextflow is installed. If you have run ./install.sh then nextflow would be installed on your computer if not run the following commands: | ||
``` | ||
wget -qO- https://get.nextflow.io | bash | ||
``` | ||
``` | ||
sudo mv nextflow /usr/local/bin | ||
``` | ||
If HITI_anon_internal is not installed, run ``` pip install -i https://test.pypi.org/simple/ HITI-anon-internal ``` |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
pd=projectDir | ||
|
||
process makedir{ | ||
output: | ||
stdout into makedir_out | ||
script: | ||
""" | ||
#!/usr/bin/python3 | ||
import os | ||
if not os.path.exists("$params.OutputDirectory"): | ||
os.makedirs("$params.OutputDirectory") | ||
if not os.path.exists("$params.OutputDirectory/workflow_results"): | ||
os.makedirs("$params.OutputDirectory/workflow_results") | ||
if not os.path.exists("$params.OutputDirectory/workflow_results/cold_extraction_results") and ($params.workflow==1 or $params.workflow==2 or $params.workflow==3 or $params.workflow==4) : | ||
os.makedirs('$params.OutputDirectory/workflow_results/cold_extraction_results') | ||
if not os.path.exists('$params.OutputDirectory/workflow_results/png_extraction_results'): | ||
os.makedirs('$params.OutputDirectory/workflow_results/png_extraction_results') | ||
if not os.path.exists('$params.OutputDirectory/workflow_results/suvpar_resuts') and ($params.workflow==1 or $params.workflow==2 or $params.workflow==5) : | ||
os.makedirs('$params.OutputDirectory/workflow_results/suvpar_resuts') | ||
if not os.path.exists('$params.OutputDirectory/workflow_results/DicomAnon_resuts') and ($params.workflow==1 or $params.workflow==3 or $params.workflow==6): | ||
os.makedirs('$params.OutputDirectory/workflow_results/DicomAnon_resuts') | ||
if not os.path.exists('$params.OutputDirectory/workflow_results/metaAnon_resuts') and ($params.workflow==1 or $params.workflow==4 or $params.workflow==7): | ||
os.makedirs('$params.OutputDirectory/workflow_results/metaAnon_resuts') | ||
x=str("$params.FilePath") | ||
depth=len(x.split("/"))-1 | ||
print(depth) | ||
""" | ||
} | ||
|
||
process cold_extraction{ | ||
input: | ||
val dept from makedir_out | ||
output: | ||
val dept into cold_extraction_out | ||
when: | ||
params.workflow==1 || params.workflow==2 || params.workflow==3 || params.workflow==4 | ||
script: | ||
""" | ||
python3 $pd/Modules/cold_extraction.py --StorageFolder $params.OutputDirectory/workflow_results/cold_extraction_results --FilePath $params.FilePath --CsvFile $params.CsvFile --NumberOfQueryAttributes $params.NumberOfQueryAttributes --FirstAttr $params.FirstAttr --FirstIndex $params.FirstIndex --SecondAttr $params.SecondAttr --SecondIndex $params.SecondIndex --ThirdAttr $params.ThirdAttr --ThirdIndex $params.ThirdIndex --DateFormat $params.DateFormat --SendEmail $params.SendEmail --YourEmail $params.YourEmail --DCM4CHEBin $params.DCM4CHEBin --SrcAet $params.SrcAet --QueryAet $params.QueryAet --DestAet $params.DestAet --NightlyOnly $params.NightlyOnly --StartHour $params.StartHour --EndHour $params.EndHour --NifflerID $params.NifflerID --MaxNifflerProcesses $params.MaxNifflerProcesses | ||
""" | ||
|
||
} | ||
|
||
if(params.workflow==1 || params.workflow==2 || params.workflow==3 || params.workflow==4){ | ||
|
||
process png_extraction{ | ||
input: | ||
val depth from cold_extraction_out | ||
output: | ||
val depth into png_ext_out | ||
script: | ||
""" | ||
python3 $pd/Modules/ImageExtractor_nextflow.py --DICOMHome $DICOMHome --SplitIntoChunks $params.SplitIntoChunks --PrintImages $params.PrintImages --CommonHeadersOnly $params.CommonHeadersOnly --UseProcesses $params.UseProcesses --FlattenedToLevel $params.FlattenedToLevel --is16Bit $params.is16Bit --SendEmail $params.SendEmail --YourEmail $params.YourEmail --PublicHeadersOnly $params.PublicHeadersOnly --Depth $depth | ||
""" | ||
} | ||
} | ||
else{ | ||
process png_extraction2{ | ||
input: | ||
val depth from makedir_out | ||
output: | ||
val depth into png_ext_out | ||
script: | ||
""" | ||
python3 $pd/Modules/ImageExtractor_nextflow.py --DICOMHome $params.OutputDirectory/workflow_results/cold_extraction_results --OutputDirectory $params.OutputDirectory/workflow_results/png_extraction_results --SplitIntoChunks $params.SplitIntoChunks --PrintImages $params.PrintImages --CommonHeadersOnly $params.CommonHeadersOnly --UseProcesses $params.UseProcesses --FlattenedToLevel $params.FlattenedToLevel --is16Bit $params.is16Bit --SendEmail $params.SendEmail --YourEmail $params.YourEmail --PublicHeadersOnly $params.PublicHeadersOnly --Depth $depth | ||
""" | ||
} | ||
|
||
} | ||
|
||
|
||
process suvpar{ | ||
input: | ||
val depth from png_ext_out | ||
when: | ||
params.workflow==1 || params.workflow==2 || params.workflow==5 || params.workflow==6 | ||
|
||
script: | ||
|
||
""" | ||
python3 $pd/Modules/suvpar.py --InputFile $params.OutputDirectory/workflow_results/png_extraction_results/metadata.csv --OutputFile $params.OutputDirectory/workflow_results/suvpar_resuts/output.csv --FeaturesetFile $params.Featureset_File_for_suvpar | ||
""" | ||
|
||
} | ||
process dicomAnon{ | ||
input: | ||
val depth from png_ext_out | ||
when: | ||
params.workflow==1 || params.workflow==3 || params.workflow==5 || params.workflow==7 | ||
script: | ||
""" | ||
python3 $pd/../../dicom-anonymization/DicomAnonymizer2.py $params.OutputDirectory/workflow_results/cold_extraction_results $params.OutputDirectory/workflow_results/DicomAnon_resuts | ||
""" | ||
} | ||
|
||
process meta_anon{ | ||
input: | ||
val depth from png_ext_out | ||
when: | ||
params.workflow==1 || params.workflow==4 || params.workflow==5 || params.workflow==8 | ||
script: | ||
""" | ||
python3 $pd/Modules/metadata_anonymization.py $params.OutputDirectory/workflow_results/png_extraction_results/metadata.csv $params.OutputDirectory/workflow_results/metaAnon_resuts/output.csv | ||
""" | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,18 @@ | ||
params{ | ||
DCM4CHEBin = "/home/kmit/pavan/dcm4che-5.22.5/bin" | ||
SrcAet = "[email protected]:4243" | ||
QueryAet = "niffler:10244" | ||
DestAet = "niffler" | ||
workflow = 1 | ||
|
||
DCM4CHEBin = "/path/to/DCM4CHE/bin" | ||
SrcAet = "SOURCEAET@IP:PORT" | ||
QueryAet = "QUERYAET:PORT" | ||
DestAet = "DESTAET" | ||
NightlyOnly = false | ||
StartHour = 19 | ||
EndHour = 7 | ||
NifflerID = 1 | ||
MaxNifflerProcesses = 1 | ||
|
||
FilePath ="{00100020}/{0020000D}/{0020000E}/{00080018}.dcm" | ||
CsvFile ="/home/kmit/pavan/Niffler/modules/cold-extraction/unit_test_read_csv.csv" | ||
CsvFile ="path/to/csvfile" | ||
NumberOfQueryAttributes = 2 | ||
FirstAttr = "PatientID" | ||
FirstIndex = 0 | ||
|
@@ -22,7 +24,9 @@ params{ | |
SendEmail =true | ||
YourEmail ="[email protected]" | ||
|
||
OutputDirectory= "/home/kmit/pavan" | ||
OutputDirectory= "path/to/outputDirectory" | ||
DICOMHome="path/to/dicomfiles" | ||
depth=4 | ||
SplitIntoChunks= 1 | ||
PrintImages=true | ||
CommonHeadersOnly=false | ||
|
@@ -33,5 +37,5 @@ params{ | |
SpecificHeadersOnly=false | ||
Featureset_File_for_png_extraction="" | ||
|
||
Featureset_File_for_suvpar="/home/kmit/pavan/Niffler/modules/suvpar/featureset.txt" | ||
} | ||
Featureset_File_for_suvpar="/path/to/featureset" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
sqlalchemy | ||
pillow | ||
pycryptodomex | ||
tqdm | ||
pyparsing |
File renamed without changes.
File renamed without changes.
91 changes: 91 additions & 0 deletions
91
modules/workflows/Nextflow_workflows/src/metadata_anonymization.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
from HITI_anon_internal.Anon import EmoryAnon | ||
import sys | ||
import pandas as pd | ||
|
||
def anonymization(metadata, Anon): | ||
metadata['AccessionNumber'] = metadata.AccessionNumber.map(str) | ||
metadata['PatientID'] = metadata.PatientID.map(int) | ||
metadata['StudyDate_modified'] = pd.to_datetime(metadata['StudyDate'], format='%Y%m%d') | ||
|
||
mask = metadata.AccessionNumber.str.len()==16 | ||
metadata = metadata.loc[mask] | ||
metadata.reset_index(inplace=True, drop=True) | ||
|
||
metadata['StudyDate'] = metadata.StudyDate.astype(str) | ||
|
||
|
||
metadata = Anon.col_norm(metadata) | ||
metadata['empi_anon'] = Anon.IDanon(metadata['PatientID'], data_type='empi') | ||
metadata['acc_anon'] = Anon.IDanon(metadata['AccessionNumber'], data_type='rad_acc') | ||
metadata['study_date_anon'] = Anon.TScol(metadata['PatientID'], metadata['StudyDate_modified']) | ||
metadata.to_csv('metadata_orig_and_anon.csv') | ||
cols_to_drop=['PatientID', | ||
'AccessionNumber', | ||
'StudyDate_modified', | ||
'InstitutionAddress', | ||
'InstitutionName', | ||
'OperatorsName', | ||
'OtherPatientIDs', | ||
'PatientAddress', | ||
'PatientBirthDate', | ||
'PatientName', | ||
'ReferringPhysicianName', | ||
'StationName', | ||
'ContentDate', | ||
'DeviceSerialNumber', | ||
'InstitutionalDepartmentName', | ||
'SeriesDate', | ||
'AcquisitionDate', | ||
'DateOfLastDetectorCalibration', | ||
'DetectorID', | ||
'PerformedProcedureStepID', | ||
'PerformedProcedureStepStartDate', | ||
'RequestingPhysician', | ||
'SOPClassUID', | ||
'SOPInstanceUID', | ||
'SeriesInstanceUID', | ||
'StudyInstanceUID', | ||
'FrameOfReferenceUID', | ||
'InstanceCreatorUID', | ||
'IrradiationEventUID', | ||
'file', | ||
'AcquisitionDateTime', | ||
'DateOfSecondaryCapture', | ||
'EthnicGroup', | ||
'EthnicGroup', | ||
'InstanceCreationDate', | ||
'InstanceCreationTime', | ||
'InstanceNumber', | ||
'IssuerOfPatientID', | ||
'PatientBirthTime', | ||
'PatientComments', | ||
'PatientSex', | ||
'PerformedProcedureStepStartTime', | ||
'StudyID', | ||
'StudyTime', | ||
'TimeOfSecondaryCapture'] | ||
|
||
finals_cols_to_drop = [] | ||
for col in cols_to_drop: | ||
if col in metadata: | ||
finals_cols_to_drop.append(col) | ||
|
||
clean_data = metadata.drop(finals_cols_to_drop, axis=1) | ||
Anon.save_keys() | ||
return (clean_data) | ||
|
||
|
||
if __name__ == "__main__": | ||
|
||
Anon = EmoryAnon('/path/to/anon_keys', '/path/to/whitelist') | ||
Anon.load_recentMasterKey() | ||
|
||
metadata_path = sys.argv[1] | ||
metadata = pd.read_csv(metadata_path, low_memory=False) | ||
del_cols = [] | ||
for col in metadata.columns: | ||
if (metadata[col].isnull().sum() > (0.90*len(metadata))): | ||
del_cols.append(col) | ||
|
||
clean_data = anonymization(metadata, Anon) | ||
clean_data.to_csv(sys.argv[2], index=False) |
File renamed without changes.