From c51fda2e81be7043dca8fbf885b9a8bafe562696 Mon Sep 17 00:00:00 2001 From: Pradeeban Kathiravelu Date: Fri, 29 Jul 2022 10:21:58 -0400 Subject: [PATCH 1/4] Add SliceMeasurementDuration to suvpar output --- modules/suvpar/featureset1.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/suvpar/featureset1.txt b/modules/suvpar/featureset1.txt index a691cdd..6154ad3 100644 --- a/modules/suvpar/featureset1.txt +++ b/modules/suvpar/featureset1.txt @@ -21,4 +21,5 @@ AcquisitionDuration Modality Manufacturer ManufacturerModelName -SeriesInstanceUID \ No newline at end of file +SeriesInstanceUID +[SliceMeasurementDuration] \ No newline at end of file From a7dd7148c915cbf60795929ef12195988141a046 Mon Sep 17 00:00:00 2001 From: Pradeeban Kathiravelu Date: Fri, 29 Jul 2022 10:25:09 -0400 Subject: [PATCH 2/4] Use the minimal featureset1.txt by default. --- modules/suvpar/config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/suvpar/config.json b/modules/suvpar/config.json index b62c643..064cdcd 100644 --- a/modules/suvpar/config.json +++ b/modules/suvpar/config.json @@ -3,7 +3,7 @@ "OutputFile": "output.csv", "ScannerDetails": "scanner.txt", "ScannerFilter": false, - "FeaturesetFile": "featureset.txt", + "FeaturesetFile": "featureset1.txt", "IsStatistics": false, "Statistics_File": "statistic.csv" } From ef29bd94f7bb7270e1deaa2a8a8d0f76f388b9cc Mon Sep 17 00:00:00 2001 From: Pradeeban Kathiravelu Date: Fri, 29 Jul 2022 11:02:30 -0400 Subject: [PATCH 3/4] Add isFinalFile as a config param for Suvpar --- modules/suvpar/README.md | 2 ++ modules/suvpar/Suvpar.py | 4 ++-- modules/suvpar/config.json | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/suvpar/README.md b/modules/suvpar/README.md index 9bfd4b5..c1e892d 100644 --- a/modules/suvpar/README.md +++ b/modules/suvpar/README.md @@ -22,6 +22,8 @@ Find the config.json file in the folder and modify accordingly. * *IsStatistics*: If you want the statistics then set it as true otherwise false(default). +* *IsFinalCSV*: Do you want to drop the intermediate fields and produce the final csv. By default, true. If false, only pre-processing of data to anonymize the data and prepare an intermediate file that is ready for Suvpar processing. + # Running Niffler SUVPaR First, to run the script to trim the file. diff --git a/modules/suvpar/Suvpar.py b/modules/suvpar/Suvpar.py index 1e90479..6a55275 100644 --- a/modules/suvpar/Suvpar.py +++ b/modules/suvpar/Suvpar.py @@ -9,11 +9,10 @@ sta = {} statistics_csv = {} output_csv = {} -final_csv = True def initialize(): - global output_csv, df, device_SN, scanner_filter, statistics_csv, isStatistics + global output_csv, df, device_SN, scanner_filter, statistics_csv, isStatistics, final_csv with open('config.json', 'r') as f: config = json.load(f) @@ -24,6 +23,7 @@ def initialize(): scanner_filter = bool(config['ScannerFilter']) statistics_csv = config['Statistics_File'] isStatistics = bool(config['IsStatistics']) + final_csv = bool(config['IsFinalCSV']) text_file = open(feature_file, "r") feature_list = text_file.read().split('\n') # Consider some Device Serial Number and remove other. diff --git a/modules/suvpar/config.json b/modules/suvpar/config.json index b62c643..ce589fe 100644 --- a/modules/suvpar/config.json +++ b/modules/suvpar/config.json @@ -5,5 +5,6 @@ "ScannerFilter": false, "FeaturesetFile": "featureset.txt", "IsStatistics": false, - "Statistics_File": "statistic.csv" + "Statistics_File": "statistic.csv", + "IsFinalCSV": true } From 07cd42b2536c130683ecb5a6244e878294982993 Mon Sep 17 00:00:00 2001 From: Pradeeban Kathiravelu Date: Fri, 29 Jul 2022 11:14:26 -0400 Subject: [PATCH 4/4] Add IsAnonymized as a config for Suvpar --- modules/suvpar/README.md | 2 ++ modules/suvpar/Suvpar.py | 44 ++++++++++++++++++++------------------ modules/suvpar/config.json | 3 ++- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/modules/suvpar/README.md b/modules/suvpar/README.md index c1e892d..0a6e96b 100644 --- a/modules/suvpar/README.md +++ b/modules/suvpar/README.md @@ -24,6 +24,8 @@ Find the config.json file in the folder and modify accordingly. * *IsFinalCSV*: Do you want to drop the intermediate fields and produce the final csv. By default, true. If false, only pre-processing of data to anonymize the data and prepare an intermediate file that is ready for Suvpar processing. +* *IsAnonymized*: Do you want to anonymize certain sensitive PHI headers. By default, true. + # Running Niffler SUVPaR First, to run the script to trim the file. diff --git a/modules/suvpar/Suvpar.py b/modules/suvpar/Suvpar.py index 6a55275..362fda4 100644 --- a/modules/suvpar/Suvpar.py +++ b/modules/suvpar/Suvpar.py @@ -12,7 +12,7 @@ def initialize(): - global output_csv, df, device_SN, scanner_filter, statistics_csv, isStatistics, final_csv + global output_csv, df, device_SN, scanner_filter, statistics_csv, isStatistics, final_csv, isAnonymized with open('config.json', 'r') as f: config = json.load(f) @@ -24,6 +24,7 @@ def initialize(): statistics_csv = config['Statistics_File'] isStatistics = bool(config['IsStatistics']) final_csv = bool(config['IsFinalCSV']) + isAnonymized = bool(config['IsAnonymized']) text_file = open(feature_file, "r") feature_list = text_file.read().split('\n') # Consider some Device Serial Number and remove other. @@ -61,26 +62,27 @@ def suvpar(): # Check for the AcquisitionTime > SeriesTime case, currently observed in Philips and FONAR scanners. df['AltCase'] = numpy.where(df['Manufacturer'].str.contains('Philips|FONAR'), True, False) - # Apply hashing function to the column. - df['AccessionNumber'] = df['AccessionNumber'].astype(str).apply( - lambda x: - hashlib.sha256(x.encode()).hexdigest() - ) - - df['InstitutionAddress'] = df['InstitutionAddress'].astype(str).apply( - lambda x: - hashlib.sha256(x.encode()).hexdigest() - ) - - df['PatientID'] = df['PatientID'].astype(str).apply( - lambda x: - hashlib.sha256(x.encode()).hexdigest() - ) - - df['SeriesInstanceUID'] = df['SeriesInstanceUID'].astype(str).apply( - lambda x: - hashlib.sha256(x.encode()).hexdigest() - ) + if isAnonymized: + # Apply hashing function to the column. + df['AccessionNumber'] = df['AccessionNumber'].astype(str).apply( + lambda x: + hashlib.sha256(x.encode()).hexdigest() + ) + + df['InstitutionAddress'] = df['InstitutionAddress'].astype(str).apply( + lambda x: + hashlib.sha256(x.encode()).hexdigest() + ) + + df['PatientID'] = df['PatientID'].astype(str).apply( + lambda x: + hashlib.sha256(x.encode()).hexdigest() + ) + + df['SeriesInstanceUID'] = df['SeriesInstanceUID'].astype(str).apply( + lambda x: + hashlib.sha256(x.encode()).hexdigest() + ) # Add computed non-DICOM fields and drop a few attributes, if we are producing a final_csv and not an intermediate. if final_csv: diff --git a/modules/suvpar/config.json b/modules/suvpar/config.json index dfffadf..1dae356 100644 --- a/modules/suvpar/config.json +++ b/modules/suvpar/config.json @@ -6,5 +6,6 @@ "FeaturesetFile": "featureset1.txt", "IsStatistics": false, "Statistics_File": "statistic.csv", - "IsFinalCSV": true + "IsFinalCSV": true, + "IsAnonymized": true }