Skip to content

Commit

Permalink
Merge pull request #331 from Emory-HITI/dev
Browse files Browse the repository at this point in the history
Make anonymizations configurable in Suvpar
  • Loading branch information
pradeeban authored Jul 29, 2022
2 parents 9abcdda + 07cd42b commit 72162a2
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 25 deletions.
4 changes: 4 additions & 0 deletions modules/suvpar/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ Find the config.json file in the folder and modify accordingly.

* *IsStatistics*: If you want the statistics then set it as true otherwise false(default).

* *IsFinalCSV*: Do you want to drop the intermediate fields and produce the final csv. By default, true. If false, only pre-processing of data to anonymize the data and prepare an intermediate file that is ready for Suvpar processing.

* *IsAnonymized*: Do you want to anonymize certain sensitive PHI headers. By default, true.

# Running Niffler SUVPaR

First, to run the script to trim the file.
Expand Down
46 changes: 24 additions & 22 deletions modules/suvpar/Suvpar.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@
sta = {}
statistics_csv = {}
output_csv = {}
final_csv = True


def initialize():
global output_csv, df, device_SN, scanner_filter, statistics_csv, isStatistics
global output_csv, df, device_SN, scanner_filter, statistics_csv, isStatistics, final_csv, isAnonymized
with open('config.json', 'r') as f:
config = json.load(f)

Expand All @@ -24,6 +23,8 @@ def initialize():
scanner_filter = bool(config['ScannerFilter'])
statistics_csv = config['Statistics_File']
isStatistics = bool(config['IsStatistics'])
final_csv = bool(config['IsFinalCSV'])
isAnonymized = bool(config['IsAnonymized'])
text_file = open(feature_file, "r")
feature_list = text_file.read().split('\n')
# Consider some Device Serial Number and remove other.
Expand Down Expand Up @@ -61,26 +62,27 @@ def suvpar():
# Check for the AcquisitionTime > SeriesTime case, currently observed in Philips and FONAR scanners.
df['AltCase'] = numpy.where(df['Manufacturer'].str.contains('Philips|FONAR'), True, False)

# Apply hashing function to the column.
df['AccessionNumber'] = df['AccessionNumber'].astype(str).apply(
lambda x:
hashlib.sha256(x.encode()).hexdigest()
)

df['InstitutionAddress'] = df['InstitutionAddress'].astype(str).apply(
lambda x:
hashlib.sha256(x.encode()).hexdigest()
)

df['PatientID'] = df['PatientID'].astype(str).apply(
lambda x:
hashlib.sha256(x.encode()).hexdigest()
)

df['SeriesInstanceUID'] = df['SeriesInstanceUID'].astype(str).apply(
lambda x:
hashlib.sha256(x.encode()).hexdigest()
)
if isAnonymized:
# Apply hashing function to the column.
df['AccessionNumber'] = df['AccessionNumber'].astype(str).apply(
lambda x:
hashlib.sha256(x.encode()).hexdigest()
)

df['InstitutionAddress'] = df['InstitutionAddress'].astype(str).apply(
lambda x:
hashlib.sha256(x.encode()).hexdigest()
)

df['PatientID'] = df['PatientID'].astype(str).apply(
lambda x:
hashlib.sha256(x.encode()).hexdigest()
)

df['SeriesInstanceUID'] = df['SeriesInstanceUID'].astype(str).apply(
lambda x:
hashlib.sha256(x.encode()).hexdigest()
)

# Add computed non-DICOM fields and drop a few attributes, if we are producing a final_csv and not an intermediate.
if final_csv:
Expand Down
6 changes: 4 additions & 2 deletions modules/suvpar/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
"OutputFile": "output.csv",
"ScannerDetails": "scanner.txt",
"ScannerFilter": false,
"FeaturesetFile": "featureset.txt",
"FeaturesetFile": "featureset1.txt",
"IsStatistics": false,
"Statistics_File": "statistic.csv"
"Statistics_File": "statistic.csv",
"IsFinalCSV": true,
"IsAnonymized": true
}
3 changes: 2 additions & 1 deletion modules/suvpar/featureset1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@ AcquisitionDuration
Modality
Manufacturer
ManufacturerModelName
SeriesInstanceUID
SeriesInstanceUID
[SliceMeasurementDuration]

0 comments on commit 72162a2

Please sign in to comment.