Skip to content

Commit

Permalink
Add IsAnonymized as a config for Suvpar
Browse files Browse the repository at this point in the history
  • Loading branch information
pradeeban committed Jul 29, 2022
1 parent 300b3d3 commit 07cd42b
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 22 deletions.
2 changes: 2 additions & 0 deletions modules/suvpar/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ Find the config.json file in the folder and modify accordingly.

* *IsFinalCSV*: Do you want to drop the intermediate fields and produce the final csv. By default, true. If false, only pre-processing of data to anonymize the data and prepare an intermediate file that is ready for Suvpar processing.

* *IsAnonymized*: Do you want to anonymize certain sensitive PHI headers. By default, true.

# Running Niffler SUVPaR

First, to run the script to trim the file.
Expand Down
44 changes: 23 additions & 21 deletions modules/suvpar/Suvpar.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


def initialize():
global output_csv, df, device_SN, scanner_filter, statistics_csv, isStatistics, final_csv
global output_csv, df, device_SN, scanner_filter, statistics_csv, isStatistics, final_csv, isAnonymized
with open('config.json', 'r') as f:
config = json.load(f)

Expand All @@ -24,6 +24,7 @@ def initialize():
statistics_csv = config['Statistics_File']
isStatistics = bool(config['IsStatistics'])
final_csv = bool(config['IsFinalCSV'])
isAnonymized = bool(config['IsAnonymized'])
text_file = open(feature_file, "r")
feature_list = text_file.read().split('\n')
# Consider some Device Serial Number and remove other.
Expand Down Expand Up @@ -61,26 +62,27 @@ def suvpar():
# Check for the AcquisitionTime > SeriesTime case, currently observed in Philips and FONAR scanners.
df['AltCase'] = numpy.where(df['Manufacturer'].str.contains('Philips|FONAR'), True, False)

# Apply hashing function to the column.
df['AccessionNumber'] = df['AccessionNumber'].astype(str).apply(
lambda x:
hashlib.sha256(x.encode()).hexdigest()
)

df['InstitutionAddress'] = df['InstitutionAddress'].astype(str).apply(
lambda x:
hashlib.sha256(x.encode()).hexdigest()
)

df['PatientID'] = df['PatientID'].astype(str).apply(
lambda x:
hashlib.sha256(x.encode()).hexdigest()
)

df['SeriesInstanceUID'] = df['SeriesInstanceUID'].astype(str).apply(
lambda x:
hashlib.sha256(x.encode()).hexdigest()
)
if isAnonymized:
# Apply hashing function to the column.
df['AccessionNumber'] = df['AccessionNumber'].astype(str).apply(
lambda x:
hashlib.sha256(x.encode()).hexdigest()
)

df['InstitutionAddress'] = df['InstitutionAddress'].astype(str).apply(
lambda x:
hashlib.sha256(x.encode()).hexdigest()
)

df['PatientID'] = df['PatientID'].astype(str).apply(
lambda x:
hashlib.sha256(x.encode()).hexdigest()
)

df['SeriesInstanceUID'] = df['SeriesInstanceUID'].astype(str).apply(
lambda x:
hashlib.sha256(x.encode()).hexdigest()
)

# Add computed non-DICOM fields and drop a few attributes, if we are producing a final_csv and not an intermediate.
if final_csv:
Expand Down
3 changes: 2 additions & 1 deletion modules/suvpar/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
"FeaturesetFile": "featureset1.txt",
"IsStatistics": false,
"Statistics_File": "statistic.csv",
"IsFinalCSV": true
"IsFinalCSV": true,
"IsAnonymized": true
}

0 comments on commit 07cd42b

Please sign in to comment.