Skip to content

Commit

Permalink
Handle large files that don't have PE info in VirusTotal
Browse files Browse the repository at this point in the history
  • Loading branch information
m417z committed Nov 22, 2023
1 parent 4de6807 commit 661e5aa
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions data/upd04_get_virustotal_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,14 @@ def get_virustotal_data_for_file(session: requests.Session, file_hash, output_di
_ = virustotal_json['data']['attributes']['pe_info']['sections'][0]
result = 'ok'
except KeyError:
prefix = '_no_pe_info_' # no PE info, need to rescan it on VirusTotal
result = 'no_pe_info'
# VirusTotal often doesn't have PE information for large files.
# https://twitter.com/sixtyvividtails/status/1697355272568643970
if virustotal_json['data']['attributes']['size'] > 250000000:
prefix = '_too_large_no_pe_info_'
result = 'too_large_no_pe_info'
else:
prefix = '_no_pe_info_' # no PE info, need to rescan it on VirusTotal
result = 'no_pe_info'
except json.JSONDecodeError:
prefix = '_not_json_'
result = 'not_json'
Expand Down Expand Up @@ -174,6 +180,8 @@ def chunks(lst, n):
elif file_result == 'not_found':
assert False, (name, hash)
# result['not_found'].add((name, hash))
elif file_result == 'too_large_no_pe_info':
result['not_found'].add((name, hash))
else:
print(f'WARNING: got result {file_result} for {hash} ({name})')
result['failed'].add((name, hash))
Expand Down

0 comments on commit 661e5aa

Please sign in to comment.