Skip to content

Commit

Permalink
Merge pull request #1448 from psavery/dicomweb-infer-file-size
Browse files Browse the repository at this point in the history
Infer DICOM file size, when possible
  • Loading branch information
psavery authored Jan 31, 2024
2 parents b4cbf6f + c981c54 commit 90a3a26
Showing 1 changed file with 131 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -139,41 +139,14 @@ def setContentHeaders(self, file, offset, endByte, contentDisposition=None):
def downloadFile(self, file, offset=0, headers=True, endByte=None,
contentDisposition=None, extraParameters=None, **kwargs):

from dicomweb_client.web import _Transaction

dicom_uids = file['dicom_uids']
study_uid = dicom_uids['study_uid']
series_uid = dicom_uids['series_uid']
instance_uid = dicom_uids['instance_uid']

client = _create_dicomweb_client(self.assetstore_meta)

if headers:
setResponseHeader('Accept-Ranges', 'bytes')
self.setContentHeaders(file, offset, endByte, contentDisposition)

# Create the URL
url = client._get_instances_url(
_Transaction.RETRIEVE,
study_uid,
series_uid,
instance_uid,
)

# Build the headers
transfer_syntax = '*'
accept_parts = [
'multipart/related',
'type="application/dicom"',
f'transfer-syntax={transfer_syntax}',
]
request_headers = {
'Accept': '; '.join(accept_parts),
}

def stream():
# Perform the request
response = client._http_get(url, headers=request_headers, stream=True)
# Try a single-part download first. If that doesn't work, do multipart.
response = self._request_retrieve_instance_prefer_singlepart(file)

bytes_read = 0
for chunk in self._stream_retrieve_instance_response(response):
Expand Down Expand Up @@ -203,6 +176,76 @@ def stream():

return stream

def _request_retrieve_instance_prefer_singlepart(self, file, transfer_syntax='*'):
# Try to perform a singlepart request. If it fails, perform a multipart request
# instead.
response = None
try:
response = self._request_retrieve_instance(file, multipart=False,
transfer_syntax=transfer_syntax)
except requests.HTTPError:
# If there is an HTTPError, the server might not accept single-part requests...
pass

if self._is_singlepart_response(response):
return response

# Perform the multipart request instead
return self._request_retrieve_instance(file, transfer_syntax=transfer_syntax)

def _request_retrieve_instance(self, file, multipart=True, transfer_syntax='*'):
# Multipart requests are officially supported by the DICOMweb standard.
# Singlepart requests are not officially supported, but they are easier
# to work with.
# Google Healthcare API support it.
# See here: https://cloud.google.com/healthcare-api/docs/dicom#dicom_instances

# Create the URL
client = _create_dicomweb_client(self.assetstore_meta)
url = self._create_retrieve_instance_url(client, file)

# Build the headers
headers = {}
if multipart:
# This is officially supported by the DICOMweb standard.
headers['Accept'] = '; '.join((
'multipart/related',
'type="application/dicom"',
f'transfer-syntax={transfer_syntax}',
))
else:
# This is not officially supported by the DICOMweb standard,
# but it is easier to work with, and some servers such as
# Google Healthcare API support it.
# See here: https://cloud.google.com/healthcare-api/docs/dicom#dicom_instances
headers['Accept'] = f'application/dicom; transfer-syntax={transfer_syntax}'

return client._http_get(url, headers=headers, stream=True)

def _create_retrieve_instance_url(self, client, file):
from dicomweb_client.web import _Transaction

dicom_uids = file['dicom_uids']
study_uid = dicom_uids['study_uid']
series_uid = dicom_uids['series_uid']
instance_uid = dicom_uids['instance_uid']

return client._get_instances_url(
_Transaction.RETRIEVE,
study_uid,
series_uid,
instance_uid,
)

def _stream_retrieve_instance_response(self, response):
# Check if the original request asked for multipart data
if 'multipart/related' in response.request.headers.get('Accept', ''):
yield from self._stream_dicom_multipart_response(response)
else:
# The content should *only* contain the DICOM file
with response:
yield from response.iter_content(BUF_SIZE)

def _extract_media_type_and_boundary(self, response):
content_type = response.headers['content-type']
media_type, *ct_info = (ct.strip() for ct in content_type.split(';'))
Expand All @@ -215,7 +258,7 @@ def _extract_media_type_and_boundary(self, response):

return media_type, boundary

def _stream_retrieve_instance_response(self, response):
def _stream_dicom_multipart_response(self, response):
# The first part of this function was largely copied from dicomweb-client's
# _decode_multipart_message() function. But we can't use that function here
# because it relies on reading the whole DICOM file into memory. We want to
Expand Down Expand Up @@ -307,6 +350,50 @@ def _stream_retrieve_instance_response(self, response):
msg = 'Failed to find ending boundary in response content'
raise ValueError(msg)

def _infer_file_size(self, file):
# Try various methods to infer the file size, without streaming the
# whole file. Returns the file size if successful, or `None` if unsuccessful.
if file.get('size') is not None:
# The file size was already determined.
return file['size']

# Only method currently is inferring from single-part content_length
return self._infer_file_size_singlepart_content_length(file)

def _is_singlepart_response(self, response):
if response is None:
return False

content_type = response.headers.get('Content-Type')
return (
response.status_code == 200 and
not any(x in content_type for x in ('multipart/related', 'boundary'))
)

def _infer_file_size_singlepart_content_length(self, file):
# First, try to see if single-part requests work, and if the Content-Length
# is returned. This works for Google Healthcare API.
try:
response = self._request_retrieve_instance(file, multipart=False)
except requests.HTTPError:
# If there is an HTTPError, the server might not accept single-part requests...
return

if not self._is_singlepart_response(response):
# Does not support single-part requests...
return

content_length = response.headers.get('Content-Length')
if not content_length:
# The server did not return a Content-Length
return

try:
# The DICOM file size is equal to the Content-Length
return int(content_length)
except ValueError:
return

def importData(self, parent, parentType, params, progress, user, **kwargs):
"""
Import DICOMweb WSI instances from a DICOMweb server.
Expand Down Expand Up @@ -408,7 +495,10 @@ def importData(self, parent, parentType, params, progress, user, **kwargs):
'instance_uid': instance_uid,
}
file['imported'] = True
File().save(file)

# Try to infer the file size without streaming, if possible.
file['size'] = self._infer_file_size(file)
file = File().save(file)

items.append(item)

Expand All @@ -420,16 +510,23 @@ def auth_session(self):

def getFileSize(self, file):
# This function will compute the size of the DICOM file (a potentially
# expensive operation, since it may have to stream the whole file),
# and cache the result in file['size'].
# expensive operation, since it may have to stream the whole file).
# The caller is expected to cache the result in file['size'].
# This function is called when the size is needed, such as the girder
# fuse mount code, and range requests.
if file.get('size') is not None:
# It has already been computed once. Return the cached size.
return file['size']

# Try to infer the file size without streaming, if possible.
size = self._infer_file_size(file)
if size:
return size

# We must stream the whole file to get the file size...
size = 0
for chunk in self.downloadFile(file, headers=False)():
response = self._request_retrieve_instance_prefer_singlepart(file)
for chunk in self._stream_retrieve_instance_response(response):
size += len(chunk)

# This should get cached in file['size'] in File().updateSize().
Expand Down

0 comments on commit 90a3a26

Please sign in to comment.