Skip to content

Commit

Permalink
Infer DICOM file size, when possible
Browse files Browse the repository at this point in the history
Google Healthcare API supports single-part requests for DICOM files.
Single-part requests are not officially supported by the DICOMweb standard,
but they are easier to work with. Google Healthcare API also provides
the Content-Length. We can utilize this information to infer the DICOM file
size without having to stream the whole file.

This PR adds logic to automatically infer the DICOM file size and set
it, when possible, without streaming the whole file.

Unfortunately, dcm4chee does not support single-part requests, and they do
not return a Content-Length either. So we must still stream the whole file
to get the file size for dcm4chee servers. But if we determine a new way
to infer the file size, we can add it as well.

Signed-off-by: Patrick Avery <[email protected]>
  • Loading branch information
psavery committed Jan 29, 2024
1 parent 2f41f81 commit 721d6a2
Showing 1 changed file with 131 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -139,41 +139,14 @@ def setContentHeaders(self, file, offset, endByte, contentDisposition=None):
def downloadFile(self, file, offset=0, headers=True, endByte=None,
contentDisposition=None, extraParameters=None, **kwargs):

from dicomweb_client.web import _Transaction

dicom_uids = file['dicom_uids']
study_uid = dicom_uids['study_uid']
series_uid = dicom_uids['series_uid']
instance_uid = dicom_uids['instance_uid']

client = _create_dicomweb_client(self.assetstore_meta)

if headers:
setResponseHeader('Accept-Ranges', 'bytes')
self.setContentHeaders(file, offset, endByte, contentDisposition)

# Create the URL
url = client._get_instances_url(
_Transaction.RETRIEVE,
study_uid,
series_uid,
instance_uid,
)

# Build the headers
transfer_syntax = '*'
accept_parts = [
'multipart/related',
'type="application/dicom"',
f'transfer-syntax={transfer_syntax}',
]
request_headers = {
'Accept': '; '.join(accept_parts),
}

def stream():
# Perform the request
response = client._http_get(url, headers=request_headers, stream=True)
# Try a single-part download first. If that doesn't work, do multipart.
response = self._request_retrieve_instance_prefer_singlepart(file)

bytes_read = 0
for chunk in self._stream_retrieve_instance_response(response):
Expand Down Expand Up @@ -203,6 +176,76 @@ def stream():

return stream

def _request_retrieve_instance_prefer_singlepart(self, file, transfer_syntax='*'):
# Try to perform a singlepart request. If it fails, perform a multipart request
# instead.
response = None
try:
response = self._request_retrieve_instance(file, multipart=False,
transfer_syntax=transfer_syntax)
except requests.HTTPError:
# If there is an HTTPError, the server might not accept single-part requests...
pass

if self._is_singlepart_response(response):
return response

# Perform the multipart request instead
return self._request_retrieve_instance(file, transfer_syntax=transfer_syntax)

def _request_retrieve_instance(self, file, multipart=True, transfer_syntax='*'):
# Multipart requests are officially supported by the DICOMweb standard.
# Singlepart requests are not officially supported, but they are easier
# to work with.
# Google Healthcare API support it.
# See here: https://cloud.google.com/healthcare-api/docs/dicom#dicom_instances

# Create the URL
client = _create_dicomweb_client(self.assetstore_meta)
url = self._create_retrieve_instance_url(client, file)

# Build the headers
headers = {}
if multipart:
# This is officially supported by the DICOMweb standard.
headers['Accept'] = '; '.join((
'multipart/related',
'type="application/dicom"',
f'transfer-syntax={transfer_syntax}',
))
else:
# This is not officially supported by the DICOMweb standard,
# but it is easier to work with, and some servers such as
# Google Healthcare API support it.
# See here: https://cloud.google.com/healthcare-api/docs/dicom#dicom_instances
headers['Accept'] = f'application/dicom; transfer-syntax={transfer_syntax}'

return client._http_get(url, headers=headers, stream=True)

def _create_retrieve_instance_url(self, client, file):
from dicomweb_client.web import _Transaction

dicom_uids = file['dicom_uids']
study_uid = dicom_uids['study_uid']
series_uid = dicom_uids['series_uid']
instance_uid = dicom_uids['instance_uid']

return client._get_instances_url(
_Transaction.RETRIEVE,
study_uid,
series_uid,
instance_uid,
)

def _stream_retrieve_instance_response(self, response):
# Check if the original request asked for multipart data
if 'multipart/related' in response.request.headers.get('Accept', ''):
yield from self._stream_dicom_multipart_response(response)
else:
# The content should *only* contain the DICOM file
with response:
yield from response.iter_content(BUF_SIZE)

def _extract_media_type_and_boundary(self, response):
content_type = response.headers['content-type']
media_type, *ct_info = (ct.strip() for ct in content_type.split(';'))
Expand All @@ -215,7 +258,7 @@ def _extract_media_type_and_boundary(self, response):

return media_type, boundary

def _stream_retrieve_instance_response(self, response):
def _stream_dicom_multipart_response(self, response):
# The first part of this function was largely copied from dicomweb-client's
# _decode_multipart_message() function. But we can't use that function here
# because it relies on reading the whole DICOM file into memory. We want to
Expand Down Expand Up @@ -307,6 +350,50 @@ def _stream_retrieve_instance_response(self, response):
msg = 'Failed to find ending boundary in response content'
raise ValueError(msg)

def _infer_file_size(self, file):
# Try various methods to infer the file size, without streaming the
# whole file. Returns the file size if successful, or `None` if unsuccessful.
if file.get('size') is not None:
# The file size was already determined.
return file['size']

# Only method currently is inferring from single-part content_length
return self._infer_file_size_singlepart_content_length(file)

def _is_singlepart_response(self, response):
if response is None:
return False

content_type = response.headers.get('Content-Type')
return (
response.status_code == 200 and
not any(x in content_type for x in ('multipart/related', 'boundary'))
)

def _infer_file_size_singlepart_content_length(self, file):
# First, try to see if single-part requests work, and if the Content-Length
# is returned. This works for Google Healthcare API.
try:
response = self._request_retrieve_instance(file, multipart=False)
except requests.HTTPError:
# If there is an HTTPError, the server might not accept single-part requests...
return

if not self._is_singlepart_response(response):
# Does not support single-part requests...
return

content_length = response.headers.get('Content-Length')
if not content_length:
# The server did not return a Content-Length
return

try:
# The DICOM file size is equal to the Content-Length
return int(content_length)
except ValueError:
return

def importData(self, parent, parentType, params, progress, user, **kwargs):
"""
Import DICOMweb WSI instances from a DICOMweb server.
Expand Down Expand Up @@ -408,7 +495,10 @@ def importData(self, parent, parentType, params, progress, user, **kwargs):
'instance_uid': instance_uid,
}
file['imported'] = True
File().save(file)

# Try to infer the file size without streaming, if possible.
file['size'] = self._infer_file_size(file)
file = File().save(file)

items.append(item)

Expand All @@ -420,16 +510,23 @@ def auth_session(self):

def getFileSize(self, file):
# This function will compute the size of the DICOM file (a potentially
# expensive operation, since it may have to stream the whole file),
# and cache the result in file['size'].
# expensive operation, since it may have to stream the whole file).
# The caller is expected to cache the result in file['size'].
# This function is called when the size is needed, such as the girder
# fuse mount code, and range requests.
if file.get('size') is not None:
# It has already been computed once. Return the cached size.
return file['size']

# Try to infer the file size without streaming, if possible.
size = self._infer_file_size(file)
if size:
return size

# We must stream the whole file to get the file size...
size = 0
for chunk in self.downloadFile(file, headers=False)():
response = self._request_retrieve_instance_prefer_singlepart(file)
for chunk in self._stream_retrieve_instance_response(response):
size += len(chunk)

# This should get cached in file['size'] in File().updateSize().
Expand Down

0 comments on commit 721d6a2

Please sign in to comment.