diff --git a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py index 368270f37..123cc4cbf 100644 --- a/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py +++ b/sources/dicom/large_image_source_dicom/assetstore/dicomweb_assetstore_adapter.py @@ -139,41 +139,14 @@ def setContentHeaders(self, file, offset, endByte, contentDisposition=None): def downloadFile(self, file, offset=0, headers=True, endByte=None, contentDisposition=None, extraParameters=None, **kwargs): - from dicomweb_client.web import _Transaction - - dicom_uids = file['dicom_uids'] - study_uid = dicom_uids['study_uid'] - series_uid = dicom_uids['series_uid'] - instance_uid = dicom_uids['instance_uid'] - - client = _create_dicomweb_client(self.assetstore_meta) - if headers: setResponseHeader('Accept-Ranges', 'bytes') self.setContentHeaders(file, offset, endByte, contentDisposition) - # Create the URL - url = client._get_instances_url( - _Transaction.RETRIEVE, - study_uid, - series_uid, - instance_uid, - ) - - # Build the headers - transfer_syntax = '*' - accept_parts = [ - 'multipart/related', - 'type="application/dicom"', - f'transfer-syntax={transfer_syntax}', - ] - request_headers = { - 'Accept': '; '.join(accept_parts), - } - def stream(): # Perform the request - response = client._http_get(url, headers=request_headers, stream=True) + # Try a single-part download first. If that doesn't work, do multipart. + response = self._request_retrieve_instance_prefer_singlepart(file) bytes_read = 0 for chunk in self._stream_retrieve_instance_response(response): @@ -203,6 +176,76 @@ def stream(): return stream + def _request_retrieve_instance_prefer_singlepart(self, file, transfer_syntax='*'): + # Try to perform a singlepart request. If it fails, perform a multipart request + # instead. + response = None + try: + response = self._request_retrieve_instance(file, multipart=False, + transfer_syntax=transfer_syntax) + except requests.HTTPError: + # If there is an HTTPError, the server might not accept single-part requests... + pass + + if self._is_singlepart_response(response): + return response + + # Perform the multipart request instead + return self._request_retrieve_instance(file, transfer_syntax=transfer_syntax) + + def _request_retrieve_instance(self, file, multipart=True, transfer_syntax='*'): + # Multipart requests are officially supported by the DICOMweb standard. + # Singlepart requests are not officially supported, but they are easier + # to work with. + # Google Healthcare API support it. + # See here: https://cloud.google.com/healthcare-api/docs/dicom#dicom_instances + + # Create the URL + client = _create_dicomweb_client(self.assetstore_meta) + url = self._create_retrieve_instance_url(client, file) + + # Build the headers + headers = {} + if multipart: + # This is officially supported by the DICOMweb standard. + headers['Accept'] = '; '.join(( + 'multipart/related', + 'type="application/dicom"', + f'transfer-syntax={transfer_syntax}', + )) + else: + # This is not officially supported by the DICOMweb standard, + # but it is easier to work with, and some servers such as + # Google Healthcare API support it. + # See here: https://cloud.google.com/healthcare-api/docs/dicom#dicom_instances + headers['Accept'] = f'application/dicom; transfer-syntax={transfer_syntax}' + + return client._http_get(url, headers=headers, stream=True) + + def _create_retrieve_instance_url(self, client, file): + from dicomweb_client.web import _Transaction + + dicom_uids = file['dicom_uids'] + study_uid = dicom_uids['study_uid'] + series_uid = dicom_uids['series_uid'] + instance_uid = dicom_uids['instance_uid'] + + return client._get_instances_url( + _Transaction.RETRIEVE, + study_uid, + series_uid, + instance_uid, + ) + + def _stream_retrieve_instance_response(self, response): + # Check if the original request asked for multipart data + if 'multipart/related' in response.request.headers.get('Accept', ''): + yield from self._stream_dicom_multipart_response(response) + else: + # The content should *only* contain the DICOM file + with response: + yield from response.iter_content(BUF_SIZE) + def _extract_media_type_and_boundary(self, response): content_type = response.headers['content-type'] media_type, *ct_info = (ct.strip() for ct in content_type.split(';')) @@ -215,7 +258,7 @@ def _extract_media_type_and_boundary(self, response): return media_type, boundary - def _stream_retrieve_instance_response(self, response): + def _stream_dicom_multipart_response(self, response): # The first part of this function was largely copied from dicomweb-client's # _decode_multipart_message() function. But we can't use that function here # because it relies on reading the whole DICOM file into memory. We want to @@ -307,6 +350,50 @@ def _stream_retrieve_instance_response(self, response): msg = 'Failed to find ending boundary in response content' raise ValueError(msg) + def _infer_file_size(self, file): + # Try various methods to infer the file size, without streaming the + # whole file. Returns the file size if successful, or `None` if unsuccessful. + if file.get('size') is not None: + # The file size was already determined. + return file['size'] + + # Only method currently is inferring from single-part content_length + return self._infer_file_size_singlepart_content_length(file) + + def _is_singlepart_response(self, response): + if response is None: + return False + + content_type = response.headers.get('Content-Type') + return ( + response.status_code == 200 and + not any(x in content_type for x in ('multipart/related', 'boundary')) + ) + + def _infer_file_size_singlepart_content_length(self, file): + # First, try to see if single-part requests work, and if the Content-Length + # is returned. This works for Google Healthcare API. + try: + response = self._request_retrieve_instance(file, multipart=False) + except requests.HTTPError: + # If there is an HTTPError, the server might not accept single-part requests... + return + + if not self._is_singlepart_response(response): + # Does not support single-part requests... + return + + content_length = response.headers.get('Content-Length') + if not content_length: + # The server did not return a Content-Length + return + + try: + # The DICOM file size is equal to the Content-Length + return int(content_length) + except ValueError: + return + def importData(self, parent, parentType, params, progress, user, **kwargs): """ Import DICOMweb WSI instances from a DICOMweb server. @@ -408,7 +495,10 @@ def importData(self, parent, parentType, params, progress, user, **kwargs): 'instance_uid': instance_uid, } file['imported'] = True - File().save(file) + + # Try to infer the file size without streaming, if possible. + file['size'] = self._infer_file_size(file) + file = File().save(file) items.append(item) @@ -420,16 +510,23 @@ def auth_session(self): def getFileSize(self, file): # This function will compute the size of the DICOM file (a potentially - # expensive operation, since it may have to stream the whole file), - # and cache the result in file['size']. + # expensive operation, since it may have to stream the whole file). + # The caller is expected to cache the result in file['size']. # This function is called when the size is needed, such as the girder # fuse mount code, and range requests. if file.get('size') is not None: # It has already been computed once. Return the cached size. return file['size'] + # Try to infer the file size without streaming, if possible. + size = self._infer_file_size(file) + if size: + return size + + # We must stream the whole file to get the file size... size = 0 - for chunk in self.downloadFile(file, headers=False)(): + response = self._request_retrieve_instance_prefer_singlepart(file) + for chunk in self._stream_retrieve_instance_response(response): size += len(chunk) # This should get cached in file['size'] in File().updateSize().