From 4bce4816509456183c6822695c2f061a6c431836 Mon Sep 17 00:00:00 2001 From: David Manthey Date: Mon, 28 Nov 2022 10:12:25 -0500 Subject: [PATCH] Add a dicom source. This uses wsidicom to read dicom files. So far it has only been tested with files that were generated with the wsi2dcm docker from the 1.0.3 deb from https://github.com/GoogleCloudPlatform/wsi-to-dicom-converter, which seems to have issues with some files. This may be refactored to use a different base library, and internal details should be considered provisional. It needs to have internal metadata exposed. --- CHANGELOG.md | 5 +- README.rst | 2 + docs/index.rst | 1 + docs/make_docs.sh | 1 + .../rest/large_image_resource.py | 2 +- requirements-dev-core.txt | 1 + requirements-dev.txt | 1 + requirements-worker.txt | 1 + setup.py | 5 +- .../large_image_source_dicom/__init__.py | 224 ++++++++++++++++++ .../large_image_source_dicom/girder_source.py | 38 +++ sources/dicom/setup.py | 73 ++++++ test/datastore.py | 4 + test/test_source_base.py | 8 +- 14 files changed, 361 insertions(+), 5 deletions(-) create mode 100644 sources/dicom/large_image_source_dicom/__init__.py create mode 100644 sources/dicom/large_image_source_dicom/girder_source.py create mode 100644 sources/dicom/setup.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 30f52f04f..afa23426d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,9 @@ # Change Log -## 1.17.4 +## 1.18.0 + +### Features +- Add a DICOM tile source ([#1005](../../pull/1005)) ### Improvements - Better control dtype on multi sources ([#993](../../pull/993)) diff --git a/README.rst b/README.rst index 20a923746..39712f4c8 100644 --- a/README.rst +++ b/README.rst @@ -134,6 +134,8 @@ Large Image consists of several Python modules designed to work together. These - ``large-image-source-tifffile``: A tile source using the tifffile library that can handle a wide variety of tiff-like files. + - ``large-image-source-dicom``: A tile source for reading DICOM WSI images. + - ``large-image-source-test``: A tile source that generates test tiles, including a simple fractal pattern. Useful for testing extreme zoom levels. - ``large-image-source-dummy``: A tile source that does nothing. diff --git a/docs/index.rst b/docs/index.rst index af410feff..426b4e942 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -18,6 +18,7 @@ _build/large_image/modules _build/large_image_source_bioformats/modules _build/large_image_source_deepzoom/modules + _build/large_image_source_dicom/modules _build/large_image_source_dummy/modules _build/large_image_source_gdal/modules _build/large_image_source_mapnik/modules diff --git a/docs/make_docs.sh b/docs/make_docs.sh index 9ec59c2b1..52299df92 100755 --- a/docs/make_docs.sh +++ b/docs/make_docs.sh @@ -20,6 +20,7 @@ python -c 'import large_image_source_multi, json;print(json.dumps(large_image_so sphinx-apidoc -f -o _build/large_image ../large_image sphinx-apidoc -f -o _build/large_image_source_bioformats ../sources/bioformats/large_image_source_bioformats sphinx-apidoc -f -o _build/large_image_source_deepzoom ../sources/deepzoom/large_image_source_deepzoom +sphinx-apidoc -f -o _build/large_image_source_dicom ../sources/dicom/large_image_source_dicom sphinx-apidoc -f -o _build/large_image_source_dummy ../sources/dummy/large_image_source_dummy sphinx-apidoc -f -o _build/large_image_source_gdal ../sources/gdal/large_image_source_gdal sphinx-apidoc -f -o _build/large_image_source_mapnik ../sources/mapnik/large_image_source_mapnik diff --git a/girder/girder_large_image/rest/large_image_resource.py b/girder/girder_large_image/rest/large_image_resource.py index 8c70eb868..98a265691 100644 --- a/girder/girder_large_image/rest/large_image_resource.py +++ b/girder/girder_large_image/rest/large_image_resource.py @@ -448,7 +448,7 @@ def deleteIncompleteTiles(self, params): @describeRoute( Description('List all Girder tile sources with associated extensions, ' 'mime types, and versions. Lower values indicate a ' - 'higher priority for an extension of mime type with that ' + 'higher priority for an extension or mime type with that ' 'source.') ) @access.public(scope=TokenScope.DATA_READ) diff --git a/requirements-dev-core.txt b/requirements-dev-core.txt index bae506ce9..8c66707e7 100644 --- a/requirements-dev-core.txt +++ b/requirements-dev-core.txt @@ -1,6 +1,7 @@ # Top level dependencies -e sources/bioformats -e sources/deepzoom +-e sources/dicom -e sources/dummy -e sources/gdal -e sources/multi diff --git a/requirements-dev.txt b/requirements-dev.txt index 7af379671..74bef18e2 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,6 +4,7 @@ girder>=3.0.13.dev6 ; python_version >= '3.8' girder-jobs>=3.0.3 -e sources/bioformats -e sources/deepzoom +-e sources/dicom -e sources/dummy -e sources/gdal -e sources/multi diff --git a/requirements-worker.txt b/requirements-worker.txt index 1f2823871..ca048e3ba 100644 --- a/requirements-worker.txt +++ b/requirements-worker.txt @@ -1,5 +1,6 @@ -e sources/bioformats -e sources/deepzoom +-e sources/dicom -e sources/dummy -e sources/gdal -e sources/multi diff --git a/setup.py b/setup.py index b1214c8a6..f9e868599 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,6 @@ def prerelease_local_scheme(version): 'gdal': [f'large-image-source-gdal{limit_version}'], 'mapnik': [f'large-image-source-mapnik{limit_version}'], 'multi': [f'large-image-source-multi{limit_version}'], - 'nd2': [f'large-image-source-nd2{limit_version}'], 'ometiff': [f'large-image-source-ometiff{limit_version}'], 'openjpeg': [f'large-image-source-openjpeg{limit_version}'], 'openslide': [f'large-image-source-openslide{limit_version}'], @@ -63,6 +62,10 @@ def prerelease_local_scheme(version): sources.update({ 'nd2': [f'large-image-source-nd2{limit_version}'], }) +if sys.version_info >= (3, 8): + sources.update({ + 'dicom': [f'large-image-source-dicom{limit_version}'], + }) extraReqs.update(sources) extraReqs['sources'] = list(set(itertools.chain.from_iterable(sources.values()))) extraReqs['all'] = list(set(itertools.chain.from_iterable(extraReqs.values()))) diff --git a/sources/dicom/large_image_source_dicom/__init__.py b/sources/dicom/large_image_source_dicom/__init__.py new file mode 100644 index 000000000..b68305973 --- /dev/null +++ b/sources/dicom/large_image_source_dicom/__init__.py @@ -0,0 +1,224 @@ +import math +import os +import warnings + +import numpy + +from large_image.cache_util import LruCacheMetaclass, methodcache +from large_image.constants import TILE_FORMAT_PIL, SourcePriority +from large_image.exceptions import TileSourceError, TileSourceFileNotFoundError +from large_image.tilesource import FileTileSource +from large_image.tilesource.utilities import _imageToNumpy, _imageToPIL + +wsidicom = None + +try: + from importlib.metadata import PackageNotFoundError + from importlib.metadata import version as _importlib_version +except ImportError: + from importlib_metadata import PackageNotFoundError + from importlib_metadata import version as _importlib_version +try: + __version__ = _importlib_version(__name__) +except PackageNotFoundError: + # package is not installed + pass + + +def _lazyImport(): + """ + Import the wsidicom module. This is done when needed rather than in the + module initialization because it is slow. + """ + global wsidicom + + if wsidicom is None: + try: + import wsidicom + except ImportError: + raise TileSourceError('nd2 module not found.') + warnings.filterwarnings('ignore', category=UserWarning, module='wsidicom') + warnings.filterwarnings('ignore', category=UserWarning, module='pydicom') + + +class DICOMFileTileSource(FileTileSource, metaclass=LruCacheMetaclass): + """ + Provides tile access to dicom files the dicom or dicomreader library can read. + """ + + cacheName = 'tilesource' + name = 'dicom' + extensions = { + None: SourcePriority.LOW, + 'dcm': SourcePriority.PREFERRED, + 'dic': SourcePriority.PREFERRED, + 'dicom': SourcePriority.PREFERRED, + } + mimeTypes = { + None: SourcePriority.FALLBACK, + 'application/dicom': SourcePriority.PREFERRED, + } + + def __init__(self, path, **kwargs): + """ + Initialize the tile class. See the base class for other available + parameters. + + :param path: a filesystem path for the tile source. + """ + super().__init__(path, **kwargs) + + # We want to make a list of paths of files in this item, if multiple, + # or adjacent items in the folder if the item is a single file. We + # filter files with names that have a preferred extension. + path = self._getLargeImagePath() + if not isinstance(path, list): + path = str(path) + if not os.path.isfile(path): + raise TileSourceFileNotFoundError(path) from None + root = os.path.dirname(path) + self._largeImagePath = [ + os.path.join(root, entry) for entry in os.listdir(root) + if os.path.isfile(os.path.join(root, entry)) and + os.path.splitext(entry)[-1][1:] in self.extensions] + if path not in self._largeImagePath: + self._largeImagePath = [path] + # TODO: fail if this file is level-(n) and a file that is + # level-(n-1) exists + else: + self._largeImagePath = path + _lazyImport() + try: + self._dicom = wsidicom.WsiDicom.open(self._largeImagePath) + except Exception: + raise TileSourceError('File cannot be opened via dicom tile source.') + self.sizeX = int(self._dicom.image_size.width) + self.sizeY = int(self._dicom.image_size.height) + self.tileWidth = int(self._dicom.tile_size.width) + self.tileHeight = int(self._dicom.tile_size.height) + self.levels = int(max(1, math.ceil(math.log( + float(max(self.sizeX, self.sizeY)) / self.tileWidth) / math.log(2)) + 1)) + + def __del__(self): + if getattr(self, '_dicom', None) is not None: + try: + self._dicom.close() + finally: + self._dicom = None + + def getNativeMagnification(self): + """ + Get the magnification at a particular level. + + :return: magnification, width of a pixel in mm, height of a pixel in mm. + """ + mm_x = mm_y = None + try: + mm_x = self._dicom.base_level.pixel_spacing.width or None + mm_y = self._dicom.base_level.pixel_spacing.height or None + except Exception: + pass + # Estimate the magnification; we don't have a direct value + mag = 0.01 / mm_x if mm_x else None + return { + 'magnification': mag, + 'mm_x': mm_x, + 'mm_y': mm_y, + } + + def getMetadata(self): + """ + Return a dictionary of metadata containing levels, sizeX, sizeY, + tileWidth, tileHeight, magnification, mm_x, mm_y, and frames. + + :returns: metadata dictionary. + """ + result = super().getMetadata() + return result + + def getInternalMetadata(self, **kwargs): + """ + Return additional known metadata about the tile source. Data returned + from this method is not guaranteed to be in any particular format or + have specific values. + + :returns: a dictionary of data or None. + """ + result = {} + return result + + @methodcache() + def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs): + frame = self._getFrame(**kwargs) + self._xyzInRange(x, y, z, frame) + x0, y0, x1, y1, step = self._xyzToCorners(x, y, z) + bw = self.tileWidth * step + bh = self.tileHeight * step + level = 0 + levelfactor = 1 + basefactor = self._dicom.base_level.pixel_spacing.width + for checklevel in range(1, len(self._dicom.levels)): + factor = round(self._dicom.levels[checklevel].pixel_spacing.width / basefactor) + if factor <= step: + level = checklevel + levelfactor = factor + else: + break + x0f = int(x0 // levelfactor) + y0f = int(y0 // levelfactor) + x1f = min(int(math.ceil(x1 / levelfactor)), self._dicom.levels[level].size.width) + y1f = min(int(math.ceil(y1 / levelfactor)), self._dicom.levels[level].size.height) + bw = int(bw // levelfactor) + bh = int(bh // levelfactor) + tile = self._dicom.read_region( + (x0f, y0f), self._dicom.levels[level].level, (x1f - x0f, y1f - y0f)) + format = TILE_FORMAT_PIL + if tile.width < bw or tile.height < bh: + tile = _imageToNumpy(tile)[0] + tile = numpy.pad( + tile, + ((0, bh - tile.shape[0]), (0, bw - tile.shape[1]), (0, 0)), + 'constant', constant_values=0) + tile = _imageToPIL(tile) + if bw > self.tileWidth or bh > self.tileHeight: + tile = tile.resize((self.tileWidth, self.tileHeight)) + return self._outputTile(tile, format, x, y, z, + pilImageAllowed, numpyAllowed, **kwargs) + + def getAssociatedImagesList(self): + """ + Return a list of associated images. + + :return: the list of image keys. + """ + return [key for key in ['label', 'macro'] if self._getAssociatedImage(key)] + + def _getAssociatedImage(self, imageKey): + """ + Get an associated image in PIL format. + + :param imageKey: the key of the associated image. + :return: the image in PIL format or None. + """ + keyMap = { + 'label': 'read_label', + 'macro': 'read_overview', + } + try: + return getattr(self._dicom, keyMap[imageKey])() + except Exception: + return None + + +def open(*args, **kwargs): + """ + Create an instance of the module class. + """ + return DICOMFileTileSource(*args, **kwargs) + + +def canRead(*args, **kwargs): + """ + Check if an input can be read by the module class. + """ + return DICOMFileTileSource.canRead(*args, **kwargs) diff --git a/sources/dicom/large_image_source_dicom/girder_source.py b/sources/dicom/large_image_source_dicom/girder_source.py new file mode 100644 index 000000000..f9918c32f --- /dev/null +++ b/sources/dicom/large_image_source_dicom/girder_source.py @@ -0,0 +1,38 @@ +import os + +from girder_large_image.girder_tilesource import GirderTileSource + +from girder.models.file import File +from girder.models.folder import Folder +from girder.models.item import Item + +from . import DICOMFileTileSource + + +class DICOMGirderTileSource(DICOMFileTileSource, GirderTileSource): + """ + Provides tile access to Girder items with an DICOM file or other files that + the dicomreader library can read. + """ + + cacheName = 'tilesource' + name = 'dicom' + + _mayHaveAdjacentFiles = True + + def _getLargeImagePath(self): + filelist = [ + File().getLocalFilePath(file) for file in Item().childFiles(self.item) + if os.path.splitext(file['name'])[-1][1:] in self.extensions] + if len(filelist) > 1: + return filelist + filelist = [] + folder = Folder().load(self.item['folderId'], force=True) + for item in Folder().childItems(folder): + if len(list(Item().childFiles(item, limit=2))) == 1: + file = next(Item().childFiles(item, limit=2)) + if os.path.splitext(file['name'])[-1][1:] in self.extensions: + filelist.append(File().getLocalFilePath(file)) + # TODO: fail if this file is level-(n) and a file that is + # level-(n-1) exists + return filelist diff --git a/sources/dicom/setup.py b/sources/dicom/setup.py new file mode 100644 index 000000000..ac1c6e359 --- /dev/null +++ b/sources/dicom/setup.py @@ -0,0 +1,73 @@ +import os + +from setuptools import find_packages, setup + +description = 'A DICOM tilesource for large_image.' +long_description = description + '\n\nSee the large-image package for more details.' + + +def prerelease_local_scheme(version): + """ + Return local scheme version unless building on master in CircleCI. + + This function returns the local scheme version number + (e.g. 0.0.0.dev+g) unless building on CircleCI for a + pre-release in which case it ignores the hash and produces a + PEP440 compliant pre-release version number (e.g. 0.0.0.dev). + """ + from setuptools_scm.version import get_local_node_and_date + + if os.getenv('CIRCLE_BRANCH') in ('master', ): + return '' + else: + return get_local_node_and_date(version) + + +try: + from setuptools_scm import get_version + + version = get_version(root='../..', local_scheme=prerelease_local_scheme) + limit_version = f'>={version}' if '+' not in version else '' +except (ImportError, LookupError): + limit_version = '' + +setup( + name='large-image-source-dicom', + use_scm_version={'root': '../..', 'local_scheme': prerelease_local_scheme, + 'fallback_version': 'development'}, + setup_requires=['setuptools-scm'], + description=description, + long_description=long_description, + license='Apache Software License 2.0', + author='Kitware, Inc.', + author_email='kitware@kitware.com', + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + ], + install_requires=[ + f'large-image{limit_version}', + 'wsidicom ; python_version >= "3.8"', + 'importlib-metadata<5 ; python_version < "3.8"', + ], + extras_require={ + 'girder': f'girder-large-image{limit_version}', + }, + keywords='large_image, tile source', + packages=find_packages(exclude=['test', 'test.*']), + url='https://github.com/girder/large_image', + python_requires='>=3.6', + entry_points={ + 'large_image.source': [ + 'dicom = large_image_source_dicom:DICOMFileTileSource' + ], + 'girder_large_image.source': [ + 'dicom = large_image_source_dicom.girder_source:DICOMGirderTileSource' + ] + }, +) diff --git a/test/datastore.py b/test/datastore.py index fe9f007ba..51b0a6017 100644 --- a/test/datastore.py +++ b/test/datastore.py @@ -83,6 +83,10 @@ # Source: generated from a tifftools dump with the image descriptions and # topmost layer removed. 'extraoverview.tiff': 'sha512:22793cc6285ad11fbb47927c3d546d35e531a73852b79a9248ba489b421792e3a55da61e00079372bcf72a7e11b12e1ee69d553620edf46ff8d86ad2a9da9fc5', # noqa + # DICOM WSI files generated from TCGA-02-0010-01Z-00-DX4...svs and only + # keeping two levels + 'level-0-frames-0-320.dcm': 'sha512:f5002131a6ed1956b563f11159912a5eb502ffece0613d85f917fe42e2e00552767f9d3a5cfb74fd1f1821bd4cf55d9a918e0b8aa900568fc1b6d06058f7cdb9', # noqa + 'level-1-frames-0-20.dcm': 'sha512:f985bd45e934d6a6976c6c6a06704e5db928e8adf819289e5b8efb8e85ce6ca5cf79d6f641d7b9253dfb796ff2039fb52b056f733b27e754bc7d412ae72f86ca', # noqa } diff --git a/test/test_source_base.py b/test/test_source_base.py index 99ac8dde4..2e9f9dd05 100644 --- a/test/test_source_base.py +++ b/test/test_source_base.py @@ -29,6 +29,10 @@ 'skipTiles': r'(TCGA-DU-6399|sample_jp2k_33003)', }, 'deepzoom': {}, + 'dicom': { + 'read': r'\.dcm$', + 'python': sys.version_info >= (3, 8), + }, 'dummy': {'any': True, 'skipTiles': r''}, 'gdal': { 'read': r'\.(jpeg|jp2|ptif|scn|svs|tif.*)$', @@ -69,12 +73,12 @@ 'skipTiles': r'(sample_image\.ptif|one_layer_missing_tiles)'}, 'tifffile': { 'read': r'', - 'noread': r'\.(nc|nd2|yml|yaml|json|czi|png|jpeg|jp2)$', + 'noread': r'\.(nc|nd2|yml|yaml|json|czi|png|jpeg|jp2|dcm)$', 'python': sys.version_info >= (3, 7) and sys.version_info < (3, 11), }, 'vips': { 'read': r'', - 'noread': r'\.(nc|nd2|yml|yaml|json|czi|png|svs|scn)$', + 'noread': r'\.(nc|nd2|yml|yaml|json|czi|png|svs|scn|dcm)$', 'skipTiles': r'(sample_image\.ptif|one_layer_missing_tiles|JK-kidney_B-gal_H3_4C_1-500sec\.jp2|extraoverview)' # noqa }, }