From c52f8e5c86530f1dae0a44ff69494c8906481e87 Mon Sep 17 00:00:00 2001 From: David Manthey Date: Mon, 15 Jul 2024 16:58:27 -0400 Subject: [PATCH] Have many sources use lazy imports --- CHANGELOG.md | 2 + .../gdal/large_image_source_gdal/__init__.py | 65 +++++++++++----- .../large_image_source_multi/__init__.py | 25 +++++- .../large_image_source_openjpeg/__init__.py | 19 ++++- .../large_image_source_rasterio/__init__.py | 78 ++++++++++++------- .../large_image_source_tifffile/__init__.py | 5 +- .../zarr/large_image_source_zarr/__init__.py | 19 ++++- 7 files changed, 162 insertions(+), 51 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e9d2245d5..51ad74ac1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ - Improve plottable data endpoint to better fetch adjacent items and annotations ([#1573](../../pull/1573), [#1574](../../pull/1574))), [#1575](../../pull/1575))) - Support Girder flat-mount paths ([#1576](../../pull/1576)) +- Lazily import some modules to speed up large_image import speed ([#1577](../../pull/1577)) +- Create or check large images for each item in a folder ([#1572](../../pull/1572)) ## 1.29.2 diff --git a/sources/gdal/large_image_source_gdal/__init__.py b/sources/gdal/large_image_source_gdal/__init__.py index d1fd608f2..49f429c37 100644 --- a/sources/gdal/large_image_source_gdal/__init__.py +++ b/sources/gdal/large_image_source_gdal/__init__.py @@ -20,28 +20,11 @@ import struct import tempfile import threading +from importlib.metadata import PackageNotFoundError +from importlib.metadata import version as _importlib_version import numpy as np import PIL.Image -from osgeo import gdal, gdal_array, gdalconst, osr - -try: - gdal.UseExceptions() -except Exception: - pass - -# isort: off - -# pyproj stopped supporting older pythons, so on those versions its database is -# aging; as such, if on those older versions of python if it is imported before -# gdal, there can be a database version conflict; importing after gdal avoids -# this. -import pyproj - -# isort: on - -from importlib.metadata import PackageNotFoundError -from importlib.metadata import version as _importlib_version from large_image.cache_util import LruCacheMetaclass, methodcache from large_image.constants import (TILE_FORMAT_IMAGE, TILE_FORMAT_NUMPY, @@ -62,6 +45,42 @@ # package is not installed pass +gdal = None +gdal_array = None +gdalconst = None +osr = None +pyproj = None + + +def _lazyImport(): + """ + Import the gdal module. This is done when needed rather than in the + module initialization because it is slow. + """ + global gdal, gdal_array, gdalconst, osr, pyproj + + if gdal is None: + try: + from osgeo import gdal, gdal_array, gdalconst, osr + + try: + gdal.UseExceptions() + except Exception: + pass + + # isort: off + + # pyproj stopped supporting older pythons, so on those versions its + # database is aging; as such, if on those older versions of python + # if it is imported before gdal, there can be a database version + # conflict; importing after gdal avoids this. + import pyproj + + # isort: on + except ImportError: + msg = 'gdal module not found.' + raise TileSourceError(msg) + class GDALFileTileSource(GDALBaseFileTileSource, metaclass=LruCacheMetaclass): """ @@ -91,6 +110,8 @@ def __init__(self, path, projection=None, unitsPerPixel=None, **kwargs): specify unitsPerPixel. """ super().__init__(path, **kwargs) + _lazyImport() + self.addKnownExtensions() self._bounds = {} self._largeImagePath = self._getLargeImagePath() @@ -300,6 +321,8 @@ def _proj4Proj(proj): :returns: a proj4 projection object. None if the specified projection cannot be created. """ + _lazyImport() + if isinstance(proj, bytes): proj = proj.decode() if not isinstance(proj, str): @@ -940,6 +963,8 @@ def isGeospatial(path): :param path: The path to the file :returns: True if geospatial. """ + _lazyImport() + try: ds = gdal.Open(str(path), gdalconst.GA_ReadOnly) except Exception: @@ -958,6 +983,8 @@ def isGeospatial(path): @classmethod def addKnownExtensions(cls): if not hasattr(cls, '_addedExtensions'): + _lazyImport() + cls._addedExtensions = True cls.extensions = cls.extensions.copy() cls.mimeTypes = cls.mimeTypes.copy() diff --git a/sources/multi/large_image_source_multi/__init__.py b/sources/multi/large_image_source_multi/__init__.py index 21abd152f..1d9743b57 100644 --- a/sources/multi/large_image_source_multi/__init__.py +++ b/sources/multi/large_image_source_multi/__init__.py @@ -10,7 +10,6 @@ from importlib.metadata import version as _importlib_version from pathlib import Path -import jsonschema import numpy as np import yaml @@ -27,6 +26,26 @@ # package is not installed pass +jsonschema = None +_validator = None + + +def _lazyImport(): + """ + Import the jsonschema module. This is done when needed rather than in the + module initialization because it is slow. + """ + global jsonschema, _validator + + if jsonschema is None: + try: + import jsonschema + + _validator = jsonschema.Draft6Validator(MultiSourceSchema) + except ImportError: + msg = 'jsonschema module not found.' + raise TileSourceError(msg) + SourceEntrySchema = { 'type': 'object', @@ -387,8 +406,6 @@ class MultiFileTileSource(FileTileSource, metaclass=LruCacheMetaclass): _defaultTileSize = 256 _maxOpenHandles = 6 - _validator = jsonschema.Draft6Validator(MultiSourceSchema) - def __init__(self, path, **kwargs): """ Initialize the tile class. See the base class for other available @@ -398,6 +415,8 @@ def __init__(self, path, **kwargs): """ super().__init__(path, **kwargs) + _lazyImport() + self._validator = _validator self._largeImagePath = self._getLargeImagePath() self._lastOpenSourceLock = threading.RLock() # 'c' must be first as channels are special because they can have names diff --git a/sources/openjpeg/large_image_source_openjpeg/__init__.py b/sources/openjpeg/large_image_source_openjpeg/__init__.py index 1ba16af3b..607eab6f5 100644 --- a/sources/openjpeg/large_image_source_openjpeg/__init__.py +++ b/sources/openjpeg/large_image_source_openjpeg/__init__.py @@ -25,7 +25,6 @@ from importlib.metadata import version as _importlib_version from xml.etree import ElementTree -import glymur import PIL.Image import large_image @@ -41,6 +40,23 @@ # package is not installed pass +glymur = None + + +def _lazyImport(): + """ + Import the glymur module. This is done when needed rather than in the module + initialization because it is slow. + """ + global glymur + + if glymur is None: + try: + import glymur + except ImportError: + msg = 'glymur module not found.' + raise TileSourceError(msg) + warnings.filterwarnings('ignore', category=UserWarning, module='glymur') @@ -88,6 +104,7 @@ def __init__(self, path, **kwargs): """ super().__init__(path, **kwargs) + _lazyImport() self._largeImagePath = str(self._getLargeImagePath()) self._pixelInfo = {} try: diff --git a/sources/rasterio/large_image_source_rasterio/__init__.py b/sources/rasterio/large_image_source_rasterio/__init__.py index 416912575..f93f9c236 100644 --- a/sources/rasterio/large_image_source_rasterio/__init__.py +++ b/sources/rasterio/large_image_source_rasterio/__init__.py @@ -26,11 +26,6 @@ import numpy as np import PIL.Image -import rasterio as rio -from affine import Affine -from rasterio import warp -from rasterio.enums import ColorInterp, Resampling -from rasterio.errors import RasterioIOError import large_image from large_image.cache_util import LruCacheMetaclass, methodcache @@ -51,11 +46,37 @@ # package is not installed pass -warnings.filterwarnings('ignore', category=rio.errors.NotGeoreferencedWarning, module='rasterio') -rio._env.code_map.pop(1, None) +rio = None +Affine = None + + +def _lazyImport(): + """ + Import the rasterio module. This is done when needed rather than in the + module initialization because it is slow. + """ + global Affine, rio + + print('XXXXXXXXXXXX') + if rio is None: + try: + import affine + import rasterio as rio + import rasterio.warp + + Affine = affine.Affine + + warnings.filterwarnings( + 'ignore', category=rio.errors.NotGeoreferencedWarning, module='rasterio') + rio._env.code_map.pop(1, None) + except ImportError: + msg = 'rasterio module not found.' + raise TileSourceError(msg) def make_crs(projection): + _lazyImport() + if isinstance(projection, str): return rio.CRS.from_string(projection) if isinstance(projection, dict): @@ -87,6 +108,7 @@ def __init__(self, path, projection=None, unitsPerPixel=None, **kwargs): """ # init the object super().__init__(path, **kwargs) + _lazyImport() self.addKnownExtensions() # create a thread lock @@ -109,7 +131,7 @@ def __init__(self, path, projection=None, unitsPerPixel=None, **kwargs): raise TileSourceFileNotFoundError(self._largeImagePath) from None try: self.dataset = rio.open(self._largeImagePath) - except RasterioIOError: + except rio.errors.RasterioIOError: msg = 'File cannot be opened via rasterio.' raise TileSourceError(msg) if self.dataset.driver == 'netCDF': @@ -246,8 +268,8 @@ def _initWithProjection(self, unitsPerPixel=None): # If unitsPerPixel is not specified, the horizontal distance # between -180,0 and +180,0 is used. Some projections (such as # stereographic) will fail in this case; they must have a unitsPerPixel specified. - east, _ = warp.transform(srcCrs, dstCrs, [-180], [0]) - west, _ = warp.transform(srcCrs, dstCrs, [180], [0]) + east, _ = rio.warp.transform(srcCrs, dstCrs, [-180], [0]) + west, _ = rio.warp.transform(srcCrs, dstCrs, [180], [0]) self.unitsAcrossLevel0 = abs(east[0] - west[0]) if not self.unitsAcrossLevel0: msg = 'unitsPerPixel must be specified for this projection' @@ -385,7 +407,7 @@ def getBounds(self, crs=None, **kwargs): # set the vertical bounds # some projection system don't cover the poles so we need to adapt # the values of ybounds accordingly - has_poles = warp.transform(4326, dstCrs, [0], [90])[1][0] != float('inf') + has_poles = rio.warp.transform(4326, dstCrs, [0], [90])[1][0] != float('inf') yBounds = 90 if has_poles else 89.999999 # for each corner fix the latitude within -yBounds yBounds @@ -409,7 +431,7 @@ def getBounds(self, crs=None, **kwargs): needProjection = dstCrs and dstCrs != srcCrs if needProjection: for pt in bounds.values(): - [pt['x']], [pt['y']] = warp.transform(srcCrs, dstCrs, [pt['x']], [pt['y']]) + [pt['x']], [pt['y']] = rio.warp.transform(srcCrs, dstCrs, [pt['x']], [pt['y']]) # extract min max coordinates from the corners ll = bounds['ll']['x'], bounds['ll']['y'] @@ -573,7 +595,7 @@ def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs): tile = self.dataset.read( window=window, out_shape=(count, h, w), - resampling=Resampling.nearest, + resampling=rio.enums.Resampling.nearest, ) else: @@ -600,7 +622,7 @@ def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs): # It will result in surprisingly unmasked data. src_alpha_band = 0 for i, interp in enumerate(self.dataset.colorinterp): - if interp == ColorInterp.alpha: + if interp == rio.enums.ColorInterp.alpha: src_alpha_band = i add_alpha = not src_alpha_band @@ -608,14 +630,14 @@ def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs): with self._getDatasetLock: with rio.vrt.WarpedVRT( self.dataset, - resampling=Resampling.nearest, + resampling=rio.enums.Resampling.nearest, crs=self.projection, transform=dst_transform, height=self.tileHeight, width=self.tileWidth, add_alpha=add_alpha, ) as vrt: - tile = vrt.read(resampling=Resampling.nearest) + tile = vrt.read(resampling=rio.enums.Resampling.nearest) # necessary for multispectral images: # set the coordinates first and the bands at the end @@ -686,12 +708,14 @@ def _convertProjectionUnits( units = units.split(':', 1)[1] srcCrs = make_crs(units) dstCrs = self.projection # instance projection -- do not use the CRS native to the file - [pleft], [ptop] = warp.transform(srcCrs, dstCrs, - [right if left is None else left], - [bottom if top is None else top]) - [pright], [pbottom] = warp.transform(srcCrs, dstCrs, - [left if right is None else right], - [top if bottom is None else bottom]) + [pleft], [ptop] = rio.warp.transform( + srcCrs, dstCrs, + [right if left is None else left], + [bottom if top is None else top]) + [pright], [pbottom] = rio.warp.transform( + srcCrs, dstCrs, + [left if right is None else right], + [top if bottom is None else bottom]) units = 'projection' # set the corner value in pixel coordinates if the coordinate was initially @@ -837,7 +861,7 @@ def toNativePixelCoordinates(self, x, y, crs=None, roundResults=True): # convert to the native projection dstCrs = make_crs(self.getCrs()) - [px], [py] = warp.transform(srcCrs, dstCrs, [x], [y]) + [px], [py] = rio.warp.transform(srcCrs, dstCrs, [x], [y]) # convert to native pixel coordinates af = self._getAffine() @@ -884,7 +908,7 @@ def getPixel(self, **kwargs): window = rio.windows.Window(int(x), int(y), 1, 1) try: value = self.dataset.read( - i, window=window, resampling=Resampling.nearest, + i, window=window, resampling=rio.enums.Resampling.nearest, ) value = value[0][0] # there should be 1 single pixel pixel.setdefault('bands', {})[i] = value.item() @@ -950,13 +974,13 @@ def getRegion(self, format=(TILE_FORMAT_IMAGE,), **kwargs): with rio.vrt.WarpedVRT( self.dataset, - resampling=Resampling.nearest, + resampling=rio.enums.Resampling.nearest, crs=self.projection, transform=dst_transform, height=height, width=width, ) as vrt: - data = vrt.read(resampling=Resampling.nearest) + data = vrt.read(resampling=rio.enums.Resampling.nearest) profile = self.dataset.meta.copy() profile.update( @@ -1019,6 +1043,8 @@ def isGeospatial(path): :param path: The path to the file :returns: True if geospatial. """ + _lazyImport() + if isinstance(path, rio.io.DatasetReaderBase): ds = path else: diff --git a/sources/tifffile/large_image_source_tifffile/__init__.py b/sources/tifffile/large_image_source_tifffile/__init__.py index 613e7015e..6adeac3b5 100644 --- a/sources/tifffile/large_image_source_tifffile/__init__.py +++ b/sources/tifffile/large_image_source_tifffile/__init__.py @@ -7,7 +7,6 @@ from importlib.metadata import version as _importlib_version import numpy as np -import zarr import large_image from large_image.cache_util import LruCacheMetaclass, methodcache @@ -16,6 +15,7 @@ from large_image.tilesource import FileTileSource tifffile = None +zarr = None try: __version__ = _importlib_version(__name__) @@ -37,6 +37,7 @@ def _lazyImport(): module initialization because it is slow. """ global tifffile + global zarr if tifffile is None: try: @@ -55,6 +56,8 @@ def _lazyImport(): logging.getLogger('tifffile.tifffile').addHandler(checkForMissingDataHandler()) logging.getLogger('tifffile').setLevel(logging.WARNING) logging.getLogger('tifffile').addHandler(checkForMissingDataHandler()) + if zarr is None: + import zarr def et_findall(tag, text): diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index aedce1808..29f36cea3 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -11,7 +11,6 @@ import numpy as np import packaging.version -import zarr import large_image from large_image.cache_util import LruCacheMetaclass, methodcache @@ -30,6 +29,23 @@ warnings.filterwarnings('ignore', category=FutureWarning, module='zarr') +zarr = None + + +def _lazyImport(): + """ + Import the zarr module. This is done when needed rather than in the module + initialization because it is slow. + """ + global zarr + + if zarr is None: + try: + import zarr + except ImportError: + msg = 'zarr module not found.' + raise TileSourceError(msg) + class ZarrFileTileSource(FileTileSource, metaclass=LruCacheMetaclass): """ @@ -70,6 +86,7 @@ def __init__(self, path, **kwargs): """ super().__init__(path, **kwargs) + _lazyImport() if str(path).startswith(NEW_IMAGE_PATH_FLAG): self._initNew(path, **kwargs) else: