From f1e68a24be5682769a0f076b5db2cc4dec778123 Mon Sep 17 00:00:00 2001 From: David Manthey Date: Tue, 16 Jan 2024 08:29:11 -0500 Subject: [PATCH] Add more type annotations --- CHANGELOG.md | 2 +- large_image/cache_util/__init__.py | 32 ++++++--- large_image/cache_util/base.py | 31 +++++---- large_image/cache_util/cache.py | 11 +-- large_image/cache_util/cachefactory.py | 35 ++++++---- large_image/cache_util/memcache.py | 69 ++++++++++--------- large_image/exceptions.py | 2 +- large_image/tilesource/tiledict.py | 3 +- .../vips/large_image_source_vips/__init__.py | 3 +- test/lisource_compare.py | 1 + tox.ini | 5 +- 11 files changed, 113 insertions(+), 81 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cd929666..e8c7ce92f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ - Optimizing when reading arrays rather than images from tiff files ([#1423](../../pull/1423)) - Better filter DICOM adjacent files to ensure they share series instance IDs ([#1424](../../pull/1424), [#1436](../../pull/1436)) - Optimizing small getRegion calls and some tiff tile fetches ([#1427](../../pull/1427)) -- Started adding python types to the core library ([#1432](../../pull/1432), [#1433](../../pull/1433)) +- Started adding python types to the core library ([#1432](../../pull/1432), [#1433](../../pull/1433), [#1437](../../pull/1437)) - Use parallelism in computing tile frames ([#1434](../../pull/1434)) ### Changed diff --git a/large_image/cache_util/__init__.py b/large_image/cache_util/__init__.py index bbbe22e70..6d7486143 100644 --- a/large_image/cache_util/__init__.py +++ b/large_image/cache_util/__init__.py @@ -1,4 +1,4 @@ -############################################################################### +############################################################################# # Copyright Kitware Inc. # # Licensed under the Apache License, Version 2.0 ( the "License" ); @@ -12,25 +12,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -############################################################################### +############################################################################# import atexit +from typing import Any, Callable, Dict, List from .cache import (CacheProperties, LruCacheMetaclass, getTileCache, isTileCacheSetup, methodcache, strhash) +from .cachefactory import CacheFactory, pickAvailableCache +MemCache: Any try: from .memcache import MemCache except ImportError: MemCache = None -from .cachefactory import CacheFactory, pickAvailableCache - -_cacheClearFuncs = [] +_cacheClearFuncs: List[Callable] = [] @atexit.register -def cachesClearExceptTile(*args, **kwargs): +def cachesClearExceptTile(*args, **kwargs) -> None: """ Clear the tilesource caches and the load model cache. Note that this does not clear memcached (which could be done with tileCache._client.flush_all, @@ -43,7 +44,7 @@ def cachesClearExceptTile(*args, **kwargs): func() -def cachesClear(*args, **kwargs): +def cachesClear(*args, **kwargs) -> None: """ Clear the tilesource caches, the load model cache, and the tile cache. """ @@ -51,13 +52,16 @@ def cachesClear(*args, **kwargs): if isTileCacheSetup(): tileCache, tileLock = getTileCache() try: - with tileLock: + if tileLock: + with tileLock: + tileCache.clear() + else: tileCache.clear() except Exception: pass -def cachesInfo(*args, **kwargs): +def cachesInfo(*args, **kwargs) -> Dict[str, Dict[str, int]]: """ Report on each cache. @@ -75,7 +79,15 @@ def cachesInfo(*args, **kwargs): if isTileCacheSetup(): tileCache, tileLock = getTileCache() try: - with tileLock: + if tileLock: + with tileLock: + info['tileCache'] = { + 'maxsize': tileCache.maxsize, + 'used': tileCache.currsize, + 'items': getattr(tileCache, 'curritems' if hasattr( + tileCache, 'curritems') else 'currsize', None), + } + else: info['tileCache'] = { 'maxsize': tileCache.maxsize, 'used': tileCache.currsize, diff --git a/large_image/cache_util/base.py b/large_image/cache_util/base.py index 51797c9b0..f8a0d2417 100644 --- a/large_image/cache_util/base.py +++ b/large_image/cache_util/base.py @@ -1,20 +1,25 @@ import hashlib import threading import time -from typing import Tuple +from typing import Any, Callable, Dict, Optional, Tuple, TypeVar import cachetools +_VT = TypeVar('_VT') + class BaseCache(cachetools.Cache): """Base interface to cachetools.Cache for use with large-image.""" - def __init__(self, *args, getsizeof=None, **kwargs): - super().__init__(*args, getsizeof=getsizeof, **kwargs) - self.lastError = {} + def __init__( + self, maxsize: float, + getsizeof: Optional[Callable[[_VT], float]] = None, + **kwargs) -> None: + super().__init__(maxsize=maxsize, getsizeof=getsizeof, **kwargs) + self.lastError: Dict[Tuple[Any, Callable], Dict[str, Any]] = {} self.throttleErrors = 10 # seconds between logging errors - def logError(self, err, func, msg): + def logError(self, err: Any, func: Callable, msg: str) -> None: """ Log errors, but throttle them so as not to spam the logs. @@ -40,16 +45,16 @@ def __repr__(self): def __iter__(self): raise NotImplementedError - def __len__(self): + def __len__(self) -> int: raise NotImplementedError - def __contains__(self, key): + def __contains__(self, item) -> bool: raise NotImplementedError def __delitem__(self, key): raise NotImplementedError - def _hashKey(self, key): + def _hashKey(self, key) -> str: return hashlib.sha256(key.encode()).hexdigest() def __getitem__(self, key): @@ -61,21 +66,21 @@ def __setitem__(self, key, value): raise NotImplementedError @property - def curritems(self): + def curritems(self) -> int: raise NotImplementedError @property - def currsize(self): + def currsize(self) -> int: raise NotImplementedError @property - def maxsize(self): + def maxsize(self) -> int: raise NotImplementedError - def clear(self): + def clear(self) -> None: raise NotImplementedError @staticmethod - def getCache() -> Tuple['BaseCache', threading.Lock]: + def getCache() -> Tuple[Optional['BaseCache'], threading.Lock]: # return cache, cacheLock raise NotImplementedError diff --git a/large_image/cache_util/cache.py b/large_image/cache_util/cache.py index 9a51418e1..38c19d463 100644 --- a/large_image/cache_util/cache.py +++ b/large_image/cache_util/cache.py @@ -2,8 +2,9 @@ import pickle import threading import uuid -from typing import Any, Callable, Dict, Optional, TypeVar +from typing import Any, Callable, Dict, Optional, Tuple, TypeVar +import cachetools from typing_extensions import ParamSpec try: @@ -18,8 +19,8 @@ P = ParamSpec('P') T = TypeVar('T') -_tileCache = None -_tileLock = None +_tileCache: Optional[cachetools.Cache] = None +_tileLock: Optional[threading.Lock] = None _cacheLockKeyToken = '_cacheLock_key' @@ -175,7 +176,7 @@ def __new__(metacls, name, bases, namespace, **kwargs): return cls - def __call__(cls, *args, **kwargs): # noqa - N805 + def __call__(cls, *args, **kwargs) -> Any: # noqa - N805 if kwargs.get('noCache') or ( kwargs.get('noCache') is None and config.getConfig('cache_sources') is False): instance = super().__call__(*args, **kwargs) @@ -259,7 +260,7 @@ def __call__(cls, *args, **kwargs): # noqa - N805 return instance -def getTileCache(): +def getTileCache() -> Tuple[cachetools.Cache, Optional[threading.Lock]]: """ Get the preferred tile cache and lock. diff --git a/large_image/cache_util/cachefactory.py b/large_image/cache_util/cachefactory.py index 20ee875da..1174ab995 100644 --- a/large_image/cache_util/cachefactory.py +++ b/large_image/cache_util/cachefactory.py @@ -16,30 +16,30 @@ import math import threading +from typing import Dict, Optional, Tuple, Type import cachetools try: import psutil + HAS_PSUTIL = True except ImportError: - psutil = None + HAS_PSUTIL = False from importlib.metadata import entry_points from .. import config from ..exceptions import TileCacheError - -try: - from .memcache import MemCache -except ImportError: - MemCache = None +from .memcache import MemCache # DO NOT MANUALLY ADD ANYTHING TO `_availableCaches` # use entrypoints and let loadCaches fill in `_availableCaches` -_availableCaches = {} +_availableCaches: Dict[str, Type[cachetools.Cache]] = {} -def loadCaches(entryPointName='large_image.cache', sourceDict=_availableCaches): +def loadCaches( + entryPointName: str = 'large_image.cache', + sourceDict: Dict[str, Type[cachetools.Cache]] = _availableCaches) -> None: """ Load all caches from entrypoints and add them to the availableCaches dictionary. @@ -69,7 +69,9 @@ def loadCaches(entryPointName='large_image.cache', sourceDict=_availableCaches): # NOTE: `python` cache is viewed as a fallback and isn't listed in `availableCaches` -def pickAvailableCache(sizeEach, portion=8, maxItems=None, cacheName=None): +def pickAvailableCache( + sizeEach: int, portion: int = 8, maxItems: Optional[int] = None, + cacheName: Optional[str] = None) -> int: """ Given an estimated size of an item, return how many of those items would fit in a fixed portion of the available virtual memory. @@ -90,7 +92,7 @@ def pickAvailableCache(sizeEach, portion=8, maxItems=None, cacheName=None): if configMaxItems > 0: maxItems = configMaxItems # Estimate usage based on (1 / portion) of the total virtual memory. - if psutil: + if HAS_PSUTIL: memory = psutil.virtual_memory().total else: memory = 1024 ** 3 @@ -100,7 +102,7 @@ def pickAvailableCache(sizeEach, portion=8, maxItems=None, cacheName=None): return numItems -def getFirstAvailableCache(): +def getFirstAvailableCache() -> Tuple[cachetools.Cache, Optional[threading.Lock]]: cacheBackend = config.getConfig('cache_backend', None) if cacheBackend is not None: msg = 'cache_backend already set' @@ -109,7 +111,7 @@ def getFirstAvailableCache(): cache, cacheLock = None, None for cacheBackend in _availableCaches: try: - cache, cacheLock = _availableCaches[cacheBackend].getCache() + cache, cacheLock = _availableCaches[cacheBackend].getCache() # type: ignore break except TileCacheError: continue @@ -124,7 +126,7 @@ def getFirstAvailableCache(): class CacheFactory: logged = False - def getCacheSize(self, numItems, cacheName=None): + def getCacheSize(self, numItems: Optional[int], cacheName: Optional[str] = None) -> int: if numItems is None: defaultPortion = 32 try: @@ -145,7 +147,10 @@ def getCacheSize(self, numItems, cacheName=None): pass return numItems - def getCache(self, numItems=None, cacheName=None, inProcess=False): + def getCache( + self, numItems: Optional[int] = None, + cacheName: Optional[str] = None, + inProcess: bool = False) -> Tuple[cachetools.Cache, Optional[threading.Lock]]: loadCaches() # Default to `python` cache for inProcess @@ -156,7 +161,7 @@ def getCache(self, numItems=None, cacheName=None, inProcess=False): cache = None if not inProcess and cacheBackend in _availableCaches: - cache, cacheLock = _availableCaches[cacheBackend].getCache() + cache, cacheLock = _availableCaches[cacheBackend].getCache() # type: ignore elif not inProcess and cacheBackend is None: cache, cacheLock = getFirstAvailableCache() diff --git a/large_image/cache_util/memcache.py b/large_image/cache_util/memcache.py index 72dc7725f..00ab86d37 100644 --- a/large_image/cache_util/memcache.py +++ b/large_image/cache_util/memcache.py @@ -17,20 +17,25 @@ import copy import threading import time -from typing import Tuple +from typing import Any, Callable, List, Optional, Tuple, TypeVar, Union from .. import config from .base import BaseCache +_VT = TypeVar('_VT') + class MemCache(BaseCache): """Use memcached as the backing cache.""" - def __init__(self, url='127.0.0.1', username=None, password=None, - getsizeof=None, mustBeAvailable=False): - global pylibmc + def __init__( + self, url: Union[str, List[str]] = '127.0.0.1', + username: Optional[str] = None, password: Optional[str] = None, + getsizeof: Optional[Callable[[_VT], float]] = None, + mustBeAvailable: bool = False) -> None: import pylibmc + self.pylibmc = pylibmc super().__init__(0, getsizeof=getsizeof) if isinstance(url, str): url = [url] @@ -56,42 +61,42 @@ def __init__(self, url='127.0.0.1', username=None, password=None, # unreachable, so we don't bother trying to use it. self._client['large_image_cache_test'] = time.time() - def __repr__(self): + def __repr__(self) -> str: return "Memcache doesn't list its keys" def __iter__(self): # return invalid iter return None - def __len__(self): + def __len__(self) -> int: # return invalid length return -1 - def __contains__(self, key): + def __contains__(self, item: object) -> bool: # cache never contains key - return None + return False - def __delitem__(self, key): + def __delitem__(self, key: str) -> None: hashedKey = self._hashKey(key) del self._client[hashedKey] - def __getitem__(self, key): + def __getitem__(self, key: str) -> Any: hashedKey = self._hashKey(key) try: return self._client[hashedKey] except KeyError: return self.__missing__(key) - except pylibmc.ServerDown: - self.logError(pylibmc.ServerDown, config.getLogger('logprint').info, + except self.pylibmc.ServerDown: + self.logError(self.pylibmc.ServerDown, config.getLogger('logprint').info, 'Memcached ServerDown') self._reconnect() return self.__missing__(key) - except pylibmc.Error: - self.logError(pylibmc.Error, config.getLogger('logprint').exception, + except self.pylibmc.Error: + self.logError(self.pylibmc.Error, config.getLogger('logprint').exception, 'pylibmc exception') return self.__missing__(key) - def __setitem__(self, key, value): + def __setitem__(self, key: str, value: Any) -> None: hashedKey = self._hashKey(key) try: self._client[hashedKey] = value @@ -106,60 +111,60 @@ def __setitem__(self, key, value): exc.__class__, config.getLogger('logprint').error, '%s: Failed to save value (size %r) with key %s' % ( exc.__class__.__name__, valueSize, hashedKey)) - except pylibmc.ServerDown: - self.logError(pylibmc.ServerDown, config.getLogger('logprint').info, + except self.pylibmc.ServerDown: + self.logError(self.pylibmc.ServerDown, config.getLogger('logprint').info, 'Memcached ServerDown') self._reconnect() - except pylibmc.TooBig: + except self.pylibmc.TooBig: pass - except pylibmc.Error as exc: + except self.pylibmc.Error as exc: # memcached won't cache items larger than 1 Mb (or a configured # size), but this returns a 'SUCCESS' error. Raise other errors. if 'SUCCESS' not in repr(exc.args): - self.logError(pylibmc.Error, config.getLogger('logprint').exception, + self.logError(self.pylibmc.Error, config.getLogger('logprint').exception, 'pylibmc exception') @property - def curritems(self): + def curritems(self) -> int: return self._getStat('curr_items') @property - def currsize(self): + def currsize(self) -> int: return self._getStat('bytes') @property - def maxsize(self): + def maxsize(self) -> int: return self._getStat('limit_maxbytes') - def _reconnect(self): + def _reconnect(self) -> None: try: self._lastReconnectBackoff = getattr(self, '_lastReconnectBackoff', 2) if time.time() - getattr(self, '_lastReconnect', 0) > self._lastReconnectBackoff: config.getLogger('logprint').info('Trying to reconnect to memcached server') - self._client = pylibmc.Client(self._clientParams[0], **self._clientParams[1]) + self._client = self.pylibmc.Client(self._clientParams[0], **self._clientParams[1]) self._lastReconnectBackoff = min(self._lastReconnectBackoff + 1, 30) self._lastReconnect = time.time() except Exception: pass - def _blockingClient(self): + def _blockingClient(self) -> Any: params = copy.deepcopy(self._clientParams) - params[1]['behaviors']['no_block'] = False - return pylibmc.Client(params[0], **params[1]) + params[1]['behaviors']['no_block'] = False # type: ignore + return self.pylibmc.Client(params[0], **params[1]) - def _getStat(self, key): + def _getStat(self, key: str) -> int: try: stats = self._blockingClient().get_stats() value = sum(int(s[key]) for server, s in stats) except Exception: - return None + return 0 return value - def clear(self): + def clear(self) -> None: self._client.flush_all() @staticmethod - def getCache() -> Tuple['MemCache', threading.Lock]: + def getCache() -> Tuple[Optional['MemCache'], threading.Lock]: # lock needed because pylibmc(memcached client) is not threadsafe cacheLock = threading.Lock() diff --git a/large_image/exceptions.py b/large_image/exceptions.py index a79ad6d3a..bacddcb6d 100644 --- a/large_image/exceptions.py +++ b/large_image/exceptions.py @@ -23,7 +23,7 @@ class TileSourceInefficientError(TileSourceError): class TileSourceFileNotFoundError(TileSourceError, FileNotFoundError): def __init__(self, *args, **kwargs) -> None: - return super().__init__(errno.ENOENT, *args, **kwargs) + super().__init__(errno.ENOENT, *args, **kwargs) class TileCacheError(TileGeneralError): diff --git a/large_image/tilesource/tiledict.py b/large_image/tilesource/tiledict.py index c6b0b17a8..f9b2599cc 100644 --- a/large_image/tilesource/tiledict.py +++ b/large_image/tilesource/tiledict.py @@ -48,14 +48,13 @@ def __init__(self, tileInfo, *args, **kwargs): self.alwaysAllowPIL = True self.imageKwargs = {} self.loaded = False - result = super().__init__(*args, **kwargs) + super().__init__(*args, **kwargs) # We set this initially so that they are listed in known keys using the # native dictionary methods self['tile'] = None self['format'] = None self.width = self['width'] self.height = self['height'] - return result def setFormat(self, format, resample=False, imageKwargs=None): """ diff --git a/sources/vips/large_image_source_vips/__init__.py b/sources/vips/large_image_source_vips/__init__.py index e15314d5a..65ecbea2a 100644 --- a/sources/vips/large_image_source_vips/__init__.py +++ b/sources/vips/large_image_source_vips/__init__.py @@ -63,7 +63,8 @@ def __init__(self, path, **kwargs): super().__init__(path, **kwargs) if str(path).startswith(NEW_IMAGE_PATH_FLAG): - return self._initNew(**kwargs) + self._initNew(**kwargs) + return self._largeImagePath = str(self._getLargeImagePath()) self._editable = False diff --git a/test/lisource_compare.py b/test/lisource_compare.py index d2e3e33d6..47934e521 100755 --- a/test/lisource_compare.py +++ b/test/lisource_compare.py @@ -20,6 +20,7 @@ logging.getLogger('tifftools').setLevel(logging.ERROR) +logging.root.addHandler(logging.NullHandler()) def yaml_dict_dump(dumper, data): diff --git a/tox.ini b/tox.ini index ae73d6fbf..9e669f0dc 100644 --- a/tox.ini +++ b/tox.ini @@ -181,6 +181,7 @@ skipsdist = true deps = -rrequirements-dev.txt mypy + types-cachetools types-pillow types-psutil commands = @@ -444,12 +445,14 @@ warn_return_any = False # files = . files = - large_image/cache_util/cache.py, + large_image/__init__.py, + large_image/cache_util/, large_image/config.py, large_image/constants.py, large_image/exceptions.py, large_image/tilesource/__init__.py, large_image/tilesource/base.py +# large_image/tilesource/utilities.py # large_image/, # sources/, # girder/,