Skip to content

Commit

Permalink
Cache histograms.
Browse files Browse the repository at this point in the history
This caches them in memory in the core library and to attached files in
Girder.
  • Loading branch information
manthey committed May 7, 2021
1 parent 556da41 commit d59939e
Show file tree
Hide file tree
Showing 7 changed files with 164 additions and 47 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Change Log

## Unreleased

### Improvements
- Cache histogram requests (#598)

## Version 1.5.0

### Features
Expand Down
117 changes: 72 additions & 45 deletions girder/girder_large_image/models/image_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import io
import json
import pickle
import pymongo

from girder import logger
Expand Down Expand Up @@ -43,11 +44,18 @@ class ImageItem(Item):
def initialize(self):
super().initialize()
self.ensureIndices(['largeImage.fileId'])
File().ensureIndices([([
('isLargeImageThumbnail', pymongo.ASCENDING),
('attachedToType', pymongo.ASCENDING),
('attachedToId', pymongo.ASCENDING),
], {})])
File().ensureIndices([
([
('isLargeImageThumbnail', pymongo.ASCENDING),
('attachedToType', pymongo.ASCENDING),
('attachedToId', pymongo.ASCENDING),
], {}),
([
('isLargeImageData', pymongo.ASCENDING),
('attachedToType', pymongo.ASCENDING),
('attachedToId', pymongo.ASCENDING),
], {}),
])

def createImageItem(self, item, fileObj, user=None, token=None,
createJob=True, notify=False, **kwargs):
Expand Down Expand Up @@ -302,18 +310,19 @@ def getThumbnail(self, item, checkAndCreate=False, width=None, height=None, **kw
"""
# check if a thumbnail file exists with a particular key
keydict = dict(kwargs, width=width, height=height)
return self._getAndCacheImage(
return self._getAndCacheImageOrData(
item, 'getThumbnail', checkAndCreate, keydict, width=width, height=height, **kwargs)

def _getAndCacheImage(self, item, imageFunc, checkAndCreate, keydict, **kwargs):
def _getAndCacheImageOrData(
self, item, imageFunc, checkAndCreate, keydict, pickleCache=False, **kwargs):
if 'fill' in keydict and (keydict['fill']).lower() == 'none':
del keydict['fill']
keydict = {k: v for k, v in keydict.items() if v is not None}
key = json.dumps(keydict, sort_keys=True, separators=(',', ':'))
existing = File().findOne({
'attachedToType': 'item',
'attachedToId': item['_id'],
'isLargeImageThumbnail': True,
'isLargeImageThumbnail' if not pickleCache else 'isLargeImageData': True,
'thumbnailKey': key
})
if existing:
Expand All @@ -323,38 +332,45 @@ def _getAndCacheImage(self, item, imageFunc, checkAndCreate, keydict, **kwargs):
contentDisposition = 'inline'
else:
contentDisposition = kwargs['contentDisposition']
if pickleCache:
data = File().open(existing).read()
return pickle.loads(data), 'application/octet-stream'
return File().download(existing, contentDisposition=contentDisposition)
tileSource = self._loadTileSource(item, **kwargs)
result = getattr(tileSource, imageFunc)(**kwargs)
if result is None:
thumbData, thumbMime = b'', 'application/octet-stream'
imageData, imageMime = b'', 'application/octet-stream'
elif pickleCache:
imageData, imageMime = result, 'application/octet-stream'
else:
thumbData, thumbMime = result
# The logic on which files to save could be more sophisticated.
maxThumbnailFiles = int(Setting().get(
constants.PluginSettings.LARGE_IMAGE_MAX_THUMBNAIL_FILES))
saveFile = maxThumbnailFiles > 0
if saveFile:
imageData, imageMime = result
saveFile = True
if not pickleCache:
# The logic on which files to save could be more sophisticated.
maxThumbnailFiles = int(Setting().get(
constants.PluginSettings.LARGE_IMAGE_MAX_THUMBNAIL_FILES))
saveFile = maxThumbnailFiles > 0
# Make sure we don't exceed the desired number of thumbnails
self.removeThumbnailFiles(item, maxThumbnailFiles - 1)
# Save the thumbnail as a file
thumbfile = Upload().uploadFromFile(
io.BytesIO(thumbData), size=len(thumbData),
if saveFile:
dataStored = imageData if not pickleCache else pickle.dumps(imageData, protocol=4)
# Save the data as a file
datafile = Upload().uploadFromFile(
io.BytesIO(dataStored), size=len(dataStored),
name='_largeImageThumbnail', parentType='item', parent=item,
user=None, mimeType=thumbMime, attachParent=True)
if not len(thumbData) and 'received' in thumbfile:
thumbfile = Upload().finalizeUpload(
thumbfile, Assetstore().load(thumbfile['assetstoreId']))
thumbfile.update({
'isLargeImageThumbnail': True,
user=None, mimeType=imageMime, attachParent=True)
if not len(dataStored) and 'received' in datafile:
datafile = Upload().finalizeUpload(
datafile, Assetstore().load(datafile['assetstoreId']))
datafile.update({
'isLargeImageThumbnail' if not pickleCache else 'isLargeImageData': True,
'thumbnailKey': key,
})
# Ideally, we would check that the file is still wanted before we
# save it. This is probably impossible without true transactions in
# Mongo.
File().save(thumbfile)
# Return the data
return thumbData, thumbMime
File().save(datafile)
return imageData, imageMime

def removeThumbnailFiles(self, item, keep=0, sort=None, **kwargs):
"""
Expand All @@ -369,23 +385,27 @@ def removeThumbnailFiles(self, item, keep=0, sort=None, **kwargs):
:returns: a tuple of (the number of files before removal, the number of
files removed).
"""
keys = ['isLargeImageThumbnail']
if not keep:
keys.append('isLargeImageData')
if not sort:
sort = [('_id', SortDir.DESCENDING)]
query = {
'attachedToType': 'item',
'attachedToId': item['_id'],
'isLargeImageThumbnail': True,
}
query.update(kwargs)
present = 0
removed = 0
for file in File().find(query, sort=sort):
present += 1
if keep > 0:
keep -= 1
continue
File().remove(file)
removed += 1
for key in keys:
query = {
'attachedToType': 'item',
'attachedToId': item['_id'],
key: True,
}
query.update(kwargs)
present = 0
removed = 0
for file in File().find(query, sort=sort):
present += 1
if keep > 0:
keep -= 1
continue
File().remove(file)
removed += 1
return (present, removed)

def getRegion(self, item, **kwargs):
Expand Down Expand Up @@ -425,8 +445,15 @@ def histogram(self, item, **kwargs):
method.
:returns: histogram object.
"""
tileSource = self._loadTileSource(item, **kwargs)
return tileSource.histogram(**kwargs)
if kwargs.get('range') is not None:
tileSource = self._loadTileSource(item, **kwargs)
result = tileSource.histogram(**kwargs)
else:
imageKey = 'histogram'
result = self._getAndCacheImageOrData(
item, 'histogram', False, dict(kwargs, imageKey=imageKey),
imageKey=imageKey, pickleCache=True, **kwargs)[0]
return result

def tileSource(self, item, **kwargs):
"""
Expand Down Expand Up @@ -459,5 +486,5 @@ def getAssociatedImage(self, item, imageKey, checkAndCreate=False, *args, **kwar
None if the associated image doesn't exist.
"""
keydict = dict(kwargs, imageKey=imageKey)
return self._getAndCacheImage(
return self._getAndCacheImageOrData(
item, 'getAssociatedImage', checkAndCreate, keydict, imageKey=imageKey, **kwargs)
34 changes: 32 additions & 2 deletions girder/girder_large_image/rest/large_image_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,9 @@ def __init__(self):
self.route('DELETE', ('thumbnails',), self.deleteThumbnails)
self.route('GET', ('associated_images',), self.countAssociatedImages)
self.route('DELETE', ('associated_images',), self.deleteAssociatedImages)
self.route('DELETE', ('tiles', 'incomplete'),
self.deleteIncompleteTiles)
self.route('GET', ('histograms',), self.countHistograms)
self.route('DELETE', ('histograms',), self.deleteHistograms)
self.route('DELETE', ('tiles', 'incomplete'), self.deleteIncompleteTiles)

@describeRoute(
Description('Clear tile source caches to release resources and file handles.')
Expand Down Expand Up @@ -438,3 +439,32 @@ def listSources(self, params):
except Exception:
pass
return results

@describeRoute(
Description('Count the number of cached histograms for large_image items.')
)
@access.admin
def countHistograms(self, params):
query = {
'isLargeImageData': True,
'attachedToType': 'item',
'thumbnailKey': {'$regex': '"imageKey":"histogram"'},
}
count = File().find(query).count()
return count

@describeRoute(
Description('Delete cached histograms from large_image items.')
)
@access.admin
def deleteHistograms(self, params):
query = {
'isLargeImageData': True,
'attachedToType': 'item',
'thumbnailKey': {'$regex': '"imageKey":"histogram"'},
}
removed = 0
for file in File().find(query):
File().remove(file)
removed += 1
return removed
27 changes: 27 additions & 0 deletions girder/test_girder/test_large_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,3 +354,30 @@ def testGetLargeImagePath(server, admin, fsAssetstore):
path = ts._getLargeImagePath()
assert path == abspath
file = File().save(origFile)


@pytest.mark.usefixtures('unbindLargeImage')
@pytest.mark.plugin('large_image')
def testHistogramCaching(server, admin, user, fsAssetstore):
file = utilities.uploadExternalFile('sample_image.ptif', admin, fsAssetstore)
itemId = str(file['itemId'])
resp = server.request(path='/item/%s/tiles/histogram' % itemId,
user=admin, isJson=False)
assert utilities.respStatus(resp) == 200
# Test GET histograms
resp = server.request(path='/large_image/histograms', user=user)
assert utilities.respStatus(resp) == 403
resp = server.request(path='/large_image/histograms', user=admin)
assert utilities.respStatus(resp) == 200
assert resp.json == 1
# Test DELETE histograms
resp = server.request(
method='DELETE', path='/large_image/histograms', user=user)
assert utilities.respStatus(resp) == 403
resp = server.request(
method='DELETE', path='/large_image/histograms', user=admin)
assert utilities.respStatus(resp) == 200
assert resp.json == 1
resp = server.request(path='/large_image/histograms', user=admin)
assert utilities.respStatus(resp) == 200
assert resp.json == 0
21 changes: 21 additions & 0 deletions girder/test_girder/test_tiles_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1130,6 +1130,27 @@ def testTilesHistogram(server, admin, fsAssetstore):
assert len(resp.json[0]['hist']) == 256
assert resp.json[1]['samples'] == 2801664
assert resp.json[1]['hist'][128] == 176
# A second query will fetch it from cache
resp = server.request(
path='/item/%s/tiles/histogram' % itemId,
params={'width': 2048, 'height': 2048, 'resample': False})
assert len(resp.json) == 3
assert len(resp.json[0]['hist']) == 256


@pytest.mark.usefixtures('unbindLargeImage')
@pytest.mark.plugin('large_image')
def testTilesHistogramWithRange(server, admin, fsAssetstore):
file = utilities.uploadExternalFile(
'sample_image.ptif', admin, fsAssetstore)
itemId = str(file['itemId'])
resp = server.request(
path='/item/%s/tiles/histogram' % itemId,
params={'width': 2048, 'height': 2048, 'resample': False, 'rangeMin': 10, 'rangeMax': 240})
assert len(resp.json) == 3
assert len(resp.json[0]['hist']) == 256
assert resp.json[1]['samples'] == 685979
assert resp.json[1]['hist'][128] == 186


@pytest.mark.usefixtures('unbindLargeImage')
Expand Down
1 change: 1 addition & 0 deletions large_image/tilesource/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1219,6 +1219,7 @@ def _pilFormatMatches(self, image, match=True, **kwargs):
# compatibility could be an issue.
return False

@methodcache()
def histogram(self, dtype=None, onlyMinMax=False, bins=256,
density=False, format=None, *args, **kwargs):
"""
Expand Down
6 changes: 6 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ setenv =
GDAL_PAM_ENABLED=no
PIP_FIND_LINKS=https://girder.github.io/large_image_wheels

# To run just some non-web-client tests, do tox -e server -- -k <test name>
# Don't use this for CI or full tests.
[testenv:server]
commands =
pytest {posargs}

[testenv:flake8]
skipsdist = true
skip_install = true
Expand Down

0 comments on commit d59939e

Please sign in to comment.