Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache histograms. #598

Merged
merged 1 commit into from
May 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Change Log

## Unreleased

### Improvements
- Cache histogram requests (#598)

## Version 1.5.0

### Features
Expand Down
117 changes: 72 additions & 45 deletions girder/girder_large_image/models/image_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import io
import json
import pickle
import pymongo

from girder import logger
Expand Down Expand Up @@ -43,11 +44,18 @@ class ImageItem(Item):
def initialize(self):
super().initialize()
self.ensureIndices(['largeImage.fileId'])
File().ensureIndices([([
('isLargeImageThumbnail', pymongo.ASCENDING),
('attachedToType', pymongo.ASCENDING),
('attachedToId', pymongo.ASCENDING),
], {})])
File().ensureIndices([
([
('isLargeImageThumbnail', pymongo.ASCENDING),
('attachedToType', pymongo.ASCENDING),
('attachedToId', pymongo.ASCENDING),
], {}),
([
('isLargeImageData', pymongo.ASCENDING),
('attachedToType', pymongo.ASCENDING),
('attachedToId', pymongo.ASCENDING),
], {}),
])

def createImageItem(self, item, fileObj, user=None, token=None,
createJob=True, notify=False, **kwargs):
Expand Down Expand Up @@ -302,18 +310,19 @@ def getThumbnail(self, item, checkAndCreate=False, width=None, height=None, **kw
"""
# check if a thumbnail file exists with a particular key
keydict = dict(kwargs, width=width, height=height)
return self._getAndCacheImage(
return self._getAndCacheImageOrData(
item, 'getThumbnail', checkAndCreate, keydict, width=width, height=height, **kwargs)

def _getAndCacheImage(self, item, imageFunc, checkAndCreate, keydict, **kwargs):
def _getAndCacheImageOrData(
self, item, imageFunc, checkAndCreate, keydict, pickleCache=False, **kwargs):
if 'fill' in keydict and (keydict['fill']).lower() == 'none':
del keydict['fill']
keydict = {k: v for k, v in keydict.items() if v is not None}
key = json.dumps(keydict, sort_keys=True, separators=(',', ':'))
existing = File().findOne({
'attachedToType': 'item',
'attachedToId': item['_id'],
'isLargeImageThumbnail': True,
'isLargeImageThumbnail' if not pickleCache else 'isLargeImageData': True,
'thumbnailKey': key
})
if existing:
Expand All @@ -323,38 +332,45 @@ def _getAndCacheImage(self, item, imageFunc, checkAndCreate, keydict, **kwargs):
contentDisposition = 'inline'
else:
contentDisposition = kwargs['contentDisposition']
if pickleCache:
data = File().open(existing).read()
return pickle.loads(data), 'application/octet-stream'
return File().download(existing, contentDisposition=contentDisposition)
tileSource = self._loadTileSource(item, **kwargs)
result = getattr(tileSource, imageFunc)(**kwargs)
if result is None:
thumbData, thumbMime = b'', 'application/octet-stream'
imageData, imageMime = b'', 'application/octet-stream'
elif pickleCache:
imageData, imageMime = result, 'application/octet-stream'
else:
thumbData, thumbMime = result
# The logic on which files to save could be more sophisticated.
maxThumbnailFiles = int(Setting().get(
constants.PluginSettings.LARGE_IMAGE_MAX_THUMBNAIL_FILES))
saveFile = maxThumbnailFiles > 0
if saveFile:
imageData, imageMime = result
saveFile = True
if not pickleCache:
# The logic on which files to save could be more sophisticated.
maxThumbnailFiles = int(Setting().get(
constants.PluginSettings.LARGE_IMAGE_MAX_THUMBNAIL_FILES))
saveFile = maxThumbnailFiles > 0
# Make sure we don't exceed the desired number of thumbnails
self.removeThumbnailFiles(item, maxThumbnailFiles - 1)
# Save the thumbnail as a file
thumbfile = Upload().uploadFromFile(
io.BytesIO(thumbData), size=len(thumbData),
if saveFile:
dataStored = imageData if not pickleCache else pickle.dumps(imageData, protocol=4)
# Save the data as a file
datafile = Upload().uploadFromFile(
io.BytesIO(dataStored), size=len(dataStored),
name='_largeImageThumbnail', parentType='item', parent=item,
user=None, mimeType=thumbMime, attachParent=True)
if not len(thumbData) and 'received' in thumbfile:
thumbfile = Upload().finalizeUpload(
thumbfile, Assetstore().load(thumbfile['assetstoreId']))
thumbfile.update({
'isLargeImageThumbnail': True,
user=None, mimeType=imageMime, attachParent=True)
if not len(dataStored) and 'received' in datafile:
datafile = Upload().finalizeUpload(
datafile, Assetstore().load(datafile['assetstoreId']))
datafile.update({
'isLargeImageThumbnail' if not pickleCache else 'isLargeImageData': True,
'thumbnailKey': key,
})
# Ideally, we would check that the file is still wanted before we
# save it. This is probably impossible without true transactions in
# Mongo.
File().save(thumbfile)
# Return the data
return thumbData, thumbMime
File().save(datafile)
return imageData, imageMime

def removeThumbnailFiles(self, item, keep=0, sort=None, **kwargs):
"""
Expand All @@ -369,23 +385,27 @@ def removeThumbnailFiles(self, item, keep=0, sort=None, **kwargs):
:returns: a tuple of (the number of files before removal, the number of
files removed).
"""
keys = ['isLargeImageThumbnail']
if not keep:
keys.append('isLargeImageData')
if not sort:
sort = [('_id', SortDir.DESCENDING)]
query = {
'attachedToType': 'item',
'attachedToId': item['_id'],
'isLargeImageThumbnail': True,
}
query.update(kwargs)
present = 0
removed = 0
for file in File().find(query, sort=sort):
present += 1
if keep > 0:
keep -= 1
continue
File().remove(file)
removed += 1
for key in keys:
query = {
'attachedToType': 'item',
'attachedToId': item['_id'],
key: True,
}
query.update(kwargs)
present = 0
removed = 0
for file in File().find(query, sort=sort):
present += 1
if keep > 0:
keep -= 1
continue
File().remove(file)
removed += 1
return (present, removed)

def getRegion(self, item, **kwargs):
Expand Down Expand Up @@ -425,8 +445,15 @@ def histogram(self, item, **kwargs):
method.
:returns: histogram object.
"""
tileSource = self._loadTileSource(item, **kwargs)
return tileSource.histogram(**kwargs)
if kwargs.get('range') is not None:
tileSource = self._loadTileSource(item, **kwargs)
result = tileSource.histogram(**kwargs)
else:
imageKey = 'histogram'
result = self._getAndCacheImageOrData(
item, 'histogram', False, dict(kwargs, imageKey=imageKey),
imageKey=imageKey, pickleCache=True, **kwargs)[0]
return result

def tileSource(self, item, **kwargs):
"""
Expand Down Expand Up @@ -459,5 +486,5 @@ def getAssociatedImage(self, item, imageKey, checkAndCreate=False, *args, **kwar
None if the associated image doesn't exist.
"""
keydict = dict(kwargs, imageKey=imageKey)
return self._getAndCacheImage(
return self._getAndCacheImageOrData(
item, 'getAssociatedImage', checkAndCreate, keydict, imageKey=imageKey, **kwargs)
34 changes: 32 additions & 2 deletions girder/girder_large_image/rest/large_image_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,9 @@ def __init__(self):
self.route('DELETE', ('thumbnails',), self.deleteThumbnails)
self.route('GET', ('associated_images',), self.countAssociatedImages)
self.route('DELETE', ('associated_images',), self.deleteAssociatedImages)
self.route('DELETE', ('tiles', 'incomplete'),
self.deleteIncompleteTiles)
self.route('GET', ('histograms',), self.countHistograms)
self.route('DELETE', ('histograms',), self.deleteHistograms)
self.route('DELETE', ('tiles', 'incomplete'), self.deleteIncompleteTiles)

@describeRoute(
Description('Clear tile source caches to release resources and file handles.')
Expand Down Expand Up @@ -438,3 +439,32 @@ def listSources(self, params):
except Exception:
pass
return results

@describeRoute(
Description('Count the number of cached histograms for large_image items.')
)
@access.admin
def countHistograms(self, params):
query = {
'isLargeImageData': True,
'attachedToType': 'item',
'thumbnailKey': {'$regex': '"imageKey":"histogram"'},
}
count = File().find(query).count()
return count

@describeRoute(
Description('Delete cached histograms from large_image items.')
)
@access.admin
def deleteHistograms(self, params):
query = {
'isLargeImageData': True,
'attachedToType': 'item',
'thumbnailKey': {'$regex': '"imageKey":"histogram"'},
}
removed = 0
for file in File().find(query):
File().remove(file)
removed += 1
return removed
27 changes: 27 additions & 0 deletions girder/test_girder/test_large_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,3 +354,30 @@ def testGetLargeImagePath(server, admin, fsAssetstore):
path = ts._getLargeImagePath()
assert path == abspath
file = File().save(origFile)


@pytest.mark.usefixtures('unbindLargeImage')
@pytest.mark.plugin('large_image')
def testHistogramCaching(server, admin, user, fsAssetstore):
file = utilities.uploadExternalFile('sample_image.ptif', admin, fsAssetstore)
itemId = str(file['itemId'])
resp = server.request(path='/item/%s/tiles/histogram' % itemId,
user=admin, isJson=False)
assert utilities.respStatus(resp) == 200
# Test GET histograms
resp = server.request(path='/large_image/histograms', user=user)
assert utilities.respStatus(resp) == 403
resp = server.request(path='/large_image/histograms', user=admin)
assert utilities.respStatus(resp) == 200
assert resp.json == 1
# Test DELETE histograms
resp = server.request(
method='DELETE', path='/large_image/histograms', user=user)
assert utilities.respStatus(resp) == 403
resp = server.request(
method='DELETE', path='/large_image/histograms', user=admin)
assert utilities.respStatus(resp) == 200
assert resp.json == 1
resp = server.request(path='/large_image/histograms', user=admin)
assert utilities.respStatus(resp) == 200
assert resp.json == 0
21 changes: 21 additions & 0 deletions girder/test_girder/test_tiles_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1130,6 +1130,27 @@ def testTilesHistogram(server, admin, fsAssetstore):
assert len(resp.json[0]['hist']) == 256
assert resp.json[1]['samples'] == 2801664
assert resp.json[1]['hist'][128] == 176
# A second query will fetch it from cache
resp = server.request(
path='/item/%s/tiles/histogram' % itemId,
params={'width': 2048, 'height': 2048, 'resample': False})
assert len(resp.json) == 3
assert len(resp.json[0]['hist']) == 256


@pytest.mark.usefixtures('unbindLargeImage')
@pytest.mark.plugin('large_image')
def testTilesHistogramWithRange(server, admin, fsAssetstore):
file = utilities.uploadExternalFile(
'sample_image.ptif', admin, fsAssetstore)
itemId = str(file['itemId'])
resp = server.request(
path='/item/%s/tiles/histogram' % itemId,
params={'width': 2048, 'height': 2048, 'resample': False, 'rangeMin': 10, 'rangeMax': 240})
assert len(resp.json) == 3
assert len(resp.json[0]['hist']) == 256
assert resp.json[1]['samples'] == 685979
assert resp.json[1]['hist'][128] == 186


@pytest.mark.usefixtures('unbindLargeImage')
Expand Down
1 change: 1 addition & 0 deletions large_image/tilesource/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1219,6 +1219,7 @@ def _pilFormatMatches(self, image, match=True, **kwargs):
# compatibility could be an issue.
return False

@methodcache()
def histogram(self, dtype=None, onlyMinMax=False, bins=256,
density=False, format=None, *args, **kwargs):
"""
Expand Down
6 changes: 6 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ setenv =
GDAL_PAM_ENABLED=no
PIP_FIND_LINKS=https://girder.github.io/large_image_wheels

# To run just some non-web-client tests, do tox -e server -- -k <test name>
# Don't use this for CI or full tests.
[testenv:server]
commands =
pytest {posargs}

[testenv:flake8]
skipsdist = true
skip_install = true
Expand Down