From 051c88d33c4fc9ec7edfed68ad5bbaaca6f1592c Mon Sep 17 00:00:00 2001 From: David Manthey Date: Tue, 30 Aug 2022 08:55:48 -0400 Subject: [PATCH] Handle large user records in annotation elements. These are subject to the mongo limit of 16Mb documents, but it can be useful to store large vectors of supplemental information in them. --- CHANGELOG.md | 1 + .../models/annotationelement.py | 35 ++++++++++++++++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index faea2adee..c6dfaf8a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - Fix iterating tiles where the overlap larger than the tile size ([940](../../pull/940)) - Better ignore tiff directories that aren't part of the pyramid ([943](../../pull/943)) - Fix an issue with styling frames in ome tiffs ([945](../../pull/945)) +- Better handle large user records in annotation elements ([949](../../pull/949)) ### Changes - Adjusted rest request logging rates for region endpoint ([948](../../pull/948)) diff --git a/girder_annotation/girder_large_image_annotation/models/annotationelement.py b/girder_annotation/girder_large_image_annotation/models/annotationelement.py index e919175cf..13f63bca5 100644 --- a/girder_annotation/girder_large_image_annotation/models/annotationelement.py +++ b/girder_annotation/girder_large_image_annotation/models/annotationelement.py @@ -36,6 +36,7 @@ # store part of them in an associated file. This is slower, so don't do it for # small ones. MAX_ELEMENT_DOCUMENT = 10000 +MAX_ELEMENT_USER_DOCUMENT = 1000000 class Annotationelement(Model): @@ -291,6 +292,17 @@ def yieldElements(self, annotation, region=None, info=None): # noqa data.write(chunk) data.seek(0) element[datafile['key']] = pickle.load(data) + if 'userFileId' in datafile: + data = io.BytesIO() + chunksize = 1024 ** 2 + with File().open(File().load(datafile['userFileId'], force=True)) as fptr: + while True: + chunk = fptr.read(chunksize) + if not len(chunk): + break + data.write(chunk) + data.seek(0) + element['user'] = pickle.load(data) if region.get('bbox') and 'bbox' in entry: element['_bbox'] = entry['bbox'] if 'bbox' not in info: @@ -327,9 +339,11 @@ def removeWithQuery(self, query): attachedQuery = query.copy() attachedQuery['datafile'] = {'$exists': True} for element in self.collection.find(attachedQuery): - file = File().load(element['datafile']['fileId'], force=True) - if file: - File().remove(file) + for key in {'fileId', 'userFileId'}: + if key in element['datafile']: + file = File().load(element['datafile'][key], force=True) + if file: + File().remove(file) self.collection.bulk_write([pymongo.DeleteMany(query)], ordered=False) def removeElements(self, annotation): @@ -505,10 +519,19 @@ def saveElementAsFile(self, annotation, entries): io.BytesIO(data), size=len(data), name='_annotationElementData', parentType='item', parent=item, user=None, mimeType='application/json', attachParent=True) + userdata = None + if 'user' in element: + userdata = pickle.dumps(element.pop('user'), protocol=4) + userFile = Upload().uploadFromFile( + io.BytesIO(userdata), size=len(userdata), name='_annotationElementUserData', + parentType='item', parent=item, user=None, + mimeType='application/json', attachParent=True) entries[0]['datafile'] = { 'key': key, 'fileId': elementFile['_id'], } + if userdata: + entries[0]['datafile']['userFileId'] = userFile['_id'] def updateElementChunk(self, elements, chunk, chunkSize, annotation, now): """ @@ -525,8 +548,10 @@ def updateElementChunk(self, elements, chunk, chunkSize, annotation, now): 'element': element } for element in elements[chunk:chunk + chunkSize]] prepTime = time.time() - chunkStartTime - if (len(entries) == 1 and len(entries[0]['element'].get( - 'points', entries[0]['element'].get('values', []))) > MAX_ELEMENT_DOCUMENT): + if (len(entries) == 1 and (len(entries[0]['element'].get( + 'points', entries[0]['element'].get('values', []))) > MAX_ELEMENT_DOCUMENT or ( + 'user' in entries[0]['element'] and + len(pickle.dumps(entries[0]['element'], protocol=4) > MAX_ELEMENT_USER_DOCUMENT)))): self.saveElementAsFile(annotation, entries) res = self.collection.insert_many(entries, ordered=False) for pos, entry in enumerate(entries):