Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle large user records in annotation elements. #949

Merged
merged 1 commit into from
Aug 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
- Fix iterating tiles where the overlap larger than the tile size ([940](../../pull/940))
- Better ignore tiff directories that aren't part of the pyramid ([943](../../pull/943))
- Fix an issue with styling frames in ome tiffs ([945](../../pull/945))
- Better handle large user records in annotation elements ([949](../../pull/949))

### Changes
- Adjusted rest request logging rates for region endpoint ([948](../../pull/948))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
# store part of them in an associated file. This is slower, so don't do it for
# small ones.
MAX_ELEMENT_DOCUMENT = 10000
MAX_ELEMENT_USER_DOCUMENT = 1000000


class Annotationelement(Model):
Expand Down Expand Up @@ -291,6 +292,17 @@ def yieldElements(self, annotation, region=None, info=None): # noqa
data.write(chunk)
data.seek(0)
element[datafile['key']] = pickle.load(data)
if 'userFileId' in datafile:
data = io.BytesIO()
chunksize = 1024 ** 2
with File().open(File().load(datafile['userFileId'], force=True)) as fptr:
while True:
chunk = fptr.read(chunksize)
if not len(chunk):
break
data.write(chunk)
data.seek(0)
element['user'] = pickle.load(data)
if region.get('bbox') and 'bbox' in entry:
element['_bbox'] = entry['bbox']
if 'bbox' not in info:
Expand Down Expand Up @@ -327,9 +339,11 @@ def removeWithQuery(self, query):
attachedQuery = query.copy()
attachedQuery['datafile'] = {'$exists': True}
for element in self.collection.find(attachedQuery):
file = File().load(element['datafile']['fileId'], force=True)
if file:
File().remove(file)
for key in {'fileId', 'userFileId'}:
if key in element['datafile']:
file = File().load(element['datafile'][key], force=True)
if file:
File().remove(file)
self.collection.bulk_write([pymongo.DeleteMany(query)], ordered=False)

def removeElements(self, annotation):
Expand Down Expand Up @@ -505,10 +519,19 @@ def saveElementAsFile(self, annotation, entries):
io.BytesIO(data), size=len(data), name='_annotationElementData',
parentType='item', parent=item, user=None,
mimeType='application/json', attachParent=True)
userdata = None
if 'user' in element:
userdata = pickle.dumps(element.pop('user'), protocol=4)
userFile = Upload().uploadFromFile(
io.BytesIO(userdata), size=len(userdata), name='_annotationElementUserData',
parentType='item', parent=item, user=None,
mimeType='application/json', attachParent=True)
entries[0]['datafile'] = {
'key': key,
'fileId': elementFile['_id'],
}
if userdata:
entries[0]['datafile']['userFileId'] = userFile['_id']

def updateElementChunk(self, elements, chunk, chunkSize, annotation, now):
"""
Expand All @@ -525,8 +548,10 @@ def updateElementChunk(self, elements, chunk, chunkSize, annotation, now):
'element': element
} for element in elements[chunk:chunk + chunkSize]]
prepTime = time.time() - chunkStartTime
if (len(entries) == 1 and len(entries[0]['element'].get(
'points', entries[0]['element'].get('values', []))) > MAX_ELEMENT_DOCUMENT):
if (len(entries) == 1 and (len(entries[0]['element'].get(
'points', entries[0]['element'].get('values', []))) > MAX_ELEMENT_DOCUMENT or (
'user' in entries[0]['element'] and
len(pickle.dumps(entries[0]['element'], protocol=4) > MAX_ELEMENT_USER_DOCUMENT)))):
self.saveElementAsFile(annotation, entries)
res = self.collection.insert_many(entries, ordered=False)
for pos, entry in enumerate(entries):
Expand Down