diff --git a/girder_annotation/girder_large_image_annotation/handlers.py b/girder_annotation/girder_large_image_annotation/handlers.py index 8f18d8ea9..a53f34aed 100644 --- a/girder_annotation/girder_large_image_annotation/handlers.py +++ b/girder_annotation/girder_large_image_annotation/handlers.py @@ -1,4 +1,5 @@ import json +import time import uuid import cachetools @@ -102,7 +103,7 @@ def resolveAnnotationGirderIds(event, results, data, possibleGirderIds): return True -def process_annotations(event): +def process_annotations(event): # noqa: C901 """Add annotations to an image on a ``data.process`` event""" results = _itemFromEvent(event, 'LargeImageAnnotationUpload') if not results: @@ -110,10 +111,14 @@ def process_annotations(event): item = results['item'] user = results['user'] + startTime = time.time() file = File().load( event.info.get('file', {}).get('_id'), level=AccessType.READ, user=user ) + if time.time() - startTime > 10: + logger.info('Loaded annotation file in %5.3fs', time.time() - startTime) + startTime = time.time() if not file: logger.error('Could not load models from the database') @@ -123,6 +128,8 @@ def process_annotations(event): except Exception: logger.error('Could not parse annotation file') raise + if time.time() - startTime > 10: + logger.info('Decoded json in %5.3fs', time.time() - startTime) if not isinstance(data, list): data = [data] diff --git a/girder_annotation/girder_large_image_annotation/models/annotation.py b/girder_annotation/girder_large_image_annotation/models/annotation.py index 4fab8cde1..a33ca09ed 100644 --- a/girder_annotation/girder_large_image_annotation/models/annotation.py +++ b/girder_annotation/girder_large_image_annotation/models/annotation.py @@ -1016,9 +1016,13 @@ def _similarElementStructure(self, a, b, parentKey=None): # noqa if parentKey == 'holes': return all( len(hole) == 3 and - isinstance(hole[0], self.numberInstance) and - isinstance(hole[1], self.numberInstance) and - isinstance(hole[2], self.numberInstance) + # this is faster than checking the instance type, and, if + # it raises an exception, it would have failed validation + # any way. + 1 + hole[0] + hole[1] + hole[2] is not None + # isinstance(hole[0], self.numberInstance) and + # isinstance(hole[1], self.numberInstance) and + # isinstance(hole[2], self.numberInstance) for hlist in b for hole in hlist) if len(a) != len(b): @@ -1027,9 +1031,13 @@ def _similarElementStructure(self, a, b, parentKey=None): # noqa # If this is an array of points, let it pass return all( len(elem) == 3 and - isinstance(elem[0], self.numberInstance) and - isinstance(elem[1], self.numberInstance) and - isinstance(elem[2], self.numberInstance) + # this is faster than checking the instance type, and, if + # it raises an exception, it would have failed validation + # any way. + 1 + elem[0] + elem[1] + elem[2] is not None + # isinstance(elem[0], self.numberInstance) and + # isinstance(elem[1], self.numberInstance) and + # isinstance(elem[2], self.numberInstance) for elem in b) for idx in range(len(a)): if not self._similarElementStructure(a[idx], b[idx], parentKey): @@ -1083,11 +1091,14 @@ def validate(self, doc): # noqa element[key] = [] except Exception: pass - if (not self._similarElementStructure(element, lastValidatedElement) and - not self._similarElementStructure(element, lastValidatedElement2)): + try: + if (not self._similarElementStructure(element, lastValidatedElement) and + not self._similarElementStructure(element, lastValidatedElement2)): + self.validatorAnnotationElement.validate(element) + lastValidatedElement2 = lastValidatedElement + lastValidatedElement = element + except TypeError: self.validatorAnnotationElement.validate(element) - lastValidatedElement2 = lastValidatedElement - lastValidatedElement = element if keys: element.update(keys) if time.time() - lastTime > 10: diff --git a/girder_annotation/girder_large_image_annotation/models/annotationelement.py b/girder_annotation/girder_large_image_annotation/models/annotationelement.py index 751322f0b..b596ce642 100644 --- a/girder_annotation/girder_large_image_annotation/models/annotationelement.py +++ b/girder_annotation/girder_large_image_annotation/models/annotationelement.py @@ -14,9 +14,11 @@ # limitations under the License. ############################################################################## +import concurrent.futures import datetime import io import math +import multiprocessing import pickle import time @@ -417,12 +419,15 @@ def _boundingBox(self, element): bbox = {} if 'points' in element: pts = element['points'] - bbox['lowx'] = min(p[0] for p in pts) - bbox['lowy'] = min(p[1] for p in pts) - bbox['lowz'] = min(p[2] for p in pts) - bbox['highx'] = max(p[0] for p in pts) - bbox['highy'] = max(p[1] for p in pts) - bbox['highz'] = max(p[2] for p in pts) + p0 = [p[0] for p in pts] + p1 = [p[1] for p in pts] + p2 = [p[2] for p in pts] + bbox['lowx'] = min(p0) + bbox['lowy'] = min(p1) + bbox['lowz'] = min(p2) + bbox['highx'] = max(p0) + bbox['highy'] = max(p1) + bbox['highz'] = max(p2) bbox['details'] = len(pts) elif element.get('type') == 'griddata': x0, y0, z = element['origin'] @@ -504,6 +509,35 @@ def saveElementAsFile(self, annotation, entries): 'fileId': elementFile['_id'], } + def updateElementChunk(self, elements, chunk, chunkSize, annotation, now): + """ + Update the database for a chunk of elements. See the updateElements + method for details. + """ + lastTime = time.time() + chunkStartTime = time.time() + entries = [{ + 'annotationId': annotation['_id'], + '_version': annotation['_version'], + 'created': now, + 'bbox': self._boundingBox(element), + 'element': element + } for element in elements[chunk:chunk + chunkSize]] + prepTime = time.time() - chunkStartTime + if (len(entries) == 1 and len(entries[0]['element'].get( + 'points', entries[0]['element'].get('values', []))) > MAX_ELEMENT_DOCUMENT): + self.saveElementAsFile(annotation, entries) + res = self.collection.insert_many(entries, ordered=False) + for pos, entry in enumerate(entries): + if 'id' not in entry['element']: + entry['element']['id'] = str(res.inserted_ids[pos]) + # If the insert is slow, log information about it. + if time.time() - lastTime > 10: + logger.info('insert %d elements in %4.2fs (prep time %4.2fs), chunk %d/%d' % ( + len(entries), time.time() - chunkStartTime, prepTime, + chunk + len(entries), len(elements))) + lastTime = time.time() + def updateElements(self, annotation): """ Given an annotation, extract the elements from it and update the @@ -511,35 +545,16 @@ def updateElements(self, annotation): :param annotation: the annotation to save elements for. Modified. """ - startTime = lastTime = time.time() + startTime = time.time() elements = annotation['annotation'].get('elements', []) if not len(elements): return now = datetime.datetime.utcnow() - chunkSize = 100000 - for chunk in range(0, len(elements), chunkSize): - chunkStartTime = time.time() - entries = [{ - 'annotationId': annotation['_id'], - '_version': annotation['_version'], - 'created': now, - 'bbox': self._boundingBox(element), - 'element': element - } for element in elements[chunk:chunk + chunkSize]] - prepTime = time.time() - chunkStartTime - if (len(entries) == 1 and len(entries[0]['element'].get( - 'points', entries[0]['element'].get('values', []))) > MAX_ELEMENT_DOCUMENT): - self.saveElementAsFile(annotation, entries) - res = self.collection.insert_many(entries) - for pos, entry in enumerate(entries): - if 'id' not in entry['element']: - entry['element']['id'] = str(res.inserted_ids[pos]) - # If the whole insert is slow, log information about it. - if time.time() - lastTime > 10: - logger.info('insert %d elements in %4.2fs (prep time %4.2fs), done %d/%d' % ( - len(entries), time.time() - chunkStartTime, prepTime, - chunk + len(entries), len(elements))) - lastTime = time.time() + threads = multiprocessing.cpu_count() + chunkSize = int(max(100000 // threads, 10000)) + with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as pool: + for chunk in range(0, len(elements), chunkSize): + pool.submit(self.updateElementChunk, elements, chunk, chunkSize, annotation, now) if time.time() - startTime > 10: logger.info('inserted %d elements in %4.2fs' % ( len(elements), time.time() - startTime)) diff --git a/girder_annotation/test_annotation/test_annotations.py b/girder_annotation/test_annotation/test_annotations.py index 849adb2ac..f59646b87 100644 --- a/girder_annotation/test_annotation/test_annotations.py +++ b/girder_annotation/test_annotation/test_annotations.py @@ -94,7 +94,13 @@ def testAnnotationCreate(self, admin): assert len(result['annotation']['elements']) == 1 def testSimilarElementStructure(self, db): - ses = Annotation()._similarElementStructure + + def ses(a, b): + try: + return Annotation()._similarElementStructure(a, b) + except TypeError: + return False + assert ses('a', 'a') assert not ses('a', 'b') assert ses(10, 10)