From e0337744ac31665895ee879a973f021ae85fd073 Mon Sep 17 00:00:00 2001 From: David Manthey Date: Wed, 24 Apr 2024 15:53:05 -0400 Subject: [PATCH] Get all plottable data --- .../rest/annotation.py | 41 ++- .../utils/__init__.py | 258 ++++++++++++++++++ 2 files changed, 298 insertions(+), 1 deletion(-) diff --git a/girder_annotation/girder_large_image_annotation/rest/annotation.py b/girder_annotation/girder_large_image_annotation/rest/annotation.py index 30a18349f..c1086868c 100644 --- a/girder_annotation/girder_large_image_annotation/rest/annotation.py +++ b/girder_annotation/girder_large_image_annotation/rest/annotation.py @@ -35,7 +35,7 @@ from girder.utility import JsonEncoder from girder.utility.progress import setResponseTimeLimit -from .. import constants +from .. import constants, utils from ..models.annotation import Annotation, AnnotationSchema from ..models.annotationelement import Annotationelement @@ -65,6 +65,8 @@ def __init__(self): self.route('GET', ('item', ':id'), self.getItemAnnotations) self.route('POST', ('item', ':id'), self.createItemAnnotations) self.route('DELETE', ('item', ':id'), self.deleteItemAnnotations) + self.route('POST', ('item', ':id', 'plot', 'list'), self.getItemPlottableElements) + self.route('POST', ('item', ':id', 'plot', 'data'), self.getItemPlottableData) self.route('GET', ('folder', ':id'), self.returnFolderAnnotations) self.route('GET', ('folder', ':id', 'present'), self.existFolderAnnotations) self.route('GET', ('folder', ':id', 'create'), self.canCreateFolderAnnotations) @@ -617,6 +619,43 @@ def deleteItemAnnotations(self, item): count += 1 return count + @autoDescribeRoute( + Description('Get a list of plottable data related to an item and its annotations.') + .modelParam('id', model=Item, level=AccessType.READ) + .jsonParam('annotations', 'A JSON list of annotation IDs that should ' + 'be included. An entry of __all__ will include all ' + 'annotations.', paramType='formData', requireArray=True, + required=False) + .errorResponse('ID was invalid.') + .errorResponse('Read access was denied for the item.', 403), + ) + @access.public(cookie=True, scope=TokenScope.DATA_READ) + def getItemPlottableElements(self, item, annotations): + user = self.getCurrentUser() + data = utils.PlottableItemData(user, item, annotations=annotations) + return data.columns + + @autoDescribeRoute( + Description('Get plottable data related to an item and its annotations.') + .modelParam('id', model=Item, level=AccessType.READ) + .param('adjacentItems', 'Whether to include adjacent item data.', + required=False, default=True, dataType='boolean') + .param('keys', 'A comma separated list of data keys to retrieve (not json).', + required=True) + .jsonParam('annotations', 'A JSON list of annotation IDs that should ' + 'be included. An entry of __all__ will include all ' + 'annotations.', paramType='formData', requireArray=True, + required=False) + .errorResponse('ID was invalid.') + .errorResponse('Read access was denied for the item.', 403), + ) + @access.public(cookie=True, scope=TokenScope.DATA_READ) + def getItemPlottableData(self, item, keys, adjacentItems, annotations): + user = self.getCurrentUser() + data = utils.PlottableItemData(user, item, annotations=annotations, + adjacentItems=adjacentItems) + return data.data(keys) + def getFolderAnnotations(self, id, recurse, user, limit=False, offset=False, sort=False, sortDir=False, count=False): diff --git a/girder_annotation/girder_large_image_annotation/utils/__init__.py b/girder_annotation/girder_large_image_annotation/utils/__init__.py index 4f9f99e90..e82a56f81 100644 --- a/girder_annotation/girder_large_image_annotation/utils/__init__.py +++ b/girder_annotation/girder_large_image_annotation/utils/__init__.py @@ -1,5 +1,11 @@ import json import math +import re + +from bson.objectid import ObjectId + +from girder.constants import AccessType +from girder.models.folder import Folder class AnnotationGeoJSON: @@ -334,3 +340,255 @@ def isGeoJSON(annotation): 'Feature', 'FeatureCollection', 'GeometryCollection', 'Point', 'LineString', 'Polygon', 'MultiPoint', 'MultiLineString', 'MultiPolygon'} + + +class PlottableItemData: + maxItems = 1000 + maxAnnotationElements = 10000 + maxDistinct = 20 + allowedTypes = (str, bool, int, float) + + def __init__(self, user, item, annotations=None, adjacentItems=False): + """ + Get plottable data associated with an item. + + :param user: authenticating user. + :param item: the item record. + :param annotations: None, a list of annotation ids, or __all__. If + adjacent items are included, the most recent annotation with the + same name will also be included. + :param adjacentItems: if True, include data other items in the same + folder. + """ + self.user = user + self._columns = None + self._datacolumns = None + self._data = None + self._findItems(item, adjacentItems) + self._findAnnotations(annotations) + + def _findItems(self, item, adjacentItems=False): + self._columns = None + self.item = item + self.folder = Folder().load(id=item['folderId'], user=self.user, level=AccessType.READ) + self.items = [item] + if adjacentItems: + for entry in Folder().childItems(self.folder): + if len(self.items) >= self.maxItems: + break + if entry['_id'] != item['_id']: + # skip if item doesn't have appropriate metadata or + # annotations. If skipping, add to list to check if + # dataframe + # TODO: DWM:: + self.items.append(entry) + # TODO: find csv/xlsx/dataframe items in the folder, exclude them from + # the item list but include them in general + + def _findAnnotations(self, annotations): + from ..models.annotation import Annotation + + self._columns = None + if isinstance(annotations, str): + annotations = annotations.split(',') + self.annotations = None + if annotations and len(annotations): + self.annotations = [] + query = {'_active': {'$ne': False}, 'itemId': self.item['_id']} + if annotations[0] != '__all__': + query['_id'] = {'$in': [ObjectId(annotId) for annotId in annotations]} + self.annotations.append(list(Annotation().find( + query, limit=0, sort=[('_version', -1)]))) + if not len(self.annotations[0]): + self.annotations = None + # Find adjacent annotations + if annotations and len(self.items) > 1: + names = {} + for idx, annot in enumerate(self.annotations[0]): + if annot['annotation']['name'] not in names: + names[annot['annotation']['name']] = idx + for adjitem in self.items[1:]: + query = {'_active': {'$ne': False}, 'itemId': adjitem['_id']} + annotList = [None] * len(self.annotations[0]) + for annot in Annotation().find(query, limit=0, sort=[('_version', -1)]): + if annot['annotation']['name'] in names and annotList[ + names[annot['annotation']['name']]] is None: + annotList[names[annot['annotation']['name']]] = annot + self.annotations.append(annotList) + + def _addColumn(self, columns, fullkey, title, root, key, source): + if fullkey not in columns: + columns[fullkey] = { + 'key': fullkey, + 'type': 'number', + 'where': [[root, key, source]], 'title': title, + 'count': 0, 'distinct': set(), 'min': None, + 'max': None} + return 0 + elif [root, key, source] not in columns[fullkey]['where']: + columns[fullkey]['where'].append([root, key, source]) + return len(columns[fullkey]['where']) - 1 + return columns[fullkey]['where'].index([root, key, source]) + + def _columnKey(self, source, root, key): + if not hasattr(self, '_columnKeyCache'): + self._columnKeyCache = {} + hashkey = (source, root, key) + if hashkey in self._columnKeyCache: + return self._columnKeyCache[hashkey] + fullkey = f'{root}.{key}.{source}'.lower() + title = f'{root} {key}' + keymap = { + r'(?i)(item|image)_(id|name)$': {'key': '_0_item.name', 'title': 'Item Name'}, + r'(?i)(low|min)(_|)x': {'key': '_bbox.x0', 'title': 'Bounding Box Low X'}, + r'(?i)(low|min)(_|)y': {'key': '_bbox.y0', 'title': 'Bounding Box Low Y'}, + r'(?i)(high|max)(_|)x': {'key': '_bbox.x1', 'title': 'Bounding Box High X'}, + r'(?i)(high|max)(_|)y': {'key': '_bbox.y1', 'title': 'Bounding Box High Y'}, + } + for k, v in keymap.items(): + if re.match(k, key): + fullkey = v['key'] + title = v['title'] + break + self._columnKeyCache[hashkey] = fullkey, title + return fullkey, title + + def _scanColumnByKey(self, result, key, entry, where=0): + if result['type'] == 'number': + try: + [float(record[key]) for record in entry + if isinstance(record.get(key), self.allowedTypes)] + except Exception: + result['type'] = 'string' + result['distinct'] = {str(v) for v in result['distinct']} + for ridx, record in enumerate(entry): + v = record.get(key) + if not isinstance(v, self.allowedTypes): + continue + result['count'] += 1 + v = float(v) if result['type'] == 'number' else str(v) + if len(result['distinct']) <= self.maxDistinct: + result['distinct'].add(v) + if result['type'] == 'number': + if result['min'] is None: + result['min'] = result['max'] = v + result['min'] = min(result['min'], v) + result['max'] = max(result['max'], v) + if self._datacolumns and result['key'] in self._datacolumns: + self._datacolumns[result['key']][(where, ridx)] = v + + def _scanColumn(self, meta, source, columns, auxmeta=None): + for root, entry in meta.items(): + if not isinstance(entry, list) or not len(entry) or not isinstance(entry[0], dict): + continue + for key in entry[0]: + if not isinstance(entry[0][key], self.allowedTypes): + continue + fullkey, title = self._columnKey(source, root, key) + where = self._addColumn(columns, fullkey, title, root, key, source) + result = columns[fullkey] + self._scanColumnByKey(result, key, entry, where) + if auxmeta: + for aux in auxmeta: + if (isinstance(aux.get(root), list) and + len(aux[root]) and + isinstance(aux[root][0], dict) and + key in aux[root][0]): + self._scanColumnByKey(result, key, aux[root], where) + + @property + def columns(self): + """ + Get a sorted list of plottable columns with some metadata for each. + + Each data entry contains + + :fullkey: a unique string. This is a good first-order sort + :root: the root data array + :key: the specific data tag + :source: the source of the data (folder, item, annotation, + annotationelement, file) + :type: string or number + :title: a human readable title + :[distinct]: a list of distinct values if there are less than some + maximum number of distinct values. This might not include i + values from adjacent items + :[min]: for number data types, the lowest value present + :[max]: for number data types, the highest value present + + :returns: a sorted list of data entries. + """ + if self._columns is not None: + return self._columns + columns = {} + self._addColumn( + columns, '_0_item.name', 'Item Name', 'Item', 'name', 'base') + self._addColumn( + columns, '_2_item.id', 'Item ID', 'Item', '_id', 'base') + self._scanColumn(self.folder.get('meta', {}), 'folder', columns) + self._scanColumn(self.item.get('meta', {}), 'item', columns, + [item.get('meta', {}) for item in self.items[1:]]) + for anidx, annot in enumerate(self.annotations[0] if self.annotations is not None else []): + self._scanColumn( + annot.get('attributes', {}), 'annotation', columns, + [itemannot[anidx].get('attributes', {}) + for itemannot in self.annotations[1:] + if itemannot[anidx] is not None]) + if not anidx: + self._addColumn( + columns, '_1_annotation.name', 'Annotation Name', + 'Annotation', 'name', 'base') + self._addColumn( + columns, '_3_annotation.id', 'Annotation ID', + 'Annotation', '_id', 'base') + self._addColumn( + columns, '_bbox.x0', 'Bounding Box Low X', 'bbox', 'lowx', + 'annotationelement') + self._addColumn( + columns, '_bbox.y0', 'Bounding Box Low Y', 'bbox', 'lowy', + 'annotationelement') + self._addColumn( + columns, '_bbox.x1', 'Bounding Box High X', 'bbox', + 'highx', 'annotationelement') + self._addColumn( + columns, '_bbox.y1', 'Bounding Box High Y', 'bbox', + 'highy', 'annotationelement') + # ##DWM:: add annotation elements + # ##DWM:: bbox could be from min/max query + # ##DWM:: Add csv + for result in columns.values(): + if len(result['distinct']) <= self.maxDistinct: + result['distinct'] = sorted(result['distinct']) + else: + result.pop('distinct', None) + if result['type'] != 'number' or result['min'] is None: + result.pop('min', None) + result.pop('max', None) + self._columns = sorted(columns.values(), key=lambda x: x['key']) + return self._columns + + def data(self, columns): + """ + Get plottable data. + """ + if not isinstance(columns, list): + columns = columns.split(',') + # TODO: Always augment columns with item id, annotation id? + self._datacolumns = {c: {} for c in columns} + rows = set() + # collects data as a side effect + collist = self.columns + for coldata in self._datacolumns.values(): + rows |= set(coldata.keys()) + rows = sorted(rows) + colsout = [col for col in collist if col['key'] in columns] + print(f'Gathering {len(self._datacolumns)} x {len(rows)} data') + data = [[None] * len(self._datacolumns) for _ in range(len(rows))] + for cidx, col in enumerate(colsout): + colkey = col['key'] + if colkey in self._datacolumns: + for ridx, rowid in enumerate(rows): + data[ridx][cidx] = self._datacolumns[colkey].get(rowid, None) + return { + 'columns': colsout, + 'data': data}