Skip to content

Commit

Permalink
Merge pull request #1524 from girder/plottable-data
Browse files Browse the repository at this point in the history
Get all plottable data
  • Loading branch information
manthey authored Jul 10, 2024
2 parents d8d094c + b20bd99 commit 87034d0
Show file tree
Hide file tree
Showing 3 changed files with 327 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- Show a loading spinner on the image display in geojs in girder ([#1559](../../pull/1559))
- Better handle images that are composed of a folder and an item ([#1561](../../pull/1561))
- Allow specifying which sources are checked with canReadList ([#1562](../../pull/1562))
- Added endpoints to get plottable data related to annotations ([#1524](../../pull/1524))

### Bug Fixes
- Fix a compositing error in transformed multi source images ([#1560](../../pull/1560))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from girder.utility import JsonEncoder
from girder.utility.progress import setResponseTimeLimit

from .. import constants
from .. import constants, utils
from ..models.annotation import Annotation, AnnotationSchema
from ..models.annotationelement import Annotationelement

Expand Down Expand Up @@ -65,6 +65,8 @@ def __init__(self):
self.route('GET', ('item', ':id'), self.getItemAnnotations)
self.route('POST', ('item', ':id'), self.createItemAnnotations)
self.route('DELETE', ('item', ':id'), self.deleteItemAnnotations)
self.route('POST', ('item', ':id', 'plot', 'list'), self.getItemPlottableElements)
self.route('POST', ('item', ':id', 'plot', 'data'), self.getItemPlottableData)
self.route('GET', ('folder', ':id'), self.returnFolderAnnotations)
self.route('GET', ('folder', ':id', 'present'), self.existFolderAnnotations)
self.route('GET', ('folder', ':id', 'create'), self.canCreateFolderAnnotations)
Expand Down Expand Up @@ -617,6 +619,45 @@ def deleteItemAnnotations(self, item):
count += 1
return count

@autoDescribeRoute(
Description('Get a list of plottable data related to an item and its annotations.')
.modelParam('id', model=Item, level=AccessType.READ)
.jsonParam('annotations', 'A JSON list of annotation IDs that should '
'be included. An entry of __all__ will include all '
'annotations.', paramType='formData', requireArray=True,
required=False)
.errorResponse('ID was invalid.')
.errorResponse('Read access was denied for the item.', 403),
)
@access.public(cookie=True, scope=TokenScope.DATA_READ)
def getItemPlottableElements(self, item, annotations):
user = self.getCurrentUser()
data = utils.PlottableItemData(user, item, annotations=annotations)
return data.columns

@autoDescribeRoute(
Description('Get plottable data related to an item and its annotations.')
.modelParam('id', model=Item, level=AccessType.READ)
.param('adjacentItems', 'Whether to include adjacent item data.',
required=False, default=True, dataType='boolean')
.param('keys', 'A comma separated list of data keys to retrieve (not json).',
required=True)
.param('requiredKeys', 'A comma separated list of data keys that must '
'be non null in all response rows (not json).', required=False)
.jsonParam('annotations', 'A JSON list of annotation IDs that should '
'be included. An entry of \\__all__ will include all '
'annotations.', paramType='formData', requireArray=True,
required=False)
.errorResponse('ID was invalid.')
.errorResponse('Read access was denied for the item.', 403),
)
@access.public(cookie=True, scope=TokenScope.DATA_READ)
def getItemPlottableData(self, item, keys, adjacentItems, annotations, requiredKeys):
user = self.getCurrentUser()
data = utils.PlottableItemData(
user, item, annotations=annotations, adjacentItems=adjacentItems)
return data.data(keys, requiredKeys)

def getFolderAnnotations(self, id, recurse, user, limit=False, offset=False, sort=False,
sortDir=False, count=False):

Expand Down
284 changes: 284 additions & 0 deletions girder_annotation/girder_large_image_annotation/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
import json
import math
import re

from bson.objectid import ObjectId

from girder import logger
from girder.constants import AccessType
from girder.models.folder import Folder


class AnnotationGeoJSON:
Expand Down Expand Up @@ -334,3 +341,280 @@ def isGeoJSON(annotation):
'Feature', 'FeatureCollection', 'GeometryCollection', 'Point',
'LineString', 'Polygon', 'MultiPoint', 'MultiLineString',
'MultiPolygon'}


class PlottableItemData:
maxItems = 1000
maxAnnotationElements = 10000
maxDistinct = 20
allowedTypes = (str, bool, int, float)

def __init__(self, user, item, annotations=None, adjacentItems=False):
"""
Get plottable data associated with an item.
:param user: authenticating user.
:param item: the item record.
:param annotations: None, a list of annotation ids, or __all__. If
adjacent items are included, the most recent annotation with the
same name will also be included.
:param adjacentItems: if True, include data other items in the same
folder.
"""
self.user = user
self._columns = None
self._datacolumns = None
self._data = None
self._findItems(item, adjacentItems)
self._findAnnotations(annotations)

def _findItems(self, item, adjacentItems=False):
self._columns = None
self.item = item
self.folder = Folder().load(id=item['folderId'], user=self.user, level=AccessType.READ)
self.items = [item]
if adjacentItems:
for entry in Folder().childItems(self.folder):
if len(self.items) >= self.maxItems:
break
if entry['_id'] != item['_id']:
# skip if item doesn't have appropriate metadata or
# annotations. If skipping, add to list to check if
# dataframe
# TODO:
self.items.append(entry)
# TODO: find csv/xlsx/dataframe items in the folder, exclude them from
# the item list but include them in general

def _findAnnotations(self, annotations):
from ..models.annotation import Annotation

self._columns = None
if isinstance(annotations, str):
annotations = annotations.split(',')
self.annotations = None
if annotations and len(annotations):
self.annotations = []
query = {'_active': {'$ne': False}, 'itemId': self.item['_id']}
if annotations[0] != '__all__':
query['_id'] = {'$in': [ObjectId(annotId) for annotId in annotations]}
self.annotations.append(list(Annotation().find(
query, limit=0, sort=[('_version', -1)])))
if not len(self.annotations[0]):
self.annotations = None
# Find adjacent annotations
if annotations and len(self.items) > 1:
names = {}
for idx, annot in enumerate(self.annotations[0]):
if annot['annotation']['name'] not in names:
names[annot['annotation']['name']] = idx
for adjitem in self.items[1:]:
query = {'_active': {'$ne': False}, 'itemId': adjitem['_id']}
annotList = [None] * len(self.annotations[0])
for annot in Annotation().find(query, limit=0, sort=[('_version', -1)]):
if annot['annotation']['name'] in names and annotList[
names[annot['annotation']['name']]] is None:
annotList[names[annot['annotation']['name']]] = annot
self.annotations.append(annotList)

def _addColumn(self, columns, fullkey, title, root, key, source):
if fullkey not in columns:
columns[fullkey] = {
'key': fullkey,
'type': 'number',
'where': [[root, key, source]], 'title': title,
'count': 0, 'distinct': set(), 'min': None,
'max': None}
return (root, source, 0)
elif [root, key, source] not in columns[fullkey]['where']:
columns[fullkey]['where'].append([root, key, source])
where = -1
for colwhere in columns[fullkey]['where']:
if colwhere[0] == root and colwhere[2] == source:
where += 1
if tuple(colwhere) == (root, key, source):
return (root, source, where)
return (root, source, where)

def _columnKey(self, source, root, key):
if not hasattr(self, '_columnKeyCache'):
self._columnKeyCache = {}
hashkey = (source, root, key)
if hashkey in self._columnKeyCache:
return self._columnKeyCache[hashkey]
fullkey = f'{root}.{key}.{source}'.lower()
title = f'{root} {key}'
keymap = {
r'(?i)(item|image)_(id|name)$': {'key': '_0_item.name', 'title': 'Item Name'},
r'(?i)(low|min)(_|)x': {'key': '_bbox.x0', 'title': 'Bounding Box Low X'},
r'(?i)(low|min)(_|)y': {'key': '_bbox.y0', 'title': 'Bounding Box Low Y'},
r'(?i)(high|max)(_|)x': {'key': '_bbox.x1', 'title': 'Bounding Box High X'},
r'(?i)(high|max)(_|)y': {'key': '_bbox.y1', 'title': 'Bounding Box High Y'},
}
for k, v in keymap.items():
if re.match(k, key):
fullkey = v['key']
title = v['title']
break
self._columnKeyCache[hashkey] = fullkey, title
return fullkey, title

def _scanColumnByKey(self, result, key, entry, where=0):
if result['type'] == 'number':
try:
[float(record[key]) for record in entry
if isinstance(record.get(key), self.allowedTypes)]
except Exception:
result['type'] = 'string'
result['distinct'] = {str(v) for v in result['distinct']}
for ridx, record in enumerate(entry):
v = record.get(key)
if not isinstance(v, self.allowedTypes):
continue
result['count'] += 1
v = float(v) if result['type'] == 'number' else str(v)
if len(result['distinct']) <= self.maxDistinct:
result['distinct'].add(v)
if result['type'] == 'number':
if result['min'] is None:
result['min'] = result['max'] = v
result['min'] = min(result['min'], v)
result['max'] = max(result['max'], v)
if self._datacolumns and result['key'] in self._datacolumns:
self._datacolumns[result['key']][(where, ridx)] = v

def _scanColumn(self, meta, source, columns, auxmeta=None):
for root, entry in meta.items():
if not isinstance(entry, list) or not len(entry) or not isinstance(entry[0], dict):
continue
for key in entry[0]:
if not isinstance(entry[0][key], self.allowedTypes):
continue
fullkey, title = self._columnKey(source, root, key)
where = self._addColumn(columns, fullkey, title, root, key, source)
result = columns[fullkey]
self._scanColumnByKey(result, key, entry, where)
if auxmeta:
for aux in auxmeta:
if (isinstance(aux.get(root), list) and
len(aux[root]) and
isinstance(aux[root][0], dict) and
key in aux[root][0]):
self._scanColumnByKey(result, key, aux[root], where)

@property
def columns(self):
"""
Get a sorted list of plottable columns with some metadata for each.
Each data entry contains
:fullkey: a unique string. This is a good first-order sort
:root: the root data array
:key: the specific data tag
:source: the source of the data (folder, item, annotation,
annotationelement, file)
:type: string or number
:title: a human readable title
:[distinct]: a list of distinct values if there are less than some
maximum number of distinct values. This might not include i
values from adjacent items
:[min]: for number data types, the lowest value present
:[max]: for number data types, the highest value present
:returns: a sorted list of data entries.
"""
if self._columns is not None:
return self._columns
columns = {}
self._addColumn(
columns, '_0_item.name', 'Item Name', 'Item', 'name', 'base')
self._addColumn(
columns, '_2_item.id', 'Item ID', 'Item', '_id', 'base')
self._scanColumn(self.folder.get('meta', {}), 'folder', columns)
self._scanColumn(self.item.get('meta', {}), 'item', columns,
[item.get('meta', {}) for item in self.items[1:]])
for anidx, annot in enumerate(self.annotations[0] if self.annotations is not None else []):
self._scanColumn(
annot.get('attributes', {}), 'annotation', columns,
[itemannot[anidx].get('attributes', {})
for itemannot in self.annotations[1:]
if itemannot[anidx] is not None])
if not anidx:
self._addColumn(
columns, '_1_annotation.name', 'Annotation Name',
'Annotation', 'name', 'base')
self._addColumn(
columns, '_3_annotation.id', 'Annotation ID',
'Annotation', '_id', 'base')
self._addColumn(
columns, '_bbox.x0', 'Bounding Box Low X', 'bbox', 'lowx',
'annotationelement')
self._addColumn(
columns, '_bbox.y0', 'Bounding Box Low Y', 'bbox', 'lowy',
'annotationelement')
self._addColumn(
columns, '_bbox.x1', 'Bounding Box High X', 'bbox',
'highx', 'annotationelement')
self._addColumn(
columns, '_bbox.y1', 'Bounding Box High Y', 'bbox',
'highy', 'annotationelement')
# TODO: add annotation elements
# TODO: bbox could be from min/max query
# TODO: Add csv
for result in columns.values():
if len(result['distinct']) <= self.maxDistinct:
result['distinct'] = sorted(result['distinct'])
result['distinctcount'] = len(result['distinct'])
else:
result.pop('distinct', None)
if result['type'] != 'number' or result['min'] is None:
result.pop('min', None)
result.pop('max', None)
self._columns = sorted(columns.values(), key=lambda x: x['key'])
return self._columns

def data(self, columns, requiredColumns=None):
"""
Get plottable data.
:param columns: the columns to return. Either a list of column names
or a comma-delimited string.
:param requiredColumns: only return data rows where all of these
columns are non-None. Either a list of column names of a
comma-delimited string.
"""
if not isinstance(columns, list):
columns = columns.split(',')
if not isinstance(requiredColumns, list):
requiredColumns = requiredColumns.split(',') if requiredColumns is not None else []
# TODO: Always augment columns with item id, annotation id?
self._datacolumns = {c: {} for c in columns}
rows = set()
# collects data as a side effect
collist = self.columns
for coldata in self._datacolumns.values():
rows |= set(coldata.keys())
rows = sorted(rows)
colsout = [col.copy() for col in collist if col['key'] in columns]
for cidx, col in enumerate(colsout):
col['index'] = cidx
logger.info(f'Gathering {len(self._datacolumns)} x {len(rows)} data')
data = [[None] * len(self._datacolumns) for _ in range(len(rows))]
for cidx, col in enumerate(colsout):
colkey = col['key']
if colkey in self._datacolumns:
datacol = self._datacolumns[colkey]
for ridx, rowid in enumerate(rows):
data[ridx][cidx] = datacol.get(rowid, None)
for cidx, col in enumerate(colsout):
colkey = col['key']
numrows = len(data)
if colkey in requiredColumns:
data = [row for row in data if row[cidx] is not None]
if len(data) < numrows:
logger.info(f'Reduced row count from {numrows} to {len(data)} '
f'because of None values in column {colkey}')
return {
'columns': colsout,
'data': data}

0 comments on commit 87034d0

Please sign in to comment.