From 31af4b42d7e77b45075df552e0b6fec9bd4bf1e4 Mon Sep 17 00:00:00 2001
From: David Manthey <david.manthey@kitware.com>
Date: Fri, 17 Apr 2020 15:11:35 -0400
Subject: [PATCH] Report internal metadata as a separate method and endpoint.

This also shifts some extraneous source-specific metadata to that
location (for instance, nd2 files reported a lot of nd2 specific
information in the general metadata).
---
 .../girder_large_image/models/image_item.py   |  6 ++++
 girder/girder_large_image/rest/tiles.py       | 17 ++++++++++-
 girder/test_girder/test_tiles_rest.py         | 10 +++++++
 large_image/tilesource/base.py                | 10 +++++++
 .../gdal/large_image_source_gdal/__init__.py  | 25 +++++++++++++++++
 .../nd2/large_image_source_nd2/__init__.py    | 28 +++++++++++++------
 .../large_image_source_ometiff/__init__.py    | 11 ++++++++
 .../large_image_source_openjpeg/__init__.py   | 13 +++++++++
 .../large_image_source_openslide/__init__.py  | 13 +++++++++
 .../pil/large_image_source_pil/__init__.py    | 16 +++++++++++
 .../test/large_image_source_test/__init__.py  | 10 +++++++
 .../tiff/large_image_source_tiff/__init__.py  | 15 ++++++++++
 test/test_source_gdal.py                      |  8 ++++++
 test/test_source_nd2.py                       |  7 +++++
 test/test_source_ometiff.py                   |  7 +++++
 test/test_source_openjpeg.py                  |  7 +++++
 test/test_source_openslide.py                 |  9 ++++++
 test/test_source_pil.py                       |  7 +++++
 test/test_source_tiff.py                      |  7 +++++
 19 files changed, 216 insertions(+), 10 deletions(-)

diff --git a/girder/girder_large_image/models/image_item.py b/girder/girder_large_image/models/image_item.py
index e99b58dfd..b6fcd0923 100644
--- a/girder/girder_large_image/models/image_item.py
+++ b/girder/girder_large_image/models/image_item.py
@@ -173,6 +173,12 @@ def getMetadata(self, item, **kwargs):
         tileSource = self._loadTileSource(item, **kwargs)
         return tileSource.getMetadata()
 
+    def getInternalMetadata(self, item, **kwargs):
+        tileSource = self._loadTileSource(item, **kwargs)
+        result = tileSource.getInternalMetadata() or {}
+        result['tilesource'] = tileSource.name
+        return result
+
     def getTile(self, item, x, y, z, mayRedirect=False, **kwargs):
         tileSource = self._loadTileSource(item, **kwargs)
         imageParams = {}
diff --git a/girder/girder_large_image/rest/tiles.py b/girder/girder_large_image/rest/tiles.py
index 86955969d..850f0aaf6 100644
--- a/girder/girder_large_image/rest/tiles.py
+++ b/girder/girder_large_image/rest/tiles.py
@@ -103,6 +103,8 @@ def __init__(self, apiRoot):
                            self.getDZIInfo)
         apiRoot.item.route('GET', (':itemId', 'tiles', 'dzi_files', ':level', ':xandy'),
                            self.getDZITile)
+        apiRoot.item.route('GET', (':itemId', 'tiles', 'internal_metadata'),
+                           self.getInternalMetadata)
         filter_logging.addLoggingFilter(
             'GET (/[^/ ?#]+)*/item/[^/ ?#]+/tiles/zxy(/[^/ ?#]+){3}',
             frequency=250)
@@ -248,9 +250,22 @@ def _setContentDisposition(self, item, contentDisposition, mime, subname):
     @access.public
     @loadmodel(model='item', map={'itemId': 'item'}, level=AccessType.READ)
     def getTilesInfo(self, item, params):
-        # TODO: parse params?
         return self._getTilesInfo(item, params)
 
+    @describeRoute(
+        Description('Get large image internal metadata.')
+        .param('itemId', 'The ID of the item.', paramType='path')
+        .errorResponse('ID was invalid.')
+        .errorResponse('Read access was denied for the item.', 403)
+    )
+    @access.public
+    @loadmodel(model='item', map={'itemId': 'item'}, level=AccessType.READ)
+    def getInternalMetadata(self, item, params):
+        try:
+            return self.imageItemModel.getInternalMetadata(item, **params)
+        except TileGeneralException as e:
+            raise RestException(e.args[0], code=400)
+
     @describeRoute(
         Description('Get test large image metadata.')
     )
diff --git a/girder/test_girder/test_tiles_rest.py b/girder/test_girder/test_tiles_rest.py
index ec76882c0..0bcf2a207 100644
--- a/girder/test_girder/test_tiles_rest.py
+++ b/girder/test_girder/test_tiles_rest.py
@@ -1119,3 +1119,13 @@ def testTilesHistogram(server, admin, fsAssetstore):
     assert len(resp.json[0]['hist']) == 256
     assert resp.json[1]['samples'] == 2801664
     assert resp.json[1]['hist'][128] == 176
+
+
+@pytest.mark.usefixtures('unbindLargeImage')
+@pytest.mark.plugin('large_image')
+def testTilesInternalMetadata(server, admin, fsAssetstore):
+    file = utilities.uploadExternalFile(
+        'data/sample_image.ptif.sha512', admin, fsAssetstore)
+    itemId = str(file['itemId'])
+    resp = server.request(path='/item/%s/tiles/internal_metadata' % itemId)
+    assert resp.json['tilesource'] == 'tiff'
diff --git a/large_image/tilesource/base.py b/large_image/tilesource/base.py
index 8737bb5c7..b68dab4c5 100644
--- a/large_image/tilesource/base.py
+++ b/large_image/tilesource/base.py
@@ -1494,6 +1494,16 @@ def getMetadata(self):
             'mm_y': mag['mm_y'],
         }
 
+    def getInternalMetadata(self, **kwargs):
+        """
+        Return additional known metadata about the tile source.  Data returned
+        from this method is not guaranteed to be in any particular format or
+        have specific values.
+
+        :returns: a dictionary of data or None.
+        """
+        return None
+
     @methodcache()
     def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False,
                 sparseFallback=False, frame=None):
diff --git a/sources/gdal/large_image_source_gdal/__init__.py b/sources/gdal/large_image_source_gdal/__init__.py
index 2b73b5818..43d032639 100644
--- a/sources/gdal/large_image_source_gdal/__init__.py
+++ b/sources/gdal/large_image_source_gdal/__init__.py
@@ -546,6 +546,31 @@ def getMetadata(self):
             metadata['netcdf'] = self._netcdf
         return metadata
 
+    def getInternalMetadata(self, **kwargs):
+        """
+        Return additional known metadata about the tile source.  Data returned
+        from this method is not guaranteed to be in any particular format or
+        have specific values.
+
+        :returns: a dictionary of data or None.
+        """
+        print('HERE')
+        result = {}
+        with self._getDatasetLock:
+            result['driverShortName'] = self.dataset.GetDriver().ShortName
+            result['driverLongName'] = self.dataset.GetDriver().LongName
+            result['fileList'] = self.dataset.GetFileList()
+            result['RasterXSize'] = self.dataset.RasterXSize
+            result['RasterYSize'] = self.dataset.RasterYSize
+            result['GeoTransform'] = self.dataset.GetGeoTransform()
+            result['GCPProjection'] = self.dataset.GetGCPProjection()
+            result['Metadata'] = self.dataset.GetMetadata_List()
+            for key in ['IMAGE_STRUCTURE', 'SUBDATASETS', 'GEOLOCATION', 'RPC']:
+                metadatalist = self.dataset.GetMetadata_List(key)
+                if metadatalist:
+                    result['Metadata_' + key] = metadatalist
+        return result
+
     def getTileCorners(self, z, x, y):
         """
         Returns bounds of a tile for a given x,y,z index.
diff --git a/sources/nd2/large_image_source_nd2/__init__.py b/sources/nd2/large_image_source_nd2/__init__.py
index d6af48823..32db57fb4 100644
--- a/sources/nd2/large_image_source_nd2/__init__.py
+++ b/sources/nd2/large_image_source_nd2/__init__.py
@@ -237,15 +237,7 @@ def getMetadata(self):  # noqa
         """
         result = super(ND2FileTileSource, self).getMetadata()
 
-        # If two imgeas haven't panned by this factor of their size, treat them
-        # as the same IndexXY
-        result['nd2'] = self._metadata
-        result['nd2'].pop('custom_data', None)
-        result['nd2'].pop('image_metadata', None)
-        result['nd2'].pop('image_metadata_sequence', None)
-        result['nd2_sizes'] = sizes = self._nd2.sizes
-        result['nd2_axes'] = self._nd2.axes
-        result['nd2_iter_axes'] = self._nd2.iter_axes
+        sizes = self._nd2.sizes
         # We may want to reformat the frames to standardize this across sources
         # An example of frames from OMETiff: {
         #   "DeltaT": "3532.529541",
@@ -312,6 +304,24 @@ def getMetadata(self):  # noqa
         }
         return result
 
+    def getInternalMetadata(self, **kwargs):
+        """
+        Return additional known metadata about the tile source.  Data returned
+        from this method is not guaranteed to be in any particular format or
+        have specific values.
+
+        :returns: a dictionary of data or None.
+        """
+        result = {}
+        result['nd2'] = self._metadata
+        # result['nd2'].pop('custom_data', None)
+        # result['nd2'].pop('image_metadata', None)
+        # result['nd2'].pop('image_metadata_sequence', None)
+        result['nd2_sizes'] = self._nd2.sizes
+        result['nd2_axes'] = self._nd2.axes
+        result['nd2_iter_axes'] = self._nd2.iter_axes
+        return result
+
     @methodcache()
     def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs):
         if z < 0 or z >= self.levels:
diff --git a/sources/ometiff/large_image_source_ometiff/__init__.py b/sources/ometiff/large_image_source_ometiff/__init__.py
index 62a015d9c..02a7d9609 100644
--- a/sources/ometiff/large_image_source_ometiff/__init__.py
+++ b/sources/ometiff/large_image_source_ometiff/__init__.py
@@ -259,6 +259,17 @@ def getMetadata(self):
                 cname: c for c, cname in enumerate(channels[:maxref.get('IndexC', 1)])}
             for frame in result['frames']:
                 frame['Channel'] = channels[frame.get('IndexC', 0)]
+        return result
+
+    def getInternalMetadata(self, **kwargs):
+        """
+        Return additional known metadata about the tile source.  Data returned
+        from this method is not guaranteed to be in any particular format or
+        have specific values.
+
+        :returns: a dictionary of data or None.
+        """
+        result = {}
         result['omeinfo'] = self._omeinfo
         return result
 
diff --git a/sources/openjpeg/large_image_source_openjpeg/__init__.py b/sources/openjpeg/large_image_source_openjpeg/__init__.py
index aa63da842..04d949cd2 100644
--- a/sources/openjpeg/large_image_source_openjpeg/__init__.py
+++ b/sources/openjpeg/large_image_source_openjpeg/__init__.py
@@ -213,6 +213,19 @@ def _readbox(self, box):
         except Exception:
             pass
 
+    def getInternalMetadata(self, **kwargs):
+        """
+        Return additional known metadata about the tile source.  Data returned
+        from this method is not guaranteed to be in any particular format or
+        have specific values.
+
+        :returns: a dictionary of data or None.
+        """
+        results = {}
+        if hasattr(self, '_description_xml'):
+            results['xml'] = self._description_xml
+        return results
+
     @methodcache()
     def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs):
         if z < 0 or z >= self.levels:
diff --git a/sources/openslide/large_image_source_openslide/__init__.py b/sources/openslide/large_image_source_openslide/__init__.py
index 43d95e6bd..7d82c9767 100644
--- a/sources/openslide/large_image_source_openslide/__init__.py
+++ b/sources/openslide/large_image_source_openslide/__init__.py
@@ -244,6 +244,19 @@ def getNativeMagnification(self):
             'mm_y': mm_y,
         }
 
+    def getInternalMetadata(self, **kwargs):
+        """
+        Return additional known metadata about the tile source.  Data returned
+        from this method is not guaranteed to be in any particular format or
+        have specific values.
+
+        :returns: a dictionary of data or None.
+        """
+        results = {'openslide': {}}
+        for key in self._openslide.properties:
+            results['openslide'][key] = self._openslide.properties[key]
+        return results
+
     @methodcache()
     def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs):
         if z < 0:
diff --git a/sources/pil/large_image_source_pil/__init__.py b/sources/pil/large_image_source_pil/__init__.py
index fc9fa4367..2f8d1967c 100644
--- a/sources/pil/large_image_source_pil/__init__.py
+++ b/sources/pil/large_image_source_pil/__init__.py
@@ -147,6 +147,22 @@ def getState(self):
         return super(PILFileTileSource, self).getState() + ',' + str(
             self._maxSize)
 
+    def getInternalMetadata(self, **kwargs):
+        """
+        Return additional known metadata about the tile source.  Data returned
+        from this method is not guaranteed to be in any particular format or
+        have specific values.
+
+        :returns: a dictionary of data or None.
+        """
+        results = {'pil': {}}
+        for key in ('filename', 'format', 'mode', 'size', 'width', 'height', 'palette', 'info'):
+            try:
+                results['pil'][key] = getattr(self._pilImage, key)
+            except Exception:
+                pass
+        return results
+
     @methodcache()
     def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False,
                 mayRedirect=False, **kwargs):
diff --git a/sources/test/large_image_source_test/__init__.py b/sources/test/large_image_source_test/__init__.py
index 452a6087c..306eb1543 100644
--- a/sources/test/large_image_source_test/__init__.py
+++ b/sources/test/large_image_source_test/__init__.py
@@ -109,6 +109,16 @@ def fractalTile(self, image, x, y, widthCount, color=(0, 0, 0)):
                             ], color, None)
             sq //= 2
 
+    def getInternalMetadata(self, **kwargs):
+        """
+        Return additional known metadata about the tile source.  Data returned
+        from this method is not guaranteed to be in any particular format or
+        have specific values.
+
+        :returns: a dictionary of data or None.
+        """
+        return {'fractal': self.fractal}
+
     @methodcache()
     def getTile(self, x, y, z, *args, **kwargs):
         widthCount = 2 ** z
diff --git a/sources/tiff/large_image_source_tiff/__init__.py b/sources/tiff/large_image_source_tiff/__init__.py
index 96dc7e62d..a4c7a855d 100644
--- a/sources/tiff/large_image_source_tiff/__init__.py
+++ b/sources/tiff/large_image_source_tiff/__init__.py
@@ -249,6 +249,21 @@ def getNativeMagnification(self):
             'mm_y': mm_y,
         }
 
+    def getInternalMetadata(self, **kwargs):
+        """
+        Return additional known metadata about the tile source.  Data returned
+        from this method is not guaranteed to be in any particular format or
+        have specific values.
+
+        :returns: a dictionary of data or None.
+        """
+        results = {}
+        for idx, dir in enumerate(self._tiffDirectories[::-1]):
+            if dir and hasattr(dir, '_description_xml'):
+                results['xml' + (
+                    '' if not results.get('xml') else '_' + str(idx))] = dir._description_xml
+        return results
+
     @methodcache()
     def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False,
                 sparseFallback=False, **kwargs):
diff --git a/test/test_source_gdal.py b/test/test_source_gdal.py
index 7899d17c5..cd8034e87 100644
--- a/test/test_source_gdal.py
+++ b/test/test_source_gdal.py
@@ -332,3 +332,11 @@ def testRetileProjection():
     assert ti['tile'].size == 3000000
     tile = ts.getTile(1178, 1507, 12)
     assert len(tile) > 1000
+
+
+def testInternalMetadata():
+    testDir = os.path.dirname(os.path.realpath(__file__))
+    imagePath = os.path.join(testDir, 'test_files', 'rgb_geotiff.tiff')
+    source = large_image_source_gdal.GDALFileTileSource(imagePath)
+    metadata = source.getInternalMetadata()
+    assert metadata['driverShortName'] == 'GTiff'
diff --git a/test/test_source_nd2.py b/test/test_source_nd2.py
index 97b6af07a..8206ba01b 100644
--- a/test/test_source_nd2.py
+++ b/test/test_source_nd2.py
@@ -27,3 +27,10 @@ def testTilesFromND2():
     assert tileMetadata['channels'] == ['Brightfield', 'YFP', 'A594', 'DAPI']
     assert tileMetadata['IndexRange'] == {'IndexC': 4, 'IndexXY': 2, 'IndexZ': 29}
     utilities.checkTilesZXY(source, tileMetadata)
+
+
+def testInternalMetadata():
+    imagePath = utilities.externaldata('data/ITGA3Hi_export_crop2.nd2.sha512')
+    source = large_image_source_nd2.ND2FileTileSource(imagePath)
+    metadata = source.getInternalMetadata()
+    assert 'nd2' in metadata
diff --git a/test/test_source_ometiff.py b/test/test_source_ometiff.py
index 6b6d9a954..3d44073ba 100644
--- a/test/test_source_ometiff.py
+++ b/test/test_source_ometiff.py
@@ -74,3 +74,10 @@ def testStyleAutoMinMax():
     assert image[128][128][0] < imageB[128][128][0]
     assert image[0][128][0] < imageB[0][128][0]
     assert image[240][128][0] < imageB[240][128][0]
+
+
+def testInternalMetadata():
+    imagePath = utilities.externaldata('data/sample.ome.tif.sha512')
+    source = large_image_source_ometiff.OMETiffFileTileSource(imagePath)
+    metadata = source.getInternalMetadata()
+    assert 'omeinfo' in metadata
diff --git a/test/test_source_openjpeg.py b/test/test_source_openjpeg.py
index d02fbeb80..dcedf6ee2 100644
--- a/test/test_source_openjpeg.py
+++ b/test/test_source_openjpeg.py
@@ -54,3 +54,10 @@ def testBelowLevelTilesFromOpenJPEG():
     large_image_source_openjpeg.OpenjpegFileTileSource._minTileSize = origMin
     large_image_source_openjpeg.OpenjpegFileTileSource._maxTileSize = origMax
     cachesClear()
+
+
+def testInternalMetadata():
+    imagePath = utilities.externaldata('data/sample_image.jp2.sha512')
+    source = large_image_source_openjpeg.OpenjpegFileTileSource(imagePath)
+    metadata = source.getInternalMetadata()
+    assert 'ScanInfo' in metadata['xml']
diff --git a/test/test_source_openslide.py b/test/test_source_openslide.py
index 0c959ee5c..30d288495 100644
--- a/test/test_source_openslide.py
+++ b/test/test_source_openslide.py
@@ -442,3 +442,12 @@ def testEdgeOptions():
     assert width == 240
     assert height == 240
     assert imageB != image
+
+
+def testInternalMetadata():
+    imagePath = utilities.externaldata(
+        'data/sample_jp2k_33003_TCGA-CV-7242-11A-01-TS1.1838afb1-9eee-'
+        '4a70-9ae3-50e3ab45e242.svs.sha512')
+    source = large_image_source_openslide.OpenslideFileTileSource(imagePath)
+    metadata = source.getInternalMetadata()
+    assert 'openslide' in metadata
diff --git a/test/test_source_pil.py b/test/test_source_pil.py
index 63d698f65..ba5718b20 100644
--- a/test/test_source_pil.py
+++ b/test/test_source_pil.py
@@ -60,3 +60,10 @@ def testReadingVariousColorFormats():
     for name in files:
         imagePath = os.path.join(testDir, 'test_files', name)
         assert large_image_source_pil.PILFileTileSource.canRead(imagePath) is True
+
+
+def testInternalMetadata():
+    imagePath = utilities.externaldata('data/sample_Easy1.png.sha512')
+    source = large_image_source_pil.PILFileTileSource(imagePath)
+    metadata = source.getInternalMetadata()
+    assert 'pil' in metadata
diff --git a/test/test_source_tiff.py b/test/test_source_tiff.py
index c42bc22a2..5a7b0d7a2 100644
--- a/test/test_source_tiff.py
+++ b/test/test_source_tiff.py
@@ -646,3 +646,10 @@ def testSingleTileIteratorResample():
     assert tile['width'] == 255
     assert tile['tile_mm_x'] == 0.0005
     assert tile['tile_magnification'] == 20.0
+
+
+def testInternalMetadata():
+    imagePath = utilities.externaldata('data/sample_image.ptif.sha512')
+    source = large_image_source_tiff.TiffFileTileSource(imagePath)
+    metadata = source.getInternalMetadata()
+    assert 'xml' in metadata