Skip to content

Commit

Permalink
Merge pull request #1774 from girder/harden-sources
Browse files Browse the repository at this point in the history
Harden sources based on more fuzz testing
  • Loading branch information
manthey authored Jan 15, 2025
2 parents 8e0a464 + b4d43c7 commit 363a8bb
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 6 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
- Better report if rasterized vector files are geospatial ([#1769](../../pull/1769))
- Provide some latitude in vips multiframe detection ([#1770](../../pull/1770))
- Don't read multiplane ndpi files with openslide ([#1772](../../pull/1772))
- Harden sources based on more fuzz testing ([#1774](../../pull/1774))

### Bug Fixes

Vy- Fix scaling tiles from stripped tiffs in some instances ([#1773](../../pull/1773))
- Fix scaling tiles from stripped tiffs in some instances ([#1773](../../pull/1773))

## 1.30.6

Expand Down
21 changes: 17 additions & 4 deletions sources/bioformats/large_image_source_bioformats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,15 @@ def __init__(self, path, **kwargs): # noqa
if self.sizeX <= 0 or self.sizeY <= 0:
msg = 'Bioformats tile size is invalid.'
raise TileSourceError(msg)
if ('JPEG' in self._metadata['readerClassName'] and
(self._metadata['optimalTileWidth'] > 16384 or
self._metadata['optimalTileHeight'] > 16384)):
msg = 'Bioformats will be too inefficient to read this file.'
raise TileSourceError(msg)
try:
self._lastGetTileException = 'raise'
self.getTile(0, 0, self.levels - 1)
delattr(self, '_lastGetTileException')
except Exception as exc:
raise TileSourceError('Bioformats cannot read a tile: %r' % exc)
self._populatedLevels = len([
Expand Down Expand Up @@ -370,6 +377,7 @@ def _metadataForCurrentSeries(self, rdr):
'optimalTileWidth': rdr.getOptimalTileWidth(),
'optimalTileHeight': rdr.getOptimalTileHeight(),
'resolutionCount': rdr.getResolutionCount(),
'readerClassName': rdr.get_class_name(),
})

def _getSeriesStarts(self, rdr): # noqa
Expand Down Expand Up @@ -613,7 +621,7 @@ def getInternalMetadata(self, **kwargs):
return self._metadata

@methodcache()
def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs):
def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs): # noqa
self._xyzInRange(x, y, z)
ft = fc = fz = 0
fseries = self._metadata['frameSeries'][0]
Expand Down Expand Up @@ -668,9 +676,14 @@ def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs):
format = TILE_FORMAT_NUMPY
except javabridge.JavaException as exc:
es = javabridge.to_string(exc.throwable)
raise TileSourceError('Failed to get Bioformat region (%s, %r).' % (es, (
fc, fz, ft, fseries, self.sizeX, self.sizeY, offsetx,
offsety, width, height)))
self.logger.exception('Failed to getTile (%r)', es)
if getattr(self, '_lastGetTileException', None) == 'raise':
raise TileSourceError('Failed to get Bioformat region (%s, %r).' % (es, (
fc, fz, ft, fseries, self.sizeX, self.sizeY, offsetx,
offsety, width, height)))
self._lastGetTileException = repr(es)
tile = np.zeros((1, 1))
format = TILE_FORMAT_NUMPY
finally:
if javabridge.get_env():
javabridge.detach()
Expand Down
3 changes: 3 additions & 0 deletions sources/ometiff/large_image_source_ometiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,9 @@ def _parseOMEInfo(self): # noqa
if isinstance(self._omeinfo['Image'], dict):
self._omeinfo['Image'] = [self._omeinfo['Image']]
for img in self._omeinfo['Image']:
if isinstance(img['Pixels'], list):
msg = 'OME Tiff has multiple pixels'
raise TileSourceError(msg)
if isinstance(img['Pixels'].get('TiffData'), dict):
img['Pixels']['TiffData'] = [img['Pixels']['TiffData']]
if isinstance(img['Pixels'].get('Plane'), dict):
Expand Down
2 changes: 1 addition & 1 deletion sources/pil/large_image_source_pil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def _checkForFrames(self):
except Exception:
self._frames = None
self._frameCount = 1
self._pilImage.seek(0)
self._pilImage = PIL.Image.open(self._getLargeImagePath())

def _fromRawpy(self, largeImagePath):
"""
Expand Down
39 changes: 39 additions & 0 deletions sources/tifffile/large_image_source_tifffile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import threading
from importlib.metadata import PackageNotFoundError
from importlib.metadata import version as _importlib_version
from pathlib import Path

import numpy as np

Expand Down Expand Up @@ -83,6 +84,7 @@ class TifffileFileTileSource(FileTileSource, metaclass=LruCacheMetaclass):
'scn': SourcePriority.PREFERRED,
'tif': SourcePriority.LOW,
'tiff': SourcePriority.LOW,
'ome': SourcePriority.HIGHER,
}
mimeTypes = {
None: SourcePriority.FALLBACK,
Expand Down Expand Up @@ -120,13 +122,17 @@ def __init__(self, path, **kwargs): # noqa
raise TileSourceFileNotFoundError(self._largeImagePath) from None
msg = 'File cannot be opened via tifffile.'
raise TileSourceError(msg)
self._checkForOmeBinaryonly()
maxseries, maxsamples = self._biggestSeries()
self.tileWidth = self.tileHeight = self._tileSize
s = self._tf.series[maxseries]
self._baseSeries = s
if len(s.levels) == 1:
self.tileWidth = self.tileHeight = self._singleTileSize
page = s.pages[0]
if not hasattr(page, 'tags'):
msg = 'File will not be opened via tifffile.'
raise TileSourceError(msg)
if ('TileWidth' in page.tags and
self._minTileSize <= page.tags['TileWidth'].value <= self._maxTileSize):
self.tileWidth = page.tags['TileWidth'].value
Expand All @@ -137,6 +143,10 @@ def __init__(self, path, **kwargs): # noqa
self._iccprofiles = [page.tags['InterColorProfile'].value]
self.sizeX = s.shape[s.axes.index('X')]
self.sizeY = s.shape[s.axes.index('Y')]
while (self.tileWidth // 2 >= self.sizeX and self.tileHeight // 2 >= self.sizeY and
min(self.tileWidth, self.tileHeight) // 2 >= self._minTileSize):
self.tileWidth //= 2
self.tileHeight //= 2
self._mm_x = self._mm_y = None
try:
unit = {2: 25.4, 3: 10}[page.tags['ResolutionUnit'].value.real]
Expand Down Expand Up @@ -170,6 +180,35 @@ def __init__(self, path, **kwargs): # noqa
msg = 'File cannot be opened via tifffile: axes and shape do not match access pattern.'
raise TileSourceError(msg)

def _checkForOmeBinaryonly(self):
from xml.etree import ElementTree as etree

omexml = getattr(self._tf, 'ome_metadata', None)
if not omexml:
return
try:
root = etree.fromstring(omexml)
except Exception:
return
metadatafile = None
for element in root:
if element.tag.endswith('BinaryOnly'):
metadatafile = element.attrib.get('MetadataFile', '')
if not metadatafile:
return
path = Path(self._largeImagePath).parent / metadatafile
if not path.is_file():
return
try:
newxml = path.open('r').read()
except Exception:
return
try:
root = etree.fromstring(newxml)
except Exception:
return
self._tf._omexml = newxml

def _biggestSeries(self):
"""
Find the series with the most pixels. Use all series that have the
Expand Down
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ setenv =
GDAL_HTTP_RETRY_DELAY=5

[testenv:test]
passenv = {[testenv]passenv}
description = Run all tests, including Girder
deps =
-rrequirements-test.txt
Expand Down

0 comments on commit 363a8bb

Please sign in to comment.