Skip to content

Commit

Permalink
Merge branch 'master' into amend-dicomweb-assetstore
Browse files Browse the repository at this point in the history
  • Loading branch information
willdunklin authored Apr 25, 2024
2 parents 420d7a8 + 6e91b8c commit 73d4e1b
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 34 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Change Log

## 1.28.2

### Improvements
- Improve uint16 image scaling ([#1511](../../pull/1511))
- Read some untiled tiffs using the tiff source ([#1512](../../pull/1512))
- Speed up multi source compositing in tiled cases ([#1513](../../pull/1513))

### Changes
- Limit internal metadata on multi-source files with huge numbers of sources ([#1514](../../pull/1514))

## 1.28.1

### Improvements
Expand Down
29 changes: 25 additions & 4 deletions examples/algorithm_progression.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
class SweepAlgorithm:
def __init__(self, algorithm, input_filename, input_params, param_order,
output_filename, max_workers, multiprocessing, overlay,
lossy=False, scale=1):
lossy=False, scale=1, dedup=False):
self.algorithm = algorithm
self.input_filename = input_filename
self.output_filename = output_filename
Expand All @@ -38,6 +38,7 @@ def __init__(self, algorithm, input_filename, input_params, param_order,
self.overlay = overlay
self.lossy = lossy
self.scale = float(scale)
self.dedup = dedup

self.combos = list(itertools.product(*[p['range'] for p in input_params.values()]))

Expand Down Expand Up @@ -121,7 +122,10 @@ def run(self):
poolExecutor = (
concurrent.futures.ProcessPoolExecutor if self.multiprocessing else
concurrent.futures.ThreadPoolExecutor)
with poolExecutor(max_workers=self.max_workers) as executor:
poolParams = {}
if self.multiprocessing and sys.version_info >= (3, 11):
poolParams['max_tasks_per_child'] = 1
with poolExecutor(max_workers=self.max_workers, **poolParams) as executor:
futures = [
executor.submit(
self.applyAlgorithm,
Expand Down Expand Up @@ -202,7 +206,9 @@ def writeOverallSink(self, sink):
f'Collected {idx + 1} of {len(self.yaml_dict["sources"])} frames\n')
sys.stdout.flush()
lastlogtime = time.time()
tifftools.write_tiff(info, self.output_filename, allowExisting=True)
tifftools.write_tiff(
info, self.output_filename, allowExisting=True,
ifdsFirst=self.dedup, dedup=self.dedup)
rts = None
info = None
else:
Expand Down Expand Up @@ -342,6 +348,13 @@ def create_argparser():
help='Only process a lower resolution version of the source data. '
'Values greater than 1 reduce the size of the data processed.',
)
argparser.add_argument(
'--dedup',
action='store_true',
help='If specified and the destination is a tiff file, rewrite the '
'output with the dedup option. This may make a smaller output tiff '
'file at the cost of a substainally longer combination tile.',
)
argparser.add_argument(
'-p',
'--param',
Expand Down Expand Up @@ -414,8 +427,16 @@ def main(argv):

sweep = cls(algorithm, input_filename, params, input_params,
args.output_filename, args.num_workers, args.multiprocessing,
args.overlay, args.lossy, args.scale)
args.overlay, args.lossy, args.scale, args.dedup)
sweep.run()
if (args.dedup and args.sink in {'zarr'} and
os.path.splitext(args.output_filename)[1] in {'.tif', '.tiff'}):
print('Rewriting with dedup')
starttime = time.time()
ti = tifftools.read_tiff(args.output_filename)
tifftools.write_tiff(ti, args.output_filename, allowExisting=True,
ifdsFirst=True, dedup=True)
print(f'Rewrite time {time.time() - starttime:5.3f}')


if __name__ == '__main__':
Expand Down
23 changes: 14 additions & 9 deletions large_image/tilesource/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1773,15 +1773,20 @@ def getRegion(self, format: Union[str, Tuple[str]] = (TILE_FORMAT_IMAGE, ), **kw
cast(Dict[str, Any], tiledimage), outWidth, outHeight, tileIter.info, **kwargs)
if outWidth != regionWidth or outHeight != regionHeight:
dtype = cast(np.ndarray, image).dtype
image = _imageToPIL(cast(np.ndarray, image), mode).resize(
(outWidth, outHeight),
getattr(PIL.Image, 'Resampling', PIL.Image).NEAREST
if resample is None else
getattr(PIL.Image, 'Resampling', PIL.Image).BICUBIC
if outWidth > regionWidth else
getattr(PIL.Image, 'Resampling', PIL.Image).LANCZOS)
if dtype == np.uint16 and TILE_FORMAT_NUMPY in format:
image = _imageToNumpy(image)[0].astype(dtype) * 257
if dtype == np.uint8 or resample is not None:
image = _imageToPIL(cast(np.ndarray, image), mode).resize(
(outWidth, outHeight),
getattr(PIL.Image, 'Resampling', PIL.Image).NEAREST
if resample is None else
getattr(PIL.Image, 'Resampling', PIL.Image).BICUBIC
if outWidth > regionWidth else
getattr(PIL.Image, 'Resampling', PIL.Image).LANCZOS)
if dtype == np.uint16 and TILE_FORMAT_NUMPY in format:
image = _imageToNumpy(image)[0].astype(dtype) * 257
else:
cols = [int(idx * regionWidth / outWidth) for idx in range(outWidth)]
rows = [int(idx * regionHeight / outHeight) for idx in range(outHeight)]
image = np.take(np.take(image, rows, axis=0), cols, axis=1)
maxWidth = kwargs.get('output', {}).get('maxWidth')
maxHeight = kwargs.get('output', {}).get('maxHeight')
if kwargs.get('fill') and maxWidth and maxHeight:
Expand Down
6 changes: 3 additions & 3 deletions sources/multi/large_image_source_multi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,7 @@ def getInternalMetadata(self, **kwargs):
"""
Return additional known metadata about the tile source. Data returned
from this method is not guaranteed to be in any particular format or
have specific values.
have specific values. Also, only the first 100 sources are used.
:returns: a dictionary of data or None.
"""
Expand All @@ -903,7 +903,7 @@ def getInternalMetadata(self, **kwargs):
'sources': copy.deepcopy(self._sources),
'sourceFiles': [],
}
for path in self._sourcePaths.values():
for path in list(self._sourcePaths.values())[:100]:
source = self._sources[min(path['sourcenum'])]
ts = self._openSource(source)
result['sourceFiles'].append({
Expand Down Expand Up @@ -1098,12 +1098,12 @@ def _addSourceToTile(self, tile, sourceEntry, corners, scale):
:returns: a numpy array of the tile.
"""
source = self._sources[sourceEntry['sourcenum']]
ts = self._openSource(source, sourceEntry['kwargs'])
# If tile is outside of bounding box, skip it
bbox = source['bbox']
if (corners[2][0] <= bbox['left'] or corners[0][0] >= bbox['right'] or
corners[2][1] <= bbox['top'] or corners[0][1] >= bbox['bottom']):
return tile
ts = self._openSource(source, sourceEntry['kwargs'])
transform = bbox.get('transform')
x = y = 0
# If there is no transform or the diagonals are positive and there is
Expand Down
51 changes: 42 additions & 9 deletions sources/tiff/large_image_source_tiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class TiffFileTileSource(FileTileSource, metaclass=LruCacheMetaclass):
}

_maxAssociatedImageSize = 8192
_maxUntiledImage = 4096

def __init__(self, path, **kwargs): # noqa
"""
Expand All @@ -85,18 +86,18 @@ def __init__(self, path, **kwargs): # noqa

self._largeImagePath = str(self._getLargeImagePath())

lastException = None
try:
self._initWithTiffTools()
return
except Exception as exc:
self.logger.debug('Cannot read with tifftools route; %r', exc)
lastException = exc

alldir = []
try:
if hasattr(self, '_info'):
alldir = self._scanDirectories()
else:
lastException = 'Could not parse file with tifftools'
except IOOpenTiffError:
msg = 'File cannot be opened via tiff source.'
raise TileSourceError(msg)
Expand Down Expand Up @@ -157,7 +158,7 @@ def __init__(self, path, **kwargs): # noqa
tifftools.constants.SampleFormat[sampleformat or 1].name,
bitspersample,
))
self._bandCount = highest._tiffInfo.get('samplesperpixel')
self._bandCount = highest._tiffInfo.get('samplesperpixel', 1)
# Sort the directories so that the highest resolution is the last one;
# if a level is missing, put a None value in its place.
self._tiffDirectories = [directories.get(key) for key in
Expand Down Expand Up @@ -252,8 +253,13 @@ def _levelFromIfd(self, ifd, baseifd):
"""
sizeX = ifd['tags'][tifftools.Tag.ImageWidth.value]['data'][0]
sizeY = ifd['tags'][tifftools.Tag.ImageLength.value]['data'][0]
tileWidth = baseifd['tags'][tifftools.Tag.TileWidth.value]['data'][0]
tileHeight = baseifd['tags'][tifftools.Tag.TileLength.value]['data'][0]
if tifftools.Tag.TileWidth.value in baseifd['tags']:
tileWidth = baseifd['tags'][tifftools.Tag.TileWidth.value]['data'][0]
tileHeight = baseifd['tags'][tifftools.Tag.TileLength.value]['data'][0]
else:
tileWidth = sizeX
tileHeight = baseifd['tags'][tifftools.Tag.RowsPerStrip.value]['data'][0]

for tag in {
tifftools.Tag.SamplesPerPixel.value,
tifftools.Tag.BitsPerSample.value,
Expand Down Expand Up @@ -298,7 +304,7 @@ def _initWithTiffTools(self): # noqa
directories are the same size and format; all non-tiled directories are
treated as associated images.
"""
dir0 = self.getTiffDir(0)
dir0 = self.getTiffDir(0, mustBeTiled=None)
self.tileWidth = dir0.tileWidth
self.tileHeight = dir0.tileHeight
self.sizeX = dir0.imageWidth
Expand All @@ -312,12 +318,11 @@ def _initWithTiffTools(self): # noqa
tifftools.constants.SampleFormat[sampleformat or 1].name,
bitspersample,
))
self._bandCount = dir0._tiffInfo.get('samplesperpixel')
self._bandCount = dir0._tiffInfo.get('samplesperpixel', 1)
info = _cached_read_tiff(self._largeImagePath)
self._info = info
frames = []
associated = [] # for now, a list of directories
curframe = -1
for idx, ifd in enumerate(info['ifds']):
# if not tiles, add to associated images
if tifftools.Tag.tileWidth.value not in ifd['tags']:
Expand All @@ -326,7 +331,6 @@ def _initWithTiffTools(self): # noqa
level = self._levelFromIfd(ifd, info['ifds'][0])
# if the same resolution as the main image, add a frame
if level == self.levels - 1:
curframe += 1
frames.append({'dirs': [None] * self.levels})
frames[-1]['dirs'][-1] = (idx, 0)
try:
Expand Down Expand Up @@ -365,6 +369,35 @@ def _initWithTiffTools(self): # noqa
else:
msg = 'Tile layers are in a surprising order'
raise TileSourceError(msg)
# If we have a single untiled ifd that is "small", use it
if tifftools.Tag.tileWidth.value not in info['ifds'][0]['tags']:
if (
self.sizeX > self._maxUntiledImage or self.sizeY > self._maxUntiledImage or
(len(info['ifds']) != 1 or tifftools.Tag.SubIfd.value in ifd['tags']) or
(tifftools.Tag.ImageDescription.value in ifd['tags'] and
'ImageJ' in ifd['tags'][tifftools.Tag.ImageDescription.value]['data'])
):
msg = 'A tiled TIFF is required.'
raise ValidationTiffError(msg)
associated = []
level = self._levelFromIfd(ifd, info['ifds'][0])
frames.append({'dirs': [None] * self.levels})
frames[-1]['dirs'][-1] = (idx, 0)
try:
frameMetadata = json.loads(
ifd['tags'][tifftools.Tag.ImageDescription.value]['data'])
for key in {'channels', 'frame'}:
if key in frameMetadata:
frames[-1][key] = frameMetadata[key]
except Exception:
pass
if tifftools.Tag.ICCProfile.value in ifd['tags']:
if not hasattr(self, '_iccprofiles'):
self._iccprofiles = []
while len(self._iccprofiles) < len(frames) - 1:
self._iccprofiles.append(None)
self._iccprofiles.append(ifd['tags'][
tifftools.Tag.ICCProfile.value]['data'])
self._associatedImages = {}
for dirNum in associated:
self._addAssociatedImage(dirNum)
Expand Down
8 changes: 4 additions & 4 deletions sources/tiff/large_image_source_tiff/tiff_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,8 @@ def _validate(self): # noqa
# the create_image.py script, such as flatten or colourspace. These
# should only be done if necessary, which would require the conversion
# job to check output and perform subsequent processing as needed.
if (not self._tiffInfo.get('samplesperpixel') or
self._tiffInfo.get('samplesperpixel') < 1):
if (not self._tiffInfo.get('samplesperpixel', 1) or
self._tiffInfo.get('samplesperpixel', 1) < 1):
msg = 'Only RGB and greyscale TIFF files are supported'
raise ValidationTiffError(msg)

Expand Down Expand Up @@ -607,7 +607,7 @@ def _getUncompressedTile(self, tileNum):
self._tiffInfo.get('bitspersample'),
self._tiffInfo.get('sampleformat') if self._tiffInfo.get(
'sampleformat') is not None else libtiff_ctypes.SAMPLEFORMAT_UINT)
image = np.empty((th, tw, self._tiffInfo['samplesperpixel']),
image = np.empty((th, tw, self._tiffInfo.get('samplesperpixel', 1)),
dtype=_ctypesFormattbl[format])
imageBuffer = image.ctypes.data_as(ctypes.POINTER(ctypes.c_char))
if self._tiffInfo.get('istiled'):
Expand Down Expand Up @@ -635,7 +635,7 @@ def _getUncompressedTile(self, tileNum):
raise IOTiffError(
'Read an unexpected number of bytes from an encoded tile' if readSize >= 0 else
'Failed to read from an encoded tile')
if (self._tiffInfo.get('samplesperpixel') == 3 and
if (self._tiffInfo.get('samplesperpixel', 1) == 3 and
self._tiffInfo.get('photometric') == libtiff_ctypes.PHOTOMETRIC_YCBCR):
if self._tiffInfo.get('bitspersample') == 16:
image = np.floor_divide(image, 256).astype(np.uint8)
Expand Down
2 changes: 2 additions & 0 deletions test/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@
'a131592c-a069-4aa7-8031-398654aa8a3d.dcm': 'sha512:99bd3da4b8e11ce7b4f7ed8a294ed0c37437320667a06c40c383f4b29be85fe8e6094043e0600bee0ba879f2401de4c57285800a4a23da2caf2eb94e5b847ee0', # noqa
# Synthetic newer ndpi with binary data and nonblank image labelled as RGB
'synthetic_ndpi_2024.ndpi': 'sha512:192cdcf551a824277ef70358b8ed6225dca0c5b5d0817fe0d800b72638e0ad9416cd5bc40cb186219da60fd324b676d1b32cc05a276a33b71b485d665a31e42e', # noqa
# Synthetic uint16 untiled tiff that can be read with the tiff source
'synthetic_untiled_16.tiff': 'sha512:f4773fcfa749ba9c2db25319c9e8ad8586dd148de4366dae0393a3703906dace9f11233eafdb24418b598170d6372ef1ca861bf8d7a8212cac21a0eb8636ee77', # noqa
}


Expand Down
8 changes: 3 additions & 5 deletions test/test_source_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
'openjpeg': {'read': r'\.(jp2)$'},
'openslide': {
'read': r'\.(ptif|svs|ndpi|tif.*|qptiff|dcm)$',
'noread': r'(oahu|DDX58_AXL|huron\.image2_jpeg2k|landcover_sample|d042-353\.crop|US_Geo\.|extraoverview|imagej|bad_axes)', # noqa
'noread': r'(oahu|DDX58_AXL|huron\.image2_jpeg2k|landcover_sample|d042-353\.crop|US_Geo\.|extraoverview|imagej|bad_axes|synthetic_untiled)', # noqa
'skip': r'nokeyframe\.ome\.tiff$',
'skipTiles': r'one_layer_missing',
},
Expand All @@ -78,9 +78,7 @@
'test': {'any': True, 'skipTiles': r''},
'tiff': {
'read': r'(\.(ptif|scn|svs|tif.*|qptiff)|[-0-9a-f]{36}\.dcm)$',
'noread': r'(oahu|DDX58_AXL|G10-3_pelvis_crop|'
r'd042-353\.crop\.small\.float|landcover_sample|US_Geo\.|'
r'imagej|bad_axes|nokeyframe\.ome\.tiff$)',
'noread': r'(DDX58_AXL|G10-3_pelvis_crop|landcover_sample|US_Geo\.|imagej)',
'skipTiles': r'(sample_image\.ptif|one_layer_missing_tiles)'},
'tifffile': {
'read': r'',
Expand All @@ -91,7 +89,7 @@
'vips': {
'read': r'',
'noread': r'\.(nc|nd2|yml|yaml|json|czi|png|svs|scn|zarr\.db|zarr\.zip)$',
'skipTiles': r'(sample_image\.ptif|one_layer_missing_tiles|JK-kidney_B-gal_H3_4C_1-500sec\.jp2|extraoverview)' # noqa
'skipTiles': r'(sample_image\.ptif|one_layer_missing_tiles|JK-kidney_B-gal_H3_4C_1-500sec\.jp2|extraoverview|synthetic_untiled)' # noqa
},
'zarr': {'read': r'\.(zarr|zgroup|zattrs|db|zarr\.zip)$'},
}
Expand Down

0 comments on commit 73d4e1b

Please sign in to comment.