From 42dffc6039691e5aa0863b7bc1fea8f2ae93dc0e Mon Sep 17 00:00:00 2001 From: David Manthey Date: Thu, 30 Sep 2021 16:08:11 -0400 Subject: [PATCH] Allow excluding associated images from the conversion. --- .../large_image_converter/__init__.py | 25 ++++++++++++++++++- .../large_image_converter/__main__.py | 9 +++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/utilities/converter/large_image_converter/__init__.py b/utilities/converter/large_image_converter/__init__.py index 0b80a1786..11f7109c8 100644 --- a/utilities/converter/large_image_converter/__init__.py +++ b/utilities/converter/large_image_converter/__init__.py @@ -5,6 +5,7 @@ import logging import math import os +import re import struct import threading import time @@ -40,6 +41,25 @@ FrameMemoryEstimate = 3 * 1024 ** 3 +def _use_associated_image(key, **kwargs): + """ + Check if an associated image key should be used. If a list of images to + keep was specified, it must match at least one of the regex in that list. + If a list of images to exclude was specified, it must not any regex in that + list. The exclude list takes priority. + """ + if kwargs.get('_exclude_associated'): + for exp in kwargs['_exclude_associated']: + if re.match(exp, key): + return False + if kwargs.get('_keep_associated'): + for exp in kwargs['_keep_associated']: + if re.match(exp, key): + return True + return False + return True + + def _data_from_large_image(path, outputPath, **kwargs): """ Check if the input file can be read by installed large_image tile sources. @@ -75,6 +95,8 @@ def _data_from_large_image(path, outputPath, **kwargs): tasks = [] pool = _get_thread_pool(**kwargs) for key in ts.getAssociatedImagesList(): + if not _use_associated_image(key, **kwargs): + continue try: img, mime = ts.getAssociatedImage(key) except Exception: @@ -185,6 +207,8 @@ def _generate_multiframe_tiff(inputPath, outputPath, tempPath, lidata, **kwargs) for w, h, subInputPath, page in imageSizes: if (w, h) not in possibleSizes: key = 'image_%d' % page + if not _use_associated_image(key, **kwargs): + continue savePath = tempPath + '-%s-%s.tiff' % (key, time.strftime('%Y%m%d-%H%M%S')) _pool_add(tasks, (pool.submit( _convert_via_vips, subInputPath, savePath, tempPath, False), )) @@ -601,7 +625,6 @@ def _output_tiff(inputs, outputPath, tempPath, lidata, extraImages=None, **kwarg if extraImages: assocList += list(extraImages.items()) for key, assocPath in assocList: - logger.debug('Reading %s', assocPath) assocInfo = tifftools.read_tiff(assocPath) assocInfo['ifds'][0]['tags'][tifftools.Tag.ImageDescription.value] = { 'data': key, diff --git a/utilities/converter/large_image_converter/__main__.py b/utilities/converter/large_image_converter/__main__.py index d009a3d52..750dd5fff 100644 --- a/utilities/converter/large_image_converter/__main__.py +++ b/utilities/converter/large_image_converter/__main__.py @@ -84,6 +84,15 @@ def get_parser(): help='JP2K peak signal to noise ratio. 0 for lossless.') parser.add_argument( '--cr', type=int, help='JP2K compression ratio. 1 for lossless.') + parser.add_argument( + '--only-associated', dest='_keep_associated', action='append', + help='Only keep associated images with the specified keys. The value ' + 'is used as a matching regex.') + parser.add_argument( + '--exclude-associated', dest='_exclude_associated', action='append', + help='Exclude associated images with the specified keys. The value ' + 'is used as a matching regex. If a key is specified for both ' + 'exclusion and inclusion, it will be excluded.') parser.add_argument( '--concurrency', '-j', type=int, dest='_concurrency', help='Maximum processor concurrency. Some conversion tasks can use '