Skip to content

Commit

Permalink
Merge pull request #541 from girder/more-stats
Browse files Browse the repository at this point in the history
Add an option to compute additional statistics.
  • Loading branch information
manthey authored Feb 2, 2021
2 parents baeeaaf + 1eeed2e commit 18e6f4c
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 3 deletions.
4 changes: 2 additions & 2 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ girder-jobs>=3.0.3
-e sources/ometiff
# must be after source/gdal
-e sources/mapnik
# Don't specify extras for the converter; they are already present above
-e utilities/converter
# Don't specify other extras for the converter; they are already present above
-e utilities/converter[stats]
# Girder and worker dependencies are already installed above
-e utilities/tasks
-e girder/.
Expand Down
9 changes: 9 additions & 0 deletions test/test_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,12 @@ def testConverterMainStats(tmpdir):
info = tifftools.read_tiff(outputPath)
desc = json.loads(info['ifds'][0]['tags'][tifftools.Tag.ImageDescription.value]['data'])
assert 'conversion_stats' in desc['large_image_converter']


def testConverterMainFullStats(tmpdir):
imagePath = utilities.externaldata('data/sample_Easy1.png.sha512')
outputPath = os.path.join(tmpdir, 'out.tiff')
main.main([imagePath, outputPath, '--full-stats'])
info = tifftools.read_tiff(outputPath)
desc = json.loads(info['ifds'][0]['tags'][tifftools.Tag.ImageDescription.value]['data'])
assert 'psnr' in desc['large_image_converter']['conversion_stats']
3 changes: 2 additions & 1 deletion utilities/converter/large_image_converter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,8 @@ def _convert_large_image(inputPath, outputPath, tempPath, lidata, **kwargs):
if data.dtype.char not in dtypeToGValue:
data = data.astype('d')
vimg = pyvips.Image.new_from_memory(
data.data, data.shape[1], data.shape[0], data.shape[2],
numpy.ascontiguousarray(data).data,
data.shape[1], data.shape[0], data.shape[2],
dtypeToGValue[data.dtype.char])
x = tile['x']
ty = tile['tile_position']['level_y']
Expand Down
106 changes: 106 additions & 0 deletions utilities/converter/large_image_converter/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
import large_image_converter


logger = logging.getLogger('large-image-converter')


def get_parser():
parser = argparse.ArgumentParser(description='Large Image image converter')
parser.add_argument(
Expand Down Expand Up @@ -60,9 +63,100 @@ def get_parser():
help='Add conversion stats (time and size) to the ImageDescription of '
'the output file. This involves writing the file an extra time; the '
'stats do not include the extra write.')
parser.add_argument(
'--stats-full', '--full-stats',
action='store_const', const='full', dest='_stats',
help='Add conversion stats, including noise metrics (PSNR, etc.) to '
'the output file. This takes more time and temporary disk space.')
return parser


def compute_error_metrics(original, altered, results, converterOpts=None):
"""
Compute the amount of error introduced via conversion compared to
conversion using a lossless method. Note that this is not compared to the
original, as we may not be able to read that in an efficient way. This is
a very time-consuming way to compute error metrics, since it first
reprocesses the input file to a lossless format, then steps through each
tile and computes RSME and SSIM errors per tile, producing a weighted-by-
number-of-pixels of each of these. The RSME is used to compute a PSNR
value.
:param original: the original file path.
:param altered: the path of compressed file to compare.
:param results: a dictionary to store results. Modified.
:param converterOpts: an optional dictionary of parameters used for the
original conversion. Only parameters that would affect the selected
pixels are used.
"""
import math
from tempfile import TemporaryDirectory
import skimage.metrics
import numpy
import large_image

lastlog = 0
with TemporaryDirectory() as tempDir:
tempPath = os.path.join(tempDir, os.path.basename(original))
orig = large_image_converter.convert(original, tempPath, compression='lzw')
tsOrig = large_image.getTileSource(orig)
numFrames = len(tsOrig.getMetadata().get('frames', [0]))
tsAlt = large_image.getTileSource(altered)
mse = 0
ssim = 0
ssim_count = 0
maxval = 0
maxdiff = 0
sum = 0
count = 0
tileSize = 2048
for frame in range(numFrames):
tiAlt = tsAlt.tileIterator(tile_size=dict(width=tileSize), frame=frame)
for tileOrig in tsOrig.tileIterator(tile_size=dict(width=tileSize), frame=frame):
tileAlt = next(tiAlt)
do = tileOrig['tile'].astype(int)
da = tileAlt['tile'].astype(int)
maxval = max(maxval, do.max(), da.max())
if do.shape[2] > da.shape[2]:
do = do[:, :, :da.shape[2]]
if da.shape[2] > do.shape[2]:
da = da[:, :, :do.shape[2]]
diff = numpy.absolute(do - da)
maxdiff = max(maxdiff, diff.max())
sum += diff.sum()
count += diff.size
last_mse = numpy.mean(diff ** 2)
mse += last_mse * diff.size
last_ssim = 0
try:
last_ssim = skimage.metrics.structural_similarity(
do.astype(float), da.astype(float),
gaussian_weights=True, sigma=1.5, use_sample_covariance=False,
multichannel=do.shape[2] > 1)
ssim += last_ssim * diff.size
ssim_count += diff.size
except ValueError:
pass
if time.time() - lastlog >= 10:
logger.debug(
'Calculating error (%d/%d): rmse %4.2f ssim %6.4f '
'last rmse %4.2f ssim %6.4f' % (
tileOrig['tile_position']['position'] + 1,
tileOrig['iterator_range']['position'],
(mse / count) ** 0.5, ssim / ssim_count,
last_mse ** 0.5, last_ssim))
lastlog = time.time()
results['maximum_error'] = maxdiff
results['average_error'] = sum / count
results['rmse'] = (mse / count) ** 0.5
results['psnr'] = 10 * math.log10(
maxval ** 2 / (mse / count)) if mse else None
if ssim_count:
results['ssim'] = ssim / ssim_count
logger.debug('Calculated error: rmse %4.2f psnr %3.1f ssim %6.4f' % (
results['rmse'], results['psnr'] or 0, results['ssim']))


def main(args=sys.argv[1:]):
parser = get_parser()
opts = parser.parse_args(args=args)
Expand Down Expand Up @@ -102,7 +196,19 @@ def main(args=sys.argv[1:]):
desc['large_image_converter']['conversion_stats'] = {
'time': end_time - start_time,
'filesize': os.path.getsize(dest),
'original_filesize': os.path.getsize(opts.source),
'compression_ratio':
desc['large_image_converter'].get('frames', 1) *
sum(info['ifds'][0]['tags'][tifftools.Tag.BitsPerSample.value]['data']) / 8 *
info['ifds'][0]['tags'][tifftools.Tag.ImageWidth.value]['data'][0] *
info['ifds'][0]['tags'][tifftools.Tag.ImageLength.value]['data'][0] /
os.path.getsize(dest),
}
if opts._stats == 'full' and opts.compression not in {
'deflate', 'zip', 'lzw', 'zstd', 'packbits'}:
compute_error_metrics(
opts.source, dest, desc['large_image_converter']['conversion_stats'],
converterOpts)
tifftools.commands.tiff_set(dest, overwrite=True, setlist=[(
'ImageDescription', json.dumps(
desc, separators=(',', ':'), sort_keys=True,
Expand Down
3 changes: 3 additions & 0 deletions utilities/converter/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ def prerelease_local_scheme(version):
'sources': [
'large_image[sources]',
],
'stats': [
'scikit-image',
],
},
packages=find_packages(),
entry_points={
Expand Down

0 comments on commit 18e6f4c

Please sign in to comment.