From d2369f7cf4d1a670852f67492a2848c17bd23a73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roberto=20Antol=C3=ADn?= Date: Wed, 4 Dec 2024 18:17:52 +0100 Subject: [PATCH] fix: Avoid computing top_values and quantiles when not necessary --- raster_loader/io/common.py | 44 ++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/raster_loader/io/common.py b/raster_loader/io/common.py index 48df3cc..1dc0ada 100644 --- a/raster_loader/io/common.py +++ b/raster_loader/io/common.py @@ -498,15 +498,15 @@ def raster_band_approx_stats( _sum = int(np.sum(samples_band)) sum_squares = int(np.sum(np.array(samples_band) ** 2)) - quantiles = compute_quantiles(samples_band, int) - - most_common = dict() - if not band_is_float(raster_dataset, band): - most_common = most_common_approx(samples_band) - if omit_stats: quantiles = None most_common = None + else: + quantiles = compute_quantiles(samples_band, int) + + most_common = dict() + if not band_is_float(raster_dataset, band): + most_common = most_common_approx(samples_band) return { "min": stats.min, @@ -588,24 +588,26 @@ def raster_band_stats( print("Removing masked data...") qdata = raster_band.compressed() - casting_function = ( - int if np.issubdtype(raster_band.dtype, np.integer) else float - ) - - quantiles = compute_quantiles(qdata, casting_function) - - print("Computing most commons values...") - warnings.warn( - "Most common values are meant for categorical data. " - "Computing them for float bands can be meaningless." - ) - most_common = Counter(qdata).most_common(100) - most_common.sort(key=lambda x: x[1], reverse=True) - most_common = dict([(casting_function(x[0]), x[1]) for x in most_common]) - if omit_stats: quantiles = None most_common = None + else: + casting_function = ( + int if np.issubdtype(raster_band.dtype, np.integer) else float + ) + + quantiles = compute_quantiles(qdata, casting_function) + + print("Computing most commons values...") + if casting_function == float: + warnings.warn( + "Most common values are meant for categorical data. " + "Computing them for float bands can be meaningless.\n" + "Please, consider to use the --omit_stats option.", + ) + most_common = Counter(qdata).most_common(100) + most_common.sort(key=lambda x: x[1], reverse=True) + most_common = dict([(casting_function(x[0]), x[1]) for x in most_common]) version = ".".join(__version__.split(".")[:3])