Skip to content

Commit

Permalink
Add logging when data range is large enough to trigger slow path.
Browse files Browse the repository at this point in the history
  • Loading branch information
erykoff committed Dec 11, 2023
1 parent f5773bd commit f80d39a
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 3 deletions.
6 changes: 4 additions & 2 deletions python/lsst/summit/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,7 @@ def getFilterSeeingCorrection(filterName):
raise ValueError(f"Unknown filter name: {filterName}")


def getCdf(data, scale, nBinsMax=131072):
def getCdf(data, scale, nBinsMax=300_000):
"""Return an approximate cumulative distribution function scaled to
the [0, scale] range.
Expand Down Expand Up @@ -1008,8 +1008,10 @@ def getQuantiles(data, nColors):
A monotonically increasing sequence of size (nColors + 1). These are
the edges of nColors intervals.
"""
if (np.nanmax(data) - np.nanmin(data)) > 131072:
if (np.nanmax(data) - np.nanmin(data)) > 300_000:
# Use slower but memory efficient nanquantile
logger = logging.getLogger(__name__)
logger.warning("Data range is very large; using slower quantile code.")
boundaries = np.nanquantile(data, np.linspace(0, 1, nColors + 1))
else:
cdf, minVal, maxVal = getCdf(data, nColors)
Expand Down
8 changes: 7 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,13 @@ def test_quantiles(self):
for nColors, (mean, width, decimal) in itertools.product(colorRanges, dataRanges):
data = np.random.normal(mean, width, (100, 100))
data[10, 10] = np.nan # check we're still nan-safe
edges1 = getQuantiles(data, nColors)
if np.nanmax(data) - np.nanmin(data) > 300_000:
with self.assertLogs(level="WARNING") as cm:
edges1 = getQuantiles(data, nColors)
self.assertIn("Data range is very large", cm.output[0])
else:
with self.assertNoLogs(level="WARNING") as cm:
edges1 = getQuantiles(data, nColors)
edges2 = np.nanquantile(data, np.linspace(0, 1, nColors + 1)) # must check with nanquantile
np.testing.assert_almost_equal(edges1, edges2, decimal=decimal)

Expand Down

0 comments on commit f80d39a

Please sign in to comment.