diff --git a/datashader/bundling.py b/datashader/bundling.py index 98026fca0..4cfac06ef 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -15,9 +15,17 @@ from math import ceil -from dask import compute, delayed from pandas import DataFrame +try: + import dask + from dask import compute, delayed +except ImportError: + dask, compute = None, None + def delayed(*args, **kwargs): + def func(*args, **kwargs): + raise ImportError("dask is required to use delayed functions") + return func try: import skimage from skimage.filters import gaussian, sobel_h, sobel_v @@ -457,8 +465,8 @@ class hammer_bundle(connect_edges): Column name for each edge weight. If None, weights are ignored.""") def __call__(self, nodes, edges, **params): - if skimage is None: - raise ImportError("hammer_bundle operation requires scikit-image. " + if dask is None or skimage is None: + raise ImportError("hammer_bundle operation requires dask and scikit-image. " "Ensure you install the dependency before applying " "bundling.") diff --git a/datashader/core.py b/datashader/core.py index 3473050e4..282180639 100644 --- a/datashader/core.py +++ b/datashader/core.py @@ -7,8 +7,6 @@ import numpy as np import pandas as pd -import dask.dataframe as dd -import dask.array as da from packaging.version import Version from xarray import DataArray, Dataset @@ -19,6 +17,12 @@ from .resampling import resample_2d, resample_2d_distributed from . import reductions as rd +try: + import dask.dataframe as dd + import dask.array as da +except ImportError: + dd, da = None, None + try: import cudf except Exception: @@ -1143,7 +1147,7 @@ def raster(self, source_window = array[rmin:rmax+1, cmin:cmax+1] if ds_method in ['var', 'std']: source_window = source_window.astype('f') - if isinstance(source_window, da.Array): + if da and isinstance(source_window, da.Array): data = resample_2d_distributed( source_window, chunksize=chunksize, max_mem=max_mem, **kwargs) @@ -1156,7 +1160,7 @@ def raster(self, source_window = source_window.astype('f') arrays = [] for arr in source_window: - if isinstance(arr, da.Array): + if da and isinstance(arr, da.Array): arr = resample_2d_distributed( arr, chunksize=chunksize, max_mem=max_mem, **kwargs) @@ -1192,7 +1196,7 @@ def raster(self, top_pad = np.full(tshape, fill_value, source_window.dtype) bottom_pad = np.full(bshape, fill_value, source_window.dtype) - concat = da.concatenate if isinstance(data, da.Array) else np.concatenate + concat = da.concatenate if da and isinstance(data, da.Array) else np.concatenate arrays = (top_pad, data) if top_pad.shape[0] > 0 else (data,) if bottom_pad.shape[0] > 0: arrays += (bottom_pad,) @@ -1350,7 +1354,10 @@ def _bypixel_sanitise(source, glyph, agg): columns = list(source.coords.keys()) + list(source.data_vars.keys()) cols_to_keep = _cols_to_keep(columns, glyph, agg) source = source.drop_vars([col for col in columns if col not in cols_to_keep]) - source = source.to_dask_dataframe() + if dd: + source = source.to_dask_dataframe() + else: + source = source.to_dataframe() if (isinstance(source, pd.DataFrame) or (cudf and isinstance(source, cudf.DataFrame))): @@ -1372,7 +1379,7 @@ def _bypixel_sanitise(source, glyph, agg): getattr(source[glyph.geometry].array, "_sindex", None) is None): source[glyph.geometry].array._sindex = sindex dshape = dshape_from_pandas(source) - elif isinstance(source, dd.DataFrame): + elif dd and isinstance(source, dd.DataFrame): dshape, source = dshape_from_dask(source) elif isinstance(source, Dataset): # Multi-dimensional Dataset diff --git a/datashader/resampling.py b/datashader/resampling.py index 83540eb9e..0ab97e94f 100644 --- a/datashader/resampling.py +++ b/datashader/resampling.py @@ -29,16 +29,20 @@ from itertools import groupby from math import floor, ceil -import dask.array as da import numpy as np -from dask.delayed import delayed from numba import prange from .utils import ngjit, ngjit_parallel +try: + import dask.array as da + from dask.delayed import delayed +except ImportError: + da, delayed = None, None + try: import cupy -except Exception: +except ImportError: cupy = None @@ -242,6 +246,8 @@ def resample_2d_distributed(src, w, h, ds_method='mean', us_method='linear', resampled : dask.array.Array A resampled version of the *src* array. """ + if da is None: + raise ImportError('dask is required for distributed regridding') temp_chunks = compute_chunksize(src, w, h, chunksize, max_mem) if chunksize is None: chunksize = src.chunksize @@ -343,7 +349,7 @@ def resample_2d(src, w, h, ds_method='mean', us_method='linear', return _mask_or_not(resampled, src, fill_value) -_resample_2d_delayed = delayed(resample_2d) +_resample_2d_delayed = delayed(resample_2d) if delayed else None def upsample_2d(src, w, h, method=US_LINEAR, fill_value=None, out=None): diff --git a/datashader/tests/test_dask.py b/datashader/tests/test_dask.py index 0c57a1d64..05345c352 100644 --- a/datashader/tests/test_dask.py +++ b/datashader/tests/test_dask.py @@ -1,11 +1,9 @@ from __future__ import annotations -import dask.dataframe as dd import numpy as np import pandas as pd import xarray as xr -from dask.context import config from numpy import nan from packaging.version import Version @@ -27,7 +25,14 @@ assert_eq_xr, assert_eq_ndarray, values ) -config.set(scheduler='synchronous') + +try: + import dask.dataframe as dd + from dask.context import config + config.set(scheduler='synchronous') +except ImportError: + pytestmark = pytest.importorskip("dask") + @dask_switcher(query=False) diff --git a/datashader/tests/test_geopandas.py b/datashader/tests/test_geopandas.py index e7eafdd06..504778036 100644 --- a/datashader/tests/test_geopandas.py +++ b/datashader/tests/test_geopandas.py @@ -1,7 +1,6 @@ # Testing GeoPandas and SpatialPandas import contextlib -import dask.dataframe as dd import datashader as ds from datashader.tests.test_pandas import assert_eq_ndarray import numpy as np @@ -10,6 +9,11 @@ from datashader.tests.utils import dask_switcher from packaging.version import Version +try: + import dask.dataframe as dd +except ImportError: + dd = None + _backends = [ pytest.param(False, id="dask"), ] diff --git a/datashader/tests/test_polygons.py b/datashader/tests/test_polygons.py index fa8593971..5d9c81296 100644 --- a/datashader/tests/test_polygons.py +++ b/datashader/tests/test_polygons.py @@ -4,9 +4,12 @@ import xarray as xr import datashader as ds from datashader.tests.test_pandas import assert_eq_ndarray, assert_eq_xr -import dask.dataframe as dd from datashader.tests.utils import dask_switcher +try: + import dask.dataframe as dd +except ImportError: + dd = None @pytest.fixture(autouse=True) def _classic_dd(): diff --git a/datashader/tests/test_quadmesh.py b/datashader/tests/test_quadmesh.py index e5c95e9c7..c629e447a 100644 --- a/datashader/tests/test_quadmesh.py +++ b/datashader/tests/test_quadmesh.py @@ -5,10 +5,20 @@ import datashader as ds import pytest -import dask.array from datashader.tests.test_pandas import assert_eq_ndarray, assert_eq_xr +from datashader.tests.utils import dask_skip + +array_modules = [np] + +try: + import dask + import dask.array + dask.config.set(scheduler='single-threaded') + array_modules.append(dask.array) +except ImportError: + class dask: + array = None -array_modules = [np, dask.array] try: import cudf import cupy @@ -18,9 +28,6 @@ cupy = None -dask.config.set(scheduler='single-threaded') - - # Raster @pytest.mark.parametrize('array_module', array_modules) def test_raster_quadmesh_autorange_downsample(array_module): @@ -90,6 +97,7 @@ def test_raster_quadmesh_autorange(array_module): assert_eq_ndarray(res.y_range, (0.5, 2.5), close=True) +@dask_skip def test_raster_quadmesh_autorange_chunked(): c = ds.Canvas(plot_width=8, plot_height=6) da = xr.DataArray( @@ -331,6 +339,7 @@ def test_rectilinear_quadmesh_autorange(array_module): assert_eq_ndarray(res.y_range, (0.5, 2.5), close=True) +@dask_skip def test_rectilinear_quadmesh_autorange_chunked(): c = ds.Canvas(plot_width=8, plot_height=6) da = xr.DataArray( @@ -572,7 +581,7 @@ def test_curve_quadmesh_autorange(array_module): assert_eq_ndarray(res.x_range, (0.5, 2.5), close=True) assert_eq_ndarray(res.y_range, (-1, 7), close=True) - +@dask_skip def test_curve_quadmesh_autorange_chunked(): c = ds.Canvas(plot_width=4, plot_height=8) diff --git a/datashader/tests/test_raster.py b/datashader/tests/test_raster.py index c554b5491..9ea5723f9 100644 --- a/datashader/tests/test_raster.py +++ b/datashader/tests/test_raster.py @@ -1,32 +1,35 @@ from __future__ import annotations import pytest -try: - import rasterio -except ImportError: - rasterio = None - -try: - import rioxarray -except ImportError: - rioxarray = None - -from dask.context import config - from os import path from itertools import product import datashader as ds import xarray as xr import numpy as np -import dask.array as da import pandas as pd from datashader.resampling import compute_chunksize import datashader.transfer_functions as tf from packaging.version import Version +from .utils import dask_skip + +try: + import rasterio +except ImportError: + rasterio = None + +try: + import rioxarray +except ImportError: + rioxarray = None -config.set(scheduler='synchronous') +try: + from dask.context import config + import dask.array as da + config.set(scheduler='synchronous') +except ImportError: + da = None open_rasterio_available = pytest.mark.skipif(rioxarray is None and rasterio is None, reason="requires rioxarray or rasterio") @@ -454,6 +457,7 @@ def test_raster_single_pixel_range_with_padding(): assert np.allclose(agg.y.values, np.array([-0.399875, -0.199625, 0.000625, 0.200875])) +@dask_skip @pytest.mark.parametrize( 'in_size, out_size, agg', product(range(5, 8), range(2, 5), @@ -481,6 +485,7 @@ def test_raster_distributed_downsample(in_size, out_size, agg): assert np.allclose(agg_arr.y.values, agg_darr.y.values) +@dask_skip @pytest.mark.parametrize('in_size, out_size', product(range(2, 5), range(7, 9))) def test_raster_distributed_upsample(in_size, out_size): """ @@ -505,6 +510,7 @@ def test_raster_distributed_upsample(in_size, out_size): assert np.allclose(agg_arr.y.values, agg_darr.y.values) +@dask_skip def test_raster_distributed_regrid_chunksize(): """ Ensure that distributed regrid respects explicit chunk size. @@ -523,7 +529,7 @@ def test_raster_distributed_regrid_chunksize(): assert agg_darr.data.chunksize == (1, 1) - +@dask_skip def test_resample_compute_chunksize(): """ Ensure chunksize computation is correct. diff --git a/datashader/tests/test_tiles.py b/datashader/tests/test_tiles.py index 188e76efd..a959b33b0 100644 --- a/datashader/tests/test_tiles.py +++ b/datashader/tests/test_tiles.py @@ -10,6 +10,8 @@ from datashader.tiles import calculate_zoom_level_stats from datashader.tiles import MercatorTileDefinition +from datashader.tests.utils import dask_skip + import numpy as np import pandas as pd @@ -57,6 +59,7 @@ def mock_post_render_func(img, **kwargs): # TODO: mark with slow_test +@dask_skip def test_render_tiles(): full_extent_of_data = (-500000, -500000, 500000, 500000) @@ -107,6 +110,7 @@ def test_get_super_tile_min_max(): assert_is_numeric(result[0]) assert_is_numeric(result[1]) +@dask_skip def test_calculate_zoom_level_stats_with_fullscan_ranging_strategy(): full_extent = (-MERCATOR_CONST, -MERCATOR_CONST, MERCATOR_CONST, MERCATOR_CONST) diff --git a/datashader/tests/test_transfer_functions.py b/datashader/tests/test_transfer_functions.py index d26d51495..993188e7b 100644 --- a/datashader/tests/test_transfer_functions.py +++ b/datashader/tests/test_transfer_functions.py @@ -4,12 +4,16 @@ import numpy as np import xarray as xr -import dask.array as da import PIL import pytest import datashader.transfer_functions as tf from datashader.tests.test_pandas import assert_eq_ndarray, assert_eq_xr, assert_image_close +try: + import dask.array as da +except ImportError: + da = None + coords = dict([('x_axis', [3, 4, 5]), ('y_axis', [0, 1, 2])]) dims = ['y_axis', 'x_axis'] @@ -34,6 +38,8 @@ def build_agg(array_module=np): def build_agg_dask(): # Dask arrays are immutable `build_agg(da)` won't work. # Create numpy based DataArray and convert to Dask by forcing chunking. + if da is None: + pytest.skip("dask is not available") return build_agg(np).chunk({d: 1 for d in dims}) @@ -55,6 +61,8 @@ def agg(request): def create_dask_array(*args, **kwargs): """Create a dask array wrapping around a numpy array.""" + if da is None: + pytest.skip("dask is not available") return da.from_array(np.array(*args, **kwargs)) @@ -113,7 +121,7 @@ def array(request): def check_span(x, cmap, how, sol): # Copy inputs that will be modified sol = sol.copy() - if isinstance(x, xr.DataArray) and isinstance(x.data, da.Array): + if isinstance(x, xr.DataArray) and da and isinstance(x.data, da.Array): x = x.compute() else: x = x.copy() diff --git a/datashader/tests/test_xarray.py b/datashader/tests/test_xarray.py index 71f3f1f40..ad6956e1b 100644 --- a/datashader/tests/test_xarray.py +++ b/datashader/tests/test_xarray.py @@ -28,8 +28,12 @@ xda.f64[2] = np.nan xds = xda.to_dataset(name='value').reset_coords(names=['i32', 'i64']) -xdda = xda.chunk(chunks=5) -xdds = xds.chunk(chunks=5) +try: + import dask + xdda = xda.chunk(chunks=5) + xdds = xds.chunk(chunks=5) +except ImportError: + dask, xdda, xdds = None, None, None c = ds.Canvas(plot_width=2, plot_height=2, x_range=(0, 1), y_range=(0, 1)) @@ -45,6 +49,8 @@ def assert_eq(agg, b): @pytest.mark.parametrize("source", [xda, xdda, xds, xdds]) def test_count(source): + if source is None: + pytest.skip("Dask not available") out = xr.DataArray(np.array([[5, 5], [5, 5]], dtype='i4'), coords=coords, dims=dims) assert_eq(c.points(source, 'x', 'y', ds.count('i32')), out) @@ -110,6 +116,8 @@ def test_lines_xarray_common_x(ds2d, on_gpu, chunksizes): source.name.data = cupy.asarray(source.name.data) if chunksizes is not None: + if dask is None: + pytest.skip("Dask not available") source = source.chunk(chunksizes) canvas = ds.Canvas(plot_height=3, plot_width=7) diff --git a/datashader/tests/utils.py b/datashader/tests/utils.py index bbae5695c..17100611c 100644 --- a/datashader/tests/utils.py +++ b/datashader/tests/utils.py @@ -3,13 +3,14 @@ from importlib import reload from importlib.util import find_spec -import dask import pytest -__all__ = ("dask_switcher",) +__all__ = ("dask_switcher", "DASK_UNAVAILABLE", "EXPR_UNAVAILABLE", "dask_skip") +DASK_UNAVAILABLE = find_spec("dask") is None EXPR_UNAVAILABLE = find_spec("dask_expr") is None +dask_skip = pytest.mark.skipif(DASK_UNAVAILABLE, reason="dask is not available") @contextmanager def dask_switcher(*, query=False, extras=None): @@ -19,9 +20,13 @@ def dask_switcher(*, query=False, extras=None): Using a context manager as it is an easy way to change the function to a decorator. """ + if DASK_UNAVAILABLE: + pytest.skip("dask is not available") if query and EXPR_UNAVAILABLE: pytest.skip("dask-expr is not available") + import dask + dask.config.set(**{"dataframe.query-planning": query}) for module in ("dask.dataframe", *(extras or ())): if module in sys.modules: diff --git a/datashader/tiles.py b/datashader/tiles.py index 524ab7f29..7149dd973 100644 --- a/datashader/tiles.py +++ b/datashader/tiles.py @@ -4,12 +4,15 @@ import math import os -import dask -import dask.bag as db import numpy as np from PIL.Image import fromarray +try: + import dask + import dask.bag as db +except ImportError: + dask, db = None, None __all__ = ['render_tiles', 'MercatorTileDefinition'] @@ -51,9 +54,11 @@ def calculate_zoom_level_stats(super_tiles, load_data_func, stats.append(np.nanmax(agg.data)) if is_bool: span = (0, 1) - else: + elif dask: b = db.from_sequence(stats) span = dask.compute(b.min(), b.max()) + else: + raise ValueError('Dask is required for non-boolean data') return super_tiles, span else: raise ValueError('Invalid color_ranging_strategy option') @@ -62,6 +67,8 @@ def calculate_zoom_level_stats(super_tiles, load_data_func, def render_tiles(full_extent, levels, load_data_func, rasterize_func, shader_func, post_render_func, output_path, color_ranging_strategy='fullscan'): + if not dask: + raise ImportError('Dask is required for rendering tiles') results = {} for level in levels: print('calculating statistics for level {}'.format(level)) diff --git a/datashader/transfer_functions/__init__.py b/datashader/transfer_functions/__init__.py index c75a6bdc0..1cd9e2b6a 100644 --- a/datashader/transfer_functions/__init__.py +++ b/datashader/transfer_functions/__init__.py @@ -10,12 +10,16 @@ import numba as nb import toolz as tz import xarray as xr -import dask.array as da from PIL.Image import fromarray from datashader.colors import rgb, Sets1to3 from datashader.utils import nansum_missing, ngjit +try: + import dask.array as da +except ImportError: + da = None + try: import cupy except Exception: @@ -252,7 +256,7 @@ def _interpolate(agg, cmap, how, alpha, span, min_alpha, name, rescale_discrete_ interpolater = _normalize_interpolate_how(how) data = agg.data - if isinstance(data, da.Array): + if da and isinstance(data, da.Array): data = data.compute() else: data = data.copy() @@ -381,7 +385,7 @@ def _colorize(agg, color_key, how, alpha, span, min_alpha, name, color_baseline, # Reorient array (transposing the category dimension first) agg_t = agg.transpose(*((agg.dims[-1],)+agg.dims[:2])) data = agg_t.data.transpose([1, 2, 0]) - if isinstance(data, da.Array): + if da and isinstance(data, da.Array): data = data.compute() color_data = data.copy() @@ -528,7 +532,7 @@ def _apply_discrete_colorkey(agg, color_key, alpha, name, color_baseline): raise ValueError("Color key must be provided as a dictionary") agg_data = agg.data - if isinstance(agg_data, da.Array): + if da and isinstance(agg_data, da.Array): agg_data = agg_data.compute() cats = color_key.keys() diff --git a/datashader/utils.py b/datashader/utils.py index f68583c4a..acab29133 100644 --- a/datashader/utils.py +++ b/datashader/utils.py @@ -12,9 +12,13 @@ from toolz import memoize from xarray import DataArray -import dask.dataframe as dd import datashader.datashape as datashape +try: + import dask.dataframe as dd +except ImportError: + dd = None + try: from datashader.datatypes import RaggedDtype except ImportError: @@ -417,7 +421,7 @@ def dshape_from_pandas_helper(col): cudf and isinstance(col.dtype, cudf.core.dtypes.CategoricalDtype)): # Compute category dtype pd_categories = col.cat.categories - if isinstance(pd_categories, dd.Index): + if dd and isinstance(pd_categories, dd.Index): pd_categories = pd_categories.compute() if cudf and isinstance(pd_categories, cudf.Index): pd_categories = pd_categories.to_pandas() @@ -581,7 +585,7 @@ def mesh(vertices, simplices): 'If no vertex weight column is provided, a triangle weight column is required.' - if isinstance(vertices, dd.DataFrame) and isinstance(simplices, dd.DataFrame): + if dd and isinstance(vertices, dd.DataFrame) and isinstance(simplices, dd.DataFrame): return _dd_mesh(vertices, simplices) return _pd_mesh(vertices, simplices) diff --git a/pixi.toml b/pixi.toml index fd4d3303f..4e1cd47b7 100644 --- a/pixi.toml +++ b/pixi.toml @@ -23,7 +23,6 @@ lint = ["py311", "lint"] [dependencies] colorcet = "*" -dask-core = "*" multipledispatch = "*" numba = "*" numpy = "*" @@ -55,6 +54,7 @@ bokeh_sampledata = "*" [feature.example.dependencies] bokeh = ">3.1" +dask-core = "*" dask-geopandas = "*" fastparquet = "*" geodatasets = "*" @@ -90,6 +90,7 @@ test-unit-nojit = { cmd = 'pytest datashader -k "not test_tiles" -n logical --di test-benchmark = 'pytest datashader/tests --benchmark' [feature.test.dependencies] +dask-core = "*" dask-expr = "*" dask-geopandas = "*" geodatasets = "*" diff --git a/pyproject.toml b/pyproject.toml index 689fe4777..d7b545236 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,6 @@ classifiers = [ ] dependencies = [ 'colorcet', - 'dask', 'multipledispatch', 'numba', 'numpy',