Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove Dask as a hard dependency of Datashader #1350

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
16 changes: 10 additions & 6 deletions datashader/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@

import numpy as np
import pandas as pd
import dask.dataframe as dd
import dask.array as da
from packaging.version import Version
from xarray import DataArray, Dataset

Expand All @@ -19,6 +17,12 @@
from .resampling import resample_2d, resample_2d_distributed
from . import reductions as rd

try:
import dask.dataframe as dd
import dask.array as da
except ImportError:
dd, da = None, None

try:
import cudf
except Exception:
Expand Down Expand Up @@ -1143,7 +1147,7 @@ def raster(self,
source_window = array[rmin:rmax+1, cmin:cmax+1]
if ds_method in ['var', 'std']:
source_window = source_window.astype('f')
if isinstance(source_window, da.Array):
if da and isinstance(source_window, da.Array):
data = resample_2d_distributed(
source_window, chunksize=chunksize, max_mem=max_mem,
**kwargs)
Expand All @@ -1156,7 +1160,7 @@ def raster(self,
source_window = source_window.astype('f')
arrays = []
for arr in source_window:
if isinstance(arr, da.Array):
if da and isinstance(arr, da.Array):
arr = resample_2d_distributed(
arr, chunksize=chunksize, max_mem=max_mem,
**kwargs)
Expand Down Expand Up @@ -1192,7 +1196,7 @@ def raster(self,
top_pad = np.full(tshape, fill_value, source_window.dtype)
bottom_pad = np.full(bshape, fill_value, source_window.dtype)

concat = da.concatenate if isinstance(data, da.Array) else np.concatenate
concat = da.concatenate if da and isinstance(data, da.Array) else np.concatenate
arrays = (top_pad, data) if top_pad.shape[0] > 0 else (data,)
if bottom_pad.shape[0] > 0:
arrays += (bottom_pad,)
Expand Down Expand Up @@ -1372,7 +1376,7 @@ def _bypixel_sanitise(source, glyph, agg):
getattr(source[glyph.geometry].array, "_sindex", None) is None):
source[glyph.geometry].array._sindex = sindex
dshape = dshape_from_pandas(source)
elif isinstance(source, dd.DataFrame):
elif dd and isinstance(source, dd.DataFrame):
dshape, source = dshape_from_dask(source)
elif isinstance(source, Dataset):
# Multi-dimensional Dataset
Expand Down
17 changes: 13 additions & 4 deletions datashader/resampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,20 @@
from itertools import groupby
from math import floor, ceil

import dask.array as da
import numpy as np

from dask.delayed import delayed
from numba import prange
from .utils import ngjit, ngjit_parallel

try:
import dask.array as da
from dask.delayed import delayed
except ImportError:
da, delayed = None, None

try:
import cupy
except Exception:
except ImportError:
cupy = None


Expand Down Expand Up @@ -242,6 +246,8 @@ def resample_2d_distributed(src, w, h, ds_method='mean', us_method='linear',
resampled : dask.array.Array
A resampled version of the *src* array.
"""
if da is None:
raise ImportError('dask is required for distributed regridding')
temp_chunks = compute_chunksize(src, w, h, chunksize, max_mem)
if chunksize is None:
chunksize = src.chunksize
Expand Down Expand Up @@ -343,7 +349,10 @@ def resample_2d(src, w, h, ds_method='mean', us_method='linear',
return _mask_or_not(resampled, src, fill_value)


_resample_2d_delayed = delayed(resample_2d)
if delayed:
_resample_2d_delayed = delayed(resample_2d)
else:
_resample_2d_delayed = None


def upsample_2d(src, w, h, method=US_LINEAR, fill_value=None, out=None):
Expand Down
16 changes: 12 additions & 4 deletions datashader/tiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@
import math
import os

import dask
import dask.bag as db

import numpy as np

from PIL.Image import fromarray
try:
import dask
import dask.bag as db
except ImportError:
dask, db = None, None

__all__ = ['render_tiles', 'MercatorTileDefinition']

Expand Down Expand Up @@ -52,8 +55,11 @@ def calculate_zoom_level_stats(super_tiles, load_data_func,
if is_bool:
span = (0, 1)
else:
b = db.from_sequence(stats)
span = dask.compute(b.min(), b.max())
if dask:
b = db.from_sequence(stats)
span = dask.compute(b.min(), b.max())
else:
raise ValueError('Dask is required for non-boolean data')
return super_tiles, span
else:
raise ValueError('Invalid color_ranging_strategy option')
Expand All @@ -62,6 +68,8 @@ def calculate_zoom_level_stats(super_tiles, load_data_func,
def render_tiles(full_extent, levels, load_data_func,
rasterize_func, shader_func,
post_render_func, output_path, color_ranging_strategy='fullscan'):
if not dask:
raise ImportError('Dask is required for rendering tiles')
results = {}
for level in levels:
print('calculating statistics for level {}'.format(level))
Expand Down
12 changes: 8 additions & 4 deletions datashader/transfer_functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@
import numba as nb
import toolz as tz
import xarray as xr
import dask.array as da
from PIL.Image import fromarray

from datashader.colors import rgb, Sets1to3
from datashader.utils import nansum_missing, ngjit

try:
import dask.array as da
except ImportError:
da = None

try:
import cupy
except Exception:
Expand Down Expand Up @@ -252,7 +256,7 @@ def _interpolate(agg, cmap, how, alpha, span, min_alpha, name, rescale_discrete_
interpolater = _normalize_interpolate_how(how)

data = agg.data
if isinstance(data, da.Array):
if da and isinstance(data, da.Array):
data = data.compute()
else:
data = data.copy()
Expand Down Expand Up @@ -381,7 +385,7 @@ def _colorize(agg, color_key, how, alpha, span, min_alpha, name, color_baseline,
# Reorient array (transposing the category dimension first)
agg_t = agg.transpose(*((agg.dims[-1],)+agg.dims[:2]))
data = agg_t.data.transpose([1, 2, 0])
if isinstance(data, da.Array):
if da and isinstance(data, da.Array):
data = data.compute()
color_data = data.copy()

Expand Down Expand Up @@ -528,7 +532,7 @@ def _apply_discrete_colorkey(agg, color_key, alpha, name, color_baseline):
raise ValueError("Color key must be provided as a dictionary")

agg_data = agg.data
if isinstance(agg_data, da.Array):
if da and isinstance(agg_data, da.Array):
agg_data = agg_data.compute()

cats = color_key.keys()
Expand Down
8 changes: 6 additions & 2 deletions datashader/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@
from toolz import memoize
from xarray import DataArray

import dask.dataframe as dd
import datashader.datashape as datashape

try:
import dask.dataframe as dd
except ImportError:
dd = None

try:
from datashader.datatypes import RaggedDtype
except ImportError:
Expand Down Expand Up @@ -581,7 +585,7 @@ def mesh(vertices, simplices):
'If no vertex weight column is provided, a triangle weight column is required.'


if isinstance(vertices, dd.DataFrame) and isinstance(simplices, dd.DataFrame):
if dd and isinstance(vertices, dd.DataFrame) and isinstance(simplices, dd.DataFrame):
return _dd_mesh(vertices, simplices)

return _pd_mesh(vertices, simplices)
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

install_requires = [
'colorcet',
'dask',
'multipledispatch',
'numba',
'numpy',
Expand All @@ -33,6 +32,7 @@

examples = geopandas + [
'bokeh >3.1',
'dask',
'geodatasets',
'holoviews',
'matplotlib >=3.3',
Expand All @@ -48,6 +48,7 @@

extras_require = {
'tests': geopandas + [
'dask',
'geodatasets',
'nbval',
'netcdf4',
Expand Down
Loading