Skip to content

Commit

Permalink
January 2021 CEOS Data Cube Utilities Release 2.22 Hotfix
Browse files Browse the repository at this point in the history
  • Loading branch information
jcrattz committed Mar 16, 2021
2 parents 8887d1f + 7dc9a7e commit 4a2ce91
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 8 deletions.
22 changes: 16 additions & 6 deletions data_cube_utilities/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def get_bin_intervals(data, num_bins):


def xr_scale_res(dataset, x_coord='longitude', y_coord='latitude',
frac_res=None, abs_res=None):
frac_res=None, abs_res=None, val_interp_method='linear'):
"""
Scales the resolution of an `xarray.Dataset` or `xarray.DataArray`
to a fraction of its original resolution or an absolute resolution.
Expand All @@ -46,6 +46,10 @@ def xr_scale_res(dataset, x_coord='longitude', y_coord='latitude',
abs_res: list-like
A list-like of the number of pixels for the x and y axes, respectively.
Overrides `frac_res` if specified.
val_interp_method: str
The interpolation method for the values. This is the `method` parameter
input to `xarray.Dataset.interp()` after the coordinates have been interpolated.
Can be one of ['nearest', 'linear'].
Returns
-------
Expand All @@ -65,7 +69,8 @@ def xr_scale_res(dataset, x_coord='longitude', y_coord='latitude',
interp_param = 'num'
x_px, y_px = abs_res
return xr_interp(dataset, {x_coord: ('interp', {interp_param: x_px}), \
y_coord: ('interp', {interp_param: y_px})})
y_coord: ('interp', {interp_param: y_px})},
val_interp_method=val_interp_method)


def xr_sel_time_by_bin(dataset, num_bins, time_coord='time'):
Expand All @@ -92,15 +97,15 @@ def xr_sel_time_by_bin(dataset, num_bins, time_coord='time'):
return xr_interp(dataset, {time_coord: ('bin', {'num': num_bins})})


def xr_interp(dataset, interp_config):
def xr_interp(dataset, interp_config, val_interp_method='nearest'):
"""
Interpolates an `xarray.Dataset` or `xarray.DataArray`.
This is often done to match dimensions between xarray objects or
downsample to reduce memory consumption.
First, coordinates are interpolated according to `interp_config`.
Then the data values for those interpolated coordinates are obtained
through nearest neighbors interpolation.
through interpolation.
Parameters
----------
Expand All @@ -121,6 +126,10 @@ def xr_interp(dataset, interp_config):
The following is an example value:
`{'latitude':('interp',{'frac':0.5}),'longitude':('interp',{'frac':0.5}),
'time':('bin',{'num':20})}`.
val_interp_method: str
The interpolation method for the values. This is the `method` parameter
input to `xarray.Dataset.interp()` after the coordinates have been interpolated.
Can be one of ['nearest', 'linear'].
Returns
-------
Expand Down Expand Up @@ -160,11 +169,12 @@ def xr_interp(dataset, interp_config):
interp_vals = np.array(list(map(_scalar_to_n64_datetime, interp_vals)))
new_coords[dim] = interp_vals
# Nearest-neighbor interpolate data values.
interp_data = dataset.interp(coords=new_coords, method='nearest')
interp_data = dataset.interp(coords=new_coords, method=val_interp_method)
# xarray.Dataset.interp() converts to dtype float64, so cast back to the original dtypes.
if isinstance(dataset, xr.DataArray):
interp_data = interp_data.astype(dataset.dtype)
elif isinstance(dataset, xr.Dataset):
for data_var_name in interp_data.data_vars:
interp_data[data_var_name] = interp_data[data_var_name].astype(dataset[data_var_name].dtype)
return interp_data
return interp_data

5 changes: 3 additions & 2 deletions data_cube_utilities/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ def create_local_dask_cluster(spare_mem='3Gb',
# start up a local cluster
num_physical_cpu = psutil.cpu_count(logical=False)
num_logical_cpu = psutil.cpu_count(logical=True)
start_local_dask_kwargs['n_workers'] = num_physical_cpu - 1
start_local_dask_kwargs['threads_per_worker'] = int(num_logical_cpu / num_physical_cpu)
num_logical_per_physical = num_logical_cpu / num_physical_cpu
start_local_dask_kwargs.setdefault('n_workers', num_physical_cpu - 1)
start_local_dask_kwargs.setdefault('threads_per_worker', int(num_logical_per_physical * start_local_dask_kwargs['n_workers']))
client = start_local_dask(mem_safety_margin=spare_mem, **start_local_dask_kwargs)

## Configure GDAL for s3 access
Expand Down

0 comments on commit 4a2ce91

Please sign in to comment.