January 2021 CEOS Data Cube Utilities Release 2.22 Hotfix

ceos-seo · Mar 16, 2021 · 4a2ce91 · 4a2ce91
2 parents 8887d1f + 7dc9a7e
commit 4a2ce91
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 8 deletions.
diff --git a/data_cube_utilities/aggregate.py b/data_cube_utilities/aggregate.py
@@ -28,7 +28,7 @@ def get_bin_intervals(data, num_bins):
 
 
 def xr_scale_res(dataset, x_coord='longitude', y_coord='latitude',
-                 frac_res=None, abs_res=None):
+                 frac_res=None, abs_res=None, val_interp_method='linear'):
     """
     Scales the resolution of an `xarray.Dataset` or `xarray.DataArray`
     to a fraction of its original resolution or an absolute resolution.
@@ -46,6 +46,10 @@ def xr_scale_res(dataset, x_coord='longitude', y_coord='latitude',
     abs_res: list-like
         A list-like of the number of pixels for the x and y axes, respectively.
         Overrides `frac_res` if specified.
+    val_interp_method: str
+          The interpolation method for the values. This is the `method` parameter
+          input to `xarray.Dataset.interp()` after the coordinates have been interpolated.
+          Can be one of ['nearest', 'linear'].
 
     Returns
     -------
@@ -65,7 +69,8 @@ def xr_scale_res(dataset, x_coord='longitude', y_coord='latitude',
         interp_param = 'num'
         x_px, y_px = abs_res
     return xr_interp(dataset, {x_coord: ('interp', {interp_param: x_px}), \
-                               y_coord: ('interp', {interp_param: y_px})})
+                               y_coord: ('interp', {interp_param: y_px})},
+                     val_interp_method=val_interp_method)
 
 
 def xr_sel_time_by_bin(dataset, num_bins, time_coord='time'):
@@ -92,15 +97,15 @@ def xr_sel_time_by_bin(dataset, num_bins, time_coord='time'):
     return xr_interp(dataset, {time_coord: ('bin', {'num': num_bins})})
 
 
-def xr_interp(dataset, interp_config):
+def xr_interp(dataset, interp_config, val_interp_method='nearest'):
     """
     Interpolates an `xarray.Dataset` or `xarray.DataArray`.
     This is often done to match dimensions between xarray objects or
     downsample to reduce memory consumption.
 
     First, coordinates are interpolated according to `interp_config`.
     Then the data values for those interpolated coordinates are obtained
-    through nearest neighbors interpolation.
+    through interpolation.
 
     Parameters
     ----------
@@ -121,6 +126,10 @@ def xr_interp(dataset, interp_config):
         The following is an example value:
         `{'latitude':('interp',{'frac':0.5}),'longitude':('interp',{'frac':0.5}),
           'time':('bin',{'num':20})}`.
+      val_interp_method: str
+          The interpolation method for the values. This is the `method` parameter
+          input to `xarray.Dataset.interp()` after the coordinates have been interpolated.
+          Can be one of ['nearest', 'linear'].
 
     Returns
     -------
@@ -160,11 +169,12 @@ def xr_interp(dataset, interp_config):
             interp_vals = np.array(list(map(_scalar_to_n64_datetime, interp_vals)))
         new_coords[dim] = interp_vals
     # Nearest-neighbor interpolate data values.
-    interp_data = dataset.interp(coords=new_coords, method='nearest')
+    interp_data = dataset.interp(coords=new_coords, method=val_interp_method)
     # xarray.Dataset.interp() converts to dtype float64, so cast back to the original dtypes.
     if isinstance(dataset, xr.DataArray):
         interp_data = interp_data.astype(dataset.dtype)
     elif isinstance(dataset, xr.Dataset):
         for data_var_name in interp_data.data_vars:
             interp_data[data_var_name] = interp_data[data_var_name].astype(dataset[data_var_name].dtype)
-    return interp_data
+    return interp_data
+
diff --git a/data_cube_utilities/dask.py b/data_cube_utilities/dask.py
@@ -57,8 +57,9 @@ def create_local_dask_cluster(spare_mem='3Gb',
     # start up a local cluster
     num_physical_cpu = psutil.cpu_count(logical=False)
     num_logical_cpu = psutil.cpu_count(logical=True)
-    start_local_dask_kwargs['n_workers'] = num_physical_cpu - 1
-    start_local_dask_kwargs['threads_per_worker'] = int(num_logical_cpu / num_physical_cpu)
+    num_logical_per_physical = num_logical_cpu / num_physical_cpu
+    start_local_dask_kwargs.setdefault('n_workers', num_physical_cpu - 1)
+    start_local_dask_kwargs.setdefault('threads_per_worker', int(num_logical_per_physical * start_local_dask_kwargs['n_workers'])) 
     client = start_local_dask(mem_safety_margin=spare_mem, **start_local_dask_kwargs)
 
     ## Configure GDAL for s3 access