-
Notifications
You must be signed in to change notification settings - Fork 106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fixing issues related to dtype=object arrays in interpolation routines #1649
Changes from all commits
2b3edef
80b1d8e
30626fe
380ee09
c90044a
bd73b42
600fecd
3e3ea87
16474de
b6a912a
2828057
53c7760
77e17f3
e80c43d
3b38e7f
7c5e31f
9b3f4f4
de6bc52
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ | |
from builtins import object | ||
from functools import partial | ||
from itertools import product | ||
from warnings import warn | ||
|
||
import numpy as np | ||
|
||
|
@@ -119,7 +120,7 @@ def point_collocation(func, points, out=None, **kwargs): | |
>>> ys = [3, 4] | ||
>>> mesh = sparse_meshgrid(xs, ys) | ||
>>> def vec_valued(x): | ||
... return (x[0] - 1, 0, x[0] + x[1]) # broadcasting | ||
... return (x[0] - 1., 0., x[0] + x[1]) # broadcasting | ||
>>> # For a function with several output components, we must specify the | ||
>>> # shape explicitly in the `out_dtype` parameter | ||
>>> func1 = sampling_function( | ||
|
@@ -621,6 +622,13 @@ def _find_indices(self, x): | |
|
||
# iterate through dimensions | ||
for xi, cvec in zip(x, self.coord_vecs): | ||
try: | ||
xi = np.asarray(xi).astype(self.values.dtype, casting='safe') | ||
except TypeError: | ||
warn("Unable to infer accurate dtype for" | ||
+" interpolation coefficients, defaulting to `float`.") | ||
xi = np.asarray(xi, dtype=float) | ||
|
||
idcs = np.searchsorted(cvec, xi) - 1 | ||
|
||
idcs[idcs < 0] = 0 | ||
|
@@ -706,6 +714,8 @@ def _compute_nearest_weights_edge(idcs, ndist): | |
|
||
def _compute_linear_weights_edge(idcs, ndist): | ||
"""Helper for linear interpolation.""" | ||
ndist = np.asarray(ndist) | ||
|
||
# Get out-of-bounds indices from the norm_distances. Negative | ||
# means "too low", larger than or equal to 1 means "too high" | ||
lo = np.where(ndist < 0) | ||
|
@@ -799,7 +809,7 @@ def _evaluate(self, indices, norm_distances, out=None): | |
# axis, resulting in a loop of length 2**ndim | ||
for lo_hi, edge in zip(product(*([['l', 'h']] * len(indices))), | ||
product(*edge_indices)): | ||
weight = 1.0 | ||
weight = np.array([1.0], dtype=self.values.dtype) | ||
# TODO(kohr-h): determine best summation order from array strides | ||
for lh, w_lo, w_hi in zip(lo_hi, low_weights, high_weights): | ||
|
||
|
@@ -923,6 +933,23 @@ def _func_out_type(func): | |
|
||
return has_out, out_optional | ||
|
||
def _broadcast_nested_list(arr_lists, element_shape, ndim): | ||
""" A generalisation of `np.broadcast_to`, applied to an arbitrarily | ||
deep list (or tuple) eventually containing arrays or scalars. """ | ||
if isinstance(arr_lists, np.ndarray) or np.isscalar(arr_lists): | ||
if ndim == 1: | ||
# As usual, 1d is tedious to deal with. This | ||
# code deals with extra dimensions in result | ||
# components that stem from using x instead of | ||
# x[0] in a function. | ||
# Without this, broadcasting fails. | ||
shp = getattr(arr_lists, 'shape', ()) | ||
if shp and shp[0] == 1: | ||
arr_lists = arr_lists.reshape(arr_lists.shape[1:]) | ||
return np.broadcast_to(arr_lists, element_shape) | ||
else: | ||
return [_broadcast_nested_list(row, element_shape, ndim) for row in arr_lists] | ||
|
||
|
||
def sampling_function(func_or_arr, domain, out_dtype=None): | ||
"""Return a function that can be used for sampling. | ||
|
@@ -987,19 +1014,20 @@ def _default_oop(func_ip, x, **kwargs): | |
|
||
def _default_ip(func_oop, x, out, **kwargs): | ||
"""Default in-place variant of an out-of-place-only function.""" | ||
result = np.array(func_oop(x, **kwargs), copy=False) | ||
if result.dtype == object: | ||
result = func_oop(x, **kwargs) | ||
try: | ||
result = np.array(result, copy=False) | ||
except ValueError: | ||
# Different shapes encountered, need to broadcast | ||
flat_results = result.ravel() | ||
if is_valid_input_array(x, domain.ndim): | ||
scalar_out_shape = out_shape_from_array(x) | ||
elif is_valid_input_meshgrid(x, domain.ndim): | ||
scalar_out_shape = out_shape_from_meshgrid(x) | ||
else: | ||
raise TypeError('invalid input `x`') | ||
|
||
bcast_results = [np.broadcast_to(res, scalar_out_shape) | ||
for res in flat_results] | ||
bcast_results = _broadcast_nested_list(result, scalar_out_shape, domain.ndim) | ||
|
||
# New array that is flat in the `out_shape` axes, reshape it | ||
# to the final `out_shape + scalar_shape`, using the same | ||
# order ('C') as the initial `result.ravel()`. | ||
|
@@ -1332,33 +1360,17 @@ def dual_use_func(x, out=None, **kwargs): | |
elif tensor_valued: | ||
# The out object can be any array-like of objects with shapes | ||
# that should all be broadcastable to scalar_out_shape. | ||
results = np.array(out) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test relates to the function |
||
if results.dtype == object or scalar_in: | ||
# Some results don't have correct shape, need to | ||
# broadcast | ||
bcast_res = [] | ||
for res in results.ravel(): | ||
if ndim == 1: | ||
# As usual, 1d is tedious to deal with. This | ||
# code deals with extra dimensions in result | ||
# components that stem from using x instead of | ||
# x[0] in a function. | ||
# Without this, broadcasting fails. | ||
shp = getattr(res, 'shape', ()) | ||
if shp and shp[0] == 1: | ||
res = res.reshape(res.shape[1:]) | ||
bcast_res.append( | ||
np.broadcast_to(res, scalar_out_shape)) | ||
try: | ||
out_arr = np.asarray(out) | ||
except ValueError: | ||
out_arr = np.asarray(_broadcast_nested_list(out, scalar_out_shape, ndim=ndim)) | ||
|
||
out_arr = np.array(bcast_res, dtype=scalar_out_dtype) | ||
elif results.dtype != scalar_out_dtype: | ||
if out_arr.dtype != scalar_out_dtype: | ||
raise ValueError( | ||
'result is of dtype {}, expected {}' | ||
''.format(dtype_repr(results.dtype), | ||
''.format(dtype_repr(out_arr.dtype), | ||
dtype_repr(scalar_out_dtype)) | ||
) | ||
else: | ||
out_arr = results | ||
|
||
out = out_arr.reshape(out_shape) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1111,8 +1111,8 @@ def uniform_grid_fromintv(intv_prod, shape, nodes_on_bdry=True): | |
|
||
shape = normalized_scalar_param_list(shape, intv_prod.ndim, safe_int_conv) | ||
|
||
if np.shape(nodes_on_bdry) == (): | ||
nodes_on_bdry = ([(bool(nodes_on_bdry), bool(nodes_on_bdry))] * | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There should be a type hinting for the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, but am unsure how best to do it – as I often am in Python, where there is no unambiguous "correct type". A direct version would be
That expresses fairly well the different ways the function can be called, but I reckon it looks daunting to the user. An alternative could be to define this union type separately:
This has the advantage that the type signature remains short, though it may also be less useful. And arguably, if we make a definition at all then it would be consequent to make it a dedicated class, though that seems overkill here. It is also debatable whether
more confusing than the version with To be fair, the Sphinx documentation is actually pretty clear about what the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is how I understand the nodes_on_bdry argument:
What do you think about that? :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Emvlt that's not how it currently works. Your examples should actually be written
So for example
or alternatively and IMO clearer
However, the specification can also be shortened to I'm not sure if anybody really need this flexibility just to make their user code a few characters shorter. We could use a simplified type hint and then just remark in the docs that it can also be shortened by collapsing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we will come to a good decision here and then, let's leave the interface as it is for now. But I opened an issue to discuss the use of type hints for the future. |
||
if isinstance(nodes_on_bdry, bool): | ||
nodes_on_bdry = ([(nodes_on_bdry, nodes_on_bdry)] * | ||
intv_prod.ndim) | ||
elif intv_prod.ndim == 1 and len(nodes_on_bdry) == 2: | ||
nodes_on_bdry = [nodes_on_bdry] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -278,11 +278,11 @@ def normalized_nodes_on_bdry(nodes_on_bdry, length): | |
>>> normalized_nodes_on_bdry([[True, False], False, True], length=3) | ||
[(True, False), (False, False), (True, True)] | ||
""" | ||
shape = np.shape(nodes_on_bdry) | ||
if shape == (): | ||
out_list = [(bool(nodes_on_bdry), bool(nodes_on_bdry))] * length | ||
elif length == 1 and shape == (2,): | ||
out_list = [(bool(nodes_on_bdry[0]), bool(nodes_on_bdry[1]))] | ||
if isinstance(nodes_on_bdry, bool): | ||
return [(nodes_on_bdry, nodes_on_bdry)] * length | ||
elif (length == 1 and len(nodes_on_bdry) == 2 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. bool(nodes_on_bdry) can be replaced by nodes_on_bdry |
||
and all(isinstance(d, bool) for d in nodes_on_bdry)): | ||
return [nodes_on_bdry[0], nodes_on_bdry[1]] | ||
elif len(nodes_on_bdry) == length: | ||
out_list = [] | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please add a warning so the user knows the values are cast as floats?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done in 53c7760.