Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interpolations for data operations #62

Draft
wants to merge 13 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions sasdata/data_util/interpolations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""
Interpolation functions for 1d data sets.
"""

import numpy as np
from numpy.typing import ArrayLike
from typing import Optional, Union


def linear(x_interp: ArrayLike, x: ArrayLike, y: ArrayLike, dy: Optional[ArrayLike] = None)\
-> tuple[np.ndarray, Union[np.ndarray, None]]:
"""
Computes linear interpolation of dataset (x, y) at the points x_interp.
Error propagation is implemented when dy is provided.
Requires that min(x) <= x_interp <= max(x)

TODO: reductus package has a similar function in err1d if we add the additional dependency
"""
x_interp = np.array(x_interp)
sort = np.argsort(x)
x = np.array(x)[sort]
y = np.array(y)[sort]
dy = np.array(dy)[sort] if (dy is not None and len(dy) == len(y)) else None

# find out where the interpolated points fit into the existing data
index_2 = np.searchsorted(x, x_interp)
index_1 = index_2 - 1

# linear interpolation of new y points
y_interp_1 = y[index_1] * (x_interp - x[index_2]) / (x[index_1] - x[index_2])
y_interp_2 = y[index_2] * (x_interp - x[index_1]) / (x[index_2] - x[index_1])
y_interp = y_interp_1 + y_interp_2

# error propagation
if dy is not None:
dy_interp_1 = dy[index_1] ** 2 * ((x_interp - x[index_2]) / (x[index_1] - x[index_2])) ** 2
dy_interp_2 = dy[index_2] ** 2 * ((x_interp - x[index_1]) / (x[index_2] - x[index_1])) ** 2
dy_interp = np.sqrt(dy_interp_1 + dy_interp_2)
else:
dy_interp = None

return y_interp, dy_interp


def linear_scales(x_interp: ArrayLike,
x: ArrayLike,
y: ArrayLike,
dy: Optional[ArrayLike] = None,
scale: Optional[str] = "linear") -> tuple[np.ndarray, Union[np.ndarray, None]]:
"""
Perform linear interpolation on different scales.
Error propagation is implemented when dy is provided.

Scale is set to "linear" by default.
Setting scale to "log" will perform the interpolation of (log(x), log(y)) at log(x_interp); log(y_interp) will be
converted back to y_interp in the return.

Returns (y_interp, dy_interp | None)
"""
x = np.array(x)
y = np.array(y)

if scale == "linear":
result = linear(x_interp=x_interp, x=x, y=y, dy=dy)
return result

elif scale == "log":
dy = np.array(dy) / y if (dy is not None and len(dy) == len(x)) else None
x_interp = np.log(x_interp)
x = np.log(x)
y = np.log(y)
result = linear(x_interp=x_interp, x=x, y=y, dy=dy)
y_interp = np.exp(result[0])
dy_interp = None if result[1] is None else y_interp * result[1]
return y_interp, dy_interp


177 changes: 106 additions & 71 deletions sasdata/dataloader/data_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,18 @@
# TODO: This module should be independent of plottables. We should write
# an adapter class for plottables when needed.

import logging
import math
from math import fabs
import copy

import numpy as np
from typing import Optional

from sasdata.data_util.uncertainty import Uncertainty
from sasdata.data_util import interpolations

logger = logging.getLogger(__name__)


class plottable_1D(object):
Expand All @@ -52,22 +57,25 @@ class plottable_1D(object):
_yaxis = ''
_yunit = ''

# operation data
_operation = None # Data1D object that stores points used for operations

def __init__(self, x, y, dx=None, dy=None, dxl=None, dxw=None,
lam=None, dlam=None):
self.x = np.asarray(x)
self.y = np.asarray(y)
if dx is not None:
self.dx = np.asarray(dx)
self.dx = np.asarray(dx, dtype=float)
if dy is not None:
self.dy = np.asarray(dy)
self.dy = np.asarray(dy, dtype=float)
if dxl is not None:
self.dxl = np.asarray(dxl)
self.dxl = np.asarray(dxl, dtype=float)
if dxw is not None:
self.dxw = np.asarray(dxw)
self.dxw = np.asarray(dxw, dtype=float)
if lam is not None:
self.lam = np.asarray(lam)
self.lam = np.asarray(lam, dtype=float)
if dlam is not None:
self.dlam = np.asarray(dlam)
self.dlam = np.asarray(dlam, dtype=float)

def xaxis(self, label, unit):
"""
Expand Down Expand Up @@ -791,7 +799,7 @@ def clone_without_data(self, length=0, clone=None):
dy = np.zeros(length)
lam = np.zeros(length)
dlam = np.zeros(length)
clone = Data1D(x, y, lam=lam, dx=dx, dy=dy, dlam=dlam)
clone = Data1D(x, y, lam=lam, dx=dx, dy=dy, dlam=dlam, isSesans=self.isSesans)

clone.title = self.title
clone.run = self.run
Expand Down Expand Up @@ -828,84 +836,111 @@ def copy_from_datainfo(self, data1d):
self.yaxis(data1d._yaxis, data1d._yunit)
self.title = data1d.title

def _validity_check(self, other):
def _interpolation_operation(self, other, tolerance: Optional[float] = 0.01, scale: Optional[str] = 'log'):
"""
Checks that the data lengths are compatible.
Checks that the x vectors are compatible.
Returns errors vectors equal to original
errors vectors if they were present or vectors
of zeros when none was found.
Checks that x values for two datasets have overlapping ranges for an operation.
If so, x, y, and dy will be updated to values used in the operation.
Resolutions, including dx, dxl, and dxw, are not kept through in the operation.
Wavelength parameters for SESANS datasets are also not kept through the operation.
:param other: other data set for operation
:return: dy for self, dy for other [numpy arrays]
:raise ValueError: when lengths are not compatible
:param other: other data for operation
:param tolerance: acceptable deviation in matching x data points, default 0.01 (equivalent to 1 % deviation)
:param scale: default 'log', performs interpolation on log scale; use 'linear' for sesans data
:raise ValueError: x-ranges of self and other do not overlap
"""
dy_other = None
if isinstance(other, Data1D):
# Check that data lengths are the same
if len(self.x) != len(other.x) or len(self.y) != len(other.y):
msg = "Unable to perform operation: data length are not equal"
# check if ranges of self.x and other.x overlap at all
if np.min(other.x) > np.max(self.x) or np.max(other.x) < np.min(self.x):
msg = "Unable to perform operation: x ranges of two datasets do not overlap."
raise ValueError(msg)
# Here we could also extrapolate between data points
TOLERANCE = 0.01
for i in range(len(self.x)):
if fabs(self.x[i] - other.x[i]) > self.x[i]*TOLERANCE:
msg = "Incompatible data sets: x-values do not match"
raise ValueError(msg)

# Check that the other data set has errors, otherwise
# create zero vector
dy_other = other.dy
if other.dy is None or (len(other.dy) != len(other.y)):
dy_other = np.zeros(len(other.y))

# Check that we have errors, otherwise create zero vector
dy = self.dy
if self.dy is None or (len(self.dy) != len(self.y)):
dy = np.zeros(len(self.y))
# check if data points match (within tolerance) across the overlap range of self.x and other.x
self_overlap_bool = np.abs((self.x[:, None] - other.x[None, :]) / self.x[:, None]).min(axis=1) <= tolerance
self_overlap_index = np.flatnonzero(self_overlap_bool)
if len(self_overlap_index) == self_overlap_index.max() - self_overlap_index.min() + 1:
# all the points in overlap region of self.x found close matches in overlap region of other.x
Comment on lines +857 to +860
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really understand what exactly this is doing (I'm just a poor white belt) but it does seem that the goal here was to either do no interpolation if the two data sets have exactly the same number of points AND all the points match up in x within tolerance, otherwise ALL the data will be interpolated even if the x points match up? Given the funtionality review of the paired sasview PR is this really what is happening?

x_op = self.x[self_overlap_bool]
other._operation = other.clone_without_data(x_op.size)
other._operation.copy_from_datainfo(other)
Comment on lines +861 to +863
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so, if my understanding from the previous comment is correct, why do we need more than x_op = self.x? i.e. why is it instead x_op = self.x[self_overlap_bool]?

If it is because my understanding is incorrect, and two data sets with different sizes and q ranges can still make it through to this code if there are matching x (within tolerance), what happens when setting other._operation to be that length before copying the data from other and the size of the other data is different?

Sorry for my naive questions but I got a bit lost here.

other_overlap_index = (np.abs(x_op[:, None] - other.x[None, :])).argmin(axis=1)
y_op_other = np.copy(other.y)[other_overlap_index]
dy_op_other = np.zeros(x_op.size) if (other.dy is None or other.dy.size == 0) \
else np.copy(other.dy)[other_overlap_index]
else:
# not all the points found a close match so implementing interpolation on log scale
self_overlap_bool = (self.x >= max([self.x.min(), other.x.min()])) & (self.x <= min([self.x.max(), other.x.max()]))
self_overlap_index = np.flatnonzero(self_overlap_bool)
x_op = self.x[self_overlap_bool]
other._operation = other.clone_without_data(x_op.size)
other._operation.copy_from_datainfo(other)
if scale == 'log':
y_op_other, dy_op_other = \
interpolations.linear_scales(x_interp=x_op, x=other.x, y=other.y, dy=other.dy, scale='log')
else:
y_op_other, dy_op_other = interpolations.linear(x_interp=x_op, x=other.x, y=other.y, dy=other.dy)

# setting resolutions and wavelength parameters to None as these parameters are not carried through
dx_op_other = None
dxl_op_other = None
dxw_op_other = None
lam_op_other = None
dlam_op_other = None

other._operation.x = x_op
other._operation.y = y_op_other
other._operation.dy = dy_op_other if dy_op_other is not None else np.zeros(x_op.size)
other._operation.dx = dx_op_other
other._operation.dxl = dxl_op_other
other._operation.dxw = dxw_op_other
other._operation.lam = lam_op_other
other._operation.dlam = dlam_op_other

return dy, dy_other
else:
# other is something besides Data1D and so all points in self should be used for operation
self_overlap_bool = np.ones(self.x.size, dtype=bool)
self_overlap_index = np.arange(0, self.x.size+1, 1)

# setup _operation Data1D for self
self._operation = self.clone_without_data(self_overlap_index.size)
self._operation.copy_from_datainfo(self)
self._operation.x = self.x[self_overlap_bool]
self._operation.y = self.y[self_overlap_bool]
self._operation.dy = np.zeros(self._operation.y.size, dtype=float) if (self.dy is None or self.dy.size==0) \
else self.dy[self_overlap_bool]
self._operation.dx = None
self._operation.dxl = None
self._operation.dxw = None
self._operation.lam = None
self._operation.dlam = None

def _perform_operation(self, other, operation):
"""
"""
# First, check the data compatibility
dy, dy_other = self._validity_check(other)
result = self.clone_without_data(len(self.x))
if self.dxw is None:
result.dxw = None
# Check for compatibility of the x-ranges and populate the data used for the operation
# sets up _operation for both datasets
# interpolation will be implemented on the 'other' dataset as needed
if self.isSesans:
self._interpolation_operation(other, scale='linear')
Comment on lines +921 to +922
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there should be a second if here: if other.isSesans then the linear interpolation otherwise raise error with message: the two data types must be the same .. or some such. Maybe more specific about not mixing sas and sesans data?

else:
result.dxw = np.zeros(len(self.x))
if self.dxl is None:
result.dxl = None
else:
result.dxl = np.zeros(len(self.x))

for i in range(len(self.x)):
result.x[i] = self.x[i]
if self.dx is not None and len(self.x) == len(self.dx):
result.dx[i] = self.dx[i]
if self.dxw is not None and len(self.x) == len(self.dxw):
result.dxw[i] = self.dxw[i]
if self.dxl is not None and len(self.x) == len(self.dxl):
result.dxl[i] = self.dxl[i]

a = Uncertainty(self.y[i], dy[i]**2)
self._interpolation_operation(other, scale='log')

result = self.clone_without_data(self._operation.x.size)
result.copy_from_datainfo(self)
result.x = np.copy(self._operation.x)
result.y = np.zeros(self._operation.x.size)
result.dy = np.zeros(self._operation.x.size)
result.dx = None if self._operation.dx is None else np.copy(self._operation.dx)
result.dxl = None if self._operation.dxl is None else np.copy(self._operation.dxl)
result.dxw = None if self._operation.dxw is None else np.copy(self._operation.dxw)
result.lam = None if self._operation.lam is None else np.copy(self._operation.lam)
result.dlam = None if self._operation.dlam is None else np.copy(self._operation.dlam)

for i in range(result.x.size):

a = Uncertainty(self._operation.y[i], self._operation.dy[i]**2)
if isinstance(other, Data1D):
b = Uncertainty(other.y[i], dy_other[i]**2)
if other.dx is not None:
result.dx[i] *= self.dx[i]
result.dx[i] += (other.dx[i]**2)
result.dx[i] /= 2
result.dx[i] = math.sqrt(result.dx[i])
if result.dxl is not None and other.dxl is not None:
result.dxl[i] *= self.dxl[i]
result.dxl[i] += (other.dxl[i]**2)
result.dxl[i] /= 2
result.dxl[i] = math.sqrt(result.dxl[i])
b = Uncertainty(other._operation.y[i], other._operation.dy[i]**2)
else:
b = other

output = operation(a, b)
result.y[i] = output.x
result.dy[i] = math.sqrt(math.fabs(output.variance))
Expand Down
Loading
Loading