Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GEOPY-1516 - Improve scaling of cross-gradient regularization #48

Merged
merged 11 commits into from
May 14, 2024
140 changes: 81 additions & 59 deletions SimPEG/regularization/cross_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,22 @@ def _model_gradients(self, models):
Compute gradient on faces
"""
gradients = []

for unit, wire in zip(self.units, self.wire_map):
model = wire * models
if unit == "radian":
gradient = []
components = "xyz" if self.regularization_mesh.dim == 3 else "xy"
for comp in components:
distances = getattr(self.regularization_mesh, f"cell_distances_{comp}")
distances = getattr(
self.regularization_mesh, f"cell_distances_{comp}"
)
cell_grad = getattr(
self.regularization_mesh, f"cell_gradient_{comp}"
)
gradient.append(coterminal(cell_grad * model * distances) / distances)
gradient.append(
coterminal(cell_grad * model * distances) / distances
)

gradient = np.hstack(gradient) / np.pi
else:
Expand Down Expand Up @@ -134,7 +139,9 @@ def calculate_cross_gradient(self, model, normalized=False, rtol=1e-6):
The norm of the cross gradient vector in each active cell.
"""
# Compute the gradients and concatenate components.
grad_m1, grad_m2 = self._calculate_gradient(model, normalized=normalized, rtol=rtol)
grad_m1, grad_m2 = self._calculate_gradient(
model, normalized=normalized, rtol=rtol
)

# for each model cell, compute the cross product of the gradient vectors.
cross_prod = np.cross(grad_m1, grad_m2)
Expand Down Expand Up @@ -166,13 +173,12 @@ def __call__(self, model):
(optional strategy, not used in this script)

"""
# m1, m2 = (wire * model for wire in self.wire_map)
Av = self._Av
G = self._G
g_m1, g_m2 = self._model_gradients(model)

return 0.5 * np.sum(
(Av @ g_m1**2) * (Av @ g_m2**2) - (Av @ (g_m1 * g_m2)) ** 2
np.sum((Av @ g_m1**2) * (Av @ g_m2**2) - (Av @ (g_m1 * g_m2)) ** 2)
)

def deriv(self, model):
Expand All @@ -189,88 +195,104 @@ def deriv(self, model):
G = self._G
g_m1, g_m2 = self._model_gradients(model)

return self.wire_map_deriv.T * np.r_[
deriv = np.r_[
(((Av @ g_m2**2) @ Av) * g_m1) @ G
- (((Av @ (g_m1 * g_m2)) @ Av) * g_m2) @ G,
(((Av @ g_m1**2) @ Av) * g_m2) @ G
- (((Av @ (g_m1 * g_m2)) @ Av) * g_m1) @ G,
]

return self.wire_map_deriv.T * deriv

def deriv2(self, model, v=None):
"""
Computes the Hessian of the cross-gradient.
r"""Hessian of the regularization function evaluated for the model provided.

:param list of numpy.ndarray ind_models: [model1, model2, ...]
:param numpy.ndarray v: vector to be multiplied by Hessian
Where :math:`\phi (\mathbf{m})` is the discrete regularization function (objective function),
this method evalutate and returns the second derivative (Hessian) with respect to the model parameters:
For a model :math:`\mathbf{m}` consisting of two physical properties such that:

:rtype: scipy.sparse.csr_matrix if v is None
numpy.ndarray if v is not None
:return Hessian matrix if v is None
Hessian multiplied by vector if v is not No
.. math::
\mathbf{m} = \begin{bmatrix} \mathbf{m_1} \\ \mathbf{m_2} \end{bmatrix}

The Hessian has the form:

.. math::
\frac{\partial^2 \phi}{\partial \mathbf{m}^2} =
\begin{bmatrix}
\dfrac{\partial^2 \phi}{\partial \mathbf{m_1}^2} &
\dfrac{\partial^2 \phi}{\partial \mathbf{m_1} \partial \mathbf{m_2}} \\
\dfrac{\partial^2 \phi}{\partial \mathbf{m_2} \partial \mathbf{m_1}} &
\dfrac{\partial^2 \phi}{\partial \mathbf{m_2}^2}
\end{bmatrix}

When a vector :math:`(\mathbf{v})` is supplied, the method returns the Hessian
times the vector:

.. math::
\frac{\partial^2 \phi}{\partial \mathbf{m}^2} \, \mathbf{v}

Parameters
----------
model : (n_param, ) numpy.ndarray
The model; a vector array containing all physical properties.
v : None, (n_param, ) numpy.ndarray (optional)
A numpy array to model the Hessian by.

Returns
-------
(n_param, n_param) scipy.sparse.csr_matrix | (n_param, ) numpy.ndarray
If the input argument *v* is ``None``, the Hessian
for the models provided is returned. If *v* is not ``None``,
the Hessian multiplied by the vector provided is returned.
"""
Av = self._Av
G = self._G

g_m1, g_m2 = self._model_gradients(model)

if v is None:
A = (
G.T
@ (
sp.diags(Av.T @ (Av @ g_m2**2))
- sp.diags(g_m2) @ Av.T @ Av @ sp.diags(g_m2)
)
@ G
)

C = (
G.T
@ (
sp.diags(Av.T @ (Av @ g_m1**2))
- sp.diags(g_m1) @ Av.T @ Av @ sp.diags(g_m1)
)
@ G
)
d11_mid = Av.T @ (Av @ g_m2**2)
d12_mid = -(Av.T @ (Av @ (g_m1 * g_m2)))
d22_mid = Av.T @ (Av @ g_m1**2)

B = None
BT = None
if v is None:
D11_mid = sp.diags(d11_mid)
D12_mid = sp.diags(d12_mid)
D22_mid = sp.diags(d22_mid)
if not self.approx_hessian:
# d_m1_d_m2
B = (
G.T
@ (
2 * sp.diags(g_m1) @ Av.T @ Av @ sp.diags(g_m2)
- sp.diags(g_m2) @ Av.T @ Av @ sp.diags(g_m1)
- sp.diags(Av.T @ Av @ (g_m1 * g_m2))
)
@ G
D11_mid = D11_mid - sp.diags(g_m2) @ Av.T @ Av @ sp.diags(g_m2)
D12_mid = (
D12_mid
+ 2 * sp.diags(g_m1) @ Av.T @ Av @ sp.diags(g_m2)
- sp.diags(g_m2) @ Av.T @ Av @ sp.diags(g_m1)
)
BT = B.T
D22_mid = D22_mid - sp.diags(g_m1) @ Av.T @ Av @ sp.diags(g_m1)
D11 = G.T @ D11_mid @ G
D12 = G.T @ D12_mid @ G
D22 = G.T @ D22_mid @ G

return (
self.wire_map_deriv.T
@ sp.bmat([[D11, D12], [D12.T, D22]], format="csr")
* self.wire_map_deriv
) # factor of 2 from derviative of | grad m1 x grad m2 | ^2

return self.wire_map_deriv.T * sp.bmat([[A, B], [BT, C]], format="csr") * self.wire_map_deriv
else:
v1, v2 = (wire * v for wire in self.wire_map)

Gv1 = G @ v1
Gv2 = G @ v2

p1 = G.T @ (
(Av.T @ (Av @ g_m2**2)) * Gv1 - g_m2 * (Av.T @ (Av @ (g_m2 * Gv1)))
)
p2 = G.T @ (
(Av.T @ (Av @ g_m1**2)) * Gv2 - g_m1 * (Av.T @ (Av @ (g_m1 * Gv2)))
)

p1 = G.T @ (d11_mid * Gv1 + d12_mid * Gv2)
p2 = G.T @ (d12_mid * Gv1 + d22_mid * Gv2)
if not self.approx_hessian:
p1 += G.T @ (
2 * g_m1 * (Av.T @ (Av @ (g_m2 * Gv2)))
- g_m2 * (Av.T @ (Av @ (g_m1 * Gv2)))
- (Av.T @ (Av @ (g_m1 * g_m2))) * Gv2
-g_m2 * (Av.T @ (Av @ (g_m2 * Gv1))) # d11*v1 full addition
+ 2 * g_m1 * (Av.T @ (Av @ (g_m2 * Gv2))) # d12*v2 full addition
- g_m2 * (Av.T @ (Av @ (g_m1 * Gv2))) # d12*v2 continued
)

p2 += G.T @ (
2 * g_m2 * (Av.T @ (Av @ (g_m1 * Gv1)))
- g_m1 * (Av.T @ (Av @ (g_m2 * Gv1)))
- (Av.T @ (Av @ (g_m2 * g_m1))) * Gv1
-g_m1 * (Av.T @ (Av @ (g_m1 * Gv2))) # d22*v2 full addition
+ 2 * g_m2 * (Av.T @ (Av @ (g_m1 * Gv1))) # d12.T*v1 full addition
- g_m1 * (Av.T @ (Av @ (g_m2 * Gv1))) # d12.T*v1 fcontinued
)
return self.wire_map_deriv.T * np.r_[p1, p2]