From b0f9a9034815a30cce6dbb1b25647af65d35babd Mon Sep 17 00:00:00 2001
From: domfournier <dominiquef@mirageoscience.com>
Date: Sat, 11 May 2024 12:33:22 -0700
Subject: [PATCH 01/10] Use full volume scaling

---
 SimPEG/regularization/cross_gradient.py | 35 +++++++++++++++++--------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/SimPEG/regularization/cross_gradient.py b/SimPEG/regularization/cross_gradient.py
index cae4714003..a7d507b511 100644
--- a/SimPEG/regularization/cross_gradient.py
+++ b/SimPEG/regularization/cross_gradient.py
@@ -34,7 +34,7 @@ def __init__(self, mesh, wire_map, approx_hessian=True, normalize=False, **kwarg
         if regmesh.mesh.dim not in (2, 3):
             raise ValueError("Cross-Gradient is only defined for 2D or 3D")
         self._G = regmesh.cell_gradient
-        self._Av = sp.diags(np.sqrt(regmesh.vol)) * regmesh.average_face_to_cell
+        self._Av = sp.diags(regmesh.vol) * regmesh.average_face_to_cell
 
     @property
     def approx_hessian(self):
@@ -60,11 +60,15 @@ def _model_gradients(self, models):
                 gradient = []
                 components = "xyz" if self.regularization_mesh.dim == 3 else "xy"
                 for comp in components:
-                    distances = getattr(self.regularization_mesh, f"cell_distances_{comp}")
+                    distances = getattr(
+                        self.regularization_mesh, f"cell_distances_{comp}"
+                    )
                     cell_grad = getattr(
                         self.regularization_mesh, f"cell_gradient_{comp}"
                     )
-                    gradient.append(coterminal(cell_grad * model * distances) / distances)
+                    gradient.append(
+                        coterminal(cell_grad * model * distances) / distances
+                    )
 
                 gradient = np.hstack(gradient) / np.pi
             else:
@@ -134,7 +138,9 @@ def calculate_cross_gradient(self, model, normalized=False, rtol=1e-6):
             The norm of the cross gradient vector in each active cell.
         """
         # Compute the gradients and concatenate components.
-        grad_m1, grad_m2 = self._calculate_gradient(model, normalized=normalized, rtol=rtol)
+        grad_m1, grad_m2 = self._calculate_gradient(
+            model, normalized=normalized, rtol=rtol
+        )
 
         # for each model cell, compute the cross product of the gradient vectors.
         cross_prod = np.cross(grad_m1, grad_m2)
@@ -189,12 +195,15 @@ def deriv(self, model):
         G = self._G
         g_m1, g_m2 = self._model_gradients(model)
 
-        return self.wire_map_deriv.T * np.r_[
-            (((Av @ g_m2**2) @ Av) * g_m1) @ G
-            - (((Av @ (g_m1 * g_m2)) @ Av) * g_m2) @ G,
-            (((Av @ g_m1**2) @ Av) * g_m2) @ G
-            - (((Av @ (g_m1 * g_m2)) @ Av) * g_m1) @ G,
-        ]
+        return (
+            self.wire_map_deriv.T
+            * np.r_[
+                (((Av @ g_m2**2) @ Av) * g_m1) @ G
+                - (((Av @ (g_m1 * g_m2)) @ Av) * g_m2) @ G,
+                (((Av @ g_m1**2) @ Av) * g_m2) @ G
+                - (((Av @ (g_m1 * g_m2)) @ Av) * g_m1) @ G,
+            ]
+        )
 
     def deriv2(self, model, v=None):
         """
@@ -247,7 +256,11 @@ def deriv2(self, model, v=None):
                 )
                 BT = B.T
 
-            return self.wire_map_deriv.T * sp.bmat([[A, B], [BT, C]], format="csr") * self.wire_map_deriv
+            return (
+                self.wire_map_deriv.T
+                * sp.bmat([[A, B], [BT, C]], format="csr")
+                * self.wire_map_deriv
+            )
         else:
             v1, v2 = (wire * v for wire in self.wire_map)
 

From 044e85e08865237e7ead9a547b7015bd1c7c3fe6 Mon Sep 17 00:00:00 2001
From: domfournier <dominiquef@mirageoscience.com>
Date: Sun, 12 May 2024 17:46:43 -0700
Subject: [PATCH 02/10] Pass sensitivity weights to cross-gradient

---
 SimPEG/directives/directives.py         |  8 +--
 SimPEG/regularization/cross_gradient.py | 85 ++++++++++++++++++++-----
 2 files changed, 74 insertions(+), 19 deletions(-)

diff --git a/SimPEG/directives/directives.py b/SimPEG/directives/directives.py
index 71f1ec6cdf..b674253ff4 100644
--- a/SimPEG/directives/directives.py
+++ b/SimPEG/directives/directives.py
@@ -2780,10 +2780,10 @@ def update(self):
 
         # Add sensitivity weighting to all model objective functions
         for reg in self.reg.objfcts:
-            if not isinstance(reg, BaseSimilarityMeasure):
-                sub_regs = getattr(reg, "objfcts", [reg])
-                for sub_reg in sub_regs:
-                    sub_reg.set_weights(sensitivity=sub_reg.mapping * wr)
+            # if not isinstance(reg, BaseSimilarityMeasure):
+            sub_regs = getattr(reg, "objfcts", [reg])
+            for sub_reg in sub_regs:
+                sub_reg.set_weights(sensitivity=sub_reg.mapping * wr)
 
     def validate(self, directiveList):
         """Validate directive against directives list.
diff --git a/SimPEG/regularization/cross_gradient.py b/SimPEG/regularization/cross_gradient.py
index a7d507b511..cd9a051518 100644
--- a/SimPEG/regularization/cross_gradient.py
+++ b/SimPEG/regularization/cross_gradient.py
@@ -30,11 +30,12 @@ def __init__(self, mesh, wire_map, approx_hessian=True, normalize=False, **kwarg
         self._units = ["metric", "metric"]
         self.normalize = normalize
         regmesh = self.regularization_mesh
+        self.set_weights(volume=self.regularization_mesh.vol)
 
         if regmesh.mesh.dim not in (2, 3):
             raise ValueError("Cross-Gradient is only defined for 2D or 3D")
         self._G = regmesh.cell_gradient
-        self._Av = sp.diags(regmesh.vol) * regmesh.average_face_to_cell
+        self._Av = regmesh.average_face_to_cell
 
     @property
     def approx_hessian(self):
@@ -173,12 +174,15 @@ def __call__(self, model):
 
         """
         # m1, m2 = (wire * model for wire in self.wire_map)
+        W1 = self.W[0]
+        W2 = self.W[1]
         Av = self._Av
         G = self._G
         g_m1, g_m2 = self._model_gradients(model)
 
         return 0.5 * np.sum(
-            (Av @ g_m1**2) * (Av @ g_m2**2) - (Av @ (g_m1 * g_m2)) ** 2
+            (W1.T @ W1 @ Av @ g_m1**2) * (W2.T @ W2 @ Av @ g_m2**2)
+            - ((W1 @ Av @ g_m1) * (W2 @ Av @ g_m2)) ** 2
         )
 
     def deriv(self, model):
@@ -191,6 +195,8 @@ def deriv(self, model):
         :return: result: gradient of the cross-gradient with respect to model1, model2
 
         """
+        W1 = self.W[0]
+        W2 = self.W[1]
         Av = self._Av
         G = self._G
         g_m1, g_m2 = self._model_gradients(model)
@@ -198,10 +204,10 @@ def deriv(self, model):
         return (
             self.wire_map_deriv.T
             * np.r_[
-                (((Av @ g_m2**2) @ Av) * g_m1) @ G
-                - (((Av @ (g_m1 * g_m2)) @ Av) * g_m2) @ G,
-                (((Av @ g_m1**2) @ Av) * g_m2) @ G
-                - (((Av @ (g_m1 * g_m2)) @ Av) * g_m1) @ G,
+                (((W1 @ Av @ g_m2**2) @ W1 @ Av) * g_m1) @ G
+                - (((W1 @ Av @ (g_m1 * g_m2)) @ W1 @ Av) * g_m2) @ G,
+                (((W2 @ Av @ g_m1**2) @ W2 @ Av) * g_m2) @ G
+                - (((W2 @ Av @ (g_m1 * g_m2)) @ W2 @ Av) * g_m1) @ G,
             ]
         )
 
@@ -218,6 +224,8 @@ def deriv2(self, model, v=None):
                 Hessian multiplied by vector if v is not No
 
         """
+        W1 = self.W[0]
+        W2 = self.W[1]
         Av = self._Av
         G = self._G
         g_m1, g_m2 = self._model_gradients(model)
@@ -226,8 +234,8 @@ def deriv2(self, model, v=None):
             A = (
                 G.T
                 @ (
-                    sp.diags(Av.T @ (Av @ g_m2**2))
-                    - sp.diags(g_m2) @ Av.T @ Av @ sp.diags(g_m2)
+                    sp.diags(Av.T @ W1.T @ W1 @ (Av @ g_m2**2))
+                    - sp.diags(g_m2) @ Av.T @ W1.T @ W1 @ Av @ sp.diags(g_m2)
                 )
                 @ G
             )
@@ -235,8 +243,8 @@ def deriv2(self, model, v=None):
             C = (
                 G.T
                 @ (
-                    sp.diags(Av.T @ (Av @ g_m1**2))
-                    - sp.diags(g_m1) @ Av.T @ Av @ sp.diags(g_m1)
+                    sp.diags(Av.T @ W2.T @ W2 @ (Av @ g_m1**2))
+                    - sp.diags(g_m1) @ Av.T @ W2.T @ W2 @ Av @ sp.diags(g_m1)
                 )
                 @ G
             )
@@ -248,9 +256,11 @@ def deriv2(self, model, v=None):
                 B = (
                     G.T
                     @ (
-                        2 * sp.diags(g_m1) @ Av.T @ Av @ sp.diags(g_m2)
-                        - sp.diags(g_m2) @ Av.T @ Av @ sp.diags(g_m1)
-                        - sp.diags(Av.T @ Av @ (g_m1 * g_m2))
+                        2 * sp.diags(g_m1) @ Av.T @ W1.T @ W1 @ Av @ sp.diags(g_m2)
+                        - sp.diags(g_m2) @ Av.T @ W1.T @ W1 @ Av @ sp.diags(g_m1)
+                        - sp.diags(
+                            Av.T @ ((W2.T @ W2 @ Av @ g_m2) * ((W1.T @ W1 @ Av @ g_m1)))
+                        )
                     )
                     @ G
                 )
@@ -268,10 +278,12 @@ def deriv2(self, model, v=None):
             Gv2 = G @ v2
 
             p1 = G.T @ (
-                (Av.T @ (Av @ g_m2**2)) * Gv1 - g_m2 * (Av.T @ (Av @ (g_m2 * Gv1)))
+                (Av.T @ W1.T @ W1 @ (Av @ g_m2**2)) * Gv1
+                - g_m2 * (Av.T @ W1.T @ W1 @ (Av @ (g_m2 * Gv1)))
             )
             p2 = G.T @ (
-                (Av.T @ (Av @ g_m1**2)) * Gv2 - g_m1 * (Av.T @ (Av @ (g_m1 * Gv2)))
+                (Av.T @ W2.T @ W2 @ (Av @ g_m1**2)) * Gv2
+                - g_m1 * (Av.T @ W2.T @ W2 @ (Av @ (g_m1 * Gv2)))
             )
 
             if not self.approx_hessian:
@@ -287,3 +299,46 @@ def deriv2(self, model, v=None):
                     - (Av.T @ (Av @ (g_m2 * g_m1))) * Gv1
                 )
             return self.wire_map_deriv.T * np.r_[p1, p2]
+
+    def set_weights(self, **weights):
+        """Adds (or updates) the specified weights to the regularization
+
+        Parameters:
+        -----------
+        **kwargs : key, numpy.ndarray
+            Each keyword argument is added to the weights used by the regularization.
+            They can be accessed with their keyword argument.
+
+        Examples
+        --------
+        >>> import discretize
+        >>> from SimPEG.regularization import Smallness
+        >>> mesh = discretize.TensorMesh([2, 3, 2])
+        >>> reg = Smallness(mesh)
+        >>> reg.set_weights(my_weight=np.ones(mesh.n_cells))
+        >>> reg.get_weights('my_weight')
+        array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
+        """
+        for key, values in weights.items():
+            # values = validate_ndarray_with_shape(
+            #     "weights", values, shape=self._weights_shapes, dtype=float
+            # )
+            if values.shape == self._weights_shapes:
+                values = np.r_[values, values]
+
+            self._weights[key] = values
+        self._W = None
+
+    @property
+    def W(self) -> tuple:
+        """
+        Weighting matrix
+        """
+        if getattr(self, "_W", None) is None:
+            weights = np.prod(list(self._weights.values()), axis=0)
+
+            self._W = (
+                sp.diags(weights[self.regularization_mesh.nC :]),
+                sp.diags(weights[: self.regularization_mesh.nC]),
+            )
+        return self._W

From 256d476930679cba8335924a7412ff34b902c3e6 Mon Sep 17 00:00:00 2001
From: domfournier <dominiquef@mirageoscience.com>
Date: Sun, 12 May 2024 20:03:09 -0700
Subject: [PATCH 03/10] Simplify implementation

---
 SimPEG/regularization/cross_gradient.py | 59 +++++++++++--------------
 1 file changed, 27 insertions(+), 32 deletions(-)

diff --git a/SimPEG/regularization/cross_gradient.py b/SimPEG/regularization/cross_gradient.py
index cd9a051518..11e2a90e1a 100644
--- a/SimPEG/regularization/cross_gradient.py
+++ b/SimPEG/regularization/cross_gradient.py
@@ -36,6 +36,10 @@ def __init__(self, mesh, wire_map, approx_hessian=True, normalize=False, **kwarg
             raise ValueError("Cross-Gradient is only defined for 2D or 3D")
         self._G = regmesh.cell_gradient
         self._Av = regmesh.average_face_to_cell
+        acf = [regmesh.aveCC2Fx, regmesh.aveCC2Fy]
+        if regmesh.dim == 3:
+            acf.append(regmesh.aveCC2Fz)
+        self._average_cell_to_faces = sp.vstack(acf)
 
     @property
     def approx_hessian(self):
@@ -55,7 +59,8 @@ def _model_gradients(self, models):
         Compute gradient on faces
         """
         gradients = []
-        for unit, wire in zip(self.units, self.wire_map):
+
+        for unit, wire, weights in zip(self.units, self.wire_map, self.W):
             model = wire * models
             if unit == "radian":
                 gradient = []
@@ -75,7 +80,7 @@ def _model_gradients(self, models):
             else:
                 gradient = self._G @ model
 
-            gradients.append(gradient)
+            gradients.append(weights @ gradient)
 
         return gradients
 
@@ -174,15 +179,12 @@ def __call__(self, model):
 
         """
         # m1, m2 = (wire * model for wire in self.wire_map)
-        W1 = self.W[0]
-        W2 = self.W[1]
         Av = self._Av
         G = self._G
         g_m1, g_m2 = self._model_gradients(model)
 
         return 0.5 * np.sum(
-            (W1.T @ W1 @ Av @ g_m1**2) * (W2.T @ W2 @ Av @ g_m2**2)
-            - ((W1 @ Av @ g_m1) * (W2 @ Av @ g_m2)) ** 2
+            (Av @ (g_m1) ** 2) * (Av @ (g_m2) ** 2) - ((Av @ g_m1) * (Av @ g_m2)) ** 2
         )
 
     def deriv(self, model):
@@ -195,8 +197,6 @@ def deriv(self, model):
         :return: result: gradient of the cross-gradient with respect to model1, model2
 
         """
-        W1 = self.W[0]
-        W2 = self.W[1]
         Av = self._Av
         G = self._G
         g_m1, g_m2 = self._model_gradients(model)
@@ -204,10 +204,10 @@ def deriv(self, model):
         return (
             self.wire_map_deriv.T
             * np.r_[
-                (((W1 @ Av @ g_m2**2) @ W1 @ Av) * g_m1) @ G
-                - (((W1 @ Av @ (g_m1 * g_m2)) @ W1 @ Av) * g_m2) @ G,
-                (((W2 @ Av @ g_m1**2) @ W2 @ Av) * g_m2) @ G
-                - (((W2 @ Av @ (g_m1 * g_m2)) @ W2 @ Av) * g_m1) @ G,
+                (((Av @ (g_m2) ** 2) @ Av) * g_m1) @ G
+                - (((Av @ ((g_m1) * (g_m2))) @ Av) * g_m2) @ G,
+                (((Av @ (g_m1) ** 2) @ Av) * g_m2) @ G
+                - (((Av @ (g_m1 * g_m2)) @ Av) * g_m1) @ G,
             ]
         )
 
@@ -224,8 +224,6 @@ def deriv2(self, model, v=None):
                 Hessian multiplied by vector if v is not No
 
         """
-        W1 = self.W[0]
-        W2 = self.W[1]
         Av = self._Av
         G = self._G
         g_m1, g_m2 = self._model_gradients(model)
@@ -234,8 +232,8 @@ def deriv2(self, model, v=None):
             A = (
                 G.T
                 @ (
-                    sp.diags(Av.T @ W1.T @ W1 @ (Av @ g_m2**2))
-                    - sp.diags(g_m2) @ Av.T @ W1.T @ W1 @ Av @ sp.diags(g_m2)
+                    sp.diags(Av.T @ (Av @ (g_m2) ** 2))
+                    - sp.diags(g_m2) @ Av.T @ Av @ sp.diags(g_m2)
                 )
                 @ G
             )
@@ -243,8 +241,8 @@ def deriv2(self, model, v=None):
             C = (
                 G.T
                 @ (
-                    sp.diags(Av.T @ W2.T @ W2 @ (Av @ g_m1**2))
-                    - sp.diags(g_m1) @ Av.T @ W2.T @ W2 @ Av @ sp.diags(g_m1)
+                    sp.diags(Av.T @ (Av @ (g_m1) ** 2))
+                    - sp.diags(g_m1) @ Av.T @ Av @ sp.diags(g_m1)
                 )
                 @ G
             )
@@ -256,11 +254,9 @@ def deriv2(self, model, v=None):
                 B = (
                     G.T
                     @ (
-                        2 * sp.diags(g_m1) @ Av.T @ W1.T @ W1 @ Av @ sp.diags(g_m2)
-                        - sp.diags(g_m2) @ Av.T @ W1.T @ W1 @ Av @ sp.diags(g_m1)
-                        - sp.diags(
-                            Av.T @ ((W2.T @ W2 @ Av @ g_m2) * ((W1.T @ W1 @ Av @ g_m1)))
-                        )
+                        2 * sp.diags(g_m1) @ Av.T @ Av @ sp.diags(g_m2)
+                        - sp.diags(g_m2) @ Av.T @ Av @ sp.diags(g_m1)
+                        - sp.diags(Av.T @ ((Av @ g_m2) * ((Av @ g_m1))))
                     )
                     @ G
                 )
@@ -278,12 +274,10 @@ def deriv2(self, model, v=None):
             Gv2 = G @ v2
 
             p1 = G.T @ (
-                (Av.T @ W1.T @ W1 @ (Av @ g_m2**2)) * Gv1
-                - g_m2 * (Av.T @ W1.T @ W1 @ (Av @ (g_m2 * Gv1)))
+                (Av.T @ (Av @ g_m2**2)) * Gv1 - g_m2 * (Av.T @ (Av @ (g_m2 * Gv1)))
             )
             p2 = G.T @ (
-                (Av.T @ W2.T @ W2 @ (Av @ g_m1**2)) * Gv2
-                - g_m1 * (Av.T @ W2.T @ W2 @ (Av @ (g_m1 * Gv2)))
+                (Av.T @ (Av @ g_m1**2)) * Gv2 - g_m1 * (Av.T @ (Av @ (g_m1 * Gv2)))
             )
 
             if not self.approx_hessian:
@@ -320,9 +314,6 @@ def set_weights(self, **weights):
         array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
         """
         for key, values in weights.items():
-            # values = validate_ndarray_with_shape(
-            #     "weights", values, shape=self._weights_shapes, dtype=float
-            # )
             if values.shape == self._weights_shapes:
                 values = np.r_[values, values]
 
@@ -338,7 +329,11 @@ def W(self) -> tuple:
             weights = np.prod(list(self._weights.values()), axis=0)
 
             self._W = (
-                sp.diags(weights[self.regularization_mesh.nC :]),
-                sp.diags(weights[: self.regularization_mesh.nC]),
+                sp.diags(
+                    self._average_cell_to_faces @ weights[self.regularization_mesh.nC :]
+                ),
+                sp.diags(
+                    self._average_cell_to_faces @ weights[: self.regularization_mesh.nC]
+                ),
             )
         return self._W

From cea55d08de449076c1a033c20048d5e17ba3f730 Mon Sep 17 00:00:00 2001
From: domfournier <dominiquef@mirageoscience.com>
Date: Sun, 12 May 2024 22:09:12 -0700
Subject: [PATCH 04/10] Roll back sens weights on cross

---
 SimPEG/regularization/cross_gradient.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/SimPEG/regularization/cross_gradient.py b/SimPEG/regularization/cross_gradient.py
index 11e2a90e1a..6bcf78909e 100644
--- a/SimPEG/regularization/cross_gradient.py
+++ b/SimPEG/regularization/cross_gradient.py
@@ -35,7 +35,7 @@ def __init__(self, mesh, wire_map, approx_hessian=True, normalize=False, **kwarg
         if regmesh.mesh.dim not in (2, 3):
             raise ValueError("Cross-Gradient is only defined for 2D or 3D")
         self._G = regmesh.cell_gradient
-        self._Av = regmesh.average_face_to_cell
+        self._Av = sp.diags(np.sqrt(regmesh.vol)) * regmesh.average_face_to_cell
         acf = [regmesh.aveCC2Fx, regmesh.aveCC2Fy]
         if regmesh.dim == 3:
             acf.append(regmesh.aveCC2Fz)
@@ -80,7 +80,7 @@ def _model_gradients(self, models):
             else:
                 gradient = self._G @ model
 
-            gradients.append(weights @ gradient)
+            gradients.append(gradient)
 
         return gradients
 

From 1c3b266fa8e35613b016b69d7c3c30c2efd3a422 Mon Sep 17 00:00:00 2001
From: domfournier <dominiquef@mirageoscience.com>
Date: Mon, 13 May 2024 00:03:12 -0700
Subject: [PATCH 05/10] Fix bug introduced in calcs

---
 SimPEG/regularization/cross_gradient.py | 153 ++++++++++++++----------
 1 file changed, 88 insertions(+), 65 deletions(-)

diff --git a/SimPEG/regularization/cross_gradient.py b/SimPEG/regularization/cross_gradient.py
index 6bcf78909e..ad188f7e40 100644
--- a/SimPEG/regularization/cross_gradient.py
+++ b/SimPEG/regularization/cross_gradient.py
@@ -29,8 +29,10 @@ def __init__(self, mesh, wire_map, approx_hessian=True, normalize=False, **kwarg
         self.approx_hessian = approx_hessian
         self._units = ["metric", "metric"]
         self.normalize = normalize
+
         regmesh = self.regularization_mesh
         self.set_weights(volume=self.regularization_mesh.vol)
+        self.scaling = np.ones(2 * regmesh.nC)
 
         if regmesh.mesh.dim not in (2, 3):
             raise ValueError("Cross-Gradient is only defined for 2D or 3D")
@@ -179,12 +181,13 @@ def __call__(self, model):
 
         """
         # m1, m2 = (wire * model for wire in self.wire_map)
+        sp.diags(self.scaling)
         Av = self._Av
         G = self._G
         g_m1, g_m2 = self._model_gradients(model)
 
         return 0.5 * np.sum(
-            (Av @ (g_m1) ** 2) * (Av @ (g_m2) ** 2) - ((Av @ g_m1) * (Av @ g_m2)) ** 2
+            np.sum((Av @ g_m1**2) * (Av @ g_m2**2) - (Av @ (g_m1 * g_m2)) ** 2)
         )
 
     def deriv(self, model):
@@ -201,98 +204,118 @@ def deriv(self, model):
         G = self._G
         g_m1, g_m2 = self._model_gradients(model)
 
-        return (
-            self.wire_map_deriv.T
-            * np.r_[
-                (((Av @ (g_m2) ** 2) @ Av) * g_m1) @ G
-                - (((Av @ ((g_m1) * (g_m2))) @ Av) * g_m2) @ G,
-                (((Av @ (g_m1) ** 2) @ Av) * g_m2) @ G
-                - (((Av @ (g_m1 * g_m2)) @ Av) * g_m1) @ G,
-            ]
-        )
+        deriv = np.r_[
+            (((Av @ g_m2**2) @ Av) * g_m1) @ G
+            - (((Av @ (g_m1 * g_m2)) @ Av) * g_m2) @ G,
+            (((Av @ g_m1**2) @ Av) * g_m2) @ G
+            - (((Av @ (g_m1 * g_m2)) @ Av) * g_m1) @ G,
+        ]
+        n_cells = self.regularization_mesh.nC
+        max_derivs = np.r_[
+            np.ones(n_cells) * np.abs(deriv[:n_cells]).max(),
+            np.ones(n_cells) * np.abs(deriv[n_cells:]).max(),
+        ]
+
+        if np.max(max_derivs) > 1e-12:
+            self.scaling = np.max(max_derivs) / max_derivs
+
+        return self.scaling * deriv
 
     def deriv2(self, model, v=None):
-        """
-        Computes the Hessian of the cross-gradient.
+        r"""Hessian of the regularization function evaluated for the model provided.
+
+        Where :math:`\phi (\mathbf{m})` is the discrete regularization function (objective function),
+        this method evalutate and returns the second derivative (Hessian) with respect to the model parameters:
+        For a model :math:`\mathbf{m}` consisting of two physical properties such that:
+
+        .. math::
+            \mathbf{m} = \begin{bmatrix} \mathbf{m_1} \\ \mathbf{m_2} \end{bmatrix}
+
+        The Hessian has the form:
 
-        :param list of numpy.ndarray ind_models: [model1, model2, ...]
-        :param numpy.ndarray v: vector to be multiplied by Hessian
+        .. math::
+            \frac{\partial^2 \phi}{\partial \mathbf{m}^2} =
+            \begin{bmatrix}
+            \dfrac{\partial^2 \phi}{\partial \mathbf{m_1}^2} &
+            \dfrac{\partial^2 \phi}{\partial \mathbf{m_1} \partial \mathbf{m_2}} \\
+            \dfrac{\partial^2 \phi}{\partial \mathbf{m_2} \partial \mathbf{m_1}} &
+            \dfrac{\partial^2 \phi}{\partial \mathbf{m_2}^2}
+            \end{bmatrix}
 
-        :rtype: scipy.sparse.csr_matrix if v is None
-                numpy.ndarray if v is not None
-        :return Hessian matrix if v is None
-                Hessian multiplied by vector if v is not No
+        When a vector :math:`(\mathbf{v})` is supplied, the method returns the Hessian
+        times the vector:
 
+        .. math::
+            \frac{\partial^2 \phi}{\partial \mathbf{m}^2} \, \mathbf{v}
+
+        Parameters
+        ----------
+        model : (n_param, ) numpy.ndarray
+            The model; a vector array containing all physical properties.
+        v : None, (n_param, ) numpy.ndarray (optional)
+            A numpy array to model the Hessian by.
+
+        Returns
+        -------
+        (n_param, n_param) scipy.sparse.csr_matrix | (n_param, ) numpy.ndarray
+            If the input argument *v* is ``None``, the Hessian
+            for the models provided is returned. If *v* is not ``None``,
+            the Hessian multiplied by the vector provided is returned.
         """
         Av = self._Av
         G = self._G
-        g_m1, g_m2 = self._model_gradients(model)
 
-        if v is None:
-            A = (
-                G.T
-                @ (
-                    sp.diags(Av.T @ (Av @ (g_m2) ** 2))
-                    - sp.diags(g_m2) @ Av.T @ Av @ sp.diags(g_m2)
-                )
-                @ G
-            )
+        scaling = sp.diags(self.scaling)
 
-            C = (
-                G.T
-                @ (
-                    sp.diags(Av.T @ (Av @ (g_m1) ** 2))
-                    - sp.diags(g_m1) @ Av.T @ Av @ sp.diags(g_m1)
-                )
-                @ G
-            )
+        g_m1, g_m2 = self._model_gradients(model)
 
-            B = None
-            BT = None
+        d11_mid = Av.T @ (Av @ g_m2**2)
+        d12_mid = -(Av.T @ (Av @ (g_m1 * g_m2)))
+        d22_mid = Av.T @ (Av @ g_m1**2)
+
+        if v is None:
+            D11_mid = sp.diags(d11_mid)
+            D12_mid = sp.diags(d12_mid)
+            D22_mid = sp.diags(d22_mid)
             if not self.approx_hessian:
-                # d_m1_d_m2
-                B = (
-                    G.T
-                    @ (
-                        2 * sp.diags(g_m1) @ Av.T @ Av @ sp.diags(g_m2)
-                        - sp.diags(g_m2) @ Av.T @ Av @ sp.diags(g_m1)
-                        - sp.diags(Av.T @ ((Av @ g_m2) * ((Av @ g_m1))))
-                    )
-                    @ G
+                D11_mid = D11_mid - sp.diags(g_m2) @ Av.T @ Av @ sp.diags(g_m2)
+                D12_mid = (
+                    D12_mid
+                    + 2 * sp.diags(g_m1) @ Av.T @ Av @ sp.diags(g_m2)
+                    - sp.diags(g_m2) @ Av.T @ Av @ sp.diags(g_m1)
                 )
-                BT = B.T
+                D22_mid = D22_mid - sp.diags(g_m1) @ Av.T @ Av @ sp.diags(g_m1)
+            D11 = G.T @ D11_mid @ G
+            D12 = G.T @ D12_mid @ G
+            D22 = G.T @ D22_mid @ G
 
             return (
                 self.wire_map_deriv.T
-                * sp.bmat([[A, B], [BT, C]], format="csr")
+                * scaling
+                @ sp.bmat([[D11, D12], [D12.T, D22]], format="csr")
                 * self.wire_map_deriv
-            )
+            )  # factor of 2 from derviative of | grad m1 x grad m2 | ^2
+
         else:
             v1, v2 = (wire * v for wire in self.wire_map)
 
             Gv1 = G @ v1
             Gv2 = G @ v2
-
-            p1 = G.T @ (
-                (Av.T @ (Av @ g_m2**2)) * Gv1 - g_m2 * (Av.T @ (Av @ (g_m2 * Gv1)))
-            )
-            p2 = G.T @ (
-                (Av.T @ (Av @ g_m1**2)) * Gv2 - g_m1 * (Av.T @ (Av @ (g_m1 * Gv2)))
-            )
-
+            p1 = G.T @ (d11_mid * Gv1 + d12_mid * Gv2)
+            p2 = G.T @ (d12_mid * Gv1 + d22_mid * Gv2)
             if not self.approx_hessian:
                 p1 += G.T @ (
-                    2 * g_m1 * (Av.T @ (Av @ (g_m2 * Gv2)))
-                    - g_m2 * (Av.T @ (Av @ (g_m1 * Gv2)))
-                    - (Av.T @ (Av @ (g_m1 * g_m2))) * Gv2
+                    -g_m2 * (Av.T @ (Av @ (g_m2 * Gv1)))  # d11*v1 full addition
+                    + 2 * g_m1 * (Av.T @ (Av @ (g_m2 * Gv2)))  # d12*v2 full addition
+                    - g_m2 * (Av.T @ (Av @ (g_m1 * Gv2)))  # d12*v2 continued
                 )
 
                 p2 += G.T @ (
-                    2 * g_m2 * (Av.T @ (Av @ (g_m1 * Gv1)))
-                    - g_m1 * (Av.T @ (Av @ (g_m2 * Gv1)))
-                    - (Av.T @ (Av @ (g_m2 * g_m1))) * Gv1
+                    -g_m1 * (Av.T @ (Av @ (g_m1 * Gv2)))  # d22*v2 full addition
+                    + 2 * g_m2 * (Av.T @ (Av @ (g_m1 * Gv1)))  # d12.T*v1 full addition
+                    - g_m1 * (Av.T @ (Av @ (g_m2 * Gv1)))  # d12.T*v1 fcontinued
                 )
-            return self.wire_map_deriv.T * np.r_[p1, p2]
+            return self.wire_map_deriv.T * scaling @ np.r_[p1, p2]
 
     def set_weights(self, **weights):
         """Adds (or updates) the specified weights to the regularization

From c95e7100e9b44dd881c0f9e03baed68d8b20efe9 Mon Sep 17 00:00:00 2001
From: domfournier <dominiquef@mirageoscience.com>
Date: Mon, 13 May 2024 14:37:45 -0700
Subject: [PATCH 06/10] Remove unused code

---
 SimPEG/directives/directives.py         |  8 ++--
 SimPEG/regularization/cross_gradient.py | 53 +------------------------
 2 files changed, 6 insertions(+), 55 deletions(-)

diff --git a/SimPEG/directives/directives.py b/SimPEG/directives/directives.py
index b674253ff4..71f1ec6cdf 100644
--- a/SimPEG/directives/directives.py
+++ b/SimPEG/directives/directives.py
@@ -2780,10 +2780,10 @@ def update(self):
 
         # Add sensitivity weighting to all model objective functions
         for reg in self.reg.objfcts:
-            # if not isinstance(reg, BaseSimilarityMeasure):
-            sub_regs = getattr(reg, "objfcts", [reg])
-            for sub_reg in sub_regs:
-                sub_reg.set_weights(sensitivity=sub_reg.mapping * wr)
+            if not isinstance(reg, BaseSimilarityMeasure):
+                sub_regs = getattr(reg, "objfcts", [reg])
+                for sub_reg in sub_regs:
+                    sub_reg.set_weights(sensitivity=sub_reg.mapping * wr)
 
     def validate(self, directiveList):
         """Validate directive against directives list.
diff --git a/SimPEG/regularization/cross_gradient.py b/SimPEG/regularization/cross_gradient.py
index ad188f7e40..00742978bc 100644
--- a/SimPEG/regularization/cross_gradient.py
+++ b/SimPEG/regularization/cross_gradient.py
@@ -29,19 +29,14 @@ def __init__(self, mesh, wire_map, approx_hessian=True, normalize=False, **kwarg
         self.approx_hessian = approx_hessian
         self._units = ["metric", "metric"]
         self.normalize = normalize
-
         regmesh = self.regularization_mesh
-        self.set_weights(volume=self.regularization_mesh.vol)
+
         self.scaling = np.ones(2 * regmesh.nC)
 
         if regmesh.mesh.dim not in (2, 3):
             raise ValueError("Cross-Gradient is only defined for 2D or 3D")
         self._G = regmesh.cell_gradient
         self._Av = sp.diags(np.sqrt(regmesh.vol)) * regmesh.average_face_to_cell
-        acf = [regmesh.aveCC2Fx, regmesh.aveCC2Fy]
-        if regmesh.dim == 3:
-            acf.append(regmesh.aveCC2Fz)
-        self._average_cell_to_faces = sp.vstack(acf)
 
     @property
     def approx_hessian(self):
@@ -62,7 +57,7 @@ def _model_gradients(self, models):
         """
         gradients = []
 
-        for unit, wire, weights in zip(self.units, self.wire_map, self.W):
+        for unit, wire in zip(self.units, self.wire_map):
             model = wire * models
             if unit == "radian":
                 gradient = []
@@ -316,47 +311,3 @@ def deriv2(self, model, v=None):
                     - g_m1 * (Av.T @ (Av @ (g_m2 * Gv1)))  # d12.T*v1 fcontinued
                 )
             return self.wire_map_deriv.T * scaling @ np.r_[p1, p2]
-
-    def set_weights(self, **weights):
-        """Adds (or updates) the specified weights to the regularization
-
-        Parameters:
-        -----------
-        **kwargs : key, numpy.ndarray
-            Each keyword argument is added to the weights used by the regularization.
-            They can be accessed with their keyword argument.
-
-        Examples
-        --------
-        >>> import discretize
-        >>> from SimPEG.regularization import Smallness
-        >>> mesh = discretize.TensorMesh([2, 3, 2])
-        >>> reg = Smallness(mesh)
-        >>> reg.set_weights(my_weight=np.ones(mesh.n_cells))
-        >>> reg.get_weights('my_weight')
-        array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])
-        """
-        for key, values in weights.items():
-            if values.shape == self._weights_shapes:
-                values = np.r_[values, values]
-
-            self._weights[key] = values
-        self._W = None
-
-    @property
-    def W(self) -> tuple:
-        """
-        Weighting matrix
-        """
-        if getattr(self, "_W", None) is None:
-            weights = np.prod(list(self._weights.values()), axis=0)
-
-            self._W = (
-                sp.diags(
-                    self._average_cell_to_faces @ weights[self.regularization_mesh.nC :]
-                ),
-                sp.diags(
-                    self._average_cell_to_faces @ weights[: self.regularization_mesh.nC]
-                ),
-            )
-        return self._W

From 40ff0236522a3d86651d27a95327015dfa6669fa Mon Sep 17 00:00:00 2001
From: domfournier <dominiquef@mirageoscience.com>
Date: Mon, 13 May 2024 15:23:34 -0700
Subject: [PATCH 07/10] Add missing wire map

---
 SimPEG/regularization/cross_gradient.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/SimPEG/regularization/cross_gradient.py b/SimPEG/regularization/cross_gradient.py
index 00742978bc..0628d8c8f8 100644
--- a/SimPEG/regularization/cross_gradient.py
+++ b/SimPEG/regularization/cross_gradient.py
@@ -214,7 +214,7 @@ def deriv(self, model):
         if np.max(max_derivs) > 1e-12:
             self.scaling = np.max(max_derivs) / max_derivs
 
-        return self.scaling * deriv
+        return self.wire_map_deriv.T * (self.scaling * deriv)
 
     def deriv2(self, model, v=None):
         r"""Hessian of the regularization function evaluated for the model provided.

From 64bc761c9ad0d1e8eba97d49eb60a11296319656 Mon Sep 17 00:00:00 2001
From: domfournier <dominiquef@mirageoscience.com>
Date: Tue, 14 May 2024 09:41:39 -0700
Subject: [PATCH 08/10] Remove auto scaling

---
 SimPEG/regularization/cross_gradient.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/SimPEG/regularization/cross_gradient.py b/SimPEG/regularization/cross_gradient.py
index 0628d8c8f8..603aa14069 100644
--- a/SimPEG/regularization/cross_gradient.py
+++ b/SimPEG/regularization/cross_gradient.py
@@ -211,9 +211,6 @@ def deriv(self, model):
             np.ones(n_cells) * np.abs(deriv[n_cells:]).max(),
         ]
 
-        if np.max(max_derivs) > 1e-12:
-            self.scaling = np.max(max_derivs) / max_derivs
-
         return self.wire_map_deriv.T * (self.scaling * deriv)
 
     def deriv2(self, model, v=None):

From b13e5be2b331e34b1750cfe89d86076d4fe8a709 Mon Sep 17 00:00:00 2001
From: domfournier <dominiquef@mirageoscience.com>
Date: Tue, 14 May 2024 09:43:15 -0700
Subject: [PATCH 09/10] Remove scaling property

---
 SimPEG/regularization/cross_gradient.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/SimPEG/regularization/cross_gradient.py b/SimPEG/regularization/cross_gradient.py
index 603aa14069..0d1fd925a0 100644
--- a/SimPEG/regularization/cross_gradient.py
+++ b/SimPEG/regularization/cross_gradient.py
@@ -31,8 +31,6 @@ def __init__(self, mesh, wire_map, approx_hessian=True, normalize=False, **kwarg
         self.normalize = normalize
         regmesh = self.regularization_mesh
 
-        self.scaling = np.ones(2 * regmesh.nC)
-
         if regmesh.mesh.dim not in (2, 3):
             raise ValueError("Cross-Gradient is only defined for 2D or 3D")
         self._G = regmesh.cell_gradient
@@ -175,8 +173,6 @@ def __call__(self, model):
         (optional strategy, not used in this script)
 
         """
-        # m1, m2 = (wire * model for wire in self.wire_map)
-        sp.diags(self.scaling)
         Av = self._Av
         G = self._G
         g_m1, g_m2 = self._model_gradients(model)
@@ -211,7 +207,7 @@ def deriv(self, model):
             np.ones(n_cells) * np.abs(deriv[n_cells:]).max(),
         ]
 
-        return self.wire_map_deriv.T * (self.scaling * deriv)
+        return self.wire_map_deriv.T * deriv
 
     def deriv2(self, model, v=None):
         r"""Hessian of the regularization function evaluated for the model provided.
@@ -257,8 +253,6 @@ def deriv2(self, model, v=None):
         Av = self._Av
         G = self._G
 
-        scaling = sp.diags(self.scaling)
-
         g_m1, g_m2 = self._model_gradients(model)
 
         d11_mid = Av.T @ (Av @ g_m2**2)
@@ -283,7 +277,6 @@ def deriv2(self, model, v=None):
 
             return (
                 self.wire_map_deriv.T
-                * scaling
                 @ sp.bmat([[D11, D12], [D12.T, D22]], format="csr")
                 * self.wire_map_deriv
             )  # factor of 2 from derviative of | grad m1 x grad m2 | ^2
@@ -307,4 +300,4 @@ def deriv2(self, model, v=None):
                     + 2 * g_m2 * (Av.T @ (Av @ (g_m1 * Gv1)))  # d12.T*v1 full addition
                     - g_m1 * (Av.T @ (Av @ (g_m2 * Gv1)))  # d12.T*v1 fcontinued
                 )
-            return self.wire_map_deriv.T * scaling @ np.r_[p1, p2]
+            return self.wire_map_deriv.T * np.r_[p1, p2]

From bf3b51a3014d6f0d0c86ccc9669bf9def687c1b2 Mon Sep 17 00:00:00 2001
From: domfournier <dominiquef@mirageoscience.com>
Date: Tue, 14 May 2024 10:10:12 -0700
Subject: [PATCH 10/10] Clean out max deriv

---
 SimPEG/regularization/cross_gradient.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/SimPEG/regularization/cross_gradient.py b/SimPEG/regularization/cross_gradient.py
index 0d1fd925a0..af435f6346 100644
--- a/SimPEG/regularization/cross_gradient.py
+++ b/SimPEG/regularization/cross_gradient.py
@@ -201,11 +201,6 @@ def deriv(self, model):
             (((Av @ g_m1**2) @ Av) * g_m2) @ G
             - (((Av @ (g_m1 * g_m2)) @ Av) * g_m1) @ G,
         ]
-        n_cells = self.regularization_mesh.nC
-        max_derivs = np.r_[
-            np.ones(n_cells) * np.abs(deriv[:n_cells]).max(),
-            np.ones(n_cells) * np.abs(deriv[n_cells:]).max(),
-        ]
 
         return self.wire_map_deriv.T * deriv