diff --git a/mphys/mphys_adflow.py b/mphys/mphys_adflow.py
index 8f25793e..f769721c 100644
--- a/mphys/mphys_adflow.py
+++ b/mphys/mphys_adflow.py
@@ -234,7 +234,7 @@ def setup(self):
 
         # self.declare_partials(of='adflow_states', wrt='*')
 
-    def _set_ap(self, inputs):
+    def _set_ap(self, inputs, print_dict=True):
         tmp = {}
         for (args, kwargs) in self.ap_vars:
             name = args[0]
@@ -246,7 +246,7 @@ def _set_ap(self, inputs):
         #     pp(tmp)
 
         self.ap.setDesignVars(tmp)
-        if self.comm.rank == 0:
+        if self.comm.rank == 0 and print_dict:
             pp(tmp)
 
     def set_ap(self, ap):
@@ -275,7 +275,7 @@ def apply_nonlinear(self, inputs, outputs, residuals):
         solver = self.solver
 
         self._set_states(outputs)
-        self._set_ap(inputs)
+        self._set_ap(inputs, print_dict=False)
 
         ap = self.ap
 
@@ -388,17 +388,36 @@ def solve_nonlinear(self, inputs, outputs):
         outputs["adflow_states"] = solver.getStates()
 
     def linearize(self, inputs, outputs, residuals):
+        solver = self.solver
+        ap = self.ap
 
-        self.solver._setupAdjoint()
-
-        self._set_ap(inputs)
+        self._set_ap(inputs, print_dict=False)
         self._set_states(outputs)
 
+        # check if we changed APs, then we have to do a bunch of updates
+        if ap != solver.curAP:
+            # AP is changed, so we have to update the AP and
+            # run a residual to make sure all intermediate vairables are up to date
+            # we assume the AP has the last converged state information,
+            # which is automatically set in the getResidual call
+            solver.getResidual(ap)
+
     def apply_linear(self, inputs, outputs, d_inputs, d_outputs, d_residuals, mode):
 
         solver = self.solver
         ap = self.ap
 
+        self._set_ap(inputs, print_dict=False)
+        self._set_states(outputs)
+
+        # check if we changed APs, then we have to do a bunch of updates
+        if ap != solver.curAP:
+            # AP is changed, so we have to update the AP and
+            # run a residual to make sure all intermediate vairables are up to date
+            # we assume the AP has the last converged state information,
+            # which is automatically set in the getResidual call
+            solver.getResidual(ap)
+
         if mode == "fwd":
             if "adflow_states" in d_residuals:
                 xDvDot = {}
@@ -439,6 +458,24 @@ def apply_linear(self, inputs, outputs, d_inputs, d_outputs, d_residuals, mode):
     def solve_linear(self, d_outputs, d_residuals, mode):
         solver = self.solver
         ap = self.ap
+
+        # check if we changed APs, then we have to do a bunch of updates
+        if ap != solver.curAP:
+            # AP is changed, so we have to update the AP and
+            # run a residual to make sure all intermediate vairables are up to date
+            # we assume the AP has the last converged state information,
+            # which is automatically set in the getResidual call
+            solver.getResidual(ap)
+
+        # the adjoint might not be set up regardless if we changed APs
+        # this is because the first call with any AP will not have this set up, so we have to check
+        # if we changed APs, then we also freed adjoint memory,
+        # and then again we would need to setup adjoint again
+        # finally, we generally want to avoid extra calls here
+        # because this routine can be call multiple times back to back in a LBGS solver.
+        if not solver.adjointSetup:
+            solver._setupAdjoint()
+
         if self.comm.rank == 0:
             print("Solving linear in mphys_adflow", flush=True)
         if mode == "fwd":
@@ -473,14 +510,14 @@ def setup(self):
 
         # self.declare_partials(of='f_aero', wrt='*')
 
-    def _set_ap(self, inputs):
+    def _set_ap(self, inputs, print_dict=True):
         tmp = {}
         for (args, kwargs) in self.ap_vars:
             name = args[0]
             tmp[name] = inputs[name]
 
         self.ap.setDesignVars(tmp)
-        if self.comm.rank == 0:
+        if self.comm.rank == 0 and print_dict:
             pp(tmp)
 
     def set_ap(self, ap):
@@ -522,6 +559,16 @@ def compute_jacvec_product(self, inputs, d_inputs, d_outputs, mode):
         solver = self.solver
         ap = self.ap
 
+        self._set_ap(inputs, print_dict=False)
+
+        # check if we changed APs, then we have to do a bunch of updates
+        if ap != solver.curAP:
+            # AP is changed, so we have to update the AP and
+            # run a residual to make sure all intermediate vairables are up to date
+            # we assume the AP has the last converged state information,
+            # which is automatically set in the getResidual call
+            solver.getResidual(ap)
+
         if mode == "fwd":
             if "f_aero" in d_outputs:
                 xDvDot = {}
@@ -638,6 +685,16 @@ def compute_jacvec_product(self, inputs, d_inputs, d_outputs, mode):
         solver = self.solver
         ap = self.options["ap"]
 
+        self._set_ap(inputs)
+
+        # check if we changed APs, then we have to do a bunch of updates
+        if ap != solver.curAP:
+            # AP is changed, so we have to update the AP and
+            # run a residual to make sure all intermediate vairables are up to date
+            # we assume the AP has the last converged state information,
+            # which is automatically set in the getResidual call
+            solver.getResidual(ap)
+
         if mode == "fwd":
             if "q_convect" in d_outputs:
                 xDvDot = {}
@@ -756,7 +813,7 @@ def setup(self):
 
         # self.declare_partials(of=f_name, wrt='*')
 
-    def _set_ap(self, inputs):
+    def _set_ap(self, inputs, print_dict=True):
         tmp = {}
         for (args, kwargs) in self.ap_vars:
             name = args[0]
@@ -764,7 +821,7 @@ def _set_ap(self, inputs):
 
         self.ap.setDesignVars(tmp)
         # self.options['solver'].setAeroProblem(self.options['ap'])
-        if self.comm.rank == 0:
+        if self.comm.rank == 0 and print_dict:
             pp(tmp)
 
     def mphys_set_ap(self, ap):
@@ -894,6 +951,15 @@ def compute(self, inputs, outputs):
     def compute_jacvec_product(self, inputs, d_inputs, d_outputs, mode):
         solver = self.solver
         ap = self.ap
+        self._set_ap(inputs, print_dict=False)
+
+        # check if we changed APs, then we have to do a bunch of updates
+        if ap != solver.curAP:
+            # AP is changed, so we have to update the AP and
+            # run a residual to make sure all intermediate vairables are up to date
+            # we assume the AP has the last converged state information,
+            # which is automatically set in the getResidual call
+            solver.getResidual(ap)
 
         if mode == "fwd":
             xDvDot = {}
diff --git a/mphys/mphys_dvgeo.py b/mphys/mphys_dvgeo.py
index 532173c6..34b7d07d 100644
--- a/mphys/mphys_dvgeo.py
+++ b/mphys/mphys_dvgeo.py
@@ -54,6 +54,10 @@ def compute(self, inputs, outputs):
             for constraintname in constraintfunc:
                 outputs[constraintname] = constraintfunc[constraintname]
 
+        # we ran a compute so the inputs changed. update the dvcon jac
+        # next time the jacvec product routine is called
+        self.update_jac = True
+
     def nom_add_discipline_coords(self, discipline, points=None):
         # TODO remove one of these methods to keep only one method to add pointsets
 
@@ -157,12 +161,22 @@ def compute_jacvec_product(self, inputs, d_inputs, d_outputs, mode):
         ni = len(list(d_inputs.keys()))
 
         if mode == "rev" and ni > 0:
-            constraintfuncsens = dict()
-            self.DVCon.evalFunctionsSens(constraintfuncsens, includeLinear=True)
-            for constraintname in constraintfuncsens:
-                for dvname in constraintfuncsens[constraintname]:
+
+            # this flag will be set to True after every compute call.
+            # if it is true, we assume the design has changed so we re-run the sensitivity update
+            # there can be hundreds of calls to this routine due to thickness constraints,
+            # as a result, we only run the actual sensitivity comp once and save the jacobians
+            # this might be better suited with the matrix-based API
+            if self.update_jac:
+                self.constraintfuncsens = dict()
+                self.DVCon.evalFunctionsSens(self.constraintfuncsens, includeLinear=True)
+                # set the flag to False so we dont run the update again if this is called w/o a compute in between
+                self.update_jac = False
+
+            for constraintname in self.constraintfuncsens:
+                for dvname in self.constraintfuncsens[constraintname]:
                     if dvname in d_inputs:
-                        dcdx = constraintfuncsens[constraintname][dvname]
+                        dcdx = self.constraintfuncsens[constraintname][dvname]
                         if self.comm.rank == 0:
                             dout = d_outputs[constraintname]
                             jvtmp = np.dot(np.transpose(dcdx), dout)
@@ -175,26 +189,34 @@ def compute_jacvec_product(self, inputs, d_inputs, d_outputs, mode):
             for ptSetName in self.DVGeo.ptSetNames:
                 if ptSetName in self.omPtSetList:
                     dout = d_outputs[ptSetName].reshape(len(d_outputs[ptSetName]) // 3, 3)
-                    # TODO dout is zero when jacvec product is called for the constraints. quite a few unnecessary computations happen here...
-
-                    # TODO totalSensitivityTransProd is broken. does not work with zero surface nodes on a proc
-                    # xdot = self.DVGeo.totalSensitivityTransProd(dout, ptSetName)
-                    xdot = self.DVGeo.totalSensitivity(dout, ptSetName)
-
-                    # loop over dvs and accumulate
-                    xdotg = {}
-                    for k in xdot:
-                        # check if this dv is present
-                        if k in d_inputs:
-                            # do the allreduce
-                            # TODO reove the allreduce when this is fixed in openmdao
-                            # reduce the result ourselves for now. ideally, openmdao will do the reduction itself when this is fixed. this is because the bcast is also done by openmdao (pyoptsparse, but regardless, it is not done here, so reduce should also not be done here)
-                            xdotg[k] = self.comm.allreduce(xdot[k], op=MPI.SUM)
-
-                            # accumulate in the dict
-                            # TODO
-                            # because we only do one point set at a time, we always want the 0th
-                            # entry of this array since dvgeo always behaves like we are passing
-                            # in multiple objective seeds with totalSensitivity. we can remove the [0]
-                            # once we move back to totalSensitivityTransProd
-                            d_inputs[k] += xdotg[k][0]
+
+                    # only do the calc. if d_output is not zero on ANY proc
+                    local_all_zeros = np.all(dout == 0)
+                    global_all_zeros = np.zeros(1, dtype=bool)
+                    # we need to communicate for this check otherwise we may hang
+                    self.comm.Allreduce([local_all_zeros, MPI.BOOL], [global_all_zeros, MPI.BOOL], MPI.LAND)
+
+                    # global_all_zeros is a numpy array of size 1
+                    if not global_all_zeros[0]:
+
+                        # TODO totalSensitivityTransProd is broken. does not work with zero surface nodes on a proc
+                        # xdot = self.DVGeo.totalSensitivityTransProd(dout, ptSetName)
+                        xdot = self.DVGeo.totalSensitivity(dout, ptSetName)
+
+                        # loop over dvs and accumulate
+                        xdotg = {}
+                        for k in xdot:
+                            # check if this dv is present
+                            if k in d_inputs:
+                                # do the allreduce
+                                # TODO reove the allreduce when this is fixed in openmdao
+                                # reduce the result ourselves for now. ideally, openmdao will do the reduction itself when this is fixed. this is because the bcast is also done by openmdao (pyoptsparse, but regardless, it is not done here, so reduce should also not be done here)
+                                xdotg[k] = self.comm.allreduce(xdot[k], op=MPI.SUM)
+
+                                # accumulate in the dict
+                                # TODO
+                                # because we only do one point set at a time, we always want the 0th
+                                # entry of this array since dvgeo always behaves like we are passing
+                                # in multiple objective seeds with totalSensitivity. we can remove the [0]
+                                # once we move back to totalSensitivityTransProd
+                                d_inputs[k] += xdotg[k][0]
diff --git a/setup.py b/setup.py
index e351249a..68e5b753 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup, find_packages
 
 __package_name__ = "mphys"
-__package_version__ = "0.3.0"
+__package_version__ = "0.4.0"
 
 
 setup(