From 4f320906e89d5e592472981705a31db9a95e7cc9 Mon Sep 17 00:00:00 2001
From: Axel Larsson <larsson4@llnl.gov>
Date: Thu, 8 Feb 2024 14:28:35 -0800
Subject: [PATCH] AssembleBlock implementation

---
 src/interfaceinteg.cpp | 455 +++++++++++++++++++++++------------------
 1 file changed, 252 insertions(+), 203 deletions(-)
diff --git a/src/interfaceinteg.cpp b/src/interfaceinteg.cpp
index ddfab7a4..5cda4c7c 100644
--- a/src/interfaceinteg.cpp
+++ b/src/interfaceinteg.cpp
@@ -1009,237 +1009,286 @@ namespace mfem
        Array2D<DenseMatrix *> &elmats)
    {
 #ifdef MFEM_THREAD_SAFE
-   // For descriptions of these variables, see the class declaration.
-   Vector shape1, shape2;
-   DenseMatrix dshape1, dshape2;
-   DenseMatrix adjJ;
-   DenseMatrix dshape1_ps, dshape2_ps;
-   Vector nor;
-   Vector nL1, nL2;
-   Vector nM1, nM2;
-   Vector dshape1_dnM, dshape2_dnM;
-   DenseMatrix jmat;
+      // For descriptions of these variables, see the class declaration.
+      Vector shape1, shape2;
+      DenseMatrix dshape1, dshape2;
+      DenseMatrix adjJ;
+      DenseMatrix dshape1_ps, dshape2_ps;
+      Vector nor;
+      Vector nL1, nL2;
+      Vector nM1, nM2;
+      Vector dshape1_dnM, dshape2_dnM;
+      DenseMatrix jmat;
 #endif
 
-   const int dim = el1.GetDim();
-   const int ndofs1 = el1.GetDof();
-   const int ndofs2 = (boundary) ? el2.GetDof() : 0;
-   const int nvdofs = dim*(ndofs1 + ndofs2);
-
-   // Initially 'elmat' corresponds to the term:
-   //    < { sigma(u) . n }, [v] > =
-   //    < { (lambda div(u) I + mu (grad(u) + grad(u)^T)) . n }, [v] >
-   // But eventually, it's going to be replaced by:
-   //    elmat := -elmat + alpha*elmat^T + jmat
-   elmats(0,0)->SetSize(ndof1, ndof1);
-   elmats(0,1)->SetSize(ndof1, ndof2);
-   elmats(1,0)->SetSize(ndof2, ndof1);
-   elmats(1,1)->SetSize(ndof2, ndof2);
-   for (int i = 0; i < 2; i++)
-      for (int j = 0; j < 2; j++) *elmats(i,j) = 0.0;
-   // elmat.SetSize(ndofs);
-   // elmat = 0.0;
-
-   const bool kappa_is_nonzero = (kappa != 0.0);
-   if (kappa_is_nonzero)
-   {
-      jmats.SetSize(2,2);
-      jmats(0,0) = new DenseMatrix(ndof1, ndof1);
-      // only the lower-triangular part of jmat is assembled.
-      jmats(0,1) = NULL;
-      jmats(1,0) = new DenseMatrix(ndof2, ndof1);
-      jmats(1,1) = new DenseMatrix(ndof2, ndof2);
-      for (int i = 0; i < 2; i++)
-         for (int j = 0; j <= i; j++) *jmats(i,j) = 0.0;
-      // jmat.SetSize(ndofs);
-      // jmat = 0.;
-   }
-
-   adjJ.SetSize(dim);
-   shape1.SetSize(ndofs1);
-   dshape1.SetSize(ndofs1, dim);
-   dshape1_ps.SetSize(ndofs1, dim);
-   nor.SetSize(dim);
-   nL1.SetSize(dim);
-   nM1.SetSize(dim);
-   dshape1_dnM.SetSize(ndofs1);
-
-   if (ndofs2)
-   {
-      shape2.SetSize(ndofs2);
-      dshape2.SetSize(ndofs2, dim);
-      dshape2_ps.SetSize(ndofs2, dim);
-      nL2.SetSize(dim);
-      nM2.SetSize(dim);
-      dshape2_dnM.SetSize(ndofs2);
-   }
-
-   const IntegrationRule *ir = IntRule;
-   if (ir == NULL)
-   {
-      // a simple choice for the integration order; is this OK?
-      const int order = 2 * max(el1.GetOrder(), ndofs2 ? el2.GetOrder() : 0);
-      ir = &IntRules.Get(Trans.GetGeometryType(), order);
-   assert(Trans1.GetGeometryType() == Trans2.GetGeometryType());
-   }
+      const int dim = el1.GetDim();
+      const int ndofs1 = el1.GetDof();
+      const int ndofs2 = (boundary) ? el2.GetDof() : 0;
+      const int nvdofs = dim * (ndofs1 + ndofs2);
 
-   for (int pind = 0; pind < ir->GetNPoints(); ++pind)
-   {
-      const IntegrationPoint &ip = ir->IntPoint(pind);
+      // Initially 'elmat' corresponds to the term:
+      //    < { sigma(u) . n }, [v] > =
+      //    < { (lambda div(u) I + mu (grad(u) + grad(u)^T)) . n }, [v] >
+      // But eventually, it's going to be replaced by:
+      //    elmat := -elmat + alpha*elmat^T + jmat
+      elmats(0, 0)->SetSize(ndof1, ndof1);
+      elmats(0, 1)->SetSize(ndof1, ndof2);
+      elmats(1, 0)->SetSize(ndof2, ndof1);
+      elmats(1, 1)->SetSize(ndof2, ndof2);
+      for (int i = 0; i < 2; i++)
+         for (int j = 0; j < 2; j++)
+            *elmats(i, j) = 0.0;
+      // elmat.SetSize(ndofs);
+      // elmat = 0.0;
 
-      // Set the integration point in the face and the neighboring elements
-      Trans1.SetAllIntPoints(&ip);
-      Trans2.SetAllIntPoints(&ip);
+      const bool kappa_is_nonzero = (kappa != 0.0);
+      if (kappa_is_nonzero)
+      {
+         jmats.SetSize(2, 2);
+         jmats(0, 0) = new DenseMatrix(ndof1, ndof1);
+         // only the lower-triangular part of jmat is assembled.
+         jmats(0, 1) = NULL;
+         jmats(1, 0) = new DenseMatrix(ndof2, ndof1);
+         jmats(1, 1) = new DenseMatrix(ndof2, ndof2);
+         for (int i = 0; i < 2; i++)
+            for (int j = 0; j <= i; j++)
+               *jmats(i, j) = 0.0;
+         // jmat.SetSize(ndofs);
+         // jmat = 0.;
+      }
 
-      // Access the neighboring elements' integration points
-      // Note: eip1 and eip2 come from Element1 of Trans1 and Trans2 respectively.
-      const IntegrationPoint &eip1 = Trans1.GetElement1IntPoint();
-      const IntegrationPoint &eip2 = Trans2.GetElement1IntPoint();
+      adjJ.SetSize(dim);
+      shape1.SetSize(ndofs1);
+      dshape1.SetSize(ndofs1, dim);
+      dshape1_ps.SetSize(ndofs1, dim);
+      nor.SetSize(dim);
+      nL1.SetSize(dim);
+      nM1.SetSize(dim);
+      dshape1_dnM.SetSize(ndofs1);
 
-      // computing outward normal vectors.
-      if (dim == 1)
+      if (ndofs2)
       {
-         nor(0) = 2*eip1.x - 1.0;
-         nor2(0) = 2*eip2.x - 1.0;
+         shape2.SetSize(ndofs2);
+         dshape2.SetSize(ndofs2, dim);
+         dshape2_ps.SetSize(ndofs2, dim);
+         nL2.SetSize(dim);
+         nM2.SetSize(dim);
+         dshape2_dnM.SetSize(ndofs2);
       }
-      else
+
+      const IntegrationRule *ir = IntRule;
+      if (ir == NULL)
       {
-         CalcOrtho(Trans1.Jacobian(), nor);
-         CalcOrtho(Trans2.Jacobian(), nor2);
+         // a simple choice for the integration order; is this OK?
+         const int order = 2 * max(el1.GetOrder(), ndofs2 ? el2.GetOrder() : 0);
+         ir = &IntRules.Get(Trans.GetGeometryType(), order);
+         assert(Trans1.GetGeometryType() == Trans2.GetGeometryType());
       }
 
-      el1.CalcShape(eip1, shape1);
-      el1.CalcDShape(eip1, dshape1);
+      for (int pind = 0; pind < ir->GetNPoints(); ++pind)
+      {
+         const IntegrationPoint &ip = ir->IntPoint(pind);
 
-      CalcAdjugate(Trans1.Elem1->Jacobian(), adjJ);
-      Mult(dshape1, adjJ, dshape1_ps);
+         // Set the integration point in the face and the neighboring elements
+         Trans1.SetAllIntPoints(&ip);
+         Trans2.SetAllIntPoints(&ip);
 
+         // Access the neighboring elements' integration points
+         // Note: eip1 and eip2 come from Element1 of Trans1 and Trans2 respectively.
+         const IntegrationPoint &eip1 = Trans1.GetElement1IntPoint();
+         const IntegrationPoint &eip2 = Trans2.GetElement1IntPoint();
 
-      double w, wLM;
-      if (ndofs2)
-      {
-         el2.CalcShape(eip2, shape2);
-         el2.CalcDShape(eip2, dshape2);
-         CalcAdjugate(Trans2.Elem1->Jacobian(), adjJ);
-         Mult(dshape2, adjJ, dshape2_ps);
-
-         w = ip.weight/2;
-         const double w2 = w / Trans2.Elem1->Weight();
-         const double wL2 = w2 * lambda->Eval(*Trans2.Elem1, eip2);
-         const double wM2 = w2 * mu->Eval(*Trans2.Elem1, eip2);
-         nL2.Set(wL2, nor);
-         nM2.Set(wM2, nor);
-         wLM = (wL2 + 2.0*wM2);
-         dshape2_ps.Mult(nM2, dshape2_dnM);
-      }
-      else
-      {
-         w = ip.weight;
-         wLM = 0.0;
+         // computing outward normal vectors.
+         if (dim == 1)
+         {
+            nor(0) = 2 * eip1.x - 1.0;
+            nor2(0) = 2 * eip2.x - 1.0;
+         }
+         else
+         {
+            CalcOrtho(Trans1.Jacobian(), nor);
+            CalcOrtho(Trans2.Jacobian(), nor2);
+         }
+
+         el1.CalcShape(eip1, shape1);
+         el1.CalcDShape(eip1, dshape1);
+
+         CalcAdjugate(Trans1.Elem1->Jacobian(), adjJ);
+         Mult(dshape1, adjJ, dshape1_ps);
+
+         double w, wLM;
+         if (ndofs2)
+         {
+            el2.CalcShape(eip2, shape2);
+            el2.CalcDShape(eip2, dshape2);
+            CalcAdjugate(Trans2.Elem1->Jacobian(), adjJ);
+            Mult(dshape2, adjJ, dshape2_ps);
+
+            w = ip.weight / 2;
+            const double w2 = w / Trans2.Elem1->Weight();
+            const double wL2 = w2 * lambda->Eval(*Trans2.Elem1, eip2);
+            const double wM2 = w2 * mu->Eval(*Trans2.Elem1, eip2);
+            nL2.Set(wL2, nor);
+            nM2.Set(wM2, nor);
+            wLM = (wL2 + 2.0 * wM2);
+            dshape2_ps.Mult(nM2, dshape2_dnM);
+         }
+         else
+         {
+            w = ip.weight;
+            wLM = 0.0;
+         }
+
+         {
+            const double w1 = w / Trans1.Elem1->Weight();
+            const double wL1 = w1 * lambda->Eval(*Trans1.Elem1, eip1);
+            const double wM1 = w1 * mu->Eval(*Trans1.Elem1, eip1);
+            nL1.Set(wL1, nor);
+            nM1.Set(wM1, nor);
+            wLM += (wL1 + 2.0 * wM1);
+            dshape1_ps.Mult(nM1, dshape1_dnM);
+         }
+
+         const double jmatcoef = kappa * (nor * nor) * wLM;
+         // (1,1) block
+         AssembleBlock(
+             dim, ndofs1, ndofs1, 0, 0, jmatcoef, nL1, nM1,
+             shape1, shape1, dshape1_dnM, dshape1_ps, *elmats(0, 0), *jmats(0, 0));
+
+         if (ndofs2 == 0)
+         {
+            continue;
+         }
+
+         // In both elmat and jmat, shape2 appears only with a minus sign.
+         shape2.Neg();
+
+         // (1,2) block
+         AssembleBlock(
+             dim, ndofs1, ndofs2, 0, dim * ndofs1, jmatcoef, nL2, nM2,
+             shape1, shape2, dshape2_dnM, dshape2_ps, *elmats(0, 1), *jmats(0, 1));
+         // (2,1) block
+         AssembleBlock(
+             dim, ndofs2, ndofs1, dim * ndofs1, 0, jmatcoef, nL1, nM1,
+             shape2, shape1, dshape1_dnM, dshape1_ps, *elmats(1, 0), *jmats(1, 0));
+         // (2,2) block
+         AssembleBlock(
+             dim, ndofs2, ndofs2, dim * ndofs1, dim * ndofs1, jmatcoef, nL2, nM2,
+             shape2, shape2, dshape2_dnM, dshape2_ps, *elmats(1, 1), *jmats(1, 1));
       }
 
+      // elmat := -elmat + sigma*elmat^t + jmat
+      Array<int> ndof_array(2);
+      ndof_array[0] = ndof1;
+      ndof_array[1] = ndof2;
+      DenseMatrix *elmat12 = NULL;
+      if (kappa_is_nonzero)
       {
-         const double w1 = w / Trans1.Elem1->Weight();
-         const double wL1 = w1 * lambda->Eval(*Trans1.Elem1, eip1);
-         const double wM1 = w1 * mu->Eval(*Trans1.Elem1, eip1);
-         nL1.Set(wL1, nor);
-         nM1.Set(wM1, nor);
-         wLM += (wL1 + 2.0*wM1);
-         dshape1_ps.Mult(nM1, dshape1_dnM);
-      }
+         for (int I = 0; I < 2; I++)
+         {
+            elmat = elmats(I, I);
+            jmat = jmats(I, I);
+            for (int i = 0; i < ndof_array[I]; i++)
+            {
+               for (int j = 0; j < i; j++)
+               {
+                  double aij = (*elmat)(i, j), aji = (*elmat)(j, i), mij = (*jmat)(i, j);
+                  (*elmat)(i, j) = sigma * aji - aij + mij;
+                  (*elmat)(j, i) = sigma * aij - aji + mij;
+               }
+               (*elmat)(i, i) = (sigma - 1.) * (*elmat)(i, i) + (*jmat)(i, i);
+            } // for (int i = 0; i < ndofs_array[I]; i++)
+         }    // for (int I = 0; I < 2; I++)
+         elmat = elmats(1, 0);
+         jmat = jmats(1, 0);
+         assert(jmat != NULL);
+         elmat12 = elmats(0, 1);
+         for (int i = 0; i < ndof2; i++)
+         {
+            for (int j = 0; j < ndof1; j++)
+            {
+               double aij = (*elmat)(i, j), aji = (*elmat12)(j, i), mij = (*jmat)(i, j);
+               (*elmat)(i, j) = sigma * aji - aij + mij;
+               (*elmat12)(j, i) = sigma * aij - aji + mij;
+            } // for (int j = 0; j < ndofs1; j++)
+         }    // for (int i = 0; i < ndofs2; i++)
+      }       // if (kappa_is_nonzero)
+      else
+      {
+         for (int I = 0; I < 2; I++)
+         {
+            elmat = elmats(I, I);
+            for (int i = 0; i < ndof_array[I]; i++)
+            {
+               for (int j = 0; j < i; j++)
+               {
+                  double aij = (*elmat)(i, j), aji = (*elmat)(j, i);
+                  (*elmat)(i, j) = sigma * aji - aij;
+                  (*elmat)(j, i) = sigma * aij - aji;
+               }
+               (*elmat)(i, i) *= (sigma - 1.);
+            } // for (int i = 0; i < ndofs_array[I]; i++)
+         }    // for (int I = 0; I < 2; I++)
+         elmat = elmats(1, 0);
+         elmat12 = elmats(0, 1);
+         for (int i = 0; i < ndof2; i++)
+         {
+            for (int j = 0; j < ndof1; j++)
+            {
+               double aij = (*elmat)(i, j), aji = (*elmat12)(j, i);
+               (*elmat)(i, j) = sigma * aji - aij;
+               (*elmat12)(j, i) = sigma * aij - aji;
+            } // for (int j = 0; j < ndofs1; j++)
+         }    // for (int i = 0; i < ndofs2; i++)
+      }       // not if (kappa_is_nonzero)
 
-      const double jmatcoef = kappa * (nor*nor) * wLM;
-      // (1,1) block
-      AssembleBlock(
-         dim, ndofs1, ndofs1, 0, 0, jmatcoef, nL1, nM1,
-         shape1, shape1, dshape1_dnM, dshape1_ps, *elmats(0, 0), *jmats(0, 0));
-
-      if (ndofs2 == 0) { continue; }
-
-      // In both elmat and jmat, shape2 appears only with a minus sign.
-      shape2.Neg();
-
-      // (1,2) block
-      AssembleBlock(
-         dim, ndofs1, ndofs2, 0, dim*ndofs1, jmatcoef, nL2, nM2,
-         shape1, shape2, dshape2_dnM, dshape2_ps, *elmats(0, 1), *jmats(0, 1));
-      // (2,1) block
-      AssembleBlock(
-         dim, ndofs2, ndofs1, dim*ndofs1, 0, jmatcoef, nL1, nM1,
-         shape2, shape1, dshape1_dnM, dshape1_ps, *elmats(1, 0), *jmats(1, 0));
-      // (2,2) block
-      AssembleBlock(
-         dim, ndofs2, ndofs2, dim*ndofs1, dim*ndofs1, jmatcoef, nL2, nM2,
-         shape2, shape2, dshape2_dnM, dshape2_ps, *elmats(1,1), *jmats(1,1));
-   
+      if (kappa_is_nonzero)
+         DeletePointers(jmats);
    }
 
-   // elmat := -elmat + sigma*elmat^t + jmat
-   Array<int> ndof_array(2);
-   ndof_array[0] = ndof1;
-   ndof_array[1] = ndof2;
-   DenseMatrix *elmat12 = NULL;
-   if (kappa_is_nonzero)
+   // static method
+   void InterfaceDGElasticityIntegrator::AssembleBlock(const int dim, const int row_ndofs, 
+   const int col_ndofs, const int row_offset, const int col_offset, const double jmatcoef, 
+   const Vector &col_nL, const Vector &col_nM, const Vector &row_shape, const Vector &col_shape, 
+   const Vector &col_dshape_dnM, const DenseMatrix &col_dshape, DenseMatrix &elmat, DenseMatrix &jmat)
    {
-      for (int I = 0; I < 2; I++)
+      // row_offset and col_offset are not needed for elmat.
+      for (int jm = 0, j = 0; jm < dim; ++jm)
       {
-         elmat = elmats(I,I);
-         jmat = jmats(I,I); 
-         for (int i = 0; i < ndof_array[I]; i++)
+         for (int jdof = 0; jdof < col_ndofs; ++jdof, ++j)
          {
-            for (int j = 0; j < i; j++)
+            const double t2 = col_dshape_dnM(jdof);
+            for (int im = 0, i = 0; im < dim; ++im)
             {
-               double aij = (*elmat)(i,j), aji = (*elmat)(j,i), mij = (*jmat)(i,j);
-               (*elmat)(i,j) = sigma*aji - aij + mij;
-               (*elmat)(j,i) = sigma*aij - aji + mij;
+               const double t1 = col_dshape(jdof, jm) * col_nL(im);
+               const double t3 = col_dshape(jdof, im) * col_nM(jm);
+               const double tt = t1 + ((im == jm) ? t2 : 0.0) + t3;
+               for (int idof = 0; idof < row_ndofs; ++idof, ++i)
+               {
+                  elmat(i, j) += row_shape(idof) * tt;
+               }
             }
-            (*elmat)(i,i) = (sigma - 1.) * (*elmat)(i,i) + (*jmat)(i,i);
-         }  // for (int i = 0; i < ndofs_array[I]; i++)
-      }  // for (int I = 0; I < 2; I++)
-      elmat = elmats(1,0);
-      jmat = jmats(1,0);
-      assert(jmat != NULL);
-      elmat12 = elmats(0,1);
-      for (int i = 0; i < ndof2; i++)
+         }
+      }
+
+      if (jmatcoef == 0.0)
       {
-         for (int j = 0; j < ndof1; j++)
-         {
-            double aij = (*elmat)(i,j), aji = (*elmat12)(j,i), mij = (*jmat)(i,j);
-            (*elmat)(i,j) = sigma*aji - aij + mij;
-            (*elmat12)(j,i) = sigma*aij - aji + mij;
-         }  // for (int j = 0; j < ndofs1; j++)
-      }  // for (int i = 0; i < ndofs2; i++)
-   }  // if (kappa_is_nonzero)
-   else
-   {
-      for (int I = 0; I < 2; I++)
+         return;
+      }
+
+      for (int d = 0; d < dim; ++d)
       {
-         elmat = elmats(I,I);
-         for (int i = 0; i < ndof_array[I]; i++)
+         const int jo = d * col_ndofs;
+         const int io = d * row_ndofs;
+         for (int jdof = 0, j = jo; jdof < col_ndofs; ++jdof, ++j)
          {
-            for (int j = 0; j < i; j++)
+            const double sj = jmatcoef * col_shape(jdof);
+            int i_start = (io + row_offset > j + col_offset) ? io : j;
+            for (int i = i_start, idof = i - io; idof < row_ndofs; ++idof, ++i)
             {
-               double aij = (*elmat)(i,j), aji = (*elmat)(j,i);
-               (*elmat)(i,j) = sigma*aji - aij;
-               (*elmat)(j,i) = sigma*aij - aji;
+               jmat(i, j) += row_shape(idof) * sj;
             }
-            (*elmat)(i,i) *= (sigma - 1.);
-         }  // for (int i = 0; i < ndofs_array[I]; i++)
-      }  // for (int I = 0; I < 2; I++)
-      elmat = elmats(1,0);
-      elmat12 = elmats(0,1);
-      for (int i = 0; i < ndof2; i++)
-      {
-         for (int j = 0; j < ndof1; j++)
-         {
-            double aij = (*elmat)(i,j), aji = (*elmat12)(j,i);
-            (*elmat)(i,j) = sigma*aji - aij;
-            (*elmat12)(j,i) = sigma*aij - aji;
-         }  // for (int j = 0; j < ndofs1; j++)
-      }  // for (int i = 0; i < ndofs2; i++)
-   }  // not if (kappa_is_nonzero)
-
-   if (kappa_is_nonzero)
-      DeletePointers(jmats);
-}
+         }
+      }
+   }
 }