diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7648e54d3..ab2af3869 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,9 +16,7 @@ option(USE_XSDK_DEFAULTS "enable the XDSK v0.3.0 default configuration" NO)
 #requre c++11 without extensions
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSION OFF)
-if(NOT ENABLE_CGNS)
-  set(CMAKE_CXX_STANDARD 11)
-endif()
+set(CMAKE_CXX_STANDARD 11)
 
 xsdk_begin_package()
 bob_begin_package()
@@ -27,8 +25,7 @@ if(USE_XSDK_DEFAULTS)
   xsdk_compiler_flags()
 endif()
 
-# require c++14
-option(ENABLE_CGNS "Enable the CGNS reader: requires c++14 extensions" OFF)
+option(ENABLE_CGNS "Enable the CGNS reader" OFF)
 message(STATUS "ENABLE_CGNS: ${ENABLE_CGNS}")
 
 # Set some default compiler flags that should always be used
@@ -37,10 +34,7 @@ if(NOT USE_XSDK_DEFAULTS)
   bob_begin_cxx_flags()
   bob_end_cxx_flags()
   set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS}")
-  if(ENABLE_CGNS) #takes precedence over SCOREC_ENABLE_CXX11
-    message(STATUS "enabling cxx14")
-    bob_cxx14_flags()
-  elseif(SCOREC_ENABLE_CXX11)
+  if(SCOREC_ENABLE_CXX11)
     bob_cxx11_flags()
   endif()
 endif()
@@ -60,6 +54,8 @@ message(STATUS "IS_TESTING: ${IS_TESTING}")
 
 set(MESHES "${CMAKE_SOURCE_DIR}/pumi-meshes" CACHE STRING "Directory of test meshes")
 message(STATUS "MESHES: ${MESHES}")
+get_filename_component(MESHES ${MESHES} ABSOLUTE)
+message(STATUS "Using absolute file path MESHES: ${MESHES}")
 
 option(BUILD_EXES "Build executables" ON)
 message(STATUS "BUILD_EXES: ${BUILD_EXES}")
@@ -135,6 +131,7 @@ if(ENABLE_OMEGA_H)
 endif()
 
 if(ENABLE_CGNS)
+  option(ENABLE_CGNS_MULTI_BASE "Enable the CGNS Multi Base tests" OFF)
   set(SCOREC_USE_CGNS_DEFAULT ${ENABLE_CGNS})
   bob_public_dep(CGNS)
   #CGNS does not provide cmake targets :(
@@ -172,8 +169,10 @@ add_library(core INTERFACE)
 target_link_libraries(core INTERFACE ${SCOREC_EXPORTED_TARGETS})
 if(ENABLE_CGNS)
   target_link_libraries(core INTERFACE ${CMAKE_DL_LIBS}) #HDF5 uses dlopen
-  target_compile_features(core INTERFACE cxx_std_14)
+  # target_compile_features(core INTERFACE cxx_std_14)
+  target_compile_features(core INTERFACE cxx_std_11)
 else()
+  target_link_libraries(core INTERFACE ${CMAKE_DL_LIBS}) #HDF5 uses dlopen
   target_compile_features(core INTERFACE cxx_std_11)
 endif()
 scorec_export_library(core)
diff --git a/apf/apfCGNS.cc b/apf/apfCGNS.cc
index 4debbb58b..da3b2b419 100644
--- a/apf/apfCGNS.cc
+++ b/apf/apfCGNS.cc
@@ -11,6 +11,7 @@
 #include "apfNumberingClass.h"
 #include "apfShape.h"
 #include "apfFieldData.h"
+#include <functional>
 #include <pcu_util.h>
 #include <lionPrint.h>
 //
@@ -269,9 +270,16 @@ void WriteTags(const CGNS &cgns, const std::vector<std::vector<apf::MeshEntity *
   }
 }
 */
-void WriteFields(const CGNS &cgns, const std::vector<std::vector<apf::MeshEntity *>> &orderedEnts, const std::vector<std::pair<cgsize_t, cgsize_t>> &ranges, const std::vector<apf::MeshEntity *> &orderedVertices, const int &vStart, const int &vEnd, apf::Mesh *m)
+
+typedef std::vector<apf::MeshEntity *> VecMeshEntity_t;
+typedef std::pair<cgsize_t, cgsize_t> CGRange_t;
+
+void WriteFields(const CGNS &cgns, const std::vector<VecMeshEntity_t> &orderedEnts, const std::vector<CGRange_t> &ranges, const VecMeshEntity_t &orderedVertices, const int &vStart, const int &vEnd, apf::Mesh *m)
 {
-  const auto writeField = [&m, &cgns](apf::Field *f, const auto &orderedEnts, const int &solIndex, const auto &inner, const auto &post, const int &numComponents, const int &component, const std::string &fieldName, const int &start, const int &end, int &fieldIndex) {
+  typedef std::function<void (apf::MeshEntity *elem, apf::FieldDataOf<double> *fieldData, std::vector<double> &ddata, const int &numComponents, const int &component)>  innerLambda_t;
+  typedef std::function<void (const int &solIndex, std::vector<double> &ddata, const cgsize_t *rmin, const cgsize_t *rmax, const int &globalSize, const int &fieldIndex)> postLambda_t;
+
+  const auto writeField = [&m, &cgns](apf::Field *f, const VecMeshEntity_t  &orderedEnt, const int &solIndex, const innerLambda_t &inner, const postLambda_t &post, const int &numComponents, const int &component, const std::string &fieldName, const int &start, const int &end, int &fieldIndex) {
     std::vector<double> data;
 
     cgsize_t rmin[3];
@@ -281,7 +289,7 @@ void WriteFields(const CGNS &cgns, const std::vector<std::vector<apf::MeshEntity
     rmax[0] = end;
 
     apf::FieldDataOf<double> *fieldData = f->getData();
-    for (const auto &e : orderedEnts)
+    for (const auto &e : orderedEnt)
     {
       if (fieldData->hasEntity(e) && m->isOwned(e))
       {
@@ -310,7 +318,7 @@ void WriteFields(const CGNS &cgns, const std::vector<std::vector<apf::MeshEntity
     }
   };
 
-  const auto loopCellFields = [&m, &writeField](const auto &orderedEnts, const int &solIndex, const auto &inner, const auto &post, const auto &ranges) {
+  const auto loopCellFields = [&m, &writeField](const std::vector<VecMeshEntity_t> &orderedEnts, const int &solIndex, const innerLambda_t &inner, const postLambda_t &post, const std::vector<CGRange_t> &ranges) {
     for (int i = 0; i < m->countFields(); ++i)
     {
       apf::Field *f = m->getField(i);
@@ -335,7 +343,7 @@ void WriteFields(const CGNS &cgns, const std::vector<std::vector<apf::MeshEntity
     }
   };
 
-  const auto loopVertexFields = [&m, &writeField](const auto &orderedEnts, const int &solIndex, const auto &inner, const auto &post, const int &vStart, const int &vEnd) {
+  const auto loopVertexFields = [&m, &writeField](const VecMeshEntity_t &orderedVertices, const int &solIndex, const innerLambda_t &inner, const postLambda_t &post, const int &vStart, const int &vEnd) {
     for (int i = 0; i < m->countFields(); ++i)
     {
       apf::Field *f = m->getField(i);
@@ -352,12 +360,12 @@ void WriteFields(const CGNS &cgns, const std::vector<std::vector<apf::MeshEntity
           fieldNameNew += "_[" + std::to_string(component) + "]";
 
         //std::cout << "VERTEX " << fieldNameNew << " " << fieldName << " " << component << " " << numComponents << std::endl;
-        writeField(f, orderedEnts, solIndex, inner, post, numComponents, component, fieldNameNew, vStart, vEnd, fieldIndex);
+        writeField(f, orderedVertices, solIndex, inner, post, numComponents, component, fieldNameNew, vStart, vEnd, fieldIndex);
       }
     }
   };
 
-  const auto postLambda = [&cgns](const int &solIndex, std::vector<double> &ddata, const cgsize_t *rmin, const cgsize_t *rmax, const int &globalSize, const int &fieldIndex) {
+  const postLambda_t postLambda = [&cgns](const int &solIndex, std::vector<double> &ddata, const cgsize_t *rmin, const cgsize_t *rmax, const int &globalSize, const int &fieldIndex) {
     if (globalSize > 0)
     {
       if (cgp_field_write_data(cgns.index, cgns.base, cgns.zone, solIndex, fieldIndex, &rmin[0], &rmax[0],
@@ -366,7 +374,7 @@ void WriteFields(const CGNS &cgns, const std::vector<std::vector<apf::MeshEntity
     }
   };
 
-  const auto innerLambda = [](apf::MeshEntity *elem, apf::FieldDataOf<double> *fieldData, std::vector<double> &ddata, const int &numComponents, const int &component) {
+  const innerLambda_t innerLambda = [](apf::MeshEntity *elem, apf::FieldDataOf<double> *fieldData, std::vector<double> &ddata, const int &numComponents, const int &component) {
     std::vector<double> vals(numComponents, -12345);
     fieldData->get(elem, vals.data());
     //std::cout << numComponents << " " << component << " " << vals[0] << std::endl;
@@ -390,7 +398,7 @@ void WriteFields(const CGNS &cgns, const std::vector<std::vector<apf::MeshEntity
   }
 }
 
-auto WriteVertices(const CGNS &cgns, apf::Mesh *m, apf::GlobalNumbering *gvn)
+std::tuple<VecMeshEntity_t, int, int> WriteVertices(const CGNS &cgns, apf::Mesh *m, apf::GlobalNumbering *gvn)
 {
   int Cx = -1;
   int Cy = -1;
@@ -412,7 +420,7 @@ auto WriteVertices(const CGNS &cgns, apf::Mesh *m, apf::GlobalNumbering *gvn)
       cgp_error_exit();
   }
 
-  std::vector<apf::MeshEntity *> orderedVertices;
+  VecMeshEntity_t orderedVertices;
   cgsize_t vertexMin[3];
   cgsize_t vertexMax[3];
   cgsize_t contigRange = -1;
@@ -574,7 +582,10 @@ CellElementReturn WriteElements(const CGNS &cgns, apf::Mesh *m, apf::GlobalNumbe
 
 void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults, const int &cellCount, apf::Mesh *m, const apf::CGNSBCMap &cgnsBCMap, const std::map<apf::Mesh::Type, CGNS_ENUMT(ElementType_t)> &apf2cgns, apf::GlobalNumbering *gvn)
 {
-  const auto EdgeLoop = [&m](const auto &lambda, apf::MeshTag *edgeTag) {
+  typedef std::function<void(apf::MeshEntity*)> LambdaMeshEntity_t;
+  typedef std::vector<apf::CGNSInfo> VecCGNSInfo_t;
+
+  const auto EdgeLoop = [&m](const LambdaMeshEntity_t &lambda, apf::MeshTag *edgeTag) {
     apf::MeshIterator *edgeIter = m->begin(1);
     apf::MeshEntity *edge = nullptr;
     int vals[1];
@@ -591,7 +602,7 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults,
     m->end(edgeIter);
   };
 
-  const auto FaceLoop = [&m](const auto &lambda, apf::MeshTag *faceTag) {
+  const auto FaceLoop = [&m](const LambdaMeshEntity_t &lambda, apf::MeshTag *faceTag) {
     apf::MeshIterator *faceIter = m->begin(2);
     apf::MeshEntity *face = nullptr;
     int vals[1];
@@ -608,7 +619,8 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults,
     m->end(faceIter);
   };
 
-  const auto BCEntityAdder = [&apf2cgns, &m, &cgns, &gvn](const auto &Looper, const auto &bcGroup, int &startingLocation) {
+  
+  const auto BCEntityAdder = [&apf2cgns, &m, &cgns, &gvn](const std::function<void(LambdaMeshEntity_t, apf::MeshTag*)> &Looper, const apf::CGNSInfo &bcGroup, int &startingLocation) {
     std::map<apf::Mesh::Type, std::vector<apf::MeshEntity *>> bcEntTypes;
     for (const auto &r : apf2cgns)
       bcEntTypes.insert(std::make_pair(r.first, std::vector<apf::MeshEntity *>()));
@@ -715,7 +727,9 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults,
                    PCU_Get_Comm());
   };
 
-  const auto doVertexBC = [&](const auto &iter) {
+  typedef std::map<std::basic_string<char>, std::vector<apf::CGNSInfo>>::const_iterator  MapCGNSInfo_t;
+
+  const auto doVertexBC = [&](const MapCGNSInfo_t &iter) {
     for (const auto &p : iter->second)
     {
       std::vector<cgsize_t> bcList;
@@ -751,7 +765,7 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults,
     }
   };
 
-  const auto doEdgeBC = [&](const auto &iter, int &startingLocation) {
+  const auto doEdgeBC = [&](const MapCGNSInfo_t &iter, int &startingLocation) {
     for (const auto &p : iter->second)
     {
       const auto se = BCEntityAdder(EdgeLoop, p, startingLocation);
@@ -774,7 +788,7 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults,
     }
   };
 
-  const auto doFaceBC = [&](const auto &iter, int &startingLocation) {
+  const auto doFaceBC = [&](const MapCGNSInfo_t &iter, int &startingLocation) {
     for (const auto &p : iter->second)
     {
       const auto se = BCEntityAdder(FaceLoop, p, startingLocation);
@@ -797,7 +811,7 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults,
     }
   };
 
-  const auto doCellBC = [&](const auto &iter, const int &) {
+  const auto doCellBC = [&](const MapCGNSInfo_t &iter, const int &) {
     for (const auto &p : iter->second)
     {
       std::vector<cgsize_t> bcList;
@@ -1009,7 +1023,9 @@ void Write2DEdges(CGNS cgns, apf::Mesh *m, const Count &edgeCount, const Count &
 // Todo split this out into a list of calls to local functions to show process/work flow
 void WriteCGNS(const char *prefix, apf::Mesh *m, const apf::CGNSBCMap &cgnsBCMap)
 {
-  static_assert(std::is_same<cgsize_t, int>::value, "cgsize_t not compiled as int");
+
+  PCU_ALWAYS_ASSERT_VERBOSE(sizeof(cgsize_t) == sizeof(int), "cgsize_t is not size of int");
+
 
   const auto myRank = PCU_Comm_Self();
   const Count vertexCount = count(m, 0);
@@ -1051,11 +1067,11 @@ void WriteCGNS(const char *prefix, apf::Mesh *m, const apf::CGNSBCMap &cgnsBCMap
   auto communicator = PCU_Get_Comm();
   cgp_mpi_comm(communicator);
   //
-  cgp_pio_mode(CGNS_ENUMV(CGP_INDEPENDENT));
+  cgp_pio_mode(CGP_INDEPENDENT);
 
   CGNS cgns;
   cgns.fname = std::string(prefix);
-  if (cgp_open(prefix, CGNS_ENUMV(CG_MODE_WRITE), &cgns.index))
+  if (cgp_open(prefix, CG_MODE_WRITE, &cgns.index))
     cgp_error_exit();
 
   {
diff --git a/mds/mdsCGNS.cc b/mds/mdsCGNS.cc
index acdef4aae..cf0230f3d 100644
--- a/mds/mdsCGNS.cc
+++ b/mds/mdsCGNS.cc
@@ -177,19 +177,19 @@ struct MeshDataGroup
     if (components.size() == 1)
     {
       std::cout << "Scalar Group has " << components.size() << " related componenets: " << std::endl;
-      for (const auto m : components)
+      for (const auto &m : components)
         std::cout << "Field " << m.second.name << " @ " << m.second.si << " " << m.second.fi << std::endl;
     }
     else if (components.size() == 3)
     {
       std::cout << "Vector Group has " << components.size() << " related componenets: " << std::endl;
-      for (const auto m : components)
+      for (const auto &m : components)
         std::cout << "Field " << m.second.name << " @ " << m.second.si << " " << m.second.fi << std::endl;
     }
     else if (components.size() == 9)
     {
       std::cout << "Matrix Group has " << components.size() << " related componenets: " << std::endl;
-      for (const auto m : components)
+      for (const auto &m : components)
         std::cout << "Field " << m.second.name << " @ " << m.second.si << " " << m.second.fi << std::endl;
     }
     else
@@ -265,7 +265,7 @@ void Kill(const int fid)
   }
 }
 
-auto ReadCGNSCoords(int cgid, int base, int zone, int ncoords, int nverts, const std::vector<cgsize_t> &, const apf::GlobalToVert &globalToVert)
+std::map<int, std::array<double, 3>> ReadCGNSCoords(int cgid, int base, int zone, int ncoords, int nverts, const std::vector<cgsize_t> &, const apf::GlobalToVert &globalToVert)
 {
   // Read min required as defined by consecutive range
   // make one based as ReadElements makes zero based
@@ -389,7 +389,7 @@ void SimpleElementPartition(std::vector<cgsize_t> &numberToReadPerProc, std::vec
 using Pair = std::pair<cgsize_t, cgsize_t>;
 using LocalElementRanges = std::vector<Pair>; // one based
 
-auto ReadElements(int cgid, int base, int zone, int section, int el_start /* one based */, int el_end, int numElements, int verticesPerElement, LocalElementRanges &localElementRanges)
+std::tuple<std::vector<cgsize_t>, cgsize_t> ReadElements(int cgid, int base, int zone, int section, int el_start /* one based */, int el_end, int numElements, int verticesPerElement, LocalElementRanges &localElementRanges)
 {
   std::vector<cgsize_t> numberToReadPerProc;
   std::vector<cgsize_t> startingIndex;
@@ -1051,13 +1051,13 @@ void ReadBCInfo(const int cgid, const int base, const int zone, const int nBocos
 
 apf::Mesh2 *DoIt(gmi_model *g, const std::string &fname, apf::CGNSBCMap &cgnsBCMap, const std::vector<std::pair<std::string, std::string>> &readMeshData)
 {
-  static_assert(std::is_same<cgsize_t, int>::value, "cgsize_t not compiled as int");
+  PCU_ALWAYS_ASSERT_VERBOSE(sizeof(cgsize_t) == sizeof(int), "cgsize_t is not size of int");
 
   int cgid = -1;
   auto comm = PCU_Get_Comm();
   cgp_mpi_comm(comm);
-  cgp_pio_mode(CGNS_ENUMV(CGP_INDEPENDENT));
-  cgp_open(fname.c_str(), CGNS_ENUMV(CG_MODE_READ), &cgid);
+  cgp_pio_mode(CGP_INDEPENDENT);
+  cgp_open(fname.c_str(), CG_MODE_READ, &cgid);
 
   int nbases = -1;
   cg_nbases(cgid, &nbases);
diff --git a/phasta/CGNSFileWritingDev.txt b/phasta/CGNSFileWritingDev.txt
new file mode 100644
index 000000000..4441cf355
--- /dev/null
+++ b/phasta/CGNSFileWritingDev.txt
@@ -0,0 +1,76 @@
+CGNS output from Chef
+
+
+
+This document is to describe work done to get CGNS output from Chef.
+
+
+
+Before doing that, I am going to list EXPECTATIONS of CGNS and how they align or not with classic Chef/PHASTA vs. PETSc/CEED-PHASTA  (defs rank=part=process?.I will use part) 
+
+I)  CGNS expects global numbering for mesh nodes and elements and that numbering MUST start from 1 (not zero).   
+
+II) The global numbering of elements is inclusive of both volume elements and boundary elements and also inclusive of all topologies with the numbering-start determined by what order you write them to file (might not be a requirement but simplest way when streaming).
+
+III) If using parallel writing (which we will have to do for any realistic size mesh), the ownership of the writer must be exclusive (write no data you don?t own),  continuous (no skipped global numbers), and linearly increasing with part number (e.g., rank0 starts from 1 and ends on nOwnedByRank0, rank1 starts from nOwnedByRank0+1 and ends on nOwnedByRank1+nOwnedbyRank0 and so on). 
+
+
+
+
+
+Going to a separate enumeration to discuss how that translated to our work on that now:
+
+Starting with the most basic, CGNS has the concept of a Base.  We keep life simple and only have 1 base. 
+CGNS has the concept of a Zone.  Someday if we get into overset grids (not likely) we might have more but for now, we only support 1 zone.
+Within a Zone we will always be type Unstructured and a few things must be described while others are optional. CGNS provides writer ?functions?  cg_<various> or cgp_<various>  and these have a structure that one function establishes the file-node in the file/database and then you are able call a second function to write the data at that node (this is a little bit like PHASTAIO?s notion of write/read header followed by write/read data). cg_ means all parallel processes must have identical data to write while cgp_ allows each process to write its portion of the data and CGNS collects (interpret collect as MPIO collective operations) that data within an HDF5 file. 
+Chef was co-developed with PHASTA to avoid global numbering and instead number from 0 to n_entity-1 on each rank when parallel and have separate data structures which tracked which rank owned a given entity and which ranks had remote copies of those entities.  Chef created data structures for PHASTA to use to manage this partition-specific ownership. Thus, before we can write any parallel distributed data with the  CGNS functions described in 3.,  we needed to create a map from PHASTA?s numbering to a numbering that satisfies I)-III).  Since that global node numbering is basically the same as PETSc with a shift by 1, I copied code from PHASTA that did that for  use withPETSc solvers (common/gen_ncorp.c) and modified it.  That also needed functionality of commuInt.f to communicate ownership on part boundaries  back to all the replicas on other parts (which in turn required a chunk of code from ctypes.f) translated to C. All of this code makes use of the ilwork data structure that helps PHASTA know how to setup and efficiently perform peer-to-peer communication. At the end of this code insertion/translation, we have an ncorp array that maps from PHASTA/Chef numbering to CGNS numbering on each part and thus we can start to now describe the arrays that are written.
+CGNS of course has to store coordinates.  It does so as flat double lists one dimension at a time so that means CoordinatesX then CoordinatesY, then CoordinatesZ for us.  To be clear, to use the cgp mid-level functions to write these in parallel, PHASTA/Chef?s part coordinate list must be sifted down to just its owners using ncorp described in 4. and pass that compact ownership array satisfying I)-III) data through the cgp_write functions (both file-node creation  and parallel data write).
+Next in our output, though not absolutely required is Solution.  Similarly to step 5., CGNS has a function to create a file-node for Solution and then you add as many fields as needed to that (currently I have only coded Pressure, VelocityX, VelocityY, VelocityZ, and Temperature.  Note CGNS is a standard and they mandate the name of these and any additional fields we might want to add to this so read the docs.  As with 5. these have to be sifted and mapped through ncorp to convert PHASTA/Chef?s numbering to a compact array that can be written in parallel using the cgp writers.  Note, as of 4.4.0, it looks to be possible to aggregate the writes described in 5. and 6. through cgp_coord_multi_* and cgp_field_multi_* respectively but this has not been explored yet. 
+Next in the file is some User Data that was a backdoor to writing some data in parallel  and to support parallel read with less work that I may describe more later but is not required by CGNS so skipping for now. 
+Next is a cell-centered solution file (that just means one value per 3D element or cell) that I put the RankOfWriterfield in. This is likely what the PETSc reader will use to understand the partition that Chef used to write this file and if that part-count matches the PETSc reader and solver, the file can be read and processed to derive all the parallel data structures PETSc/CEED-PHASTA?s need. CGNS issue filed to determine if they have a standard for this and if not interest in developing one. 
+Next is the first 3D element topology connectivity.  Basically we create a separate node for each element topology, establish global numbering by rank (easy as there are no replicas of elements and thus the ownership range was established definitively by the partitioner and thus the ownership range just jumps by the number of that element type on a given part).  If multi-topology, this repeats for the rest of the 3D element topologies. 
+Next is the first 2D boundary element topology  which follows concepts of  9 for it and subsequent other 2D boundary element topologies.  At this time we have elected to write all the elements of a given topology in a single CGNS file-node even if they are distributed across multiples geometric model surfaces (not the only option).  Note, since ALL cgp and cg writes are collective, all ranks, even those without boundary elements (or interior elements of a given topology) must participate.  Obviously the same for the MPI_Exscan, and PCU collectives.  
+It was decided/chose in the first pass to forgo writing ZonalBCs based on nodes in favor of writing them as mesh-sets (CGNS calls them PointLists abstracting the face numbers to the non-existent point at the centroid of the mesh face) which are face numbers with a particular surfID set in the smd (GUI if Simmetrix model-based) or spj (flat text file if working with a dmg model as we do with MATLAB->MGEN-MNER or SIMMETRIX->{MDLCONVERT,CONVERT(withExtrude)} ) workflows to get to chef inputs.  PETSc will then parse these mesh-sets into DMLabels for the boundary of the mesh. Then, it will handle Dirichlet and Neumann boundary conditions as it normally does (based on yaml input as to what type of BC is on a particular surfID number). For now we have a rather rigid prototype code that is limited to processing and writing 6  distinct mesh sets (one for each of the 6 faces of our topological box).  It should not be hard to extend and generalize this code but we took this shortcut in the first version of this code.  CGNS clearly supports direct nodal/Dirichlet PointSet but we have CHOSEN not to pursue this in the first pass.
+Last but certainly not least is a file-node called ZonalGridConnectivity which is how CGNS  encodes periodic boundary conditions as can be seen in the first/only leaf under that file-node Periodic Connectivity. This has been setup rigidly to assume that the faces listed in PointList are ordered in the same way as the faces listed in PointListDonor and further that surfID=2 is the donor and surfID=1 is the periodic partner of the donor.  This is again a shortcut or hardcoded link that assumes that the spj file has put surfID=1 on the face that is the periodic match for the face that it has surfID=2.  These meshes obviously need to be matched meshes and this creates an issue we still need to resolve (will describe soon).  The code currently computes the translation between the two periodic planes.  I found the documentation unclear but assumed that vector was FROM the donor To the periodic plane.  In the current inputs to the test codes the donor (surfID=2) is at zMax while the periodic plane (surfID=1) is as zMin so this makes the Translation[3]={0,0,-Lz} but that might be backwards and would certainly be flipped if I got the FROM/TO flipped (here Lz is unsigned as it is the spanwise domain width).  I made the code general to use the first element with a surfID=2's centroid coordinates - the first element with a surfID=1's centroid coordinates (this picks up a y component of 1e-21 due to roundoff). 
+
+
+While the above is functional, the already mentioned ambiguity  and the following issues/limitations remain unresolved:
+
+
+
+If we feed the current code a matched mesh ncorp will be computed incorrectly for every point that is on a part boundary that is also matched.  The reason for this is that ilwork was set up for PHASTA?s needs and capabilities.  As noted above PHASTA has replica nodes as REAL nodes  (nodes with local node ranges) that it uses for all on-rank work and then the on-rank numbers do their parallel assembly with the true OWNER node which in this case is not the node they physically share on the periodic plane but instead ilwork sets up a communication with the donor for that node.  Consequently, if we use the ilwork data structure as it is made for PHASTA, ncorp will map that node to a global node number on the donor plane.  Again ilwork is only used in PHASTA for assembling equations so this is right for PHASTA but will foul PETSc by providing a connectivity that gives global node numbers with coordinates on the donor plane. 
+Currently ZonalBC does not support parallel BC writing (cg available but not cgp).   James is working with the CGNS development group to develop cgp_ptset_* for reading/writing PointSet data (which is also used for ZonalGridConnectivity), but for now we are doing MPI_Allgather{v} operations so that cg is correct.  Note it is Allgather and not Gather because CGNS does not let part 0 write in serial but instead requires all ranks to have the same data and all to call cg with that same data to have this work correctly.  We are told by CGNS developers (and this seems like it has to be true) that only part=0 is actually writing but we have observed that any non-matching data on part!=0 results in a failure of cg. This is a potential scalability issue but seems likely to be addressed through the development of cgp for ZonalBCs. 
+
+
+Discussion of ISSUE 1) I just put this question to Jed and James in the GitHub PR but in doing so I think it is clear that CGNS does need global node numbers for the perioidic replicas of the owner/donor nodes.  Thus we do need to do one of the following:
+
+A) Turn off matching (creates another conflict), 
+
+B) suppress it during certain stages of chef?s work, or similarly 
+
+C) alter the code to not add this type of mapping in ilwork. 
+
+
+
+The conflict with A) might be limited  to Simmetrix wedge-tet meshes that won?t match when there is an unstructured mesh region (like tets).  TBH, on small meshes I can?t get matching to work anyway.  This is what we are currently doing and this has forced us to re-discover matching through reordering the donor and periodic mesh sets to have their centroids match.  To make this tractable, I did a MPI_Gather of the data to part 0 and did a serial sort there.  This will eventually hit scalability issues (less of an issue for Q3 as they are 3^d more coarse than Q1 meshes) but still not pretty.  If we could keep matching on AND we were able to disable it from ilwork so that we build the global numbering that CGNS wants (periodic-nodes/replicas have a global node number) then we MIGHT be able to use matching  to order the periodic-mesh set to match the order of the donor-mesh-set in parallel and avoid this serial bottleneck.  
+
+
+
+CWS and KEJ discussed the following organization of options:
+
+I) Use SCOREC/core matching
+
+II) Order periodic faces without use of SCOREC/core matching information
+
+
+
+I) further breaks into the following steps and branches
+
+To have matching available requires one of the following a)  replication of ilwork to ilworkCGNS structure without accounting for periodic matching so that it will build an ncorp as if matching were not present or b) POSSIBLY if filterMatching flag is set, existing ilwork will create a PHASTA input set that is lacking periodicity and thus correct correct for CGNS and yet the matching information is saved and can be restored for use in CGNS code to determine matching 
+The second aspect is how to do that matching.  Since inputs coming from matchedNodeElement reader ONLY have VERTEX matching.  Here again 3 options are possible:  a) make the matching check for matches of nodes through face connectivity,   b) make inputs to MNER richer to include face matching (likely available during mesh generation),  c) develop code within MNER to elevate matching information to edges and faces (as is done with classification though this is far easier due to classification being to a geometric model that is simple enough to be on all ranks AND model entities are far fewer and this is currently limited to extrusions anyway but so is periodicity so not really a limitation),
+
+
+II) also breaks into at least 2 branches:
+
+distance of centroid collected to rank0 and sorted (current approach) which we will likely use as long as mesh size on periodic plane does not make this intractable. 
+OR breadth first search that starts with a single matched face (seed) and then finds adds neighboring faces to a list from which the next in some order (could be centroidal distance or other) is chosen.  If mesh is matched, this ordering can proceed in parallel for both the donor and the periodic mesh set. When a face is added that touches a part boundary, existing part boundary adjacency information is used to continue the search on another rank. 
diff --git a/phasta/CMakeLists.txt b/phasta/CMakeLists.txt
index 0a785e268..4e5ff54ab 100644
--- a/phasta/CMakeLists.txt
+++ b/phasta/CMakeLists.txt
@@ -6,6 +6,7 @@ set(SOURCES
   phOutput.cc
   phLinks.cc
   phGeomBC.cc
+  phCGNSgbc.cc
   phBlock.cc
   phAdapt.cc
   phRestart.cc
diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc
new file mode 100644
index 000000000..e86eaf227
--- /dev/null
+++ b/phasta/phCGNSgbc.cc
@@ -0,0 +1,1281 @@
+#include <PCU.h>
+#include "phOutput.h"
+#include "phIO.h"
+#include "phiotimer.h"
+#include <cstdio>
+#include <sstream>
+#include <pcu_util.h>
+#include <lionPrint.h>
+#include <cstdlib>
+#include <string.h>
+#include <assert.h>
+#include "phRestart.h"
+#include <apf.h>
+#include <apfField.h>
+#include "apfShape.h"
+
+
+#ifdef HAVE_CGNS
+//
+#include <cgns_io.h>
+#include <pcgnslib.h>
+//
+#endif
+typedef int lcorp_t;
+#define NCORP_MPI_T MPI_INTEGER
+static cgsize_t nDbgCG=50;
+static int nDbgI=50;
+
+namespace  {
+
+template<class T>
+MPI_Datatype getMpiType(T) {
+  MPI_Datatype mpitype;
+  //determine the type based on what is being sent
+  if( std::is_same<T, double>::value ) {
+    mpitype = MPI_DOUBLE;
+  } else if ( std::is_same<T, int64_t>::value ) {
+    mpitype = MPI_INT64_T;
+  } else if ( std::is_same<T, int32_t>::value ) {
+    mpitype = MPI_INT32_T;
+  } else {
+    assert(false);
+    fprintf(stderr, "Unknown type in %s... exiting\n", __func__);
+    exit(EXIT_FAILURE);
+  }
+  return mpitype;
+}
+
+}
+
+// https://www.geeksforgeeks.org/sorting-array-according-another-array-using-pair-stl/
+// Sort an array according to
+// other using pair in STL.  Modified to be real-int pair (for distance matching) and in a separate routine, two integers (for idx sort by surfID)  
+#include <bits/stdc++.h>
+using namespace std;
+ 
+// Function to sort integer array b[]
+// according to the order defined by a[]
+void pairsortDI(double a[], int b[], int n)
+{
+       pair<double, int> *pairt = new pair<double, int>[n];  // when done    delete pairt;
+ 
+    // Storing the respective array
+    // elements in pairs.
+    for (int i = 0; i < n; i++)
+    {
+        pairt[i].first = a[i];
+        pairt[i].second = b[i];
+    }
+ 
+    // Sorting the pair array.
+    sort(pairt, pairt + n);
+     
+    // Modifying original arrays
+    for (int i = 0; i < n; i++)
+    {
+        a[i] = pairt[i].first;
+        b[i] = pairt[i].second;
+    }
+    delete [] pairt;
+}
+
+// Function to sort integer array b[]
+// according to the order defined by a[]
+void pairsort(int a[], int b[], int n)
+{
+    pair<double, int> *pairt = new pair<double, int>[n];
+ 
+    // Storing the respective array
+    // elements in pairs.
+    for (int i = 0; i < n; i++)
+    {
+        pairt[i].first = a[i];
+        pairt[i].second = b[i];
+    }
+ 
+    // Sorting the pair array.
+    sort(pairt, pairt + n);
+     
+    // Modifying original arrays
+    for (int i = 0; i < n; i++)
+    {
+        a[i] = pairt[i].first;
+        b[i] = pairt[i].second;
+    }
+    delete [] pairt;
+}
+void pairDeal6sort(int a[], int b[], int n)
+{
+    int c[6]={0}; 
+    for (int i = 0; i < n; i++) c[a[i]-1]++;  // count number each type in a pre-scan
+    int** p = new int*[6];
+    for (int i = 0; i < 6; i++) p[i]=new int[c[i]];
+    int** idx = new int*[6];
+    for (int i = 0; i < 6; i++) idx[i]=new int[c[i]];
+    for (int i = 0; i < 6; i++) c[i]=0;
+    int isrfM1;
+    for (int i = 0; i < n; i++)
+    {
+       isrfM1=a[i]-1;
+       p[isrfM1][c[isrfM1]]=b[i];
+       idx[isrfM1][c[isrfM1]]=a[i];
+       c[isrfM1]++;
+    }
+    int igc=0;
+    for (int j = 0; j < 6; j++){
+      for (int i = 0; i < c[j]; i++) {
+        b[igc] = p[j][i];
+        a[igc] = idx[j][i];
+        igc++;
+      }
+    }
+    assert(igc==n);
+    for (int i = 0; i < 6; i++) delete [] p[i];
+    for (int i = 0; i < 6; i++) delete [] idx[i];
+    delete [] idx;
+    delete [] p;
+}
+
+
+namespace ph {
+
+static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes);
+static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes);
+
+
+void commuInt(Output& o, cgsize_t* global)
+{ // translating a commuInt out from PHASTA to c
+  int numtask=o.arrays.ilwork[0];
+  int itkbeg=0;
+  int maxseg=1;
+  int numseg;
+  for (int itask=0; itask<numtask; ++itask) {
+    numseg = o.arrays.ilwork[itkbeg + 4];
+    maxseg=std::max(numseg,maxseg);
+    itkbeg+=4+2*numseg;
+  }
+         
+  int itag, iacc, iother, isgbeg;
+//first do what ctypes does for setup
+  int* isbegin;
+  int* lenseg;
+  int* ioffset;
+  isbegin = (int*) malloc(sizeof(int) * maxseg);
+  lenseg  = (int*) malloc(sizeof(int) * maxseg);
+  ioffset = (int*) malloc(sizeof(int) * maxseg);
+  MPI_Request* req;
+  req = (MPI_Request*) malloc(sizeof(MPI_Request) * numtask); 
+  MPI_Status* stat;
+  stat = (MPI_Status*) malloc(sizeof(MPI_Status) * numtask); 
+  MPI_Datatype* sevsegtype;
+  sevsegtype = (MPI_Datatype*) malloc(sizeof(MPI_Datatype) * numtask); 
+  int maxfront=0;
+  int lfront;
+  itkbeg=0;
+  for (int itask=0; itask<numtask; ++itask) {
+    iacc   = o.arrays.ilwork[itkbeg + 2];
+    numseg = o.arrays.ilwork[itkbeg + 4];
+    // ctypes.f decrements itkbeg+3 by one for rank 0-based.  do that where used below
+    lfront=0; 
+    for(int is=0; is<numseg; ++is){
+      isbegin[is]= o.arrays.ilwork[itkbeg+3+2*(is+1)] -1 ; // ilwork was created for 1-based
+      lenseg[is]= o.arrays.ilwork[itkbeg+4+2*(is+1)];
+      lfront+=lenseg[is];
+    }
+    maxfront=std::max(maxfront,lfront);
+    for ( int iseg=0; iseg<numseg; ++iseg) ioffset[iseg] = isbegin[iseg] - isbegin[0];
+    auto type = getMpiType( cgsize_t() );
+    MPI_Type_indexed (numseg, lenseg, ioffset,type, &sevsegtype[itask]);
+    MPI_Type_commit (&sevsegtype[itask]);
+    itkbeg+=4+2*numseg;
+  }
+  free(isbegin);
+  free(lenseg);
+  free(ioffset);
+
+  int m = 0; 
+  itkbeg=0;
+  for (int itask=0; itask<numtask; ++itask) {
+    itag   = o.arrays.ilwork[itkbeg + 1];
+    iacc   = o.arrays.ilwork[itkbeg + 2];
+    iother = o.arrays.ilwork[itkbeg + 3] - 1; // MPI is 0 based but this was prepped wrong
+    numseg = o.arrays.ilwork[itkbeg + 4]; /// not used
+    isgbeg = o.arrays.ilwork[itkbeg + 5] - 1;
+    if (iacc==0){ 
+      MPI_Irecv(&global[isgbeg], 1, sevsegtype[itask],iother, itag, MPI_COMM_WORLD, &req[m]);
+    } else {
+      MPI_Isend(&global[isgbeg], 1, sevsegtype[itask],iother, itag, MPI_COMM_WORLD, &req[m]);
+    }
+    itkbeg+=4+2*numseg;
+    m      = m + 1; 
+  }
+  MPI_Waitall(m, req, stat);
+}
+
+void gen_ncorp(Output& o )
+{
+  apf::Mesh* m = o.mesh;
+  int i;
+  lcorp_t nilwork = o.nlwork;
+  int num_nodes=m->count(0);
+  o.arrays.ncorp = new cgsize_t[num_nodes]; 
+  lcorp_t owned;
+  lcorp_t local;
+  lcorp_t* owner_counts;
+  cgsize_t  local_start_id;
+  cgsize_t  gid;
+
+  const int num_parts = PCU_Comm_Peers();
+  const int part = PCU_Comm_Self() ;
+
+  for(int i=0; i < num_nodes; i++) o.arrays.ncorp[i]=0;
+  owned = count_owned(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes);
+  local = count_local(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes);
+  o.iownnodes = owned+local;
+#ifdef PRINT_EVERYTHING
+  printf("%d: %d local only nodes\n", part, local);
+  printf("%d: %d owned nodes\n", part, owned);
+#endif
+  assert( owned <= num_nodes );
+  assert( owned+local <= num_nodes );
+
+  owner_counts = (lcorp_t*) malloc(sizeof(lcorp_t)*num_parts);
+  for(int i=0; i < num_parts; i++) owner_counts[i]=0;
+  owner_counts[part] = owned+local;
+#ifdef PRINT_EVERYTHING
+  for(i=0;i<num_parts;i++)
+    printf("%d,", owner_counts[i]);
+  printf("\n");
+#endif
+  MPI_Allgather(MPI_IN_PLACE, 1, NCORP_MPI_T, owner_counts, 1, NCORP_MPI_T, MPI_COMM_WORLD);
+#ifdef PRINT_EVERYTHING
+  for(i=0;i<num_parts;i++)
+    printf("%d,", owner_counts[i]);
+  printf("\n");
+#endif
+  local_start_id=0;
+  for(i=0;i<part;i++) 
+    local_start_id += owner_counts[i];
+  local_start_id++; //Fortran numbering
+  o.local_start_id = local_start_id;
+#ifdef PRINT_EVERYTHING
+  printf("%d: %d\n", part, local_start_id);
+#endif
+  gid = local_start_id;
+  if(gid<0) printf("part,gid, %d %ld",part,gid);
+  assert(gid>=0);
+  for(i=0;i<num_nodes;i++) //assign owned node's numbers
+  { //if shared, owned 1 //if shared, slave -1 //if local only, 0
+    if(o.arrays.ncorp[i] == 1)
+    {
+      o.arrays.ncorp[i]=gid;
+      assert(o.arrays.ncorp[i]>=0);
+      gid++;
+      continue;
+    }
+    if(o.arrays.ncorp[i] == 0)
+    {
+      o.arrays.ncorp[i] = gid;
+      assert(o.arrays.ncorp[i]>=0);
+      gid++;
+      continue;
+    }
+    if(o.arrays.ncorp[i] == -1)
+      o.arrays.ncorp[i] = 0; //commu() adds, so zero slaves
+  } //char code[] = "out"; //int ione = 1;
+
+  if(num_parts > 1) 
+    commuInt(o, o.arrays.ncorp);
+if(0==1) {
+  for (int ipart=0; ipart<num_parts; ++ipart){
+    if(part==ipart) { // my turn
+      for (int inod=0; inod<num_nodes; ++inod) printf("%ld ", o.arrays.ncorp[inod]);
+        printf(" \n");
+    }
+    PCU_Barrier();
+  }
+}
+  free(owner_counts);
+}
+
+static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes)
+{
+	int i;
+	lcorp_t num_local = 0;
+	for(i=0;i<num_nodes;i++)
+	{
+		if(ncorp_tmp[i] == 0)
+			num_local++; //nodes away from part boundary
+		assert(!(ncorp_tmp[i] < -1 || ncorp_tmp[i] > 1));
+	}
+	return(num_local);
+}
+
+static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes)
+{
+	int numtask = ilwork[0];
+	int itkbeg = 0; //task offset
+	int owned = 0;
+	int i,j,k;
+	for(i=0;i<numtask;i++)
+	{
+		int itag = ilwork[itkbeg+1]; //mpi tag
+		int iacc = ilwork[itkbeg+2]; //0 for slave, 1 for master
+		assert(iacc >= 0 && iacc <= 1);
+		int iother = ilwork[itkbeg+3]-1; //other rank (see ctypes.f for off by one)
+		int numseg = ilwork[itkbeg+4]; //number of segments
+		for(j=0;j<numseg;j++)
+		{
+			int isgbeg = ilwork[itkbeg+5+(j*2)]; //first idx of seg
+			int lenseg = ilwork[itkbeg+6+(j*2)]; //length of seg
+			assert(iacc == 0 || iacc == 1);
+			if(iacc)
+			{
+				for(k=0;k<lenseg;k++)
+				{
+					if(ncorp_tmp[isgbeg-1+k] == 0)
+						owned++;
+					//make sure we're not both master and slave
+					assert(ncorp_tmp[isgbeg-1+k] != -1);
+					ncorp_tmp[isgbeg-1+k] = 1;
+					assert(isgbeg-1+k < num_nodes);
+				}
+				assert(owned <= num_nodes);
+			}
+			else
+			{
+				for(k=0;k<lenseg;k++)
+				{
+					ncorp_tmp[isgbeg-1+k] = -1;
+					assert(isgbeg-1+k < num_nodes);
+				}
+			}
+			//ncorp_tmp init'd to 0
+			//if shared, owned 1
+			//if shared, slave -1
+			//if local only, 0
+
+			assert(itkbeg+6+(j*2) < nlwork);
+		}
+		itkbeg+= 4+2*numseg;
+	}
+	return(owned);
+}
+
+
+// renamed, retained but not yet updated
+static std::string buildCGNSFileName(std::string timestep_or_dat)
+{
+  std::stringstream ss;
+  ss << "chefO." << timestep_or_dat;
+  return ss.str();
+}
+
+// update is only a transpose to match CNGS.  
+void getInteriorConnectivityCGNS(Output& o, int block, cgsize_t* c)
+{
+  int nelem = o.blocks.interior.nElements[block];
+  int nvert = o.blocks.interior.keys[block].nElementVertices;
+  size_t i = 0;
+  if(nvert==4) { //prepped for PHASTA's negative volume tets so flip second and third vertex
+    for (int elem = 0; elem < nelem; ++elem){
+        c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][0]];
+        c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][2]];
+        c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][1]];
+        c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][3]];
+     }
+  } else {
+    for (int elem = 0; elem < nelem; ++elem)
+      for (int vert = 0; vert < nvert; ++vert)
+        c[i++] = o.arrays.ncorp[o.arrays.ien[block][elem][vert]]; // input is 0-based,  out is  1-based do drop the +1
+  }
+  PCU_ALWAYS_ASSERT(i == nelem*nvert);
+}
+
+// update is both a transpose to match CNGS and reduction to only filling the first number of vertices on the boundary whereas PHASTA wanted full volume
+void getBoundaryConnectivityCGNS(Output& o, int block, cgsize_t* c, double* eCenx, double* eCeny, double* eCenz)
+{
+  int nelem = o.blocks.boundary.nElements[block];
+  int nvertVol = o.blocks.boundary.keys[block].nElementVertices;
+  int nvert = o.blocks.boundary.keys[block].nBoundaryFaceEdges;
+  int num_nodes=o.mesh->count(0);
+  size_t i = 0;
+  size_t phGnod = 0;
+  std::vector<int> lnode={0,1,2,3}; // Standard pattern of first 4 (or 3)
+  // PHASTA's use of volume elements has an lnode array that maps the surface nodes from the volume numbering.  We need it here too
+  //  see hierarchic.f but note that is fortran numbering
+  if(nvertVol==4) lnode={0, 2, 1, -1};             // tet is first three but opposite normal of others to go with neg volume
+  if(nvertVol==5 && nvert==3) lnode={0, 4, 1, -1}; // pyramid tri is a fortran map of 1 5 2 
+  if(nvertVol==6 && nvert==4) lnode={0, 3, 4, 1};  // wedge quad is a fortran map of 1 4 5 2
+  for (int elem = 0; elem < nelem; ++elem){
+    eCenx[elem]=0;
+    eCeny[elem]=0;
+    eCenz[elem]=0;
+    for (int vert = 0; vert < nvert; ++vert){
+      phGnod=o.arrays.ienb[block][elem][lnode[vert]]; //actually it is on-rank Global 
+      c[i++] = o.arrays.ncorp[phGnod]; // PETSc truely global
+      eCenx[elem]+=o.arrays.coordinates[0*num_nodes+phGnod];
+      eCeny[elem]+=o.arrays.coordinates[1*num_nodes+phGnod];
+      eCenz[elem]+=o.arrays.coordinates[2*num_nodes+phGnod];
+    }
+    eCenx[elem]/=nvert; // only necessary if you really want to use this as a correct Centroid rather than comparison
+    eCeny[elem]/=nvert; // only necessary if you really want to use this as a correct Centroid rather than comparison
+    eCenz[elem]/=nvert; // only necessary if you really want to use this as a correct Centroid rather than comparison
+   }
+  PCU_ALWAYS_ASSERT(i == nelem*nvert);
+}
+
+void getInterfaceConnectivityCGNS // not extended yet other than transpose
+(
+  Output& o,
+  int block,
+  apf::DynamicArray<int>& c
+)
+{
+  int nelem = o.blocks.interface.nElements[block];
+  int nvert0 = o.blocks.interface.keys[block].nElementVertices;
+  int nvert1 = o.blocks.interface.keys[block].nElementVertices1;
+  c.setSize(nelem * (nvert0 + nvert1));
+  size_t i = 0;
+  for (int elem = 0; elem < nelem; ++elem)
+    for (int vert = 0; vert < nvert0; ++vert)
+      c[i++] = o.arrays.ncorp[o.arrays.ienif0[block][elem][vert]];
+  for (int elem = 0; elem < nelem; ++elem)
+    for (int vert = 0; vert < nvert1; ++vert)
+      c[i++] = o.arrays.ncorp[o.arrays.ienif1[block][elem][vert]];
+  PCU_ALWAYS_ASSERT(i == c.getSize());
+}
+
+// renamed stripped down to just give srfID
+void getNaturalBCCodesCGNS(Output& o, int block, int* codes)
+{
+  int nelem = o.blocks.boundary.nElements[block];
+  size_t i = 0;
+  for (int elem = 0; elem < nelem; ++elem)
+      codes[i++] = o.arrays.ibcb[block][elem][1]; //srfID is the second number so 1
+// if we wanted we could use PHASTA's bit in coding in the first number to us attributes to set
+// arbitrary combinations of BCs but leaving that out for now
+}
+
+void topoSwitch(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_startg,cgsize_t e_endg)
+{
+  int Ep;
+  switch(nvert){
+    case 4:
+      snprintf(Ename, 4, "Tet");
+      if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(TETRA_4), e_startg, e_endg, 0, &Ep))
+         cgp_error_exit();
+      break;
+    case 5:
+      snprintf(Ename, 4, "Pyr");
+      if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(PYRA_5), e_startg, e_endg, 0, &Ep))
+          cgp_error_exit();
+      break;
+    case 6:
+      snprintf(Ename, 4, "Wdg");
+      if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(PENTA_6), e_startg, e_endg, 0, &Ep))
+          cgp_error_exit();
+      break;
+    case 8:
+      snprintf(Ename, 4, "Hex");
+      if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(HEXA_8), e_startg, e_endg, 0, &Ep))
+          cgp_error_exit();
+      break;
+  }
+if(0==1)  printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep);
+  *E=Ep;
+}
+void topoSwitchB(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_startg,cgsize_t e_endg)
+{
+  int Ep;
+  switch(nvert){
+    case 3:
+      snprintf(Ename, 4, "Tri");
+      if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(TRI_3), e_startg, e_endg, 0, &Ep))
+            cgp_error_exit();
+      break;
+    case 4:
+      snprintf(Ename, 5, "Quad");
+      if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(QUAD_4), e_startg, e_endg, 0, &Ep))
+            cgp_error_exit();
+      break;
+  }
+if(0==1)  printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep);
+  *E=Ep;
+}
+
+// renamed and calling the renamed functions above with output writes now to CGNS
+void writeBlocksCGNSinterior(int F,int B,int Z, int SCR, Output& o, cgsize_t *e_written)
+{
+  int E,Fs,Fs2,Fsb,Fsb2;
+  cgsize_t e_owned, e_start,e_end;
+  cgsize_t e_startg,e_endg;
+  const int num_parts = PCU_Comm_Peers();
+  const cgsize_t num_parts_cg=num_parts;
+  const int part = PCU_Comm_Self() ;
+  const cgsize_t part_cg=part;
+  // create a centered solution 
+  if ( cgp_field_write(F, B, Z, SCR, CGNS_ENUMV(Integer), "CellRank", &Fs))
+      cgp_error_exit();
+  int nblki= o.blocks.interior.getSize();
+  int nvMap[4] = {4,5,6,8};
+  int nvC,nvert,nvAll,invC,iblkC;
+  for (int i = 0; i < 4; ++i) { // check all topologies
+    nvAll=0;
+    nvC=nvMap[i];
+    for (int j = 0; j < nblki; ++j) { // check all blocks
+      BlockKey& k = o.blocks.interior.keys[j];
+      nvert = o.blocks.interior.keys[j].nElementVertices;
+      if(nvC==nvert) {
+        invC=1;
+        iblkC=j;
+        break;
+      } else invC=0;
+    }
+    nvAll= PCU_Add_Int(invC); // add across all
+    cgsize_t* e=NULL; // = (cgsize_t *)malloc(nvC * e_owned * sizeof(cgsize_t));
+    if(nvAll!=0) { //nvC present on at least 1 rank
+      if(invC!=0){  //nvC present on my rank
+         e_owned = o.blocks.interior.nElements[iblkC];
+         e = (cgsize_t *)malloc(nvC * e_owned * sizeof(cgsize_t));
+        getInteriorConnectivityCGNS(o, iblkC, e);
+      }
+      else e_owned=0;
+      long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int
+      e_endg=*e_written + PCU_Add_Long(safeArg); // end for the elements of this topology
+      e_startg=1+*e_written; // start for the elements of this topology
+      char Ename[5];
+      topoSwitch(Ename, nvC,F,B,Z,&E,e_startg,e_endg);
+      e_start=0;
+      auto type = getMpiType( cgsize_t() );
+      MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
+      e_start+=1+*e_written; // my parts global element start 1-based
+      e_end=e_start+e_owned-1;  // my parts global element stop 1-based
+      if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
+          cgp_error_exit();
+      *e_written=e_endg;
+      if(invC!=0) free(e);
+      //     create the field data for this process 
+      int* d = NULL;
+      if(invC!=0){  //nvC present on my rank
+        d = (int *)malloc(e_owned * sizeof(int));
+        for (int n = 0; n < e_owned; n++) 
+          d[n] = part;
+        //     write the solution field data in parallel 
+      }
+      if (cgp_field_write_data(F, B, Z, SCR, Fs, &e_start, &e_end, d))
+          cgp_error_exit();
+      if(invC!=0) free(d);
+      char UserDataName[11];
+      snprintf(UserDataName, 11, "n%sOnRank", Ename);
+      // create Helper array for number of elements on part of a given topology 
+      if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
+           cg_gorel(F, "User Data", 0, NULL) ||
+           cgp_array_write(UserDataName, CGNS_ENUMV(Integer), 1, &num_parts_cg, &Fs2))
+           cgp_error_exit();
+        // create the field data for this process 
+      int nIelVec=e_owned;
+      cgsize_t  partP1=part+1;
+if(0==1)      printf("Intr, %s,  %d, %d, %d, %d \n", UserDataName, nIelVec,part,Fs,Fs2);
+      if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nIelVec))
+           cgp_error_exit();
+
+if(0==1){
+    printf("interior cnn %s %d %ld %ld \n", Ename,part, e_start, e_end);
+//    for (int ne=0; ne<std::min(nDbgCG,e_owned); ++ne) {
+//      printf("%d, %d ", part,(ne+1));
+//      for(int nv=0; nv< nvC; ++nv) printf("%ld ", e[ne*nvC+nv]);
+//      printf("\n");
+//    }
+}
+
+    } // end if ANY rank has this topology
+  } // end of loop over ALL topologies
+  PCU_Barrier();
+}
+void writeBoundaryVertexToSol(int F,int B,int Z, int SBVR, int SBVS, Output& o, int* srfID, int part,int invC, cgsize_t e_owned, int nvC, cgsize_t* e)
+{
+  int FsR, FsS;
+  // more tricky to put something into PV to visualize the above (approximately) through vertex field
+  int* dv = (int *)malloc(o.iownnodes * sizeof(int));
+  cgsize_t start=o.local_start_id;
+  cgsize_t end=start+o.iownnodes-1;
+  if ( cgp_field_write(F, B, Z, SBVR, CGNS_ENUMV(Integer), "BoundaryVertexRank", &FsR))
+       cgp_error_exit();
+  //     create the field data for this process 
+  for (int inode = 0; inode < o.iownnodes; ++inode) dv[inode]= -1;
+  int idxl, en;
+  if(invC!=0) { 
+    for (int ibel = 0; ibel < e_owned; ++ibel){ 
+      for (int ilv=0; ilv < nvC; ilv++) {
+        en=e[ibel*nvC+ilv]; 
+        if(en>=start && en<=end) 
+          dv[en-start]= part; 
+      }
+    }
+  }
+  if (cgp_field_write_data(F, B, Z, SBVR, FsR, &start, &end, dv))
+      cgp_error_exit();
+  // more tricky to put srfID on nodes to see in PV (approximately) through vertex field
+  if ( cgp_field_write(F, B, Z, SBVS, CGNS_ENUMV(Integer), "BoundaryVertexSrfID", &FsS))
+       cgp_error_exit();
+  //     create the field data for this process 
+  for (int inode = 0; inode < o.iownnodes; ++inode) dv[inode]= -1;
+  if(invC!=0) { 
+    for (int ibel = 0; ibel < e_owned; ++ibel){ 
+      for (int ilv=0; ilv < nvC; ilv++) {
+        en=e[ibel*nvC+ilv]; 
+        if(en>=start && en<=end) 
+          dv[en-start]= srfID[ibel]; 
+      }
+    }
+  }
+  if (cgp_field_write_data(F, B, Z, SBVS, FsS, &start, &end, dv))
+      cgp_error_exit();
+}
+void writeBlocksCGNSboundary(int F,int B,int Z, int SBVR, int SBVS, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int* endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int *nStackedOnRank, int nblkb)
+{
+    int E,Fsb,Fsb2, nvC,nvert,nvAll,invC;
+    const int num_parts = PCU_Comm_Peers();
+    const cgsize_t num_parts_cg=num_parts;
+    const int part = PCU_Comm_Self() ;
+    const cgsize_t part_cg=part;
+    cgsize_t e_owned, e_start,e_end, e_startg,e_endg;
+    cgsize_t eVolElm=*e_written;
+    cgsize_t e_belWritten=0;
+    int nvMap[2] = {3,4};
+    int iblkC[2];
+    int estart[2];
+    for (int i = 0; i < 2; ++i) { // check all topologies
+      nvAll=0;
+      invC=0;
+      nvC=nvMap[i];
+      int icountB=0;
+      for (int j = 0; j < nblkb; ++j) { // check all blocks
+        BlockKey& k = o.blocks.boundary.keys[j];
+        nvert = o.blocks.boundary.keys[j].nBoundaryFaceEdges;
+        if(nvert==nvC) {
+           invC=1;
+           iblkC[icountB]=j; // mark the block numbers (could be more than one) that have current topology
+           icountB++;
+        } 
+      }
+      nvAll= PCU_Add_Int(invC); // add across all
+      cgsize_t* e=NULL; double* eCenx=NULL; double* eCeny=NULL; double* eCenz=NULL; 
+      if(nvAll!=0) { //nvC present on at least 1 rank
+        e_owned=0;
+        if(invC!=0){  //nvC present on my rank
+           for (int j = 0; j < icountB; ++j) { // combine blocks 
+             estart[j]=e_owned;
+             e_owned += o.blocks.boundary.nElements[iblkC[j]];
+           }
+           e = (cgsize_t *)malloc(nvC * e_owned * sizeof(cgsize_t));
+           eCenx = (double *)malloc( e_owned * sizeof(double));
+           eCeny = (double *)malloc( e_owned * sizeof(double));
+           eCenz = (double *)malloc( e_owned * sizeof(double));
+           for (int j = 0; j < icountB; ++j) {// combine blocks 
+             getBoundaryConnectivityCGNS(o, iblkC[j], &e[estart[j]], &eCenx[estart[j]], 
+                                                  &eCeny[estart[j]], &eCenz[estart[j]]); // stack repeated topologies 
+             getNaturalBCCodesCGNS(o, iblkC[j], &srfID[e_belWritten+estart[j]]); // note e_owned counts all same topo
+           }
+           (*nStackedOnRank)++;  // no longer have nblkb blocks so count them as you stack them
+        }
+        e_startg=1+*e_written; // start for the elements of this topology
+        long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int
+        cgsize_t  numBelTP = PCU_Add_Long(safeArg); // number of elements of this topology
+        e_endg=*e_written + numBelTP; // end for the elements of this topology
+        char Ename[6];
+        topoSwitchB(Ename, nvC,F,B,Z,&E,e_startg,e_endg);
+        e_start=0;
+        auto type = getMpiType( cgsize_t() );
+        MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
+        e_start+=1+*e_written; // my parts global element start 1-based
+        e_end=e_start+e_owned-1;  // my parts global element stop 1-based
+        // write the element connectivity in parallel 
+        if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e))
+            cgp_error_exit();
+if(0==1)        printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end);
+if(0==1){
+    for (int ne=0; ne<std::min(nDbgCG,e_owned); ++ne) { printf("%d, %d ", part,(ne+1)); for(int nv=0; nv< nvert; ++nv) printf("%ld ", e[ne*nvert+nv]); printf("\n"); }
+}
+        int idx =((*nStackedOnRank) - 1);
+        writeBoundaryVertexToSol(F,B,Z, SBVR, SBVS, o, srfID, part,invC, e_owned, nvC, e);
+        if(invC!=0) {
+          free(e);
+          int icnt1=0; int icnt2=0;
+          for (int ne=0; ne<e_owned; ++ne){ //count srfID =1 and 2 on this part,block
+             if(srfID[e_belWritten+ne]==1) icnt1++; 
+             if(srfID[e_belWritten+ne]==2) icnt2++;
+          } 
+          srfIDCen1[idx]=new double[icnt1*3]; // icnt{1,2} will equal to zero OFTEN
+          srfIDCen2[idx]=new double[icnt2*3];
+          srfID1OnBlk[idx]=icnt1;
+          srfID2OnBlk[idx]=icnt2;
+          int j1=0; int j2=0;
+          for (int ne=0; ne<e_owned; ++ne){
+             if(srfID[e_belWritten+ne]==1){  // copy in the toplogies centroids
+               srfIDCen1[idx][j1++]=eCenx[ne];
+               srfIDCen1[idx][j1++]=eCeny[ne];
+               srfIDCen1[idx][j1++]=eCenz[ne];
+             }
+             if(srfID[e_belWritten+ne]==2) {
+               srfIDCen2[idx][j2++]=eCenx[ne];
+               srfIDCen2[idx][j2++]=eCeny[ne];
+               srfIDCen2[idx][j2++]=eCenz[ne];
+             }
+          } 
+          free(eCenx); free(eCeny); free(eCenz);
+if(0==1){      printf("CentroidCounts %d %d %d %d %d %d %d %d\n",part,icnt1, icnt2, j1, j2, e_owned, srfID1OnBlk[i],srfID2OnBlk[i]);}
+          for (int j = 0; j < (int) e_owned; ++j) srfIDidx[e_belWritten+j]=e_start+j;
+          startBelBlk[idx]=e_start-eVolElm; // provides start point for each block in srfID 
+          endBelBlk[idx]=e_end-eVolElm; // provides end point for each block in srfID
+        }
+        *e_written=e_endg;
+        e_belWritten+=e_owned; // this is tracking written by this rank as we unpack srfID later
+        char UserDataName[12]; snprintf(UserDataName, 13, "n%sOnRank", Ename);
+        if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
+             cg_gorel(F, "User Data", 0, NULL) ||
+             cgp_array_write(UserDataName, CGNS_ENUMV(Integer), 1, &num_parts_cg, &Fsb2))
+             cgp_error_exit();
+if(0==1)        printf("Bndy %s, %ld, %d, %d \n", UserDataName, e_owned, part,Fsb2);
+        cgsize_t partP1=part+1;
+        if (cgp_array_write_data(Fsb2, &partP1, &partP1, &e_owned))
+            cgp_error_exit();
+      } // at least one part has this topo
+    } // loop over both topos
+    *totBel = *e_written-eVolElm;
+}
+void writeCGNS_UserData_srfID(int F,int B, Output& o, int* srfID,  int* startBelBlk, int *endBelBlk, cgsize_t *e_written, cgsize_t *totBel, cgsize_t *eVolElm, int nStackedOnRank)
+{
+    cgsize_t e_owned, e_start,e_end;
+    int Fsb;
+    // setup User Data for boundary faces 
+    if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
+         cg_gorel(F, "User Data", 0, NULL) ||
+         cgp_array_write("srfID", CGNS_ENUMV(Integer), 1,totBel, &Fsb)) 
+         cgp_error_exit();
+    // write the user data for this process 
+    int nblkb = o.blocks.boundary.getSize(); 
+    int nvMap[2] = {3,4};
+    int iblkC[2];
+    int estart[2];
+    int nvC,nvert,nvAll,invC;
+    for (int i = 0; i < 2; ++i) { // at most, 2 blocks (assumed as we have (untested) collapsed/stacked blocks withe same number of verts)
+      nvAll=0;
+      nvC=nvMap[i];
+      invC=0;
+      for (int j = 0; j < nblkb; ++j) { // check all blocks
+        BlockKey& k = o.blocks.boundary.keys[j];
+        nvert = o.blocks.boundary.keys[j].nBoundaryFaceEdges;
+        if(nvert==nvC) {
+           invC=1;
+        } 
+      }
+      nvAll= PCU_Add_Int(invC); // add across all
+      if(nvAll!=0) { //nvC present on at least 1 rank
+        e_owned=0;
+        int e_startB=0;
+        if(i<nStackedOnRank) {
+          e_owned=endBelBlk[i]-startBelBlk[i]+1;
+          e_startB=startBelBlk[i]; // srfID is only for bel....matches linear order with eVolElm offset from 
+        }                                 // bel# that starts from last volume element
+        e_start=0;
+        auto type = getMpiType( cgsize_t() );
+        MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD);
+        e_start+=1+*e_written; // my parts global element start 1-based
+        e_end=e_start+e_owned-1;  // my parts global element stop 1-based
+if(0==1)      printf("BndyUserData %s, %ld, %ld, %ld,  %d, %d %d \n", "srfID", e_start, e_end, e_owned, i, e_startB,*totBel);
+        if(i<nStackedOnRank){ 
+          if (cgp_array_write_data(Fsb, &e_start, &e_end, &srfID[e_startB]))
+          cgp_error_exit();
+        } else { 
+          if (cgp_array_write_data(Fsb, &e_start, &e_end, NULL))
+          cgp_error_exit();
+        }
+        long safeArg=e_owned; // is cgsize_t which could be an 32 or 64 bit int
+        *e_written += PCU_Add_Long(safeArg); // number of elements of this topology
+      }
+    }
+
+}
+void sortID1andID2(double* srfID1GCen,double* srfID2GCen, int nmatchFace, int* imapD1, int*imapD2)
+{
+    int* imapD2v = (int *)malloc( nmatchFace * sizeof(int));
+    double* srfID1distSq = (double *)malloc( nmatchFace * sizeof(double));
+    double* srfID2distSq = (double *)malloc( nmatchFace * sizeof(double));
+    const int part = PCU_Comm_Self() ;
+    double xc=0.1; // true cubes with uniform meshes and other symmetries set up ties  (good for debugging/verifying that dumb search backup works)
+    for (int i = 0; i < nmatchFace; ++i) {
+      srfID1distSq[i]=(srfID1GCen[i*3+0]-xc)*(srfID1GCen[i*3+0]-xc) 
+                   +srfID1GCen[i*3+1]*srfID1GCen[i*3+1] 
+                   +srfID1GCen[i*3+2]*srfID1GCen[i*3+2]; // periodicity always in Z then could be omitted
+      srfID2distSq[i]=(srfID2GCen[i*3+0]-xc)*(srfID2GCen[i*3+0]-xc) 
+                   +srfID2GCen[i*3+1]*srfID2GCen[i*3+1] 
+                   +srfID2GCen[i*3+2]*srfID2GCen[i*3+2]; // periodicity always in Z then could be omitted
+      imapD1[i]=i;
+      imapD2[i]=i;
+    }
+    if(0==1){ if(part==0) {
+      printf(" srfID1dist GLOBAL B "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%f ", srfID1distSq[is]); printf("\n");
+      printf(" imapD1 GLOBAL B     "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", imapD1[is]); printf("\n"); }
+    }
+    if(0==1){ if(part==0) {
+      printf(" srfID2dist GLOBAL B "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%f ", srfID2distSq[is]); printf("\n");
+      printf(" imapD2 GLOBAL B     "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", imapD2[is]); printf("\n"); }
+    }
+    pairsortDI(srfID1distSq,imapD1,nmatchFace); // imapD1 puts elements with srfID=1 in order of increasing distance from pt 0.1, 0 0 
+    pairsortDI(srfID2distSq,imapD2,nmatchFace); // imapD1 puts elements with srfID=2 in order of increasing distance from pt 0.1, 0 0 
+
+    if(0==1){ if(part==0) {
+      printf(" srfID1dist GLOBAL "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%f ", srfID1distSq[is]); printf("\n");
+      printf(" imapD1 GLOBAL     "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", imapD1[is]); printf("\n"); }
+    }
+    if(0==1){ if(part==0) {
+      printf(" srfID2dist GLOBAL "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%f ", srfID2distSq[is]); printf("\n");
+      printf(" imapD2 GLOBAL     "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", imapD2[is]); printf("\n"); }
+    }
+
+    double tol2=1.0e-14;
+    int jclosest, iclose1, iclose2;
+    double d1,d2,vDistSq,vDSmin;
+    int DistFails=0;
+    for (int i = 0; i < nmatchFace; ++i) {
+        iclose1=imapD1[i];
+        iclose2=imapD2[i];
+        d1=srfID1GCen[(iclose1)*3+0]-srfID2GCen[(iclose2)*3+0];
+        d2=srfID1GCen[(iclose1)*3+1]-srfID2GCen[(iclose2)*3+1];
+        vDistSq= d1*d1+d2*d2;
+        if(vDistSq < tol2) {
+           imapD2v[i]=imapD2[i];
+        } else {// Centroid for i did not match-> search list srfID=2 list to find true match
+          vDSmin=vDistSq;
+          DistFails++;
+          for (int j = 0; j < nmatchFace; ++j) {   // if this turns out to be taken a lot then it could be narrowed e.g. j=max(0,i-50), j< i+min(matchFace,i+50),
+            iclose2=imapD2[j];
+            d1=srfID1GCen[(iclose1)*3+0]-srfID2GCen[(iclose2)*3+0];
+            d2=srfID1GCen[(iclose1)*3+1]-srfID2GCen[(iclose2)*3+1];
+            vDistSq= d1*d1+d2*d2;
+            if(vDistSq<vDSmin) {
+              vDSmin=vDistSq;
+              jclosest=j;
+              if(vDistSq<tol2) break;
+            } 
+          }
+          assert(vDistSq<tol2);
+          imapD2v[i]=imapD2[jclosest];
+        } 
+    } 
+    for (int i = 0; i < nmatchFace; ++i) imapD2[i]=imapD2v[i];
+    if(0==1&&part==0) {
+      printf("Number of Distance Failures=%d\n ",DistFails);
+      printf(" srfID1dist GLOBAL "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%f ", srfID1distSq[is]); printf("\n");
+      printf(" imapD1 GLOBAL     "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", imapD1[is]); printf("\n"); 
+      printf(" srfID2dist GLOBAL "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%f ", srfID2distSq[is]); printf("\n");
+      printf(" imapD2 GLOBAL     "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", imapD2[is]); printf("\n"); }
+    free(srfID1distSq); free(srfID2distSq);
+    free(imapD2v);
+}
+void GatherCentroid(double** srfIDCen,int* srfIDOnBlk, double** srfIDGCen, int *nmatchFace, int nStackedOnRank)
+{
+// stack  connectivities on rank before gather (should preserve order)
+    const int num_parts = PCU_Comm_Peers();
+    const int part = PCU_Comm_Self() ;
+    int* rcounts = (int *)malloc( num_parts * sizeof(int));
+    int* displs = (int *)malloc( num_parts * sizeof(int));
+    int numSurfIDOnRank=0;
+    for (int i = 0; i < nStackedOnRank; ++i) numSurfIDOnRank+=srfIDOnBlk[i];
+    double* srfIDCenAllBlocks = (double *)malloc(numSurfIDOnRank*3 * sizeof(double));
+    int k1=0;
+    for (int i = 0; i < nStackedOnRank; ++i) 
+      for (int j = 0; j < srfIDOnBlk[i]*3; ++j) srfIDCenAllBlocks[k1++]=srfIDCen[i][j];
+    int ncon=numSurfIDOnRank*3;
+    auto type_i = getMpiType( int() );
+    MPI_Gather(&ncon,1,type_i,rcounts,1,type_i,0,MPI_COMM_WORLD);
+    displs[0]=0;
+    for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
+    int GsrfIDcnt=displs[num_parts-1]+rcounts[num_parts-1];
+    *nmatchFace=GsrfIDcnt/3;
+    if(part==0) *srfIDGCen = (double *)malloc( GsrfIDcnt * sizeof(double));
+if(0==1){ printf("displs1 ");for(int ip=0; ip< num_parts; ++ip) printf("% ld ", displs[ip]); printf("\n"); }
+    auto type_d = getMpiType( double() );
+    MPI_Gatherv(srfIDCenAllBlocks,ncon,type_d,*srfIDGCen,rcounts,displs,type_d,0, MPI_COMM_WORLD);
+    free(srfIDCenAllBlocks); free(rcounts); free(displs);
+}
+void Allgather2IntAndSort(int* srfID, int* srfIDidx,Output& o,int* srfIDG, int* srfIDGidx, int totOnRankBel)
+{
+    const int part = PCU_Comm_Self() ;
+    const int num_parts = PCU_Comm_Peers();
+    const cgsize_t part_cg=part;
+    int* rcounts = (int *)malloc( num_parts * sizeof(int));
+    int* displs = (int *)malloc( num_parts * sizeof(int));
+    auto type_i = getMpiType( int() );
+    MPI_Allgather(&totOnRankBel,1,type_i,rcounts,1,type_i,MPI_COMM_WORLD);
+    displs[0]=0;
+    for (int i = 1; i < num_parts; ++i) displs[i]=displs[i-1]+rcounts[i-1]; 
+    int totBel=displs[num_parts-1]+rcounts[num_parts-1];
+if(0==1){ for(int ip=0; ip< num_parts; ++ip) printf("%ld ", displs[ip]); printf("\n"); }
+    MPI_Allgatherv(srfID,totOnRankBel,type_i,srfIDG,rcounts,displs,type_i,MPI_COMM_WORLD);
+    MPI_Allgatherv(srfIDidx,totOnRankBel,type_i,srfIDGidx,rcounts,displs,type_i,MPI_COMM_WORLD);
+    free(rcounts); free(displs);
+if(0==1){ if(part==0) {
+    printf(" srfID GLOBAL    "); for(int is=0; is< std::min(nDbgI,totBel); ++is)  printf("%d ", srfIDG[is]); printf("\n");
+    printf(" srfIDidx GLOBAL "); for(int is=0; is< std::min(nDbgI,totBel); ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); }
+    printf("rank %d ",part); printf(" srfID on Part "); for(int is=0; is< std::min(nDbgI,totOnRankBel); ++is)  printf("%d ", srfID[is]); printf("\n");
+    printf(" srfIDidx on Part "); for(int is=0; is< std::min(nDbgI,totOnRankBel); ++is)  printf("%d ", srfIDidx[is]); printf("\n"); }
+//    pairsort(srfIDG,srfIDGidx,totBel);
+    pairDeal6sort(srfIDG,srfIDGidx,totBel);
+if(0==1){ if(part==0) {
+    printf(" srfID GLOBAL    "); for(int is=0; is< std::min(nDbgI,totBel); ++is)  printf("%d ", srfIDG[is]); printf("\n");
+    printf(" srfIDidx GLOBAL "); for(int is=0; is< std::min(nDbgI,totBel); ++is)  printf("%d ", srfIDGidx[is]); printf("\n"); } }
+}
+void writeCGNSboundary(int F,int B,int Z, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int *endBelBlk, cgsize_t *e_written, int totOnRankBel, cgsize_t *totBel, int nStackedOnRank)
+{
+// srfID is for ALL Boundary faces
+    const int num_parts = PCU_Comm_Peers();
+    const cgsize_t num_parts_cg=num_parts;
+    const int part = PCU_Comm_Self() ;
+    const cgsize_t part_cg=part;
+    cgsize_t e_owned, e_start,e_end;
+    int Fsb;
+    cgsize_t  eVolElm = *e_written-*totBel;
+    *e_written=0; //recycling  eVolElm holds 
+    writeCGNS_UserData_srfID(F,B, o, srfID,  startBelBlk, endBelBlk, e_written, totBel, &eVolElm, nStackedOnRank );
+    double* srfID1GCen; 
+    double* srfID2GCen; 
+    int nmatchFace1,nmatchFace;
+    GatherCentroid(srfIDCen1,srfID1OnBlk,&srfID1GCen,&nmatchFace1, nStackedOnRank);
+    GatherCentroid(srfIDCen2,srfID2OnBlk,&srfID2GCen,&nmatchFace, nStackedOnRank);
+if(0==1)    if(part==0)  printf("matchface %d, %d\n", nmatchFace1, nmatchFace);
+    if(part==0) assert(nmatchFace1==nmatchFace);
+//   compute the translation while we still have ordered centroids data  Assuming Translation = donor minus periodic but documents unclear
+    double  TranslationD[3];
+    if (part==0){  TranslationD[0]=srfID2GCen[0]-srfID1GCen[0]; TranslationD[1]=srfID2GCen[1]-srfID1GCen[1];TranslationD[2]=srfID2GCen[2]-srfID1GCen[2];}
+if(0==1){  printf("%d part srfID 1 xc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+0]); printf("\n"); }
+if(0==1){  printf("%d part srfID 1 yc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+1]); printf("\n"); }
+if(0==1){  printf("%d part srfID 1 zc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID1GCen[ip*3+2]); printf("\n"); }
+       PCU_Barrier();
+if(0==1){  printf("%d part srfID 2 xc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+0]); printf("\n"); }
+if(0==1){  printf("%d part srfID 2 yc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+1]); printf("\n"); }
+if(0==1){  printf("%d part srfID 2 zc ",part); for(int ip=0; ip< std::min(nDbgI,nmatchFace); ++ip) printf("%f ", srfID2GCen[ip*3+2]); printf("\n"); }
+    auto type_i = getMpiType( int() );
+    MPI_Bcast(&nmatchFace,1,type_i,0, MPI_COMM_WORLD);
+    int* imapD1 = (int *)malloc( nmatchFace * sizeof(int));
+    int* imapD2 = (int *)malloc( nmatchFace * sizeof(int));
+    if(part==0) sortID1andID2(srfID1GCen,srfID2GCen,nmatchFace, imapD1, imapD2);
+    PCU_Barrier();
+    MPI_Bcast(imapD1,nmatchFace,type_i,0, MPI_COMM_WORLD);
+    MPI_Bcast(imapD2,nmatchFace,type_i,0, MPI_COMM_WORLD);
+    auto type_d = getMpiType( double() );
+    MPI_Bcast(TranslationD,3,type_d,0, MPI_COMM_WORLD);
+    if(part==0) {free(srfID1GCen); free(srfID2GCen);}
+// ZonalBC data 
+    int* srfIDG = (int *)malloc( *totBel * sizeof(int));
+    int* srfIDGidx = (int *)malloc( *totBel * sizeof(int));
+    cgsize_t* donor2 = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t));
+    cgsize_t* periodic1 = (cgsize_t *)malloc(nmatchFace * sizeof(cgsize_t));
+    Allgather2IntAndSort(srfID, srfIDidx,o,srfIDG, srfIDGidx,totOnRankBel);
+    int BC_scan=0;
+    cgsize_t* eBC = (cgsize_t *)malloc(*totBel * sizeof(cgsize_t));
+    for (int BCid = 1; BCid < 7; BCid++) {
+      int imatch=0;
+      for (int ib = BC_scan; ib < *totBel; ib++) {
+        if(srfIDG[ib]==BCid){
+          eBC[imatch]=srfIDGidx[BC_scan];
+          BC_scan++;
+          imatch++;
+        } else  break;
+      }
+//reorder SurfID = 1 and 2 using idmapD{1,2} based on distance to support periodicity 
+      if(BCid==1) {
+        for (int i = 0; i < nmatchFace; i++) periodic1[i]=eBC[imapD1[i]];
+        for (int i = 0; i < nmatchFace; i++) eBC[i]=periodic1[i];
+if(0==1&&part==1){ printf(" srfIDidx 1 "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", eBC[is]); printf("\n"); }
+      }       
+      if(BCid==2) {
+        for (int i = 0; i < nmatchFace; i++) donor2[i]=eBC[imapD2[i]];
+        for (int i = 0; i < nmatchFace; i++) eBC[i]=donor2[i];
+if(0==1&&part==1){ printf(" srfIDidx 2 "); for(int is=0; is< std::min(nDbgI,nmatchFace); ++is)  printf("%d ", eBC[is]); printf("\n"); }
+      }       
+if(0==1) {
+      printf(" srfID =%d    ",BCid); for(int is=0; is< std::min(nDbgI,imatch); ++is)  printf("%d ", eBC[is]); printf("\n");
+}
+      int BC_index;
+      char BC_name[33];
+      snprintf(BC_name, 33, "SurfID_%d", BCid );
+      if(cg_boco_write(F, B, Z, BC_name, CGNS_ENUMV(BCTypeUserDefined), CGNS_ENUMV(PointList), imatch, eBC,  &BC_index))
+         cg_error_exit();
+      if(cg_goto(F, B, "Zone_t", 1, "ZoneBC_t", 1, "BC_t", BC_index, "end")) cg_error_exit();;
+      if(cg_gridlocation_write(CGNS_ENUMV(FaceCenter))) cg_error_exit();
+    }
+    int cgconn;
+    if (cg_conn_write(F, B, Z, "Periodic Connectivity",
+          CGNS_ENUMV(FaceCenter), CGNS_ENUMV(Abutting1to1),
+          CGNS_ENUMV(PointList), nmatchFace, periodic1, "Zone",
+          CGNS_ENUMV(Unstructured), CGNS_ENUMV(PointListDonor),
+          CGNS_ENUMV(Integer), nmatchFace, donor2, &cgconn)) cgp_error_exit();
+    const float  RotationCenter[3]={0};
+    const float  RotationAngle[3]={0};
+    const float  Translation[3]={(float)TranslationD[0],(float)TranslationD[1],(float)TranslationD[2]};
+
+    if (cg_conn_periodic_write(F, B, Z, cgconn, RotationCenter, RotationAngle,  Translation)) cgp_error_exit();
+    free(imapD1); free(imapD2);
+    free(eBC); free(srfIDG); free(srfIDGidx); free(donor2); free(periodic1);
+} 
+void CGNS_NodalSolution(int F,int B,int Z, Output& o)
+{
+  // create a nodal solution 
+  char fieldName[12];
+  snprintf(fieldName, 13, "solution");
+if(0==1)  printf("solution=%s",fieldName);
+  double* data;
+  int size, S,Q;
+  detachField(o.mesh, fieldName, data, size);
+  assert(size==5);
+
+//     create the field data for this process 
+  double* p = (double *)malloc(o.iownnodes * sizeof(double));
+  double* u = (double *)malloc(o.iownnodes * sizeof(double));
+  double* v = (double *)malloc(o.iownnodes * sizeof(double));
+  double* w = (double *)malloc(o.iownnodes * sizeof(double));
+  double* T = (double *)malloc(o.iownnodes * sizeof(double));
+  int icount=0;
+  int num_nodes=o.mesh->count(0);
+  cgsize_t gnod,start,end;
+  start=o.local_start_id;
+  end=start+o.iownnodes-1;
+  for (int n = 0; n < num_nodes; n++) {
+    gnod=o.arrays.ncorp[n];
+    if(gnod >= start && gnod <= end) { // solution to write
+         p[icount]= data[0*num_nodes+n];
+         u[icount]= data[1*num_nodes+n];
+         v[icount]= data[2*num_nodes+n];
+         w[icount]= data[3*num_nodes+n];
+         T[icount]= data[4*num_nodes+n];
+         icount++;
+    }
+  }
+//     write the solution field data in parallel 
+  if (cg_sol_write(F, B, Z, "Solution", CGNS_ENUMV(Vertex), &S) ||
+      cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "Pressure", &Q))
+      cgp_error_exit();
+  if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, p))
+      cgp_error_exit();
+  if ( cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "VelocityX", &Q))
+      cgp_error_exit();
+  if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, u))
+      cgp_error_exit();
+  if ( cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "VelocityY", &Q))
+      cgp_error_exit();
+  if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, v))
+      cgp_error_exit();
+  if ( cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "VelocityZ", &Q))
+      cgp_error_exit();
+  if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, w))
+      cgp_error_exit();
+  if ( cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "Temperature", &Q))
+      cgp_error_exit();
+  if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, T))
+      cgp_error_exit();
+  free(p); free(u); free(v); free(w); free(T); free(data);
+}
+void CGNS_Coordinates(int F,int B,int Z, Output& o)
+{
+   int Cx,Cy,Cz;
+  if (cgp_coord_write(F, B, Z, CGNS_ENUMV(RealDouble), "CoordinateX", &Cx) ||
+      cgp_coord_write(F, B, Z, CGNS_ENUMV(RealDouble), "CoordinateY", &Cy) ||
+      cgp_coord_write(F, B, Z, CGNS_ENUMV(RealDouble), "CoordinateZ", &Cz))
+      cgp_error_exit();
+
+// condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS.
+  int num_nodes=o.mesh->count(0);
+  cgsize_t gnod;
+  cgsize_t start=o.local_start_id;
+  cgsize_t end=start+o.iownnodes-1;
+  double* x = (double *)malloc(o.iownnodes * sizeof(double));
+  for (int j = 0; j < 3; ++j) {
+    int icount=0;
+    for (int inode = 0; inode < num_nodes; ++inode){
+      gnod=o.arrays.ncorp[inode];
+      if(gnod >= start && gnod <= end) { // coordinate to write
+         x[icount]= o.arrays.coordinates[j*num_nodes+inode];
+         icount++;
+      }
+    }
+if(0==1) {
+    printf("%ld, %ld \n", start, end);
+    for (int ne=0; ne<std::min(icount,nDbgI); ++ne)
+	printf("%d, %f \n", (ne+1), x[ne]);
+}
+    if(j==0) if(cgp_coord_write_data(F, B, Z, Cx, &start, &end, x)) cgp_error_exit();
+    if(j==1) if(cgp_coord_write_data(F, B, Z, Cy, &start, &end, x)) cgp_error_exit();
+    if(j==2) if(cgp_coord_write_data(F, B, Z, Cz, &start, &end, x)) cgp_error_exit();
+  }
+  free (x);
+}
+void CGNS_VertexRank(int F,int B,int Z, int SVR, Output& o)
+{
+  int Fs2;
+  const int part = PCU_Comm_Self() ;
+  cgsize_t start=o.local_start_id;
+  cgsize_t end=start+o.iownnodes-1;
+  if ( cgp_field_write(F, B, Z, SVR, CGNS_ENUMV(Integer), "VertexRank", &Fs2))
+      cgp_error_exit();
+  int* d = (int *)malloc(o.iownnodes * sizeof(int));
+  for (int inode = 0; inode < o.iownnodes; ++inode) d[inode]= part;
+  if (cgp_field_write_data(F, B, Z, SVR, Fs2, &start, &end, d))
+          cgp_error_exit();
+  free(d);
+}
+void  CirculateSolWriteOrder(int F,int B,int Z,int *SVR,int *SBVS,int *SBVR, Output&o)
+{
+  if(o.writeCGNSFiles == 2) { // Solution
+        CGNS_NodalSolution(F,B,Z,o);
+        if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), SVR) )  
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), SBVS) )
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), SBVR) )
+            cgp_error_exit();
+  }else if(o.writeCGNSFiles == 3) { // Vertex Rank
+        if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), SVR) )  
+            cgp_error_exit();
+        CGNS_NodalSolution(F,B,Z,o);
+        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), SBVS) )
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), SBVR) )
+            cgp_error_exit();
+  }else if(o.writeCGNSFiles == 4) { // Boundary Vertex Rank
+        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), SBVR) )
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), SVR) )  
+            cgp_error_exit();
+        CGNS_NodalSolution(F,B,Z,o);
+        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), SBVS) )
+            cgp_error_exit();
+  }else if(o.writeCGNSFiles == 5) { // Boundary Vertex SrfID
+        if (cg_sol_write(F, B, Z, "BoundaryVertexSrfID", CGNS_ENUMV(Vertex), SBVS) )
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "BoundaryVertexRank", CGNS_ENUMV(Vertex), SBVR) )
+            cgp_error_exit();
+        if (cg_sol_write(F, B, Z, "VertexRank", CGNS_ENUMV(Vertex), SVR) )  
+            cgp_error_exit();
+        CGNS_NodalSolution(F,B,Z,o);
+  }
+}
+void writeCGNS(Output& o, std::string path)
+{
+  double t0 = PCU_Time();
+  apf::Mesh* m = o.mesh;
+  const int num_parts = PCU_Comm_Peers();
+  const int part = PCU_Comm_Self() ;
+  const cgsize_t  num_parts_cg=num_parts;
+  std::string timestep_or_dat;
+  static char outfile[] = "chefOut.cgns";
+  int  F, B, Z, E, S, SCR, SVR, SBVR, SBVS, Fs, Fs2, A, Cx, Cy, Cz;
+  cgsize_t sizes[3],*e, start, end;
+  int num_nodes=m->count(0);
+if(0==1){  // ilwork debugging
+    for (int ipart=0; ipart<num_parts; ++ipart){
+        if(part==ipart) { // my turn
+           printf("ilwork %d, %d, %d \n", part, o.nlwork,o.arrays.ilwork[0]);
+           int ist=0;
+           for (int itask=0; itask<o.arrays.ilwork[0]; ++itask) {
+              printf("%d  ",itask);
+              for (int itt=1; itt<5; ++itt)  printf("%d ", o.arrays.ilwork[ist+itt]);
+              printf(" \n");
+              int pnumseg=o.arrays.ilwork[ist+4];
+              for (int is=0; is<pnumseg; ++is) { 
+                 printf("%d, %d, %d \n",is,o.arrays.ilwork[ist+5+2*is],o.arrays.ilwork[ist+6+2*is]);
+              } 
+           }
+       }
+       PCU_Barrier();
+     }
+}
+if(0==1){
+  for (int ipart=0; ipart<num_parts; ++ipart){
+    if(part==part) { // my turn    
+    printf("xyz %d, %d \n", part, num_nodes);
+    for (int inode = 0; inode < std::min(nDbgI,num_nodes); ++inode){
+      printf("%d ",inode+1);
+      for (int j=0; j<3; ++j) printf("%f ", o.arrays.coordinates[j*num_nodes+inode]);
+      printf(" \n");
+   }
+       }
+       PCU_Barrier();
+     }
+}
+// copied gen_ncorp from PHASTA to help map on-rank numbering to CGNS/PETSC friendly global numbering
+  gen_ncorp( o );
+//  o carries
+//     o.arrays.ncorp[on-rank-node-number(0-based)] => PETSc global node number (1-based)
+//     o.iownnodes => nodes owned by this rank
+//     o.local_start_id => this rank's first node number (1-based and also which must be a long long int)
+  long safeArg=o.iownnodes; // cgsize_t could be an int
+  sizes[0]=PCU_Add_Long(safeArg);
+  int ncells=m->count(m->getDimension()); // this ranks number of elements
+  safeArg=ncells; // cgsize_t could be an int
+  sizes[1]=PCU_Add_Long(safeArg);
+  sizes[2]=0;
+  if(cgp_mpi_comm(MPI_COMM_WORLD)) cgp_error_exit;
+  if ( cgp_open(outfile, CG_MODE_WRITE, &F) ||
+       cg_base_write(F, "Base", 3, 3, &B) )
+       cgp_error_exit();
+  if ( cg_goto(F,B,"end"))
+       cgp_error_exit();
+  if ( cg_dataclass_write(CGNS_ENUMV(Dimensional)))
+       cgp_error_exit();
+  cg_units_write(CGNS_ENUMV(Kilogram),CGNS_ENUMV(Meter),CGNS_ENUMV(Second),CGNS_ENUMV(Kelvin),CGNS_ENUMV(Degree));
+  if ( cg_zone_write(F, B, "Zone", sizes, CGNS_ENUMV(Unstructured), &Z))
+       cgp_error_exit();
+    // create data nodes for coordinates 
+  cg_set_file_type(CG_FILE_HDF5);
+  CGNS_Coordinates(F,B,Z, o);
+  if (cg_sol_write(F, B, Z, "CellRank", CGNS_ENUMV(CellCenter), &SCR)) 
+       cgp_error_exit();
+// Paraview will only viz the first sol node created so control that with writeCGNSFiles flag
+  CirculateSolWriteOrder(F,B,Z,&SVR,&SBVS,&SBVR,o);
+  CGNS_VertexRank(F,B,Z,SVR, o);
+  // create Helper array for number of elements on rank 
+  if ( cg_goto(F, B, "Zone_t", 1, NULL) ||
+       cg_user_data_write("User Data") ||
+       cg_gorel(F, "User Data", 0, NULL) ||
+       cgp_array_write("nCoordsOnRank", CGNS_ENUMV(Integer), 1, &num_parts_cg, &Fs2))
+       cgp_error_exit();
+  // create the field data for this process 
+  int nCoordVec=o.iownnodes;
+  cgsize_t partP1=part+1;
+if(0==1)  printf("Coor %d, %d, %d, \n", nCoordVec,part,Fs2);
+  if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nCoordVec))
+       cgp_error_exit();
+  cgsize_t e_written=0; 
+  writeBlocksCGNSinterior(F,B,Z,SCR,o,&e_written);
+  cgsize_t totBel;
+  int nblkb = o.blocks.boundary.getSize(); 
+  double** srfIDCen1 = new double*[nblkb]; // might not all be used
+  double** srfIDCen2 = new double*[nblkb];
+  int totOnRankBel=0;
+  for (int i = 0; i < nblkb; ++i) 
+    totOnRankBel += o.blocks.boundary.nElements[i];
+  int* srfID = (int *)malloc( totOnRankBel * sizeof(int));
+  int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int));
+  int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int));
+  int* startBelBlk = (int *)malloc( nblkb * sizeof(int));
+  int* endBelBlk = (int *)malloc( nblkb * sizeof(int));
+  int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int));
+  int nStackedOnRank=0;
+  writeBlocksCGNSboundary(F,B,Z, SBVR, SBVS, o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, &nStackedOnRank, nblkb);
+  writeCGNSboundary      (F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, totOnRankBel, &totBel,  nStackedOnRank);
+  free(srfID); free(srfIDidx);
+  free(srfID1OnBlk); free(srfID2OnBlk);
+  free(startBelBlk); free(endBelBlk);
+  for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen1[i];
+  for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen2[i];
+  delete [] srfIDCen1; delete [] srfIDCen2;
+  if(cgp_close(F)) cgp_error_exit();
+  double t1 = PCU_Time();
+  if (!PCU_Comm_Self())
+    lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0);
+}
+} // namespace
diff --git a/phasta/phCook.cc b/phasta/phCook.cc
index 983570d43..d701ba26e 100644
--- a/phasta/phCook.cc
+++ b/phasta/phCook.cc
@@ -197,19 +197,21 @@ namespace ph {
     ph::enterFilteredMatching(m, in, bcs);
     ph::generateOutput(in, bcs, m, out);
     ph::exitFilteredMatching(m);
-    // a path is not needed for inmem
-    if ( in.writeRestartFiles ) {
-      if(!PCU_Comm_Self()) lion_oprint(1,"write file-based restart file\n");
-      // store the value of the function pointer
-      FILE* (*fn)(Output& out, const char* path) = out.openfile_write;
-      // set function pointer for file writing
-      out.openfile_write = chef::openfile_write;
-      ph::detachAndWriteSolution(in,out,m,subDirPath); //write restart
-      // reset the function pointer to the original value
-      out.openfile_write = fn;
-    }
-    else {
-      ph::detachAndWriteSolution(in,out,m,subDirPath); //write restart
+    if ( in.writeCGNSFiles ==0  ) { // for now, don't write restarts when writing CGNS since writing restarts is bundled with destroying fields 
+      // a path is not needed for inmem
+      if ( in.writeRestartFiles ) {
+        if(!PCU_Comm_Self()) lion_oprint(1,"write file-based restart file\n");
+        // store the value of the function pointer
+        FILE* (*fn)(Output& out, const char* path) = out.openfile_write;
+        // set function pointer for file writing
+        out.openfile_write = chef::openfile_write;
+        ph::detachAndWriteSolution(in,out,m,subDirPath); //write restart
+        // reset the function pointer to the original value
+        out.openfile_write = fn;
+      }
+      else {
+        ph::detachAndWriteSolution(in,out,m,subDirPath); //write restart
+      }
     }
     if ( ! in.outMeshFileName.empty() )
       m->writeNative(in.outMeshFileName.c_str());
@@ -224,6 +226,9 @@ namespace ph {
       out.openfile_write = fn;
     }
     ph::writeGeomBC(out, subDirPath); //write geombc
+    out.writeCGNSFiles=in.writeCGNSFiles;
+    if ( in.writeCGNSFiles > 0 ) 
+    ph::writeCGNS(out, subDirPath); //write CGNS
     if(!PCU_Comm_Self())
       ph::writeAuxiliaryFiles(path, in.timeStepNumber);
     m->verify();
diff --git a/phasta/phInput.cc b/phasta/phInput.cc
index ffc9989c2..65118d677 100644
--- a/phasta/phInput.cc
+++ b/phasta/phInput.cc
@@ -60,6 +60,7 @@ static void setDefaults(Input& in)
   in.axisymmetry = 0;
   in.parmaLoops = 3; //a magical value
   in.parmaVerbosity = 1; //fairly quiet
+  in.writeCGNSFiles = 0;  // write CGNS Files 
   in.writeGeomBCFiles = 0;  // write additional geombc file for vis in streaming
   in.writeRestartFiles = 0;  // write additional restart file for vis in streaming
   in.writeVTK = 0; 
@@ -153,6 +154,7 @@ static void formMaps(Input& in, StringMap& stringMap, IntMap& intMap, DblMap& db
   intMap["parmaLoops"] = &in.parmaLoops;
   intMap["parmaVerbosity"] = &in.parmaVerbosity;
   intMap["writeVTK"] = &in.writeVTK;
+  intMap["writeCGNSFiles"] = &in.writeCGNSFiles;
   intMap["writeGeomBCFiles"] = &in.writeGeomBCFiles;
   intMap["writeRestartFiles"] = &in.writeRestartFiles;
   intMap["ramdisk"] = &in.ramdisk;
diff --git a/phasta/phInput.h b/phasta/phInput.h
index a6bf88c90..28123f805 100644
--- a/phasta/phInput.h
+++ b/phasta/phInput.h
@@ -147,6 +147,8 @@ class Input
     /** \brief write the geombc file during in-memory data transfer
        between phasta and chef. */
     int writeGeomBCFiles;
+    /* \brief write CGNS files for pre-processing */
+    int writeCGNSFiles;
     /** \brief write the restart file during in-memory data transfer
        between phasta and chef. */
     int writeRestartFiles;
diff --git a/phasta/phOutput.cc b/phasta/phOutput.cc
index d4b71028b..fd5e73a69 100644
--- a/phasta/phOutput.cc
+++ b/phasta/phOutput.cc
@@ -997,6 +997,7 @@ Output::~Output()
   //nOwnedNodes will still be zero.
   if(!nOwnedNodes) return;
 
+  delete [] arrays.ncorp;
   delete [] arrays.coordinates;
   delete [] arrays.ilwork;
   delete [] arrays.ilworkf;
diff --git a/phasta/phOutput.h b/phasta/phOutput.h
index 6a93e9d96..0e3c85351 100644
--- a/phasta/phOutput.h
+++ b/phasta/phOutput.h
@@ -4,6 +4,13 @@
 #include "phInput.h"
 #include "phBlock.h"
 #include "phBC.h"
+#ifdef HAVE_CGNS
+//
+#include <cgns_io.h>
+#include <pcgnslib.h>
+//
+#endif
+
 
 namespace apf {
 class Mesh;
@@ -136,6 +143,11 @@ idx:   0  1 2  3   4   5  6   7   8   9  10   11  12  13  14  15   16   17   18
   int* ifather;
 /* an array of integers of size nfather that has nsons in each entry */
   int* nsonsArr;
+/* an array that maps on-rank-node-number (input) to PETSc global-node-number */
+// worked but  long int* ncorp;
+#ifdef HAVE_CGNS
+  cgsize_t* ncorp;
+#endif
 };
 
 
@@ -153,6 +165,11 @@ struct Output
   int nMaxElementNodes;
   int nEssentialBCNodes;
   int nOverlapEdges;
+#ifdef HAVE_CGNS
+  cgsize_t local_start_id; /* this rank's first global node number (1 based) */
+  int iownnodes;  /*  how many node this rank owns */
+#endif
+  int writeCGNSFiles;
   int nlwork; /* size of arrays.ilwork */
   int nlworkf; /* size of arrays.ilworkf */
   int nlworkl; /* size of arrays.ilworkl */
@@ -168,6 +185,7 @@ struct Output
 
 void generateOutput(Input& in, BCs& bcs, apf::Mesh* mesh, Output& o);
 void writeGeomBC(Output& o, std::string path, int timestep_or_dat = 0);
+void writeCGNS(Output& o, std::string path);
 
 }
 
diff --git a/phasta/phRestart.h b/phasta/phRestart.h
index cd82967b6..f17b690d6 100644
--- a/phasta/phRestart.h
+++ b/phasta/phRestart.h
@@ -27,6 +27,8 @@ void detachAndWriteSolution(Input& in, Output& out,
 void attachZeroSolution(Input& in, apf::Mesh* m);
 
 void detachField(apf::Field* f, double*& data, int& size);
+void detachField(apf::Mesh* m, const char* fieldname, double*& data, int& size);
+
 
 }
 
diff --git a/pumi-meshes b/pumi-meshes
index c00ba9c16..0629ac309 160000
--- a/pumi-meshes
+++ b/pumi-meshes
@@ -1 +1 @@
-Subproject commit c00ba9c16cacbb361ee538c03a3ec694ddb989f2
+Subproject commit 0629ac3090be4905a3f98d602fb1379eb6b31b00
diff --git a/test/cgns.cc b/test/cgns.cc
index 6f853d2e0..a3c57d777 100644
--- a/test/cgns.cc
+++ b/test/cgns.cc
@@ -119,7 +119,7 @@ pMesh toPumi(const std::string &prefix, gmi_model *g, apf::Mesh2 *mesh)
   return pm;
 }
 
-auto additional(const std::string &prefix, gmi_model *g, apf::Mesh2 *mesh)
+std::function<void()> additional(const std::string &prefix, gmi_model *g, apf::Mesh2 *mesh)
 {
   // seems essential to make pm first before calling balance or reorder...
   auto pm = toPumi(prefix, g, mesh);
diff --git a/test/matchedNodeElmReader.cc b/test/matchedNodeElmReader.cc
index 4dfdfabd0..7b4f11a59 100644
--- a/test/matchedNodeElmReader.cc
+++ b/test/matchedNodeElmReader.cc
@@ -20,33 +20,6 @@
 #include <string>
 #include <regex>
 
-/* from https://github.com/SCOREC/core/issues/205
-0=fully interior of the volume
-1-6 =classified on face (not edge or vertex)
-11-22 = classified on model edge (not end points which are model vertices)
-31-38 = classified on a model vertex.
-*/
-
-/* tags on vertices */
-#define INTERIORTAG  0
-#define FACE 1
-#define FACE_LAST 6
-#define EDGE 11
-#define EDGE_LAST 22
-#define VERTEX 31
-#define VERTEX_LAST 38
-
-/* model entity ids */
-//#define INTERIOR_REGION 0
-//int INTERIOR_REGION=0; // initialized but will be checked from read input
-
-//Manifold single region apf::ModelEntity* getMdlRgn(gmi_model* model) {
-//Manifold single region   apf::ModelEntity* rgn = reinterpret_cast<apf::ModelEntity*>(
-//Manifold single region       gmi_find(model, 3, INTERIOR_REGION));
-//Manifold single region   PCU_ALWAYS_ASSERT(rgn);
-//Manifold single region   return rgn;
-//Manifold single region }
-
 
 apf::ModelEntity* getMdlRegion(apf::Mesh2* mesh, int tag) {
   apf::ModelEntity* region = mesh->findModelEntity(3,tag);
@@ -747,17 +720,16 @@ int main(int argc, char** argv)
   PCU_Comm_Init();
   lion_set_verbosity(1);
   int noVerify=0;    // maintain default of verifying if not explicitly requesting it off
-  if( argc < 11 ) {
+  if( argc < 10 ) {
     if( !PCU_Comm_Self() ) {
-      printf("Usage: %s <input dmg model> "
-          "<ascii mesh connectivity .cnn> "
+      printf("Usage: %s <input dmg model> no rank but .rank added to next 6 "
+          "<ascii mesh connectivity cnn> "
           "<ascii vertex coordinates .crd> "
           "<ascii vertex matching flag .match> "
           "<ascii vertex classification flag .class> "
           "<ascii vertex fathers2D flag .fathers2D> "
           "<ascii solution flag .soln> "
-          "<ascii conn header> "
-          "<output model .dmg> <output mesh .smb>"
+          "<ascii conn header see MGEN for format if using Matlab> "
           "turn off verify mesh if equal 1 (on if you give nothing)\n",
           argv[0]);
     }
@@ -767,14 +739,14 @@ int main(int argc, char** argv)
   gmi_register_mesh();
   gmi_register_null();
 
-  if( argc == 11 ) noVerify=atoi(argv[10]);
+  if( argc == 11 ) noVerify=atoi(argv[9]);
 
   double t0 = PCU_Time();
   MeshInfo m;
   readMesh(argv[2],argv[3],argv[4],argv[5],argv[6],argv[7],argv[8],m);
 
   bool isMatched = true;
-  if( !strcmp(argv[3], "NULL") )
+  if( !strcmp(argv[4], "NULL") )
     isMatched = false;
 
   if(!PCU_Comm_Self())
@@ -822,7 +794,7 @@ int main(int argc, char** argv)
 
   outMap.clear();
   apf::writeVtkFiles("rendered",mesh);
-  mesh->writeNative(argv[10]);
+  mesh->writeNative(argv[9]);
   if(noVerify != 1) mesh->verify();
 
   mesh->destroyNative();
diff --git a/test/swapDoubles.cc b/test/swapDoubles.cc
index beacc27d2..f85c2a531 100644
--- a/test/swapDoubles.cc
+++ b/test/swapDoubles.cc
@@ -25,5 +25,6 @@ int main(int argc, char** argv) {
   }
   delete [] d_orig;
   delete [] d;
+  MPI_Finalize();
   return 0;
 }
diff --git a/test/testing.cmake b/test/testing.cmake
index 069006a26..d256c6b30 100644
--- a/test/testing.cmake
+++ b/test/testing.cmake
@@ -69,6 +69,90 @@ else()
   set(GXT dmg)
 endif()
 
+if(ENABLE_CGNS AND SIM_DOT_VERSION VERSION_GREATER 12.0.171000)
+  set(MDIR ${MESHES}/phasta/cube_CGNS)
+  mpi_test(chef-CGNS-multitopology1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/1-1-Chef)
+  add_test(NAME chef-CGNS-multitopology1-diff
+    COMMAND h5diff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/1-1-Chef)
+
+  mpi_test(chef-CGNS-multitopology2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/2-1-Chef)
+  add_test(NAME chef-CGNS-multitopology2-diff
+    COMMAND h5diff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/2-1-Chef)
+
+  mpi_test(chef-CGNS-multitopology4 4 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/4-1-Chef)
+  add_test(NAME chef-CGNS-multitopology4-diff
+    COMMAND h5diff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/4-1-Chef)
+endif()
+
+if(ENABLE_SIMMETRIX AND SIM_PARASOLID AND SIMMODSUITE_SimAdvMeshing_FOUND AND ENABLE_CGNS AND SIM_DOT_VERSION VERSION_GREATER 12.0.171000)
+  set(MDIR ${MESHES}/phasta/cube_CGNS)
+  mpi_test(chef-CGNS-8hex1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/1-1-Chef)
+  add_test(NAME chef-CGNS-8hex1-diff
+    COMMAND h5diff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/1-1-Chef)
+
+  mpi_test(chef-CGNS-8hex2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/2-1-Chef)
+  add_test(NAME chef-CGNS-8hex2-diff
+    COMMAND h5diff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/2-1-Chef)
+
+  mpi_test(chef-CGNS-smallTet1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/1-1-Chef)
+  add_test(NAME chef-CGNS-smallTet1-diff
+    COMMAND h5diff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/1-1-Chef)
+
+  mpi_test(chef-CGNS-smallTet2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/2-1-Chef)
+  add_test(NAME chef-CGNS-smallTet2-diff
+    COMMAND h5diff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/2-1-Chef)
+
+  mpi_test(chef-CGNS-AllHex1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/1-1-Chef)
+  add_test(NAME chef-CGNS-AllHex1-diff
+    COMMAND h5diff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/1-1-Chef)
+
+  mpi_test(chef-CGNS-AllHex2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/2-1-Chef)
+  add_test(NAME chef-CGNS-AllHex2-diff
+    COMMAND h5diff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/2-1-Chef)
+
+  mpi_test(chef-CGNS-AllTet 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/1-1-Chef)
+  add_test(NAME chef-CGNS-AllTet-diff
+    COMMAND h5diff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/1-1-Chef)
+
+  mpi_test(chef-CGNS-AllTet2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/2-1-Chef)
+  add_test(NAME chef-CGNS-AllTet2-diff
+    COMMAND h5diff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/2-1-Chef)
+
+  mpi_test(chef-CGNS-AllWedge1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/1-1-Chef)
+  add_test(NAME chef-CGNS-AllWedge1-diff
+    COMMAND h5diff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/1-1-Chef)
+
+  mpi_test(chef-CGNS-AllWedge2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/2-1-Chef)
+  add_test(NAME chef-CGNS-AllWedge2-diff
+    COMMAND h5diff  chefOut.cgns correct.cgns
+    WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/2-1-Chef)
+endif()
+
 set(MDIR ${MESHES}/phasta/dg)
 
 if(ENABLE_SIMMETRIX AND SIM_PARASOLID AND SIMMODSUITE_SimAdvMeshing_FOUND)
@@ -213,7 +297,7 @@ mpi_test(matchedNodeElementReader_p1 1
   "${MDIR}/1part/geom3D.fathr"
   "NULL"
   "${MDIR}/1part/geom3DHead.cnn"
-  "geom.dmg" "geom.smb")
+  "geom.smb")
 
 mpi_test(matchedNodeElementReader_p4 4
   ./matchedNodeElmReader
@@ -225,7 +309,7 @@ mpi_test(matchedNodeElementReader_p4 4
   "${MDIR}/4part/geom3D.fathr"
   "NULL"
   "${MDIR}/4part/geom3DHead.cnn"
-  "geom.dmg" "geom.smb")
+  "geom.smb")
 
 set(MDIR ${MESHES}/gmsh)
 mpi_test(gmshv2TwoQuads 1
@@ -515,7 +599,7 @@ if(ENABLE_ZOLTAN)
   )
 endif()
 
-if(ENABLE_CGNS AND ENABLE_ZOLTAN)
+if(ENABLE_CGNS AND ENABLE_ZOLTAN AND ENABLE_CGNS_MULTI_BASE)
 #
 # sort of an arbitrary choice
 set(numProcs 4)
@@ -560,7 +644,7 @@ mpi_test(cgns_3d_2 ${numProcs}
 #
 # 3D BCS tests
 #
-set(numProcs 5)
+set(numProcs 4)
 #
 set(CGNSDIR ${MESHES}/cgns/withBCS/3D)
 #
@@ -600,7 +684,7 @@ mpi_test(cgns_bcs_3 ${numProcs}
   bcs3.smb
   additional)
 
-endif(ENABLE_CGNS AND ENABLE_ZOLTAN)
+endif(ENABLE_CGNS AND ENABLE_ZOLTAN AND ENABLE_CGNS_MULTI_BASE)
 
 mpi_test(construct 4
   ./construct