diff --git a/CMakeLists.txt b/CMakeLists.txt index 7648e54d3..ab2af3869 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,9 +16,7 @@ option(USE_XSDK_DEFAULTS "enable the XDSK v0.3.0 default configuration" NO) #requre c++11 without extensions set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSION OFF) -if(NOT ENABLE_CGNS) - set(CMAKE_CXX_STANDARD 11) -endif() +set(CMAKE_CXX_STANDARD 11) xsdk_begin_package() bob_begin_package() @@ -27,8 +25,7 @@ if(USE_XSDK_DEFAULTS) xsdk_compiler_flags() endif() -# require c++14 -option(ENABLE_CGNS "Enable the CGNS reader: requires c++14 extensions" OFF) +option(ENABLE_CGNS "Enable the CGNS reader" OFF) message(STATUS "ENABLE_CGNS: ${ENABLE_CGNS}") # Set some default compiler flags that should always be used @@ -37,10 +34,7 @@ if(NOT USE_XSDK_DEFAULTS) bob_begin_cxx_flags() bob_end_cxx_flags() set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS}") - if(ENABLE_CGNS) #takes precedence over SCOREC_ENABLE_CXX11 - message(STATUS "enabling cxx14") - bob_cxx14_flags() - elseif(SCOREC_ENABLE_CXX11) + if(SCOREC_ENABLE_CXX11) bob_cxx11_flags() endif() endif() @@ -60,6 +54,8 @@ message(STATUS "IS_TESTING: ${IS_TESTING}") set(MESHES "${CMAKE_SOURCE_DIR}/pumi-meshes" CACHE STRING "Directory of test meshes") message(STATUS "MESHES: ${MESHES}") +get_filename_component(MESHES ${MESHES} ABSOLUTE) +message(STATUS "Using absolute file path MESHES: ${MESHES}") option(BUILD_EXES "Build executables" ON) message(STATUS "BUILD_EXES: ${BUILD_EXES}") @@ -135,6 +131,7 @@ if(ENABLE_OMEGA_H) endif() if(ENABLE_CGNS) + option(ENABLE_CGNS_MULTI_BASE "Enable the CGNS Multi Base tests" OFF) set(SCOREC_USE_CGNS_DEFAULT ${ENABLE_CGNS}) bob_public_dep(CGNS) #CGNS does not provide cmake targets :( @@ -172,8 +169,10 @@ add_library(core INTERFACE) target_link_libraries(core INTERFACE ${SCOREC_EXPORTED_TARGETS}) if(ENABLE_CGNS) target_link_libraries(core INTERFACE ${CMAKE_DL_LIBS}) #HDF5 uses dlopen - target_compile_features(core INTERFACE cxx_std_14) + # target_compile_features(core INTERFACE cxx_std_14) + target_compile_features(core INTERFACE cxx_std_11) else() + target_link_libraries(core INTERFACE ${CMAKE_DL_LIBS}) #HDF5 uses dlopen target_compile_features(core INTERFACE cxx_std_11) endif() scorec_export_library(core) diff --git a/apf/apfCGNS.cc b/apf/apfCGNS.cc index 4debbb58b..da3b2b419 100644 --- a/apf/apfCGNS.cc +++ b/apf/apfCGNS.cc @@ -11,6 +11,7 @@ #include "apfNumberingClass.h" #include "apfShape.h" #include "apfFieldData.h" +#include #include #include // @@ -269,9 +270,16 @@ void WriteTags(const CGNS &cgns, const std::vector> &orderedEnts, const std::vector> &ranges, const std::vector &orderedVertices, const int &vStart, const int &vEnd, apf::Mesh *m) + +typedef std::vector VecMeshEntity_t; +typedef std::pair CGRange_t; + +void WriteFields(const CGNS &cgns, const std::vector &orderedEnts, const std::vector &ranges, const VecMeshEntity_t &orderedVertices, const int &vStart, const int &vEnd, apf::Mesh *m) { - const auto writeField = [&m, &cgns](apf::Field *f, const auto &orderedEnts, const int &solIndex, const auto &inner, const auto &post, const int &numComponents, const int &component, const std::string &fieldName, const int &start, const int &end, int &fieldIndex) { + typedef std::function *fieldData, std::vector &ddata, const int &numComponents, const int &component)> innerLambda_t; + typedef std::function &ddata, const cgsize_t *rmin, const cgsize_t *rmax, const int &globalSize, const int &fieldIndex)> postLambda_t; + + const auto writeField = [&m, &cgns](apf::Field *f, const VecMeshEntity_t &orderedEnt, const int &solIndex, const innerLambda_t &inner, const postLambda_t &post, const int &numComponents, const int &component, const std::string &fieldName, const int &start, const int &end, int &fieldIndex) { std::vector data; cgsize_t rmin[3]; @@ -281,7 +289,7 @@ void WriteFields(const CGNS &cgns, const std::vector *fieldData = f->getData(); - for (const auto &e : orderedEnts) + for (const auto &e : orderedEnt) { if (fieldData->hasEntity(e) && m->isOwned(e)) { @@ -310,7 +318,7 @@ void WriteFields(const CGNS &cgns, const std::vector &orderedEnts, const int &solIndex, const innerLambda_t &inner, const postLambda_t &post, const std::vector &ranges) { for (int i = 0; i < m->countFields(); ++i) { apf::Field *f = m->getField(i); @@ -335,7 +343,7 @@ void WriteFields(const CGNS &cgns, const std::vectorcountFields(); ++i) { apf::Field *f = m->getField(i); @@ -352,12 +360,12 @@ void WriteFields(const CGNS &cgns, const std::vector &ddata, const cgsize_t *rmin, const cgsize_t *rmax, const int &globalSize, const int &fieldIndex) { + const postLambda_t postLambda = [&cgns](const int &solIndex, std::vector &ddata, const cgsize_t *rmin, const cgsize_t *rmax, const int &globalSize, const int &fieldIndex) { if (globalSize > 0) { if (cgp_field_write_data(cgns.index, cgns.base, cgns.zone, solIndex, fieldIndex, &rmin[0], &rmax[0], @@ -366,7 +374,7 @@ void WriteFields(const CGNS &cgns, const std::vector *fieldData, std::vector &ddata, const int &numComponents, const int &component) { + const innerLambda_t innerLambda = [](apf::MeshEntity *elem, apf::FieldDataOf *fieldData, std::vector &ddata, const int &numComponents, const int &component) { std::vector vals(numComponents, -12345); fieldData->get(elem, vals.data()); //std::cout << numComponents << " " << component << " " << vals[0] << std::endl; @@ -390,7 +398,7 @@ void WriteFields(const CGNS &cgns, const std::vector WriteVertices(const CGNS &cgns, apf::Mesh *m, apf::GlobalNumbering *gvn) { int Cx = -1; int Cy = -1; @@ -412,7 +420,7 @@ auto WriteVertices(const CGNS &cgns, apf::Mesh *m, apf::GlobalNumbering *gvn) cgp_error_exit(); } - std::vector orderedVertices; + VecMeshEntity_t orderedVertices; cgsize_t vertexMin[3]; cgsize_t vertexMax[3]; cgsize_t contigRange = -1; @@ -574,7 +582,10 @@ CellElementReturn WriteElements(const CGNS &cgns, apf::Mesh *m, apf::GlobalNumbe void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults, const int &cellCount, apf::Mesh *m, const apf::CGNSBCMap &cgnsBCMap, const std::map &apf2cgns, apf::GlobalNumbering *gvn) { - const auto EdgeLoop = [&m](const auto &lambda, apf::MeshTag *edgeTag) { + typedef std::function LambdaMeshEntity_t; + typedef std::vector VecCGNSInfo_t; + + const auto EdgeLoop = [&m](const LambdaMeshEntity_t &lambda, apf::MeshTag *edgeTag) { apf::MeshIterator *edgeIter = m->begin(1); apf::MeshEntity *edge = nullptr; int vals[1]; @@ -591,7 +602,7 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults, m->end(edgeIter); }; - const auto FaceLoop = [&m](const auto &lambda, apf::MeshTag *faceTag) { + const auto FaceLoop = [&m](const LambdaMeshEntity_t &lambda, apf::MeshTag *faceTag) { apf::MeshIterator *faceIter = m->begin(2); apf::MeshEntity *face = nullptr; int vals[1]; @@ -608,7 +619,8 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults, m->end(faceIter); }; - const auto BCEntityAdder = [&apf2cgns, &m, &cgns, &gvn](const auto &Looper, const auto &bcGroup, int &startingLocation) { + + const auto BCEntityAdder = [&apf2cgns, &m, &cgns, &gvn](const std::function &Looper, const apf::CGNSInfo &bcGroup, int &startingLocation) { std::map> bcEntTypes; for (const auto &r : apf2cgns) bcEntTypes.insert(std::make_pair(r.first, std::vector())); @@ -715,7 +727,9 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults, PCU_Get_Comm()); }; - const auto doVertexBC = [&](const auto &iter) { + typedef std::map, std::vector>::const_iterator MapCGNSInfo_t; + + const auto doVertexBC = [&](const MapCGNSInfo_t &iter) { for (const auto &p : iter->second) { std::vector bcList; @@ -751,7 +765,7 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults, } }; - const auto doEdgeBC = [&](const auto &iter, int &startingLocation) { + const auto doEdgeBC = [&](const MapCGNSInfo_t &iter, int &startingLocation) { for (const auto &p : iter->second) { const auto se = BCEntityAdder(EdgeLoop, p, startingLocation); @@ -774,7 +788,7 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults, } }; - const auto doFaceBC = [&](const auto &iter, int &startingLocation) { + const auto doFaceBC = [&](const MapCGNSInfo_t &iter, int &startingLocation) { for (const auto &p : iter->second) { const auto se = BCEntityAdder(FaceLoop, p, startingLocation); @@ -797,7 +811,7 @@ void AddBocosToMainBase(const CGNS &cgns, const CellElementReturn &cellResults, } }; - const auto doCellBC = [&](const auto &iter, const int &) { + const auto doCellBC = [&](const MapCGNSInfo_t &iter, const int &) { for (const auto &p : iter->second) { std::vector bcList; @@ -1009,7 +1023,9 @@ void Write2DEdges(CGNS cgns, apf::Mesh *m, const Count &edgeCount, const Count & // Todo split this out into a list of calls to local functions to show process/work flow void WriteCGNS(const char *prefix, apf::Mesh *m, const apf::CGNSBCMap &cgnsBCMap) { - static_assert(std::is_same::value, "cgsize_t not compiled as int"); + + PCU_ALWAYS_ASSERT_VERBOSE(sizeof(cgsize_t) == sizeof(int), "cgsize_t is not size of int"); + const auto myRank = PCU_Comm_Self(); const Count vertexCount = count(m, 0); @@ -1051,11 +1067,11 @@ void WriteCGNS(const char *prefix, apf::Mesh *m, const apf::CGNSBCMap &cgnsBCMap auto communicator = PCU_Get_Comm(); cgp_mpi_comm(communicator); // - cgp_pio_mode(CGNS_ENUMV(CGP_INDEPENDENT)); + cgp_pio_mode(CGP_INDEPENDENT); CGNS cgns; cgns.fname = std::string(prefix); - if (cgp_open(prefix, CGNS_ENUMV(CG_MODE_WRITE), &cgns.index)) + if (cgp_open(prefix, CG_MODE_WRITE, &cgns.index)) cgp_error_exit(); { diff --git a/mds/mdsCGNS.cc b/mds/mdsCGNS.cc index acdef4aae..cf0230f3d 100644 --- a/mds/mdsCGNS.cc +++ b/mds/mdsCGNS.cc @@ -177,19 +177,19 @@ struct MeshDataGroup if (components.size() == 1) { std::cout << "Scalar Group has " << components.size() << " related componenets: " << std::endl; - for (const auto m : components) + for (const auto &m : components) std::cout << "Field " << m.second.name << " @ " << m.second.si << " " << m.second.fi << std::endl; } else if (components.size() == 3) { std::cout << "Vector Group has " << components.size() << " related componenets: " << std::endl; - for (const auto m : components) + for (const auto &m : components) std::cout << "Field " << m.second.name << " @ " << m.second.si << " " << m.second.fi << std::endl; } else if (components.size() == 9) { std::cout << "Matrix Group has " << components.size() << " related componenets: " << std::endl; - for (const auto m : components) + for (const auto &m : components) std::cout << "Field " << m.second.name << " @ " << m.second.si << " " << m.second.fi << std::endl; } else @@ -265,7 +265,7 @@ void Kill(const int fid) } } -auto ReadCGNSCoords(int cgid, int base, int zone, int ncoords, int nverts, const std::vector &, const apf::GlobalToVert &globalToVert) +std::map> ReadCGNSCoords(int cgid, int base, int zone, int ncoords, int nverts, const std::vector &, const apf::GlobalToVert &globalToVert) { // Read min required as defined by consecutive range // make one based as ReadElements makes zero based @@ -389,7 +389,7 @@ void SimpleElementPartition(std::vector &numberToReadPerProc, std::vec using Pair = std::pair; using LocalElementRanges = std::vector; // one based -auto ReadElements(int cgid, int base, int zone, int section, int el_start /* one based */, int el_end, int numElements, int verticesPerElement, LocalElementRanges &localElementRanges) +std::tuple, cgsize_t> ReadElements(int cgid, int base, int zone, int section, int el_start /* one based */, int el_end, int numElements, int verticesPerElement, LocalElementRanges &localElementRanges) { std::vector numberToReadPerProc; std::vector startingIndex; @@ -1051,13 +1051,13 @@ void ReadBCInfo(const int cgid, const int base, const int zone, const int nBocos apf::Mesh2 *DoIt(gmi_model *g, const std::string &fname, apf::CGNSBCMap &cgnsBCMap, const std::vector> &readMeshData) { - static_assert(std::is_same::value, "cgsize_t not compiled as int"); + PCU_ALWAYS_ASSERT_VERBOSE(sizeof(cgsize_t) == sizeof(int), "cgsize_t is not size of int"); int cgid = -1; auto comm = PCU_Get_Comm(); cgp_mpi_comm(comm); - cgp_pio_mode(CGNS_ENUMV(CGP_INDEPENDENT)); - cgp_open(fname.c_str(), CGNS_ENUMV(CG_MODE_READ), &cgid); + cgp_pio_mode(CGP_INDEPENDENT); + cgp_open(fname.c_str(), CG_MODE_READ, &cgid); int nbases = -1; cg_nbases(cgid, &nbases); diff --git a/phasta/CGNSFileWritingDev.txt b/phasta/CGNSFileWritingDev.txt new file mode 100644 index 000000000..4441cf355 --- /dev/null +++ b/phasta/CGNSFileWritingDev.txt @@ -0,0 +1,76 @@ +CGNS output from Chef + + + +This document is to describe work done to get CGNS output from Chef. + + + +Before doing that, I am going to list EXPECTATIONS of CGNS and how they align or not with classic Chef/PHASTA vs. PETSc/CEED-PHASTA (defs rank=part=process?.I will use part) + +I) CGNS expects global numbering for mesh nodes and elements and that numbering MUST start from 1 (not zero). + +II) The global numbering of elements is inclusive of both volume elements and boundary elements and also inclusive of all topologies with the numbering-start determined by what order you write them to file (might not be a requirement but simplest way when streaming). + +III) If using parallel writing (which we will have to do for any realistic size mesh), the ownership of the writer must be exclusive (write no data you don?t own), continuous (no skipped global numbers), and linearly increasing with part number (e.g., rank0 starts from 1 and ends on nOwnedByRank0, rank1 starts from nOwnedByRank0+1 and ends on nOwnedByRank1+nOwnedbyRank0 and so on). + + + + + +Going to a separate enumeration to discuss how that translated to our work on that now: + +Starting with the most basic, CGNS has the concept of a Base. We keep life simple and only have 1 base. +CGNS has the concept of a Zone. Someday if we get into overset grids (not likely) we might have more but for now, we only support 1 zone. +Within a Zone we will always be type Unstructured and a few things must be described while others are optional. CGNS provides writer ?functions? cg_ or cgp_ and these have a structure that one function establishes the file-node in the file/database and then you are able call a second function to write the data at that node (this is a little bit like PHASTAIO?s notion of write/read header followed by write/read data). cg_ means all parallel processes must have identical data to write while cgp_ allows each process to write its portion of the data and CGNS collects (interpret collect as MPIO collective operations) that data within an HDF5 file. +Chef was co-developed with PHASTA to avoid global numbering and instead number from 0 to n_entity-1 on each rank when parallel and have separate data structures which tracked which rank owned a given entity and which ranks had remote copies of those entities. Chef created data structures for PHASTA to use to manage this partition-specific ownership. Thus, before we can write any parallel distributed data with the CGNS functions described in 3., we needed to create a map from PHASTA?s numbering to a numbering that satisfies I)-III). Since that global node numbering is basically the same as PETSc with a shift by 1, I copied code from PHASTA that did that for use withPETSc solvers (common/gen_ncorp.c) and modified it. That also needed functionality of commuInt.f to communicate ownership on part boundaries back to all the replicas on other parts (which in turn required a chunk of code from ctypes.f) translated to C. All of this code makes use of the ilwork data structure that helps PHASTA know how to setup and efficiently perform peer-to-peer communication. At the end of this code insertion/translation, we have an ncorp array that maps from PHASTA/Chef numbering to CGNS numbering on each part and thus we can start to now describe the arrays that are written. +CGNS of course has to store coordinates. It does so as flat double lists one dimension at a time so that means CoordinatesX then CoordinatesY, then CoordinatesZ for us. To be clear, to use the cgp mid-level functions to write these in parallel, PHASTA/Chef?s part coordinate list must be sifted down to just its owners using ncorp described in 4. and pass that compact ownership array satisfying I)-III) data through the cgp_write functions (both file-node creation and parallel data write). +Next in our output, though not absolutely required is Solution. Similarly to step 5., CGNS has a function to create a file-node for Solution and then you add as many fields as needed to that (currently I have only coded Pressure, VelocityX, VelocityY, VelocityZ, and Temperature. Note CGNS is a standard and they mandate the name of these and any additional fields we might want to add to this so read the docs. As with 5. these have to be sifted and mapped through ncorp to convert PHASTA/Chef?s numbering to a compact array that can be written in parallel using the cgp writers. Note, as of 4.4.0, it looks to be possible to aggregate the writes described in 5. and 6. through cgp_coord_multi_* and cgp_field_multi_* respectively but this has not been explored yet. +Next in the file is some User Data that was a backdoor to writing some data in parallel and to support parallel read with less work that I may describe more later but is not required by CGNS so skipping for now. +Next is a cell-centered solution file (that just means one value per 3D element or cell) that I put the RankOfWriterfield in. This is likely what the PETSc reader will use to understand the partition that Chef used to write this file and if that part-count matches the PETSc reader and solver, the file can be read and processed to derive all the parallel data structures PETSc/CEED-PHASTA?s need. CGNS issue filed to determine if they have a standard for this and if not interest in developing one. +Next is the first 3D element topology connectivity. Basically we create a separate node for each element topology, establish global numbering by rank (easy as there are no replicas of elements and thus the ownership range was established definitively by the partitioner and thus the ownership range just jumps by the number of that element type on a given part). If multi-topology, this repeats for the rest of the 3D element topologies. +Next is the first 2D boundary element topology which follows concepts of 9 for it and subsequent other 2D boundary element topologies. At this time we have elected to write all the elements of a given topology in a single CGNS file-node even if they are distributed across multiples geometric model surfaces (not the only option). Note, since ALL cgp and cg writes are collective, all ranks, even those without boundary elements (or interior elements of a given topology) must participate. Obviously the same for the MPI_Exscan, and PCU collectives. +It was decided/chose in the first pass to forgo writing ZonalBCs based on nodes in favor of writing them as mesh-sets (CGNS calls them PointLists abstracting the face numbers to the non-existent point at the centroid of the mesh face) which are face numbers with a particular surfID set in the smd (GUI if Simmetrix model-based) or spj (flat text file if working with a dmg model as we do with MATLAB->MGEN-MNER or SIMMETRIX->{MDLCONVERT,CONVERT(withExtrude)} ) workflows to get to chef inputs. PETSc will then parse these mesh-sets into DMLabels for the boundary of the mesh. Then, it will handle Dirichlet and Neumann boundary conditions as it normally does (based on yaml input as to what type of BC is on a particular surfID number). For now we have a rather rigid prototype code that is limited to processing and writing 6 distinct mesh sets (one for each of the 6 faces of our topological box). It should not be hard to extend and generalize this code but we took this shortcut in the first version of this code. CGNS clearly supports direct nodal/Dirichlet PointSet but we have CHOSEN not to pursue this in the first pass. +Last but certainly not least is a file-node called ZonalGridConnectivity which is how CGNS encodes periodic boundary conditions as can be seen in the first/only leaf under that file-node Periodic Connectivity. This has been setup rigidly to assume that the faces listed in PointList are ordered in the same way as the faces listed in PointListDonor and further that surfID=2 is the donor and surfID=1 is the periodic partner of the donor. This is again a shortcut or hardcoded link that assumes that the spj file has put surfID=1 on the face that is the periodic match for the face that it has surfID=2. These meshes obviously need to be matched meshes and this creates an issue we still need to resolve (will describe soon). The code currently computes the translation between the two periodic planes. I found the documentation unclear but assumed that vector was FROM the donor To the periodic plane. In the current inputs to the test codes the donor (surfID=2) is at zMax while the periodic plane (surfID=1) is as zMin so this makes the Translation[3]={0,0,-Lz} but that might be backwards and would certainly be flipped if I got the FROM/TO flipped (here Lz is unsigned as it is the spanwise domain width). I made the code general to use the first element with a surfID=2's centroid coordinates - the first element with a surfID=1's centroid coordinates (this picks up a y component of 1e-21 due to roundoff). + + +While the above is functional, the already mentioned ambiguity and the following issues/limitations remain unresolved: + + + +If we feed the current code a matched mesh ncorp will be computed incorrectly for every point that is on a part boundary that is also matched. The reason for this is that ilwork was set up for PHASTA?s needs and capabilities. As noted above PHASTA has replica nodes as REAL nodes (nodes with local node ranges) that it uses for all on-rank work and then the on-rank numbers do their parallel assembly with the true OWNER node which in this case is not the node they physically share on the periodic plane but instead ilwork sets up a communication with the donor for that node. Consequently, if we use the ilwork data structure as it is made for PHASTA, ncorp will map that node to a global node number on the donor plane. Again ilwork is only used in PHASTA for assembling equations so this is right for PHASTA but will foul PETSc by providing a connectivity that gives global node numbers with coordinates on the donor plane. +Currently ZonalBC does not support parallel BC writing (cg available but not cgp). James is working with the CGNS development group to develop cgp_ptset_* for reading/writing PointSet data (which is also used for ZonalGridConnectivity), but for now we are doing MPI_Allgather{v} operations so that cg is correct. Note it is Allgather and not Gather because CGNS does not let part 0 write in serial but instead requires all ranks to have the same data and all to call cg with that same data to have this work correctly. We are told by CGNS developers (and this seems like it has to be true) that only part=0 is actually writing but we have observed that any non-matching data on part!=0 results in a failure of cg. This is a potential scalability issue but seems likely to be addressed through the development of cgp for ZonalBCs. + + +Discussion of ISSUE 1) I just put this question to Jed and James in the GitHub PR but in doing so I think it is clear that CGNS does need global node numbers for the perioidic replicas of the owner/donor nodes. Thus we do need to do one of the following: + +A) Turn off matching (creates another conflict), + +B) suppress it during certain stages of chef?s work, or similarly + +C) alter the code to not add this type of mapping in ilwork. + + + +The conflict with A) might be limited to Simmetrix wedge-tet meshes that won?t match when there is an unstructured mesh region (like tets). TBH, on small meshes I can?t get matching to work anyway. This is what we are currently doing and this has forced us to re-discover matching through reordering the donor and periodic mesh sets to have their centroids match. To make this tractable, I did a MPI_Gather of the data to part 0 and did a serial sort there. This will eventually hit scalability issues (less of an issue for Q3 as they are 3^d more coarse than Q1 meshes) but still not pretty. If we could keep matching on AND we were able to disable it from ilwork so that we build the global numbering that CGNS wants (periodic-nodes/replicas have a global node number) then we MIGHT be able to use matching to order the periodic-mesh set to match the order of the donor-mesh-set in parallel and avoid this serial bottleneck. + + + +CWS and KEJ discussed the following organization of options: + +I) Use SCOREC/core matching + +II) Order periodic faces without use of SCOREC/core matching information + + + +I) further breaks into the following steps and branches + +To have matching available requires one of the following a) replication of ilwork to ilworkCGNS structure without accounting for periodic matching so that it will build an ncorp as if matching were not present or b) POSSIBLY if filterMatching flag is set, existing ilwork will create a PHASTA input set that is lacking periodicity and thus correct correct for CGNS and yet the matching information is saved and can be restored for use in CGNS code to determine matching +The second aspect is how to do that matching. Since inputs coming from matchedNodeElement reader ONLY have VERTEX matching. Here again 3 options are possible: a) make the matching check for matches of nodes through face connectivity, b) make inputs to MNER richer to include face matching (likely available during mesh generation), c) develop code within MNER to elevate matching information to edges and faces (as is done with classification though this is far easier due to classification being to a geometric model that is simple enough to be on all ranks AND model entities are far fewer and this is currently limited to extrusions anyway but so is periodicity so not really a limitation), + + +II) also breaks into at least 2 branches: + +distance of centroid collected to rank0 and sorted (current approach) which we will likely use as long as mesh size on periodic plane does not make this intractable. +OR breadth first search that starts with a single matched face (seed) and then finds adds neighboring faces to a list from which the next in some order (could be centroidal distance or other) is chosen. If mesh is matched, this ordering can proceed in parallel for both the donor and the periodic mesh set. When a face is added that touches a part boundary, existing part boundary adjacency information is used to continue the search on another rank. diff --git a/phasta/CMakeLists.txt b/phasta/CMakeLists.txt index 0a785e268..4e5ff54ab 100644 --- a/phasta/CMakeLists.txt +++ b/phasta/CMakeLists.txt @@ -6,6 +6,7 @@ set(SOURCES phOutput.cc phLinks.cc phGeomBC.cc + phCGNSgbc.cc phBlock.cc phAdapt.cc phRestart.cc diff --git a/phasta/phCGNSgbc.cc b/phasta/phCGNSgbc.cc new file mode 100644 index 000000000..e86eaf227 --- /dev/null +++ b/phasta/phCGNSgbc.cc @@ -0,0 +1,1281 @@ +#include +#include "phOutput.h" +#include "phIO.h" +#include "phiotimer.h" +#include +#include +#include +#include +#include +#include +#include +#include "phRestart.h" +#include +#include +#include "apfShape.h" + + +#ifdef HAVE_CGNS +// +#include +#include +// +#endif +typedef int lcorp_t; +#define NCORP_MPI_T MPI_INTEGER +static cgsize_t nDbgCG=50; +static int nDbgI=50; + +namespace { + +template +MPI_Datatype getMpiType(T) { + MPI_Datatype mpitype; + //determine the type based on what is being sent + if( std::is_same::value ) { + mpitype = MPI_DOUBLE; + } else if ( std::is_same::value ) { + mpitype = MPI_INT64_T; + } else if ( std::is_same::value ) { + mpitype = MPI_INT32_T; + } else { + assert(false); + fprintf(stderr, "Unknown type in %s... exiting\n", __func__); + exit(EXIT_FAILURE); + } + return mpitype; +} + +} + +// https://www.geeksforgeeks.org/sorting-array-according-another-array-using-pair-stl/ +// Sort an array according to +// other using pair in STL. Modified to be real-int pair (for distance matching) and in a separate routine, two integers (for idx sort by surfID) +#include +using namespace std; + +// Function to sort integer array b[] +// according to the order defined by a[] +void pairsortDI(double a[], int b[], int n) +{ + pair *pairt = new pair[n]; // when done delete pairt; + + // Storing the respective array + // elements in pairs. + for (int i = 0; i < n; i++) + { + pairt[i].first = a[i]; + pairt[i].second = b[i]; + } + + // Sorting the pair array. + sort(pairt, pairt + n); + + // Modifying original arrays + for (int i = 0; i < n; i++) + { + a[i] = pairt[i].first; + b[i] = pairt[i].second; + } + delete [] pairt; +} + +// Function to sort integer array b[] +// according to the order defined by a[] +void pairsort(int a[], int b[], int n) +{ + pair *pairt = new pair[n]; + + // Storing the respective array + // elements in pairs. + for (int i = 0; i < n; i++) + { + pairt[i].first = a[i]; + pairt[i].second = b[i]; + } + + // Sorting the pair array. + sort(pairt, pairt + n); + + // Modifying original arrays + for (int i = 0; i < n; i++) + { + a[i] = pairt[i].first; + b[i] = pairt[i].second; + } + delete [] pairt; +} +void pairDeal6sort(int a[], int b[], int n) +{ + int c[6]={0}; + for (int i = 0; i < n; i++) c[a[i]-1]++; // count number each type in a pre-scan + int** p = new int*[6]; + for (int i = 0; i < 6; i++) p[i]=new int[c[i]]; + int** idx = new int*[6]; + for (int i = 0; i < 6; i++) idx[i]=new int[c[i]]; + for (int i = 0; i < 6; i++) c[i]=0; + int isrfM1; + for (int i = 0; i < n; i++) + { + isrfM1=a[i]-1; + p[isrfM1][c[isrfM1]]=b[i]; + idx[isrfM1][c[isrfM1]]=a[i]; + c[isrfM1]++; + } + int igc=0; + for (int j = 0; j < 6; j++){ + for (int i = 0; i < c[j]; i++) { + b[igc] = p[j][i]; + a[igc] = idx[j][i]; + igc++; + } + } + assert(igc==n); + for (int i = 0; i < 6; i++) delete [] p[i]; + for (int i = 0; i < 6; i++) delete [] idx[i]; + delete [] idx; + delete [] p; +} + + +namespace ph { + +static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes); +static lcorp_t count_local(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes); + + +void commuInt(Output& o, cgsize_t* global) +{ // translating a commuInt out from PHASTA to c + int numtask=o.arrays.ilwork[0]; + int itkbeg=0; + int maxseg=1; + int numseg; + for (int itask=0; itaskcount(0); + o.arrays.ncorp = new cgsize_t[num_nodes]; + lcorp_t owned; + lcorp_t local; + lcorp_t* owner_counts; + cgsize_t local_start_id; + cgsize_t gid; + + const int num_parts = PCU_Comm_Peers(); + const int part = PCU_Comm_Self() ; + + for(int i=0; i < num_nodes; i++) o.arrays.ncorp[i]=0; + owned = count_owned(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes); + local = count_local(o.arrays.ilwork, nilwork, o.arrays.ncorp, num_nodes); + o.iownnodes = owned+local; +#ifdef PRINT_EVERYTHING + printf("%d: %d local only nodes\n", part, local); + printf("%d: %d owned nodes\n", part, owned); +#endif + assert( owned <= num_nodes ); + assert( owned+local <= num_nodes ); + + owner_counts = (lcorp_t*) malloc(sizeof(lcorp_t)*num_parts); + for(int i=0; i < num_parts; i++) owner_counts[i]=0; + owner_counts[part] = owned+local; +#ifdef PRINT_EVERYTHING + for(i=0;i=0); + for(i=0;i=0); + gid++; + continue; + } + if(o.arrays.ncorp[i] == 0) + { + o.arrays.ncorp[i] = gid; + assert(o.arrays.ncorp[i]>=0); + gid++; + continue; + } + if(o.arrays.ncorp[i] == -1) + o.arrays.ncorp[i] = 0; //commu() adds, so zero slaves + } //char code[] = "out"; //int ione = 1; + + if(num_parts > 1) + commuInt(o, o.arrays.ncorp); +if(0==1) { + for (int ipart=0; ipart 1)); + } + return(num_local); +} + +static lcorp_t count_owned(int* ilwork, int nlwork,cgsize_t* ncorp_tmp, int num_nodes) +{ + int numtask = ilwork[0]; + int itkbeg = 0; //task offset + int owned = 0; + int i,j,k; + for(i=0;i= 0 && iacc <= 1); + int iother = ilwork[itkbeg+3]-1; //other rank (see ctypes.f for off by one) + int numseg = ilwork[itkbeg+4]; //number of segments + for(j=0;jcount(0); + size_t i = 0; + size_t phGnod = 0; + std::vector lnode={0,1,2,3}; // Standard pattern of first 4 (or 3) + // PHASTA's use of volume elements has an lnode array that maps the surface nodes from the volume numbering. We need it here too + // see hierarchic.f but note that is fortran numbering + if(nvertVol==4) lnode={0, 2, 1, -1}; // tet is first three but opposite normal of others to go with neg volume + if(nvertVol==5 && nvert==3) lnode={0, 4, 1, -1}; // pyramid tri is a fortran map of 1 5 2 + if(nvertVol==6 && nvert==4) lnode={0, 3, 4, 1}; // wedge quad is a fortran map of 1 4 5 2 + for (int elem = 0; elem < nelem; ++elem){ + eCenx[elem]=0; + eCeny[elem]=0; + eCenz[elem]=0; + for (int vert = 0; vert < nvert; ++vert){ + phGnod=o.arrays.ienb[block][elem][lnode[vert]]; //actually it is on-rank Global + c[i++] = o.arrays.ncorp[phGnod]; // PETSc truely global + eCenx[elem]+=o.arrays.coordinates[0*num_nodes+phGnod]; + eCeny[elem]+=o.arrays.coordinates[1*num_nodes+phGnod]; + eCenz[elem]+=o.arrays.coordinates[2*num_nodes+phGnod]; + } + eCenx[elem]/=nvert; // only necessary if you really want to use this as a correct Centroid rather than comparison + eCeny[elem]/=nvert; // only necessary if you really want to use this as a correct Centroid rather than comparison + eCenz[elem]/=nvert; // only necessary if you really want to use this as a correct Centroid rather than comparison + } + PCU_ALWAYS_ASSERT(i == nelem*nvert); +} + +void getInterfaceConnectivityCGNS // not extended yet other than transpose +( + Output& o, + int block, + apf::DynamicArray& c +) +{ + int nelem = o.blocks.interface.nElements[block]; + int nvert0 = o.blocks.interface.keys[block].nElementVertices; + int nvert1 = o.blocks.interface.keys[block].nElementVertices1; + c.setSize(nelem * (nvert0 + nvert1)); + size_t i = 0; + for (int elem = 0; elem < nelem; ++elem) + for (int vert = 0; vert < nvert0; ++vert) + c[i++] = o.arrays.ncorp[o.arrays.ienif0[block][elem][vert]]; + for (int elem = 0; elem < nelem; ++elem) + for (int vert = 0; vert < nvert1; ++vert) + c[i++] = o.arrays.ncorp[o.arrays.ienif1[block][elem][vert]]; + PCU_ALWAYS_ASSERT(i == c.getSize()); +} + +// renamed stripped down to just give srfID +void getNaturalBCCodesCGNS(Output& o, int block, int* codes) +{ + int nelem = o.blocks.boundary.nElements[block]; + size_t i = 0; + for (int elem = 0; elem < nelem; ++elem) + codes[i++] = o.arrays.ibcb[block][elem][1]; //srfID is the second number so 1 +// if we wanted we could use PHASTA's bit in coding in the first number to us attributes to set +// arbitrary combinations of BCs but leaving that out for now +} + +void topoSwitch(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_startg,cgsize_t e_endg) +{ + int Ep; + switch(nvert){ + case 4: + snprintf(Ename, 4, "Tet"); + if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(TETRA_4), e_startg, e_endg, 0, &Ep)) + cgp_error_exit(); + break; + case 5: + snprintf(Ename, 4, "Pyr"); + if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(PYRA_5), e_startg, e_endg, 0, &Ep)) + cgp_error_exit(); + break; + case 6: + snprintf(Ename, 4, "Wdg"); + if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(PENTA_6), e_startg, e_endg, 0, &Ep)) + cgp_error_exit(); + break; + case 8: + snprintf(Ename, 4, "Hex"); + if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(HEXA_8), e_startg, e_endg, 0, &Ep)) + cgp_error_exit(); + break; + } +if(0==1) printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep); + *E=Ep; +} +void topoSwitchB(char* Ename, int nvert,int F,int B,int Z,int *E, cgsize_t e_startg,cgsize_t e_endg) +{ + int Ep; + switch(nvert){ + case 3: + snprintf(Ename, 4, "Tri"); + if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(TRI_3), e_startg, e_endg, 0, &Ep)) + cgp_error_exit(); + break; + case 4: + snprintf(Ename, 5, "Quad"); + if (cgp_section_write(F, B, Z, Ename, CGNS_ENUMV(QUAD_4), e_startg, e_endg, 0, &Ep)) + cgp_error_exit(); + break; + } +if(0==1) printf("%d %d %d %s %ld %ld %d\n",F,B,Z,Ename,e_startg,e_endg,Ep); + *E=Ep; +} + +// renamed and calling the renamed functions above with output writes now to CGNS +void writeBlocksCGNSinterior(int F,int B,int Z, int SCR, Output& o, cgsize_t *e_written) +{ + int E,Fs,Fs2,Fsb,Fsb2; + cgsize_t e_owned, e_start,e_end; + cgsize_t e_startg,e_endg; + const int num_parts = PCU_Comm_Peers(); + const cgsize_t num_parts_cg=num_parts; + const int part = PCU_Comm_Self() ; + const cgsize_t part_cg=part; + // create a centered solution + if ( cgp_field_write(F, B, Z, SCR, CGNS_ENUMV(Integer), "CellRank", &Fs)) + cgp_error_exit(); + int nblki= o.blocks.interior.getSize(); + int nvMap[4] = {4,5,6,8}; + int nvC,nvert,nvAll,invC,iblkC; + for (int i = 0; i < 4; ++i) { // check all topologies + nvAll=0; + nvC=nvMap[i]; + for (int j = 0; j < nblki; ++j) { // check all blocks + BlockKey& k = o.blocks.interior.keys[j]; + nvert = o.blocks.interior.keys[j].nElementVertices; + if(nvC==nvert) { + invC=1; + iblkC=j; + break; + } else invC=0; + } + nvAll= PCU_Add_Int(invC); // add across all + cgsize_t* e=NULL; // = (cgsize_t *)malloc(nvC * e_owned * sizeof(cgsize_t)); + if(nvAll!=0) { //nvC present on at least 1 rank + if(invC!=0){ //nvC present on my rank + e_owned = o.blocks.interior.nElements[iblkC]; + e = (cgsize_t *)malloc(nvC * e_owned * sizeof(cgsize_t)); + getInteriorConnectivityCGNS(o, iblkC, e); + } + else e_owned=0; + long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int + e_endg=*e_written + PCU_Add_Long(safeArg); // end for the elements of this topology + e_startg=1+*e_written; // start for the elements of this topology + char Ename[5]; + topoSwitch(Ename, nvC,F,B,Z,&E,e_startg,e_endg); + e_start=0; + auto type = getMpiType( cgsize_t() ); + MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD); + e_start+=1+*e_written; // my parts global element start 1-based + e_end=e_start+e_owned-1; // my parts global element stop 1-based + if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) + cgp_error_exit(); + *e_written=e_endg; + if(invC!=0) free(e); + // create the field data for this process + int* d = NULL; + if(invC!=0){ //nvC present on my rank + d = (int *)malloc(e_owned * sizeof(int)); + for (int n = 0; n < e_owned; n++) + d[n] = part; + // write the solution field data in parallel + } + if (cgp_field_write_data(F, B, Z, SCR, Fs, &e_start, &e_end, d)) + cgp_error_exit(); + if(invC!=0) free(d); + char UserDataName[11]; + snprintf(UserDataName, 11, "n%sOnRank", Ename); + // create Helper array for number of elements on part of a given topology + if ( cg_goto(F, B, "Zone_t", 1, NULL) || + cg_gorel(F, "User Data", 0, NULL) || + cgp_array_write(UserDataName, CGNS_ENUMV(Integer), 1, &num_parts_cg, &Fs2)) + cgp_error_exit(); + // create the field data for this process + int nIelVec=e_owned; + cgsize_t partP1=part+1; +if(0==1) printf("Intr, %s, %d, %d, %d, %d \n", UserDataName, nIelVec,part,Fs,Fs2); + if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nIelVec)) + cgp_error_exit(); + +if(0==1){ + printf("interior cnn %s %d %ld %ld \n", Ename,part, e_start, e_end); +// for (int ne=0; ne=start && en<=end) + dv[en-start]= part; + } + } + } + if (cgp_field_write_data(F, B, Z, SBVR, FsR, &start, &end, dv)) + cgp_error_exit(); + // more tricky to put srfID on nodes to see in PV (approximately) through vertex field + if ( cgp_field_write(F, B, Z, SBVS, CGNS_ENUMV(Integer), "BoundaryVertexSrfID", &FsS)) + cgp_error_exit(); + // create the field data for this process + for (int inode = 0; inode < o.iownnodes; ++inode) dv[inode]= -1; + if(invC!=0) { + for (int ibel = 0; ibel < e_owned; ++ibel){ + for (int ilv=0; ilv < nvC; ilv++) { + en=e[ibel*nvC+ilv]; + if(en>=start && en<=end) + dv[en-start]= srfID[ibel]; + } + } + } + if (cgp_field_write_data(F, B, Z, SBVS, FsS, &start, &end, dv)) + cgp_error_exit(); +} +void writeBlocksCGNSboundary(int F,int B,int Z, int SBVR, int SBVS, Output& o, int* srfID, int* srfIDidx, double** srfIDCen1, double** srfIDCen2, int* srfID1OnBlk, int* srfID2OnBlk, int* startBelBlk, int* endBelBlk, cgsize_t *e_written, cgsize_t *totBel, int *nStackedOnRank, int nblkb) +{ + int E,Fsb,Fsb2, nvC,nvert,nvAll,invC; + const int num_parts = PCU_Comm_Peers(); + const cgsize_t num_parts_cg=num_parts; + const int part = PCU_Comm_Self() ; + const cgsize_t part_cg=part; + cgsize_t e_owned, e_start,e_end, e_startg,e_endg; + cgsize_t eVolElm=*e_written; + cgsize_t e_belWritten=0; + int nvMap[2] = {3,4}; + int iblkC[2]; + int estart[2]; + for (int i = 0; i < 2; ++i) { // check all topologies + nvAll=0; + invC=0; + nvC=nvMap[i]; + int icountB=0; + for (int j = 0; j < nblkb; ++j) { // check all blocks + BlockKey& k = o.blocks.boundary.keys[j]; + nvert = o.blocks.boundary.keys[j].nBoundaryFaceEdges; + if(nvert==nvC) { + invC=1; + iblkC[icountB]=j; // mark the block numbers (could be more than one) that have current topology + icountB++; + } + } + nvAll= PCU_Add_Int(invC); // add across all + cgsize_t* e=NULL; double* eCenx=NULL; double* eCeny=NULL; double* eCenz=NULL; + if(nvAll!=0) { //nvC present on at least 1 rank + e_owned=0; + if(invC!=0){ //nvC present on my rank + for (int j = 0; j < icountB; ++j) { // combine blocks + estart[j]=e_owned; + e_owned += o.blocks.boundary.nElements[iblkC[j]]; + } + e = (cgsize_t *)malloc(nvC * e_owned * sizeof(cgsize_t)); + eCenx = (double *)malloc( e_owned * sizeof(double)); + eCeny = (double *)malloc( e_owned * sizeof(double)); + eCenz = (double *)malloc( e_owned * sizeof(double)); + for (int j = 0; j < icountB; ++j) {// combine blocks + getBoundaryConnectivityCGNS(o, iblkC[j], &e[estart[j]], &eCenx[estart[j]], + &eCeny[estart[j]], &eCenz[estart[j]]); // stack repeated topologies + getNaturalBCCodesCGNS(o, iblkC[j], &srfID[e_belWritten+estart[j]]); // note e_owned counts all same topo + } + (*nStackedOnRank)++; // no longer have nblkb blocks so count them as you stack them + } + e_startg=1+*e_written; // start for the elements of this topology + long safeArg=e_owned; // e_owned is cgsize_t which could be an 32 or 64 bit int + cgsize_t numBelTP = PCU_Add_Long(safeArg); // number of elements of this topology + e_endg=*e_written + numBelTP; // end for the elements of this topology + char Ename[6]; + topoSwitchB(Ename, nvC,F,B,Z,&E,e_startg,e_endg); + e_start=0; + auto type = getMpiType( cgsize_t() ); + MPI_Exscan(&e_owned, &e_start, 1, type , MPI_SUM, MPI_COMM_WORLD); + e_start+=1+*e_written; // my parts global element start 1-based + e_end=e_start+e_owned-1; // my parts global element stop 1-based + // write the element connectivity in parallel + if (cgp_elements_write_data(F, B, Z, E, e_start, e_end, e)) + cgp_error_exit(); +if(0==1) printf("boundary cnn %d, %ld, %ld \n", part, e_start, e_end); +if(0==1){ + for (int ne=0; ne search list srfID=2 list to find true match + vDSmin=vDistSq; + DistFails++; + for (int j = 0; j < nmatchFace; ++j) { // if this turns out to be taken a lot then it could be narrowed e.g. j=max(0,i-50), j< i+min(matchFace,i+50), + iclose2=imapD2[j]; + d1=srfID1GCen[(iclose1)*3+0]-srfID2GCen[(iclose2)*3+0]; + d2=srfID1GCen[(iclose1)*3+1]-srfID2GCen[(iclose2)*3+1]; + vDistSq= d1*d1+d2*d2; + if(vDistSqcount(0); + cgsize_t gnod,start,end; + start=o.local_start_id; + end=start+o.iownnodes-1; + for (int n = 0; n < num_nodes; n++) { + gnod=o.arrays.ncorp[n]; + if(gnod >= start && gnod <= end) { // solution to write + p[icount]= data[0*num_nodes+n]; + u[icount]= data[1*num_nodes+n]; + v[icount]= data[2*num_nodes+n]; + w[icount]= data[3*num_nodes+n]; + T[icount]= data[4*num_nodes+n]; + icount++; + } + } +// write the solution field data in parallel + if (cg_sol_write(F, B, Z, "Solution", CGNS_ENUMV(Vertex), &S) || + cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "Pressure", &Q)) + cgp_error_exit(); + if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, p)) + cgp_error_exit(); + if ( cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "VelocityX", &Q)) + cgp_error_exit(); + if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, u)) + cgp_error_exit(); + if ( cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "VelocityY", &Q)) + cgp_error_exit(); + if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, v)) + cgp_error_exit(); + if ( cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "VelocityZ", &Q)) + cgp_error_exit(); + if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, w)) + cgp_error_exit(); + if ( cgp_field_write(F, B, Z, S, CGNS_ENUMV(RealDouble), "Temperature", &Q)) + cgp_error_exit(); + if (cgp_field_write_data(F, B, Z, S, Q, &start, &end, T)) + cgp_error_exit(); + free(p); free(u); free(v); free(w); free(T); free(data); +} +void CGNS_Coordinates(int F,int B,int Z, Output& o) +{ + int Cx,Cy,Cz; + if (cgp_coord_write(F, B, Z, CGNS_ENUMV(RealDouble), "CoordinateX", &Cx) || + cgp_coord_write(F, B, Z, CGNS_ENUMV(RealDouble), "CoordinateY", &Cy) || + cgp_coord_write(F, B, Z, CGNS_ENUMV(RealDouble), "CoordinateZ", &Cz)) + cgp_error_exit(); + +// condense out vertices owned by another rank in a new array, x, whose slices are ready for CGNS. + int num_nodes=o.mesh->count(0); + cgsize_t gnod; + cgsize_t start=o.local_start_id; + cgsize_t end=start+o.iownnodes-1; + double* x = (double *)malloc(o.iownnodes * sizeof(double)); + for (int j = 0; j < 3; ++j) { + int icount=0; + for (int inode = 0; inode < num_nodes; ++inode){ + gnod=o.arrays.ncorp[inode]; + if(gnod >= start && gnod <= end) { // coordinate to write + x[icount]= o.arrays.coordinates[j*num_nodes+inode]; + icount++; + } + } +if(0==1) { + printf("%ld, %ld \n", start, end); + for (int ne=0; necount(0); +if(0==1){ // ilwork debugging + for (int ipart=0; ipart PETSc global node number (1-based) +// o.iownnodes => nodes owned by this rank +// o.local_start_id => this rank's first node number (1-based and also which must be a long long int) + long safeArg=o.iownnodes; // cgsize_t could be an int + sizes[0]=PCU_Add_Long(safeArg); + int ncells=m->count(m->getDimension()); // this ranks number of elements + safeArg=ncells; // cgsize_t could be an int + sizes[1]=PCU_Add_Long(safeArg); + sizes[2]=0; + if(cgp_mpi_comm(MPI_COMM_WORLD)) cgp_error_exit; + if ( cgp_open(outfile, CG_MODE_WRITE, &F) || + cg_base_write(F, "Base", 3, 3, &B) ) + cgp_error_exit(); + if ( cg_goto(F,B,"end")) + cgp_error_exit(); + if ( cg_dataclass_write(CGNS_ENUMV(Dimensional))) + cgp_error_exit(); + cg_units_write(CGNS_ENUMV(Kilogram),CGNS_ENUMV(Meter),CGNS_ENUMV(Second),CGNS_ENUMV(Kelvin),CGNS_ENUMV(Degree)); + if ( cg_zone_write(F, B, "Zone", sizes, CGNS_ENUMV(Unstructured), &Z)) + cgp_error_exit(); + // create data nodes for coordinates + cg_set_file_type(CG_FILE_HDF5); + CGNS_Coordinates(F,B,Z, o); + if (cg_sol_write(F, B, Z, "CellRank", CGNS_ENUMV(CellCenter), &SCR)) + cgp_error_exit(); +// Paraview will only viz the first sol node created so control that with writeCGNSFiles flag + CirculateSolWriteOrder(F,B,Z,&SVR,&SBVS,&SBVR,o); + CGNS_VertexRank(F,B,Z,SVR, o); + // create Helper array for number of elements on rank + if ( cg_goto(F, B, "Zone_t", 1, NULL) || + cg_user_data_write("User Data") || + cg_gorel(F, "User Data", 0, NULL) || + cgp_array_write("nCoordsOnRank", CGNS_ENUMV(Integer), 1, &num_parts_cg, &Fs2)) + cgp_error_exit(); + // create the field data for this process + int nCoordVec=o.iownnodes; + cgsize_t partP1=part+1; +if(0==1) printf("Coor %d, %d, %d, \n", nCoordVec,part,Fs2); + if ( cgp_array_write_data(Fs2, &partP1, &partP1, &nCoordVec)) + cgp_error_exit(); + cgsize_t e_written=0; + writeBlocksCGNSinterior(F,B,Z,SCR,o,&e_written); + cgsize_t totBel; + int nblkb = o.blocks.boundary.getSize(); + double** srfIDCen1 = new double*[nblkb]; // might not all be used + double** srfIDCen2 = new double*[nblkb]; + int totOnRankBel=0; + for (int i = 0; i < nblkb; ++i) + totOnRankBel += o.blocks.boundary.nElements[i]; + int* srfID = (int *)malloc( totOnRankBel * sizeof(int)); + int* srfID1OnBlk = (int *)malloc( nblkb * sizeof(int)); + int* srfID2OnBlk = (int *)malloc( nblkb * sizeof(int)); + int* startBelBlk = (int *)malloc( nblkb * sizeof(int)); + int* endBelBlk = (int *)malloc( nblkb * sizeof(int)); + int* srfIDidx = (int *)malloc( totOnRankBel * sizeof(int)); + int nStackedOnRank=0; + writeBlocksCGNSboundary(F,B,Z, SBVR, SBVS, o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, &totBel, &nStackedOnRank, nblkb); + writeCGNSboundary (F,B,Z,o, srfID, srfIDidx, srfIDCen1, srfIDCen2, srfID1OnBlk, srfID2OnBlk, startBelBlk, endBelBlk, &e_written, totOnRankBel, &totBel, nStackedOnRank); + free(srfID); free(srfIDidx); + free(srfID1OnBlk); free(srfID2OnBlk); + free(startBelBlk); free(endBelBlk); + for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen1[i]; + for (int i = 0; i < nStackedOnRank; ++i) delete [] srfIDCen2[i]; + delete [] srfIDCen1; delete [] srfIDCen2; + if(cgp_close(F)) cgp_error_exit(); + double t1 = PCU_Time(); + if (!PCU_Comm_Self()) + lion_oprint(1,"CGNS file written in %f seconds\n", t1 - t0); +} +} // namespace diff --git a/phasta/phCook.cc b/phasta/phCook.cc index 983570d43..d701ba26e 100644 --- a/phasta/phCook.cc +++ b/phasta/phCook.cc @@ -197,19 +197,21 @@ namespace ph { ph::enterFilteredMatching(m, in, bcs); ph::generateOutput(in, bcs, m, out); ph::exitFilteredMatching(m); - // a path is not needed for inmem - if ( in.writeRestartFiles ) { - if(!PCU_Comm_Self()) lion_oprint(1,"write file-based restart file\n"); - // store the value of the function pointer - FILE* (*fn)(Output& out, const char* path) = out.openfile_write; - // set function pointer for file writing - out.openfile_write = chef::openfile_write; - ph::detachAndWriteSolution(in,out,m,subDirPath); //write restart - // reset the function pointer to the original value - out.openfile_write = fn; - } - else { - ph::detachAndWriteSolution(in,out,m,subDirPath); //write restart + if ( in.writeCGNSFiles ==0 ) { // for now, don't write restarts when writing CGNS since writing restarts is bundled with destroying fields + // a path is not needed for inmem + if ( in.writeRestartFiles ) { + if(!PCU_Comm_Self()) lion_oprint(1,"write file-based restart file\n"); + // store the value of the function pointer + FILE* (*fn)(Output& out, const char* path) = out.openfile_write; + // set function pointer for file writing + out.openfile_write = chef::openfile_write; + ph::detachAndWriteSolution(in,out,m,subDirPath); //write restart + // reset the function pointer to the original value + out.openfile_write = fn; + } + else { + ph::detachAndWriteSolution(in,out,m,subDirPath); //write restart + } } if ( ! in.outMeshFileName.empty() ) m->writeNative(in.outMeshFileName.c_str()); @@ -224,6 +226,9 @@ namespace ph { out.openfile_write = fn; } ph::writeGeomBC(out, subDirPath); //write geombc + out.writeCGNSFiles=in.writeCGNSFiles; + if ( in.writeCGNSFiles > 0 ) + ph::writeCGNS(out, subDirPath); //write CGNS if(!PCU_Comm_Self()) ph::writeAuxiliaryFiles(path, in.timeStepNumber); m->verify(); diff --git a/phasta/phInput.cc b/phasta/phInput.cc index ffc9989c2..65118d677 100644 --- a/phasta/phInput.cc +++ b/phasta/phInput.cc @@ -60,6 +60,7 @@ static void setDefaults(Input& in) in.axisymmetry = 0; in.parmaLoops = 3; //a magical value in.parmaVerbosity = 1; //fairly quiet + in.writeCGNSFiles = 0; // write CGNS Files in.writeGeomBCFiles = 0; // write additional geombc file for vis in streaming in.writeRestartFiles = 0; // write additional restart file for vis in streaming in.writeVTK = 0; @@ -153,6 +154,7 @@ static void formMaps(Input& in, StringMap& stringMap, IntMap& intMap, DblMap& db intMap["parmaLoops"] = &in.parmaLoops; intMap["parmaVerbosity"] = &in.parmaVerbosity; intMap["writeVTK"] = &in.writeVTK; + intMap["writeCGNSFiles"] = &in.writeCGNSFiles; intMap["writeGeomBCFiles"] = &in.writeGeomBCFiles; intMap["writeRestartFiles"] = &in.writeRestartFiles; intMap["ramdisk"] = &in.ramdisk; diff --git a/phasta/phInput.h b/phasta/phInput.h index a6bf88c90..28123f805 100644 --- a/phasta/phInput.h +++ b/phasta/phInput.h @@ -147,6 +147,8 @@ class Input /** \brief write the geombc file during in-memory data transfer between phasta and chef. */ int writeGeomBCFiles; + /* \brief write CGNS files for pre-processing */ + int writeCGNSFiles; /** \brief write the restart file during in-memory data transfer between phasta and chef. */ int writeRestartFiles; diff --git a/phasta/phOutput.cc b/phasta/phOutput.cc index d4b71028b..fd5e73a69 100644 --- a/phasta/phOutput.cc +++ b/phasta/phOutput.cc @@ -997,6 +997,7 @@ Output::~Output() //nOwnedNodes will still be zero. if(!nOwnedNodes) return; + delete [] arrays.ncorp; delete [] arrays.coordinates; delete [] arrays.ilwork; delete [] arrays.ilworkf; diff --git a/phasta/phOutput.h b/phasta/phOutput.h index 6a93e9d96..0e3c85351 100644 --- a/phasta/phOutput.h +++ b/phasta/phOutput.h @@ -4,6 +4,13 @@ #include "phInput.h" #include "phBlock.h" #include "phBC.h" +#ifdef HAVE_CGNS +// +#include +#include +// +#endif + namespace apf { class Mesh; @@ -136,6 +143,11 @@ idx: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 int* ifather; /* an array of integers of size nfather that has nsons in each entry */ int* nsonsArr; +/* an array that maps on-rank-node-number (input) to PETSc global-node-number */ +// worked but long int* ncorp; +#ifdef HAVE_CGNS + cgsize_t* ncorp; +#endif }; @@ -153,6 +165,11 @@ struct Output int nMaxElementNodes; int nEssentialBCNodes; int nOverlapEdges; +#ifdef HAVE_CGNS + cgsize_t local_start_id; /* this rank's first global node number (1 based) */ + int iownnodes; /* how many node this rank owns */ +#endif + int writeCGNSFiles; int nlwork; /* size of arrays.ilwork */ int nlworkf; /* size of arrays.ilworkf */ int nlworkl; /* size of arrays.ilworkl */ @@ -168,6 +185,7 @@ struct Output void generateOutput(Input& in, BCs& bcs, apf::Mesh* mesh, Output& o); void writeGeomBC(Output& o, std::string path, int timestep_or_dat = 0); +void writeCGNS(Output& o, std::string path); } diff --git a/phasta/phRestart.h b/phasta/phRestart.h index cd82967b6..f17b690d6 100644 --- a/phasta/phRestart.h +++ b/phasta/phRestart.h @@ -27,6 +27,8 @@ void detachAndWriteSolution(Input& in, Output& out, void attachZeroSolution(Input& in, apf::Mesh* m); void detachField(apf::Field* f, double*& data, int& size); +void detachField(apf::Mesh* m, const char* fieldname, double*& data, int& size); + } diff --git a/pumi-meshes b/pumi-meshes index c00ba9c16..0629ac309 160000 --- a/pumi-meshes +++ b/pumi-meshes @@ -1 +1 @@ -Subproject commit c00ba9c16cacbb361ee538c03a3ec694ddb989f2 +Subproject commit 0629ac3090be4905a3f98d602fb1379eb6b31b00 diff --git a/test/cgns.cc b/test/cgns.cc index 6f853d2e0..a3c57d777 100644 --- a/test/cgns.cc +++ b/test/cgns.cc @@ -119,7 +119,7 @@ pMesh toPumi(const std::string &prefix, gmi_model *g, apf::Mesh2 *mesh) return pm; } -auto additional(const std::string &prefix, gmi_model *g, apf::Mesh2 *mesh) +std::function additional(const std::string &prefix, gmi_model *g, apf::Mesh2 *mesh) { // seems essential to make pm first before calling balance or reorder... auto pm = toPumi(prefix, g, mesh); diff --git a/test/matchedNodeElmReader.cc b/test/matchedNodeElmReader.cc index 4dfdfabd0..7b4f11a59 100644 --- a/test/matchedNodeElmReader.cc +++ b/test/matchedNodeElmReader.cc @@ -20,33 +20,6 @@ #include #include -/* from https://github.com/SCOREC/core/issues/205 -0=fully interior of the volume -1-6 =classified on face (not edge or vertex) -11-22 = classified on model edge (not end points which are model vertices) -31-38 = classified on a model vertex. -*/ - -/* tags on vertices */ -#define INTERIORTAG 0 -#define FACE 1 -#define FACE_LAST 6 -#define EDGE 11 -#define EDGE_LAST 22 -#define VERTEX 31 -#define VERTEX_LAST 38 - -/* model entity ids */ -//#define INTERIOR_REGION 0 -//int INTERIOR_REGION=0; // initialized but will be checked from read input - -//Manifold single region apf::ModelEntity* getMdlRgn(gmi_model* model) { -//Manifold single region apf::ModelEntity* rgn = reinterpret_cast( -//Manifold single region gmi_find(model, 3, INTERIOR_REGION)); -//Manifold single region PCU_ALWAYS_ASSERT(rgn); -//Manifold single region return rgn; -//Manifold single region } - apf::ModelEntity* getMdlRegion(apf::Mesh2* mesh, int tag) { apf::ModelEntity* region = mesh->findModelEntity(3,tag); @@ -747,17 +720,16 @@ int main(int argc, char** argv) PCU_Comm_Init(); lion_set_verbosity(1); int noVerify=0; // maintain default of verifying if not explicitly requesting it off - if( argc < 11 ) { + if( argc < 10 ) { if( !PCU_Comm_Self() ) { - printf("Usage: %s " - " " + printf("Usage: %s no rank but .rank added to next 6 " + " " " " " " " " " " " " - " " - " " + " " "turn off verify mesh if equal 1 (on if you give nothing)\n", argv[0]); } @@ -767,14 +739,14 @@ int main(int argc, char** argv) gmi_register_mesh(); gmi_register_null(); - if( argc == 11 ) noVerify=atoi(argv[10]); + if( argc == 11 ) noVerify=atoi(argv[9]); double t0 = PCU_Time(); MeshInfo m; readMesh(argv[2],argv[3],argv[4],argv[5],argv[6],argv[7],argv[8],m); bool isMatched = true; - if( !strcmp(argv[3], "NULL") ) + if( !strcmp(argv[4], "NULL") ) isMatched = false; if(!PCU_Comm_Self()) @@ -822,7 +794,7 @@ int main(int argc, char** argv) outMap.clear(); apf::writeVtkFiles("rendered",mesh); - mesh->writeNative(argv[10]); + mesh->writeNative(argv[9]); if(noVerify != 1) mesh->verify(); mesh->destroyNative(); diff --git a/test/swapDoubles.cc b/test/swapDoubles.cc index beacc27d2..f85c2a531 100644 --- a/test/swapDoubles.cc +++ b/test/swapDoubles.cc @@ -25,5 +25,6 @@ int main(int argc, char** argv) { } delete [] d_orig; delete [] d; + MPI_Finalize(); return 0; } diff --git a/test/testing.cmake b/test/testing.cmake index 069006a26..d256c6b30 100644 --- a/test/testing.cmake +++ b/test/testing.cmake @@ -69,6 +69,90 @@ else() set(GXT dmg) endif() +if(ENABLE_CGNS AND SIM_DOT_VERSION VERSION_GREATER 12.0.171000) + set(MDIR ${MESHES}/phasta/cube_CGNS) + mpi_test(chef-CGNS-multitopology1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/1-1-Chef) + add_test(NAME chef-CGNS-multitopology1-diff + COMMAND h5diff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/1-1-Chef) + + mpi_test(chef-CGNS-multitopology2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/2-1-Chef) + add_test(NAME chef-CGNS-multitopology2-diff + COMMAND h5diff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/2-1-Chef) + + mpi_test(chef-CGNS-multitopology4 4 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/4-1-Chef) + add_test(NAME chef-CGNS-multitopology4-diff + COMMAND h5diff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/multiTopology/mner/Chef/4-1-Chef) +endif() + +if(ENABLE_SIMMETRIX AND SIM_PARASOLID AND SIMMODSUITE_SimAdvMeshing_FOUND AND ENABLE_CGNS AND SIM_DOT_VERSION VERSION_GREATER 12.0.171000) + set(MDIR ${MESHES}/phasta/cube_CGNS) + mpi_test(chef-CGNS-8hex1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/1-1-Chef) + add_test(NAME chef-CGNS-8hex1-diff + COMMAND h5diff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/1-1-Chef) + + mpi_test(chef-CGNS-8hex2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/2-1-Chef) + add_test(NAME chef-CGNS-8hex2-diff + COMMAND h5diff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mds8Hex/Chef/2-1-Chef) + + mpi_test(chef-CGNS-smallTet1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/1-1-Chef) + add_test(NAME chef-CGNS-smallTet1-diff + COMMAND h5diff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/1-1-Chef) + + mpi_test(chef-CGNS-smallTet2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/2-1-Chef) + add_test(NAME chef-CGNS-smallTet2-diff + COMMAND h5diff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mds-SmallestTet/Chef/2-1-Chef) + + mpi_test(chef-CGNS-AllHex1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/1-1-Chef) + add_test(NAME chef-CGNS-AllHex1-diff + COMMAND h5diff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/1-1-Chef) + + mpi_test(chef-CGNS-AllHex2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/2-1-Chef) + add_test(NAME chef-CGNS-AllHex2-diff + COMMAND h5diff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mdsAllHex/Chef/2-1-Chef) + + mpi_test(chef-CGNS-AllTet 1 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/1-1-Chef) + add_test(NAME chef-CGNS-AllTet-diff + COMMAND h5diff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/1-1-Chef) + + mpi_test(chef-CGNS-AllTet2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/2-1-Chef) + add_test(NAME chef-CGNS-AllTet2-diff + COMMAND h5diff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mdsAllTet/Chef/2-1-Chef) + + mpi_test(chef-CGNS-AllWedge1 1 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/1-1-Chef) + add_test(NAME chef-CGNS-AllWedge1-diff + COMMAND h5diff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/1-1-Chef) + + mpi_test(chef-CGNS-AllWedge2 2 ${CMAKE_CURRENT_BINARY_DIR}/chef + WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/2-1-Chef) + add_test(NAME chef-CGNS-AllWedge2-diff + COMMAND h5diff chefOut.cgns correct.cgns + WORKING_DIRECTORY ${MDIR}/sms2mdsAllWedge/Chef/2-1-Chef) +endif() + set(MDIR ${MESHES}/phasta/dg) if(ENABLE_SIMMETRIX AND SIM_PARASOLID AND SIMMODSUITE_SimAdvMeshing_FOUND) @@ -213,7 +297,7 @@ mpi_test(matchedNodeElementReader_p1 1 "${MDIR}/1part/geom3D.fathr" "NULL" "${MDIR}/1part/geom3DHead.cnn" - "geom.dmg" "geom.smb") + "geom.smb") mpi_test(matchedNodeElementReader_p4 4 ./matchedNodeElmReader @@ -225,7 +309,7 @@ mpi_test(matchedNodeElementReader_p4 4 "${MDIR}/4part/geom3D.fathr" "NULL" "${MDIR}/4part/geom3DHead.cnn" - "geom.dmg" "geom.smb") + "geom.smb") set(MDIR ${MESHES}/gmsh) mpi_test(gmshv2TwoQuads 1 @@ -515,7 +599,7 @@ if(ENABLE_ZOLTAN) ) endif() -if(ENABLE_CGNS AND ENABLE_ZOLTAN) +if(ENABLE_CGNS AND ENABLE_ZOLTAN AND ENABLE_CGNS_MULTI_BASE) # # sort of an arbitrary choice set(numProcs 4) @@ -560,7 +644,7 @@ mpi_test(cgns_3d_2 ${numProcs} # # 3D BCS tests # -set(numProcs 5) +set(numProcs 4) # set(CGNSDIR ${MESHES}/cgns/withBCS/3D) # @@ -600,7 +684,7 @@ mpi_test(cgns_bcs_3 ${numProcs} bcs3.smb additional) -endif(ENABLE_CGNS AND ENABLE_ZOLTAN) +endif(ENABLE_CGNS AND ENABLE_ZOLTAN AND ENABLE_CGNS_MULTI_BASE) mpi_test(construct 4 ./construct