Skip to content

Commit

Permalink
added draft of the api for checkpointing
Browse files Browse the repository at this point in the history
  • Loading branch information
cnpetra committed Aug 28, 2024
1 parent 7a939b7 commit 181f7f9
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 74 deletions.
12 changes: 11 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ option(HIOP_USE_EIGEN "Build with Eigen support" ON)
option(HIOP_USE_MPI "Build with MPI support" ON)
option(HIOP_USE_GPU "Build with support for GPUs - CUDA or HIP libraries" OFF)
option(HIOP_TEST_WITH_BSUB "Use `jsrun` instead of `mpirun` commands when running tests" OFF)
option(HIOP_USE_RAJA "Build with portability abstraction library RAJA" OFF)
option(HIOP_USE_RAJA "Build with portability abstraction library RAJA" OFF)
option(HIOP_USE_AXOM "Build with AXOM to use Sidre for scalable checkpointing" OFF)
option(HIOP_DEEPCHECKS "Extra checks and asserts in the code with a high penalty on performance" OFF)
option(HIOP_WITH_KRON_REDUCTION "Build Kron Reduction code (requires UMFPACK)" OFF)
option(HIOP_DEVELOPER_MODE "Build with extended warnings and options" OFF)
Expand Down Expand Up @@ -289,6 +290,15 @@ if(HIOP_USE_RAJA)
message(STATUS "Found umpire pkg-config: ${umpire_CONFIG}")
endif()

if(HIOP_USE_AXOM)
find_package(AXOM CONFIG
PATHS ${AXOM_DIR} ${AXOM_DIR}/lib/cmake/
REQUIRED)
target_link_libraries(hiop_tpl INTERFACE axom)
message(STATUS "Found AXOM pkg-config: ${AXOM_CONFIG}")
endif()


if(HIOP_WITH_KRON_REDUCTION)
set(HIOP_UMFPACK_DIR CACHE PATH "Path to UMFPACK directory")
include(FindUMFPACK)
Expand Down
7 changes: 4 additions & 3 deletions src/Interface/hiopInterface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,14 +500,15 @@ class hiopInterfaceBase
/**
* This method is called after each iteration and should be implemented by the user to instruct HiOp
* to save a complete state of the algorithm to disk. An axom::sidre::DataStore will be used for IO, which
* can be set by the user upon creation of the HiOp algorithm class. If not set, HiOp will used one.
* can be set by the user upon creation of the HiOp algorithm class. If not set, HiOp will create one
* internally. TODO: mention API methods from the Alg class.
*
* This is feature is useful for IO checkpointing, for example; it allows the internal algorithm to be
* This feature is useful for IO checkpointing, for example; it allows the internal algorithm to be
* restarted at the same state as before saving. @see hiop::hiopAlgorithm::tbd for the method to be used
* to load a previously saved state.
*
* The method provided by the user should return true if HiOp should save the state of the algorithm at
* at current iteration. The argument is passed by HiOp to indicate the current iterate number.
* at current iteration. The argument is passed HiOp to indicate the current iterate number.
*
* @param[in] iter the current iteration number
*/
Expand Down
45 changes: 45 additions & 0 deletions src/Optimization/hiopAlgFilterIPM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1485,6 +1485,51 @@ void hiopAlgFilterIPMQuasiNewton::outputIteration(int lsStatus, int lsNum, int u
}
}

//declaration required by C++14, not anymore by C++17 or after
constexpr char hiopAlgFilterIPMQuasiNewton::default_state_filename[];

void hiopAlgFilterIPMQuasiNewton::save_state_to_file(const ::std::string& path_in)
{
auto path = path_in=="" ? default_state_filename : path_in;

axom_sidre_DataStore* ds = new axom_sidre_DataStore(0);

this->save_state_to_data_store(ds);

//::axom::sidre::IOManager writer(this->get_nlp()->get_comm());
//int num_files;
//MPI_Comm_size(this->get_nlp()->get_comm(), &num_files);
//writer.write(ds_->getRoot(), num_files, path.str(), ::axom::sidre::Group::getDefaultIOProtocol());

delete ds;
}

void hiopAlgFilterIPMQuasiNewton::load_state_from_file(const ::std::string& path_in)
{
auto path = path_in=="" ? default_state_filename : path_in;
//todo
}

void hiopAlgFilterIPMQuasiNewton::save_state_to_data_store(void* ds)
{
//Group* nlp_group = ds->getRoot()->createGroup("hiop solver");

//create views for each member that needs to be saved

const double* x = it_curr->get_x()->local_data_host();
//destination = nlp_group->createViewAndAllocate("x", ::axom::sidre::DOUBLE_ID, size);
}

void hiopAlgFilterIPMQuasiNewton::load_state_from_data_store(const void* ds)
{
//Group* nlp_group = ds->getRoot()->createGroup("hiop solver");

//create views for each member that needs to be saved

const double* x = it_curr->get_x()->local_data_host();
//destination = nlp_group->createViewAndAllocate("x", ::axom::sidre::DOUBLE_ID, size);
}


/******************************************************************************************************
* FULL NEWTON IPM
Expand Down
102 changes: 32 additions & 70 deletions src/Optimization/hiopAlgFilterIPM.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@
#include "hiopPDPerturbation.hpp"
#include "hiopFactAcceptor.hpp"

#include "Checkpointing.hpp"

#include "hiopTimer.hpp"

namespace hiop
Expand Down Expand Up @@ -118,73 +120,6 @@ class hiopAlgFilterIPMBase {
return filter.contains(theta, logbar_obj);
}

//notes on checkpointing
// 1. need to allow user to pass axom::sidre::DataStore. HiOp will put all the info into a group
// Question: should a DataStore be passed any time restarting is invoked (this is a bit cumbersome),
// or just once, by calling the HiOp algorithm class
//
//
//
//
// (for when checkpointing is used without the user setting a data store, so HiOp will create it and do the IO)
// 2. need to allow user to pass a string with the file where the DataStore will be
// writting to/reading from. A default name will be used for empty filename.

/**
* Setter for the axom::sidre DataStore used to manage the data associated with the state of
* NLP the algorithm. If the setter is not called by the user, the DataStore will be created
* internally by the iteration checkpointing object.
*/
inline void set_state_data_manager(axom::sidre::DataStore& mng)
{
iter_chkpnt_.set_data_manger(mng);
}

/**
* The method saves the state of the algorithm in the axom::sidre::DataStore object that was
* previously provided by @set_checkpoint_data_manager. If this method has not been previously
* called, the HiOp will create such instance and will save it on disk under a default name.
*/
inline void save_state()
{
iter_chkpnt_.save(this);
}

/**
* The method saves the state of the algorithm in the file specified by the string argument. If
* the string is empty, a file with a default name will be created.
*
* Internally, HiOp uses axom::sidre::DataStore object that is created internally and shares the
* IO code with @save_state. This method disregards previous calls to @set_checkpoint_data_manager.
*/
inline void save_state(const std::string& filename)
{
iter_chkpnt_.save(filename, this);
}


/**
* This method loads the state of the algorithm from a axom::sidre::DataStore that was previously
* provided by @set_checkpoint_data_manager. This DataStore instance needs to be properly
* initialized and have a group called "HiOpState".
*/
inline void load_state()
{
iter_chkpnt_.load(this);
}

/**
* This method loads the state of the algorithm from the file whose name is passed as a string
* argument. HiOp expected that the file contains a axom::sidre::DataStore that was previously saved
* using one of the @save_state methods above.
*
*/
inline void load_state(const std::string& filename)
{
iter_chkpnt_.load(filename, this);
}


/// Setter for the primal steplength.
inline void set_alpha_primal(const double alpha_primal) { _alpha_primal = alpha_primal; }

Expand Down Expand Up @@ -314,9 +249,6 @@ class hiopAlgFilterIPMBase {
hiopNlpFormulation* nlp;
hiopFilter filter;

/// Helper for saving/loading algorithm state to disk.
Checkpointing iter_chkpnt_;

hiopLogBarProblem* logbar;

/* Iterate, search directions (managed by this (algorithm) class) */
Expand Down Expand Up @@ -411,6 +343,36 @@ class hiopAlgFilterIPMQuasiNewton : public hiopAlgFilterIPMBase
virtual ~hiopAlgFilterIPMQuasiNewton();

virtual hiopSolveStatus run();

//work in progress
virtual void save_state_to_data_store(void* sidre_data_store);
virtual void load_state_from_data_store(const void* sidre_data_store);

static constexpr char default_state_filename[] = "hiop_qn_state.sidre";

/**
* @brief save the state of the algorithm to the file
* @param path the name of the file
*
* @details
* Internally, HiOp uses axom::sidre::DataStore, which is saved to the file. If argument is the
* empty string, HiOp will attempt saving the state to the path specified by default_state_filename
* static member.
*/
void save_state_to_file(const ::std::string& path="");

/**
* @brief load the state of the algorithm from file
* @param path the name of the file to load from
*
* @details
* The file should contains a axom::sidre::DataStore that was previously saved using save_state_to_file().
* If argument is the empty string, HiOp will attempt loading state from the path specified by
* default_state_filename static member.
*
*/
void load_state_from_file(const ::std::string& path="");

private:
virtual void outputIteration(int lsStatus, int lsNum, int use_soc = 0, int use_fr = 0);
private:
Expand Down

0 comments on commit 181f7f9

Please sign in to comment.