From 64f3814d358e01ad31459cb60e097784820c5bd5 Mon Sep 17 00:00:00 2001 From: Brett Viren Date: Wed, 9 Jun 2021 17:36:34 -0400 Subject: [PATCH] Factor and fix numpy save/load methods into 'helper' --- gen/src/DepoBagger.cxx | 3 + gen/src/DepoFanout.cxx | 4 ++ sio/inc/WireCellSio/NumpyDepoLoader.h | 12 +++- sio/src/NumpyDepoLoader.cxx | 35 ++++-------- sio/src/NumpyDepoSaver.cxx | 10 +++- sio/src/NumpyFrameSaver.cxx | 12 ++-- util/inc/WireCellUtil/NumpyHelper.h | 50 +++++++++++++++++ util/test/check_numpy_depos.cxx | 51 +++++++++++++++++ util/test/test_cnpy_eigen.cxx | 80 +++++++++++++++++++++------ util/test/test_cnpy_eigen2.cxx | 59 ++++++++++++++++++++ 10 files changed, 268 insertions(+), 48 deletions(-) create mode 100644 util/inc/WireCellUtil/NumpyHelper.h create mode 100644 util/test/check_numpy_depos.cxx create mode 100644 util/test/test_cnpy_eigen2.cxx diff --git a/gen/src/DepoBagger.cxx b/gen/src/DepoBagger.cxx index b20ad4568..3d53ef9a7 100644 --- a/gen/src/DepoBagger.cxx +++ b/gen/src/DepoBagger.cxx @@ -1,5 +1,6 @@ #include "WireCellGen/DepoBagger.h" #include "WireCellIface/SimpleDepoSet.h" +#include "WireCellUtil/Logging.h" #include "WireCellUtil/NamedFactory.h" WIRECELL_FACTORY(DepoBagger, WireCell::Gen::DepoBagger, WireCell::IDepoCollector, WireCell::IConfigurable) @@ -33,6 +34,8 @@ void Gen::DepoBagger::configure(const WireCell::Configuration& cfg) bool Gen::DepoBagger::operator()(const input_pointer& depo, output_queue& deposetqueue) { if (!depo) { // EOS + Log::logptr_t log(Log::logger("sim")); + log->debug("bagged {} depos at {}", m_depos.size(), m_count); // even if empyt, must send out something to retain sync. auto out = std::make_shared(m_count, m_depos); deposetqueue.push_back(out); diff --git a/gen/src/DepoFanout.cxx b/gen/src/DepoFanout.cxx index b9da90c19..4e6a27869 100644 --- a/gen/src/DepoFanout.cxx +++ b/gen/src/DepoFanout.cxx @@ -43,6 +43,10 @@ std::vector Gen::DepoFanout::output_types() bool Gen::DepoFanout::operator()(const input_pointer& in, output_vector& outv) { // Note: if "in" indicates EOS, just pass it on + if (!in) { + auto log = Log::logger("sio"); + log->debug("depo fanout sees EOS"); + } outv.resize(m_multiplicity); for (size_t ind = 0; ind < m_multiplicity; ++ind) { diff --git a/sio/inc/WireCellSio/NumpyDepoLoader.h b/sio/inc/WireCellSio/NumpyDepoLoader.h index eb5bb36ef..fbbbac3a6 100644 --- a/sio/inc/WireCellSio/NumpyDepoLoader.h +++ b/sio/inc/WireCellSio/NumpyDepoLoader.h @@ -1,4 +1,14 @@ -/** Load depos from a Numpy file. */ +/** Load depos from a Numpy file. + + Depos should consist of arrays named depo_data_N and depo_info_N + where N is an integer count. + + For a given value of N the data array should be shaped (Ndepo, 7), + and info is shaped (Ndepo, 4). + + See also @ref NumpyDepoSaver. + +*/ #ifndef WIRECELLSIO_NUMPYDEPOLOADER #define WIRECELLSIO_NUMPYDEPOLOADER diff --git a/sio/src/NumpyDepoLoader.cxx b/sio/src/NumpyDepoLoader.cxx index 36046b769..c9fd74d4a 100644 --- a/sio/src/NumpyDepoLoader.cxx +++ b/sio/src/NumpyDepoLoader.cxx @@ -4,7 +4,7 @@ #include "WireCellUtil/Array.h" #include "WireCellUtil/Logging.h" #include "WireCellUtil/Point.h" -#include "WireCellUtil/cnpy.h" +#include "WireCellUtil/NumpyHelper.h" #include "WireCellIface/SimpleDepo.h" #include @@ -46,7 +46,6 @@ void Sio::NumpyDepoLoader::configure(const WireCell::Configuration& config) using DataArray = Eigen::Array; using InfoArray = Eigen::Array; - bool Sio::NumpyDepoLoader::next() { auto log = Log::logger("sio"); @@ -58,10 +57,12 @@ bool Sio::NumpyDepoLoader::next() const std::string info_name = String::format("depo_info_%d", m_load_count-1); const std::string fname = m_cfg["filename"].asString(); - cnpy::NpyArray data_arr, info_arr; + + Array::array_xxf data; + Array::array_xxi info; try { - data_arr = cnpy::npz_load(fname, data_name); - info_arr = cnpy::npz_load(fname, info_name); + WireCell::Numpy::load2d(data, data_name, fname); + WireCell::Numpy::load2d(info, info_name, fname); } catch (std::runtime_error) { log->debug("NumpyDepoLoader: {}:{}: no such array", @@ -69,41 +70,27 @@ bool Sio::NumpyDepoLoader::next() return false; } - // In [4]: f['depo_data_0'].shape - // Out[4]: (7, 1785) - - if (data_arr.shape[0] != 7) { + if (data.cols() != 7) { log->warn("NumpyDepoLoader: {}:{}: depo data is not size 7", fname, data_name); return false; } - if (info_arr.shape[0] != 4) { + if (info.cols() != 4) { log->warn("NumpyDepoLoader: {}:{}: depo info is not size 4", fname, info_name); return false; } - const auto ndatas = data_arr.shape[1]; - const auto ninfos = info_arr.shape[1]; + const size_t ndatas = data.rows(); + const size_t ninfos = info.rows(); if (ndatas != ninfos) { log->warn("NumpyDepoLoader: {}: mismatch ndepo={} ninfo={}", fname, ndatas, ninfos); return false; } - const auto ndepos = ndatas; + const size_t ndepos = ndatas; log->debug("load {} depos from frame {}", ndepos, data_name); - // data: time, charge, x, y, z, dlong, dtran - // info: ID, pdg, gen, child - - // Get as eigen arrays - auto data = Eigen::Map(data_arr.data(), - ndepos, 7); - auto info = Eigen::Map(info_arr.data(), - ndepos, 4); - assert(data.cols() == 7); // make sure we understand - assert(info.cols() == 4); // how to call eigen::map! - std::vector sdepos; for (size_t ind=0; ind < ndepos; ++ind) { diff --git a/sio/src/NumpyDepoSaver.cxx b/sio/src/NumpyDepoSaver.cxx index 26a92cf43..353864311 100644 --- a/sio/src/NumpyDepoSaver.cxx +++ b/sio/src/NumpyDepoSaver.cxx @@ -2,7 +2,7 @@ #include "WireCellUtil/NamedFactory.h" #include "WireCellUtil/Array.h" -#include "WireCellUtil/cnpy.h" +#include "WireCellUtil/NumpyHelper.h" #include #include @@ -13,6 +13,7 @@ WIRECELL_FACTORY(NumpyDepoSaver, WireCell::Sio::NumpyDepoSaver, WireCell::IDepoFilter, WireCell::IConfigurable) using namespace WireCell; +using WireCell::Numpy::save2d; Sio::NumpyDepoSaver::NumpyDepoSaver() : m_save_count(0) @@ -75,6 +76,7 @@ bool Sio::NumpyDepoSaver::operator()(const WireCell::IDepo::pointer& indepo, Wir auto fdepos = flatten_depos(m_depos); const size_t nfdepos = fdepos.size(); + /// Dimensions are such that we have a vecotr of N-tuples // time, charge, x, y, z, dlong, dtran const size_t ndata = 7; Array::array_xxf data(nfdepos, ndata); @@ -103,8 +105,10 @@ bool Sio::NumpyDepoSaver::operator()(const WireCell::IDepo::pointer& indepo, Wir const std::string fname = m_cfg["filename"].asString(); const std::string mode = "a"; - cnpy::npz_save(fname, data_name, data.data(), {ndata, ndepos}, mode); - cnpy::npz_save(fname, info_name, info.data(), {ninfo, ndepos}, mode); + + save2d(data, data_name, fname, mode); + save2d(info, info_name, fname, mode); + m_depos.clear(); ++m_save_count; diff --git a/sio/src/NumpyFrameSaver.cxx b/sio/src/NumpyFrameSaver.cxx index f1f1c2f0a..10dab6b13 100644 --- a/sio/src/NumpyFrameSaver.cxx +++ b/sio/src/NumpyFrameSaver.cxx @@ -2,7 +2,7 @@ #include "WireCellAux/FrameTools.h" #include "WireCellUtil/NamedFactory.h" -#include "WireCellUtil/cnpy.h" +#include "WireCellUtil/NumpyHelper.h" #include #include @@ -13,6 +13,7 @@ WIRECELL_FACTORY(NumpyFrameSaver, WireCell::Sio::NumpyFrameSaver, WireCell::IFrameFilter, WireCell::IConfigurable) using namespace WireCell; +using WireCell::Numpy::save2d; Sio::NumpyFrameSaver::NumpyFrameSaver() : m_save_count(0) @@ -130,11 +131,13 @@ bool Sio::NumpyFrameSaver::operator()(const IFrame::pointer& inframe, IFrame::po const std::string aname = String::format("frame_%s_%d", tag.c_str(), m_save_count); if (digitize) { Array::array_xxs sarr = arr.cast(); - const short* sdata = sarr.data(); - cnpy::npz_save(fname, aname, sdata, {ncols, nrows}, mode); + // const short* sdata = sarr.data(); + // cnpy::npz_save(fname, aname, sdata, {ncols, nrows}, mode); + save2d(sarr, aname, fname, mode); } else { - cnpy::npz_save(fname, aname, arr.data(), {ncols, nrows}, mode); + // cnpy::npz_save(fname, aname, arr.data(), {ncols, nrows}, mode); + save2d(arr, aname, fname, mode); } l->debug("NumpyFrameSaver: saved {} with {} channels {} ticks @t={} ms qtot={}", aname, nrows, ncols, inframe->time() / units::ms, arr.sum()); @@ -143,6 +146,7 @@ bool Sio::NumpyFrameSaver::operator()(const IFrame::pointer& inframe, IFrame::po { // the channel array const std::string aname = String::format("channels_%s_%d", tag.c_str(), m_save_count); cnpy::npz_save(fname, aname, channels.data(), {nrows}, mode); + } { // the tick array diff --git a/util/inc/WireCellUtil/NumpyHelper.h b/util/inc/WireCellUtil/NumpyHelper.h new file mode 100644 index 000000000..ceefc716e --- /dev/null +++ b/util/inc/WireCellUtil/NumpyHelper.h @@ -0,0 +1,50 @@ +/** NumpyHelper + + Some helper functions for using with Numpy files. + + Eigen default 2D array order is column-major. + + Numpy is row-major. + + */ + +#ifndef WIRECELL_UTIL_NUMPYHELPER +#define WIRECELL_UTIL_NUMPYHELPER + +#include "WireCellUtil/cnpy.h" + +#include + +#include +#include + +namespace WireCell::Numpy { + + + template + void save2d(ARRAY& array, std::string aname, std::string fname, std::string mode = "w") { + using ROWM = Eigen::Array; + using Scalar = typename ARRAY::Scalar; + ROWM rowm = array; // assure we have row major + const Scalar* data = rowm.data(); + std::vector shape(2); + shape[0] = static_cast(array.rows()); + shape[1] = static_cast(array.cols()); + cnpy::npz_save(fname.c_str(), aname, data, + shape, mode); + } + + template + void load2d(ARRAY& array, std::string aname, std::string fname) { + using ROWM = Eigen::Array; + using Scalar = typename ARRAY::Scalar; + + cnpy::NpyArray np = cnpy::npz_load(fname, aname); + + ROWM temp = Eigen::Map(np.data(), + np.shape[0], np.shape[1]); + array = temp; + } + +} +#endif diff --git a/util/test/check_numpy_depos.cxx b/util/test/check_numpy_depos.cxx new file mode 100644 index 000000000..86f54a344 --- /dev/null +++ b/util/test/check_numpy_depos.cxx @@ -0,0 +1,51 @@ + +#include "WireCellUtil/NumpyHelper.h" +#include +#include + +using FArray = Eigen::Array; +using IArray = Eigen::Array; + +using WireCell::Numpy::load2d; + +int main(int argc, char** argv) + +{ + if (argc < 3) { + std::cerr << "usage: check_numpy_depos depo-file.npz [ []]\n"; + } + const std::string fname = argv[1]; + const std::string nname = argv[2]; + size_t row_beg=0, nrows = 10; + if (argc > 3) { + row_beg = atoi(argv[3]); + } + if (argc > 4) { + nrows = atoi(argv[4]); + } + + FArray data; + IArray info; + load2d(data, "depo_data_" + nname, fname); + load2d(info, "depo_info_" + nname, fname); + + assert (data.cols() == 7); + assert (info.cols() == 4); + + const size_t ndepos = data.rows(); + assert(ndepos == (size_t)info.rows()); + + const size_t row_end = std::min(row_beg + nrows, ndepos); + + for (size_t irow=row_beg; irow -typedef Eigen::Array ArrayXXs; +using ntype = short; + +// default Eigen ordering +using ArrayXXsColM = Eigen::Array; +// non-default Eigen ordering (default for numpy) +using ArrayXXsRowM = Eigen::Array; -const int Nchanc = 480; -// const int Nchani = 800; -const int Ntick = 2000; +const int Nrows = 3; +const int Ncols = 4; int main(int argc, char* argv[]) { @@ -14,17 +27,52 @@ int main(int argc, char* argv[]) name += ".npz"; // ArrayXXs a = ArrayXXs::Random(Nchanc,Ntick); - ArrayXXs a = ArrayXXs::Zero(Nchanc, Ntick); - a(0, 1000) = 1000; - a(400, 0) = 400; - const short* data = a.data(); - cnpy::npz_save(name.c_str(), "a", data, {Ntick, Nchanc}, "w"); - /* - >>> import numpy - >>> f = numpy.load("eigentest.npz") - >>> a = f['a'] - >>> print a.shape, a[0,400], a[1000,0] - (2000, 480) 400 1000 - */ + ArrayXXsColM colm = ArrayXXsColM::Zero(Nrows, Ncols); + for (int irow = 0; irow < Nrows; ++irow) { + for (int icol=0; icol < Ncols; ++icol) { + ntype val = icol + irow*Ncols; + colm(irow, icol) = val; + } + } + + /// + /// Convert to Numpy's row-major from Eigen's column-major. + /// + ArrayXXsRowM rowm = colm; + const ntype* data = rowm.data(); + cnpy::npz_save(name.c_str(), "a", data, {Nrows, Ncols}, "w"); + + + /* With python, confirm:: + + python -c'import numpy; a=numpy.load("./build/util/test_cnpy_eigen2.npz")["a"]; print(f"{a.shape}\n{a}")' + (3, 4) + [[ 0 1 2 3] + [ 4 5 6 7] + [ 8 9 10 11]] + */ + + // + // Load back in to a non-default row-major Eigen array. + // + cnpy::NpyArray np = cnpy::npz_load(name, "a"); + assert(np.shape[0] == 3); + assert(np.shape[1] == 4); + auto eig = Eigen::Map(np.data(), + Nrows, Ncols); + + // + // Convert to Eigen defaujlt of column-major and check everythign + // is as expected. + // + ArrayXXsColM eig2 = eig; + for (int irow = 0; irow < Nrows; ++irow) { + for (int icol=0; icol < Ncols; ++icol) { + ntype val = icol + irow*Ncols; + assert(eig(irow, icol) == val); + assert(eig2(irow, icol) == val); + } + } + return 0; } diff --git a/util/test/test_cnpy_eigen2.cxx b/util/test/test_cnpy_eigen2.cxx new file mode 100644 index 000000000..5e11687b3 --- /dev/null +++ b/util/test/test_cnpy_eigen2.cxx @@ -0,0 +1,59 @@ +/** + Eigen defaults column-major ordering (FORTRAN). + + Cnpy defaults to row-major ordering (C). + + We thus must make a transpose before saving and after loading. + + This is like test_cnpy_eigen.cxx but uses the numpy helpers. + + */ + +#include "WireCellUtil/NumpyHelper.h" +#include "WireCellUtil/Array.h" + +using ntype = short; + +using ArrayType = Eigen::Array; + +const int Nrows = 3; +const int Ncols = 4; + +int main(int argc, char* argv[]) +{ + std::string fname = argv[0]; + fname += ".npz"; + std::string aname = "a"; + + // ArrayXXs a = ArrayXXs::Random(Nchanc,Ntick); + ArrayType arr = ArrayType::Zero(Nrows, Ncols); + for (int irow = 0; irow < Nrows; ++irow) { + for (int icol=0; icol < Ncols; ++icol) { + ntype val = icol + irow*Ncols; + arr(irow, icol) = val; + } + } + + WireCell::Numpy::save2d(arr, aname, fname); + + /* With python, confirm:: + + python -c'import numpy; a=numpy.load("./build/util/test_cnpy_eigen2.npz")["a"]; print(f"{a.shape}\n{a}")' + (3, 4) + [[ 0 1 2 3] + [ 4 5 6 7] + [ 8 9 10 11]] + */ + + ArrayType arr2; + WireCell::Numpy::load2d(arr2, aname, fname); + + for (int irow = 0; irow < Nrows; ++irow) { + for (int icol=0; icol < Ncols; ++icol) { + ntype val = icol + irow*Ncols; + assert(arr2(irow, icol) == val); + } + } + + return 0; +}