Skip to content

Commit

Permalink
Factor and fix numpy save/load methods into 'helper'
Browse files Browse the repository at this point in the history
  • Loading branch information
brettviren committed Jun 9, 2021
1 parent 3426fe4 commit 64f3814
Show file tree
Hide file tree
Showing 10 changed files with 268 additions and 48 deletions.
3 changes: 3 additions & 0 deletions gen/src/DepoBagger.cxx
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "WireCellGen/DepoBagger.h"
#include "WireCellIface/SimpleDepoSet.h"
#include "WireCellUtil/Logging.h"

#include "WireCellUtil/NamedFactory.h"
WIRECELL_FACTORY(DepoBagger, WireCell::Gen::DepoBagger, WireCell::IDepoCollector, WireCell::IConfigurable)
Expand Down Expand Up @@ -33,6 +34,8 @@ void Gen::DepoBagger::configure(const WireCell::Configuration& cfg)
bool Gen::DepoBagger::operator()(const input_pointer& depo, output_queue& deposetqueue)
{
if (!depo) { // EOS
Log::logptr_t log(Log::logger("sim"));
log->debug("bagged {} depos at {}", m_depos.size(), m_count);
// even if empyt, must send out something to retain sync.
auto out = std::make_shared<SimpleDepoSet>(m_count, m_depos);
deposetqueue.push_back(out);
Expand Down
4 changes: 4 additions & 0 deletions gen/src/DepoFanout.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ std::vector<std::string> Gen::DepoFanout::output_types()
bool Gen::DepoFanout::operator()(const input_pointer& in, output_vector& outv)
{
// Note: if "in" indicates EOS, just pass it on
if (!in) {
auto log = Log::logger("sio");
log->debug("depo fanout sees EOS");
}

outv.resize(m_multiplicity);
for (size_t ind = 0; ind < m_multiplicity; ++ind) {
Expand Down
12 changes: 11 additions & 1 deletion sio/inc/WireCellSio/NumpyDepoLoader.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
/** Load depos from a Numpy file. */
/** Load depos from a Numpy file.
Depos should consist of arrays named depo_data_N and depo_info_N
where N is an integer count.
For a given value of N the data array should be shaped (Ndepo, 7),
and info is shaped (Ndepo, 4).
See also @ref NumpyDepoSaver.
*/

#ifndef WIRECELLSIO_NUMPYDEPOLOADER
#define WIRECELLSIO_NUMPYDEPOLOADER
Expand Down
35 changes: 11 additions & 24 deletions sio/src/NumpyDepoLoader.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "WireCellUtil/Array.h"
#include "WireCellUtil/Logging.h"
#include "WireCellUtil/Point.h"
#include "WireCellUtil/cnpy.h"
#include "WireCellUtil/NumpyHelper.h"
#include "WireCellIface/SimpleDepo.h"

#include <string>
Expand Down Expand Up @@ -46,7 +46,6 @@ void Sio::NumpyDepoLoader::configure(const WireCell::Configuration& config)
using DataArray = Eigen::Array<float, Eigen::Dynamic, 7>;
using InfoArray = Eigen::Array<int, Eigen::Dynamic, 4>;


bool Sio::NumpyDepoLoader::next()
{
auto log = Log::logger("sio");
Expand All @@ -58,52 +57,40 @@ bool Sio::NumpyDepoLoader::next()
const std::string info_name = String::format("depo_info_%d", m_load_count-1);

const std::string fname = m_cfg["filename"].asString();
cnpy::NpyArray data_arr, info_arr;

Array::array_xxf data;
Array::array_xxi info;
try {
data_arr = cnpy::npz_load(fname, data_name);
info_arr = cnpy::npz_load(fname, info_name);
WireCell::Numpy::load2d(data, data_name, fname);
WireCell::Numpy::load2d(info, info_name, fname);
}
catch (std::runtime_error) {
log->debug("NumpyDepoLoader: {}:{}: no such array",
fname, data_name);
return false;
}

// In [4]: f['depo_data_0'].shape
// Out[4]: (7, 1785)

if (data_arr.shape[0] != 7) {
if (data.cols() != 7) {
log->warn("NumpyDepoLoader: {}:{}: depo data is not size 7",
fname, data_name);
return false;
}
if (info_arr.shape[0] != 4) {
if (info.cols() != 4) {
log->warn("NumpyDepoLoader: {}:{}: depo info is not size 4",
fname, info_name);
return false;
}

const auto ndatas = data_arr.shape[1];
const auto ninfos = info_arr.shape[1];
const size_t ndatas = data.rows();
const size_t ninfos = info.rows();
if (ndatas != ninfos) {
log->warn("NumpyDepoLoader: {}: mismatch ndepo={} ninfo={}",
fname, ndatas, ninfos);
return false;
}
const auto ndepos = ndatas;
const size_t ndepos = ndatas;
log->debug("load {} depos from frame {}", ndepos, data_name);

// data: time, charge, x, y, z, dlong, dtran
// info: ID, pdg, gen, child

// Get as eigen arrays
auto data = Eigen::Map<DataArray>(data_arr.data<float>(),
ndepos, 7);
auto info = Eigen::Map<InfoArray>(info_arr.data<int>(),
ndepos, 4);
assert(data.cols() == 7); // make sure we understand
assert(info.cols() == 4); // how to call eigen::map!

std::vector<SimpleDepo*> sdepos;
for (size_t ind=0; ind < ndepos; ++ind) {

Expand Down
10 changes: 7 additions & 3 deletions sio/src/NumpyDepoSaver.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#include "WireCellUtil/NamedFactory.h"
#include "WireCellUtil/Array.h"
#include "WireCellUtil/cnpy.h"
#include "WireCellUtil/NumpyHelper.h"

#include <string>
#include <vector>
Expand All @@ -13,6 +13,7 @@
WIRECELL_FACTORY(NumpyDepoSaver, WireCell::Sio::NumpyDepoSaver, WireCell::IDepoFilter, WireCell::IConfigurable)

using namespace WireCell;
using WireCell::Numpy::save2d;

Sio::NumpyDepoSaver::NumpyDepoSaver()
: m_save_count(0)
Expand Down Expand Up @@ -75,6 +76,7 @@ bool Sio::NumpyDepoSaver::operator()(const WireCell::IDepo::pointer& indepo, Wir
auto fdepos = flatten_depos(m_depos);
const size_t nfdepos = fdepos.size();

/// Dimensions are such that we have a vecotr of N-tuples
// time, charge, x, y, z, dlong, dtran
const size_t ndata = 7;
Array::array_xxf data(nfdepos, ndata);
Expand Down Expand Up @@ -103,8 +105,10 @@ bool Sio::NumpyDepoSaver::operator()(const WireCell::IDepo::pointer& indepo, Wir

const std::string fname = m_cfg["filename"].asString();
const std::string mode = "a";
cnpy::npz_save(fname, data_name, data.data(), {ndata, ndepos}, mode);
cnpy::npz_save(fname, info_name, info.data(), {ninfo, ndepos}, mode);

save2d(data, data_name, fname, mode);
save2d(info, info_name, fname, mode);

m_depos.clear();

++m_save_count;
Expand Down
12 changes: 8 additions & 4 deletions sio/src/NumpyFrameSaver.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#include "WireCellAux/FrameTools.h"
#include "WireCellUtil/NamedFactory.h"
#include "WireCellUtil/cnpy.h"
#include "WireCellUtil/NumpyHelper.h"

#include <string>
#include <vector>
Expand All @@ -13,6 +13,7 @@
WIRECELL_FACTORY(NumpyFrameSaver, WireCell::Sio::NumpyFrameSaver, WireCell::IFrameFilter, WireCell::IConfigurable)

using namespace WireCell;
using WireCell::Numpy::save2d;

Sio::NumpyFrameSaver::NumpyFrameSaver()
: m_save_count(0)
Expand Down Expand Up @@ -130,11 +131,13 @@ bool Sio::NumpyFrameSaver::operator()(const IFrame::pointer& inframe, IFrame::po
const std::string aname = String::format("frame_%s_%d", tag.c_str(), m_save_count);
if (digitize) {
Array::array_xxs sarr = arr.cast<short>();
const short* sdata = sarr.data();
cnpy::npz_save(fname, aname, sdata, {ncols, nrows}, mode);
// const short* sdata = sarr.data();
// cnpy::npz_save(fname, aname, sdata, {ncols, nrows}, mode);
save2d(sarr, aname, fname, mode);
}
else {
cnpy::npz_save(fname, aname, arr.data(), {ncols, nrows}, mode);
// cnpy::npz_save(fname, aname, arr.data(), {ncols, nrows}, mode);
save2d(arr, aname, fname, mode);
}
l->debug("NumpyFrameSaver: saved {} with {} channels {} ticks @t={} ms qtot={}", aname, nrows, ncols,
inframe->time() / units::ms, arr.sum());
Expand All @@ -143,6 +146,7 @@ bool Sio::NumpyFrameSaver::operator()(const IFrame::pointer& inframe, IFrame::po
{ // the channel array
const std::string aname = String::format("channels_%s_%d", tag.c_str(), m_save_count);
cnpy::npz_save(fname, aname, channels.data(), {nrows}, mode);

}

{ // the tick array
Expand Down
50 changes: 50 additions & 0 deletions util/inc/WireCellUtil/NumpyHelper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/** NumpyHelper
Some helper functions for using with Numpy files.
Eigen default 2D array order is column-major.
Numpy is row-major.
*/

#ifndef WIRECELL_UTIL_NUMPYHELPER
#define WIRECELL_UTIL_NUMPYHELPER

#include "WireCellUtil/cnpy.h"

#include <Eigen/Core>

#include <string>
#include <vector>

namespace WireCell::Numpy {


template <typename ARRAY>
void save2d(ARRAY& array, std::string aname, std::string fname, std::string mode = "w") {
using ROWM = Eigen::Array<typename ARRAY::Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
using Scalar = typename ARRAY::Scalar;
ROWM rowm = array; // assure we have row major
const Scalar* data = rowm.data();
std::vector<size_t> shape(2);
shape[0] = static_cast<size_t>(array.rows());
shape[1] = static_cast<size_t>(array.cols());
cnpy::npz_save<Scalar>(fname.c_str(), aname, data,
shape, mode);
}

template <typename ARRAY>
void load2d(ARRAY& array, std::string aname, std::string fname) {
using ROWM = Eigen::Array<typename ARRAY::Scalar, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
using Scalar = typename ARRAY::Scalar;

cnpy::NpyArray np = cnpy::npz_load(fname, aname);

ROWM temp = Eigen::Map<ROWM>(np.data<Scalar>(),
np.shape[0], np.shape[1]);
array = temp;
}

}
#endif
51 changes: 51 additions & 0 deletions util/test/check_numpy_depos.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@

#include "WireCellUtil/NumpyHelper.h"
#include <iostream>
#include <string>

using FArray = Eigen::Array<float, Eigen::Dynamic, Eigen::Dynamic>;
using IArray = Eigen::Array<int, Eigen::Dynamic, Eigen::Dynamic>;

using WireCell::Numpy::load2d;

int main(int argc, char** argv)

{
if (argc < 3) {
std::cerr << "usage: check_numpy_depos depo-file.npz <SetN> [<FirstDepo> [<NDepos>]]\n";
}
const std::string fname = argv[1];
const std::string nname = argv[2];
size_t row_beg=0, nrows = 10;
if (argc > 3) {
row_beg = atoi(argv[3]);
}
if (argc > 4) {
nrows = atoi(argv[4]);
}

FArray data;
IArray info;
load2d(data, "depo_data_" + nname, fname);
load2d(info, "depo_info_" + nname, fname);

assert (data.cols() == 7);
assert (info.cols() == 4);

const size_t ndepos = data.rows();
assert(ndepos == (size_t)info.rows());

const size_t row_end = std::min(row_beg + nrows, ndepos);

for (size_t irow=row_beg; irow<row_end; ++irow) {

std::cerr
<< "row=" << irow
<< " t=" << data(irow,0)
<< " id=" << info(irow,0)
<< " gen=" << info(irow,2)
<< " child=" << info(irow,3)
<< std::endl;
}
return 0;
}
Loading

0 comments on commit 64f3814

Please sign in to comment.